Commit f45f195a authored by Leon Romanovsky's avatar Leon Romanovsky
Browse files

RDMA/mlx4: Introduce a modern CQ creation interface

The uverbs CQ creation UAPI allows users to supply their own umem when
creating a CQ. Update mlx4 to support this model while preserving compatibility
with the legacy interface that allocates umem internally.

Link: https://patch.msgid.link/20260213-refactor-umem-v1-13-f3be85847922@nvidia.com


Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
parent 0e4b9841
Loading
Loading
Loading
Loading
+107 −84
Original line number Diff line number Diff line
@@ -136,7 +136,8 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
}

#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
			   const struct ib_cq_init_attr *attr,
			   struct uverbs_attr_bundle *attrs)
{
	struct ib_udata *udata = &attrs->driver_udata;
@@ -145,13 +146,16 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
	int vector = attr->comp_vector;
	struct mlx4_ib_dev *dev = to_mdev(ibdev);
	struct mlx4_ib_cq *cq = to_mcq(ibcq);
	struct mlx4_uar *uar;
	struct mlx4_ib_create_cq ucmd;
	int cqe_size = dev->dev->caps.cqe_size;
	void *buf_addr;
	int shift;
	int n;
	int err;
	struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
		udata, struct mlx4_ib_ucontext, ibucontext);

	if (entries < 1 || entries > dev->dev->caps.max_cqes)
	if (attr->cqe > dev->dev->caps.max_cqes)
		return -EINVAL;

	if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
@@ -161,44 +165,37 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
	cq->ibcq.cqe = entries - 1;
	mutex_init(&cq->resize_mutex);
	spin_lock_init(&cq->lock);
	cq->resize_buf = NULL;
	cq->resize_umem = NULL;
	cq->create_flags = attr->flags;
	INIT_LIST_HEAD(&cq->send_qp_list);
	INIT_LIST_HEAD(&cq->recv_qp_list);

	if (udata) {
		struct mlx4_ib_create_cq ucmd;
		int cqe_size = dev->dev->caps.cqe_size;
		int shift;
		int n;

		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
	if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
		err = -EFAULT;
		goto err_cq;
	}

	buf_addr = (void *)(unsigned long)ucmd.buf_addr;

		cq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
	if (!ibcq->umem)
		ibcq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
					 entries * cqe_size,
					 IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(cq->umem)) {
			err = PTR_ERR(cq->umem);
	if (IS_ERR(ibcq->umem)) {
		err = PTR_ERR(ibcq->umem);
		goto err_cq;
	}

		shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->umem, 0, &n);
	shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->ibcq.umem, 0, &n);
	if (shift < 0) {
		err = shift;
			goto err_umem;
		goto err_cq;
	}

	err = mlx4_mtt_init(dev->dev, n, shift, &cq->buf.mtt);
	if (err)
			goto err_umem;
		goto err_cq;

		err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
	err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->ibcq.umem);
	if (err)
		goto err_mtt;

@@ -206,46 +203,21 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
	if (err)
		goto err_mtt;

		uar = &context->uar;
		cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
	} else {
		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
		if (err)
			goto err_cq;

		cq->mcq.set_ci_db  = cq->db.db;
		cq->mcq.arm_db     = cq->db.db + 1;
		*cq->mcq.set_ci_db = 0;
		*cq->mcq.arm_db    = 0;

		err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
		if (err)
			goto err_db;

		buf_addr = &cq->buf.buf;

		uar = &dev->priv_uar;
		cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
	}

	if (dev->eq_table)
		vector = dev->eq_table[vector % ibdev->num_comp_vectors];

	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma,
			    &cq->mcq, vector, 0,
	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &context->uar,
			    cq->db.dma, &cq->mcq, vector, 0,
			    !!(cq->create_flags &
			       IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
			    buf_addr, !!udata);
			    buf_addr, true);
	if (err)
		goto err_dbmap;

	if (udata)
	cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
	else
		cq->mcq.comp = mlx4_ib_cq_comp;
	cq->mcq.event = mlx4_ib_cq_event;
	cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;

	if (udata)
	if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
		err = -EFAULT;
		goto err_cq_free;
@@ -257,21 +229,72 @@ int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
	mlx4_cq_free(dev->dev, &cq->mcq);

err_dbmap:
	if (udata)
	mlx4_ib_db_unmap_user(context, &cq->db);

err_mtt:
	mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
	/* UMEM is released by ib_core */

err_umem:
	ib_umem_release(cq->umem);
	if (!udata)
err_cq:
	return err;
}

int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
		      struct uverbs_attr_bundle *attrs)
{
	struct ib_device *ibdev = ibcq->device;
	int entries = attr->cqe;
	int vector = attr->comp_vector;
	struct mlx4_ib_dev *dev = to_mdev(ibdev);
	struct mlx4_ib_cq *cq = to_mcq(ibcq);
	void *buf_addr;
	int err;

	if (attr->cqe > dev->dev->caps.max_cqes)
		return -EINVAL;

	entries      = roundup_pow_of_two(entries + 1);
	cq->ibcq.cqe = entries - 1;
	mutex_init(&cq->resize_mutex);
	spin_lock_init(&cq->lock);
	INIT_LIST_HEAD(&cq->send_qp_list);
	INIT_LIST_HEAD(&cq->recv_qp_list);

	err = mlx4_db_alloc(dev->dev, &cq->db, 1);
	if (err)
		return err;

	cq->mcq.set_ci_db  = cq->db.db;
	cq->mcq.arm_db     = cq->db.db + 1;
	*cq->mcq.set_ci_db = 0;
	*cq->mcq.arm_db    = 0;

	err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries);
	if (err)
		goto err_db;

	buf_addr = &cq->buf.buf;

	if (dev->eq_table)
		vector = dev->eq_table[vector % ibdev->num_comp_vectors];

	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &dev->priv_uar,
			    cq->db.dma, &cq->mcq, vector, 0, 0,
			    buf_addr, false);
	if (err)
		goto err_buf;

	cq->mcq.comp = mlx4_ib_cq_comp;
	cq->mcq.event = mlx4_ib_cq_event;
	cq->mcq.usage = MLX4_RES_USAGE_DRIVER;

	return 0;

err_buf:
	mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);

err_db:
	if (!udata)
	mlx4_db_free(dev->dev, &cq->db);
err_cq:
	return err;
}

@@ -445,8 +468,8 @@ int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
	if (ibcq->uobject) {
		cq->buf      = cq->resize_buf->buf;
		cq->ibcq.cqe = cq->resize_buf->cqe;
		ib_umem_release(cq->umem);
		cq->umem     = cq->resize_umem;
		ib_umem_release(cq->ibcq.umem);
		cq->ibcq.umem     = cq->resize_umem;

		kfree(cq->resize_buf);
		cq->resize_buf = NULL;
@@ -506,11 +529,11 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
				struct mlx4_ib_ucontext,
				ibucontext),
			&mcq->db);
		/* UMEM is released by ib_core */
	} else {
		mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
		mlx4_db_free(dev->dev, &mcq->db);
	}
	ib_umem_release(mcq->umem);
	return 0;
}

+1 −0
Original line number Diff line number Diff line
@@ -2525,6 +2525,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
	.attach_mcast = mlx4_ib_mcg_attach,
	.create_ah = mlx4_ib_create_ah,
	.create_cq = mlx4_ib_create_cq,
	.create_user_cq = mlx4_ib_create_user_cq,
	.create_qp = mlx4_ib_create_qp,
	.create_srq = mlx4_ib_create_srq,
	.dealloc_pd = mlx4_ib_dealloc_pd,
+3 −1
Original line number Diff line number Diff line
@@ -121,7 +121,6 @@ struct mlx4_ib_cq {
	struct mlx4_db		db;
	spinlock_t		lock;
	struct mutex		resize_mutex;
	struct ib_umem	       *umem;
	struct ib_umem	       *resize_umem;
	int			create_flags;
	/* List of qps that it serves.*/
@@ -772,6 +771,9 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
		      struct uverbs_attr_bundle *attrs);
int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
			   const struct ib_cq_init_attr *attr,
			   struct uverbs_attr_bundle *attrs);
int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);