Commit d03fb5c6 authored by Daisuke Matsuda's avatar Daisuke Matsuda Committed by Jason Gunthorpe
Browse files

RDMA/rxe: Allow registering MRs for On-Demand Paging

Allow userspace to register an ODP-enabled MR, in which case the flag
IB_ACCESS_ON_DEMAND is passed to rxe_reg_user_mr(). However, there is no
RDMA operation enabled right now. They will be supported later in the
subsequent two patches.

rxe_odp_do_pagefault() is called to initialize an ODP-enabled MR. It syncs
process address space from the CPU page table to the driver page table
(dma_list/pfn_list in umem_odp) when called with RXE_PAGEFAULT_SNAPSHOT
flag. Additionally, It can be used to trigger page fault when pages being
accessed are not present or do not have proper read/write permissions, and
possibly to prefetch pages in the future.

Link: https://patch.msgid.link/r/20241220100936.2193541-4-matsuda-daisuke@fujitsu.com


Signed-off-by: default avatarDaisuke Matsuda <matsuda-daisuke@fujitsu.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent b6017923
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -92,6 +92,13 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
	dev_put(ndev);

	rxe->max_ucontext			= RXE_MAX_UCONTEXT;

	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
		rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;

		/* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
		rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
	}
}

/* initialize port attributes */
+12 −0
Original line number Diff line number Diff line
@@ -184,4 +184,16 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
/* rxe_odp.c */
extern const struct mmu_interval_notifier_ops rxe_mn_ops;

#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
			 u64 iova, int access_flags, struct rxe_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline int
rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
		     int access_flags, struct rxe_mr *mr)
{
	return -EOPNOTSUPP;
}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */

#endif /* RXE_LOC_H */
+8 −1
Original line number Diff line number Diff line
@@ -323,6 +323,9 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
		return err;
	}

	if (mr->umem->is_odp)
		return -EOPNOTSUPP;
	else
		return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
}

@@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
	struct page *page;
	u64 *va;

	/* ODP is not supported right now. WIP. */
	if (mr->umem->is_odp)
		return RESPST_ERR_UNSUPPORTED_OPCODE;

	/* See IBA oA19-28 */
	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
		rxe_dbg_mr(mr, "mr not in valid state\n");
+86 −0
Original line number Diff line number Diff line
@@ -36,3 +36,89 @@ static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
const struct mmu_interval_notifier_ops rxe_mn_ops = {
	.invalidate = rxe_ib_invalidate_range,
};

#define RXE_PAGEFAULT_RDONLY BIT(1)
#define RXE_PAGEFAULT_SNAPSHOT BIT(2)
static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
{
	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
	bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
	u64 access_mask;
	int np;

	access_mask = ODP_READ_ALLOWED_BIT;
	if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
		access_mask |= ODP_WRITE_ALLOWED_BIT;

	/*
	 * ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
	 * Callers must release the lock later to let invalidation handler
	 * do its work again.
	 */
	np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
					  access_mask, fault);
	return np;
}

static int rxe_odp_init_pages(struct rxe_mr *mr)
{
	struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
	int ret;

	ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
					    mr->umem->length,
					    RXE_PAGEFAULT_SNAPSHOT);

	if (ret >= 0)
		mutex_unlock(&umem_odp->umem_mutex);

	return ret >= 0 ? 0 : ret;
}

int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
			 u64 iova, int access_flags, struct rxe_mr *mr)
{
	struct ib_umem_odp *umem_odp;
	int err;

	if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
		return -EOPNOTSUPP;

	rxe_mr_init(access_flags, mr);

	if (!start && length == U64_MAX) {
		if (iova != 0)
			return -EINVAL;
		if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
			return -EINVAL;

		/* Never reach here, for implicit ODP is not implemented. */
	}

	umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
				   &rxe_mn_ops);
	if (IS_ERR(umem_odp)) {
		rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
			   (int)PTR_ERR(umem_odp));
		return PTR_ERR(umem_odp);
	}

	umem_odp->private = mr;

	mr->umem = &umem_odp->umem;
	mr->access = access_flags;
	mr->ibmr.length = length;
	mr->ibmr.iova = iova;
	mr->page_offset = ib_umem_offset(&umem_odp->umem);

	err = rxe_odp_init_pages(mr);
	if (err) {
		ib_umem_odp_release(umem_odp);
		return err;
	}

	mr->state = RXE_MR_STATE_VALID;
	mr->ibmr.type = IB_MR_TYPE_USER;

	return err;
}
+11 −4
Original line number Diff line number Diff line
@@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
	struct rxe_mr *mr = qp->resp.mr;
	struct resp_res *res = qp->resp.res;

	/* ODP is not supported right now. WIP. */
	if (mr->umem->is_odp)
		return RESPST_ERR_UNSUPPORTED_OPCODE;

	/* oA19-14, oA19-15 */
	if (res && res->replay)
		return RESPST_ACKNOWLEDGE;
@@ -702,6 +706,9 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
	if (!res->replay) {
		u64 iova = qp->resp.va + qp->resp.offset;

		if (mr->umem->is_odp)
			err = RESPST_ERR_UNSUPPORTED_OPCODE;
		else
			err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
						  atmeth_comp(pkt),
						  atmeth_swap_add(pkt),
Loading