Commit 36680ef7 authored by Michael Guralnik's avatar Michael Guralnik Committed by Leon Romanovsky
Browse files

RDMA/mlx5: Switch from MR cache to FRMR pools



Use the new generic FRMR pools mechanism to optimize the performance of
memory registrations.
The move to the new generic FRMR pools will allow users configuring MR
cache through debugfs of MR cache to use the netlink API for FRMR pools
which will be added later in this series. Thus being able to have more
flexibility configuring the kernel and also being able to configure on
machines where debugfs is not available.

Mlx5_ib will save the mkey index as the handle in FRMR pools, same as the
MR cache implementation.
Upon each memory registration mlx5_ib will try to pull a handle from FRMR
pools and upon each deregistration it will push the handle back to it's
appropriate pool.

Use the vendor key field in umr pool key to save the access mode of the
mkey.

Use the option for kernel-only FRMR pool to manage the mkeys used for
registration with DMAH as the translation between UAPI of DMAH and the
mkey property of st_index is non-trivial and changing dynamically.
Since the value for no PH is 0xff and not zero, switch between them in
the frmr_key to have a zero'ed kernel_vendor_key when not using DMAH.

Remove the limitation we had with MR cache for mkeys up to 2^20 dma
blocks and support mkeys up to HW limitations according to caps.

Remove all MR cache related code.

Signed-off-by: default avatarMichael Guralnik <michaelgur@nvidia.com>
Reviewed-by: default avatarYishai Hadas <yishaih@nvidia.com>
Signed-off-by: default avatarEdward Srouji <edwards@nvidia.com>
Link: https://patch.msgid.link/20260226-frmr_pools-v4-6-95360b54f15e@nvidia.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 020d189d
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -4875,7 +4875,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)

static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
	mlx5_mkey_cache_cleanup(dev);
	mlx5r_frmr_pools_cleanup(&dev->ib_dev);
	mlx5r_umr_resource_cleanup(dev);
	mlx5r_umr_cleanup(dev);
}
@@ -4893,9 +4893,10 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
	if (ret)
		return ret;

	ret = mlx5_mkey_cache_init(dev);
	ret = mlx5r_frmr_pools_init(&dev->ib_dev);
	if (ret)
		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
		mlx5_ib_warn(dev, "frmr pools init failed %d\n", ret);

	return ret;
}

+2 −84
Original line number Diff line number Diff line
@@ -641,25 +641,12 @@ enum mlx5_mkey_type {
/* Used for non-existent ph value */
#define MLX5_IB_NO_PH 0xff

struct mlx5r_cache_rb_key {
	u8 ats:1;
	u8 ph;
	u16 st_index;
	unsigned int access_mode;
	unsigned int access_flags;
	unsigned int ndescs;
};

struct mlx5_ib_mkey {
	u32 key;
	enum mlx5_mkey_type type;
	unsigned int ndescs;
	struct wait_queue_head wait;
	refcount_t usecount;
	/* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
	struct mlx5r_cache_rb_key rb_key;
	struct mlx5_cache_ent *cache_ent;
	u8 cacheable : 1;
};

#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -784,68 +771,6 @@ struct umr_common {
	struct mutex init_lock;
};

#define NUM_MKEYS_PER_PAGE \
	((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))

struct mlx5_mkeys_page {
	u32 mkeys[NUM_MKEYS_PER_PAGE];
	struct list_head list;
};
static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE);

struct mlx5_mkeys_queue {
	struct list_head pages_list;
	u32 num_pages;
	unsigned long ci;
	spinlock_t lock; /* sync list ops */
};

struct mlx5_cache_ent {
	struct mlx5_mkeys_queue	mkeys_queue;
	u32			pending;

	char                    name[4];

	struct rb_node		node;
	struct mlx5r_cache_rb_key rb_key;

	u8 is_tmp:1;
	u8 disabled:1;
	u8 fill_to_high_water:1;
	u8 tmp_cleanup_scheduled:1;

	/*
	 * - limit is the low water mark for stored mkeys, 2* limit is the
	 *   upper water mark.
	 */
	u32 in_use;
	u32 limit;

	/* Statistics */
	u32                     miss;

	struct mlx5_ib_dev     *dev;
	struct delayed_work	dwork;
};

struct mlx5r_async_create_mkey {
	union {
		u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
		u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
	};
	struct mlx5_async_work cb_work;
	struct mlx5_cache_ent *ent;
	u32 mkey;
};

struct mlx5_mkey_cache {
	struct workqueue_struct *wq;
	struct rb_root		rb_root;
	struct mutex		rb_lock;
	struct dentry		*fs_root;
	unsigned long		last_add;
};

struct mlx5_ib_port_resources {
	struct mlx5_ib_gsi_qp *gsi;
	struct work_struct pkey_change_work;
@@ -1182,8 +1107,6 @@ struct mlx5_ib_dev {
	struct mlx5_ib_resources	devr;

	atomic_t			mkey_var;
	struct mlx5_mkey_cache		cache;
	struct timer_list		delay_timer;
	/* Prevents soft lock on massive reg MRs */
	struct mutex			slow_path_mutex;
	struct ib_odp_caps	odp_caps;
@@ -1445,13 +1368,8 @@ int mlx5_ib_query_port_speed(struct ib_device *ibdev, u32 port_num,
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
			  u64 access_flags);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
struct mlx5_cache_ent *
mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
			      struct mlx5r_cache_rb_key rb_key,
			      bool persistent_entry);

int mlx5r_frmr_pools_init(struct ib_device *device);
void mlx5r_frmr_pools_cleanup(struct ib_device *device);
struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
				       int access_flags, int access_mode,
				       int ndescs);
+184 −958

File changed.

Preview size limit exceeded, changes collapsed.

+0 −19
Original line number Diff line number Diff line
@@ -1875,25 +1875,6 @@ mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
	return err;
}

int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
{
	struct mlx5r_cache_rb_key rb_key = {
		.access_mode = MLX5_MKC_ACCESS_MODE_KSM,
		.ndescs = mlx5_imr_ksm_entries,
		.ph = MLX5_IB_NO_PH,
	};
	struct mlx5_cache_ent *ent;

	if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
		return 0;

	ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
	if (IS_ERR(ent))
		return PTR_ERR(ent);

	return 0;
}

static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
	.advise_mr = mlx5_ib_advise_mr,
};
+1 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@

#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
#define MLX5_MAX_UMR_EXTENDED_SHIFT 43

#define MLX5_IB_UMR_OCTOWORD	       16
#define MLX5_IB_UMR_XLT_ALIGNMENT      64