Commit 026a4259 authored by Mark Zhang's avatar Mark Zhang Committed by Leon Romanovsky
Browse files

RDMA/mlx5: Support plane device and driver APIs to add and delete it



This patch supports driver APIs "add_sub_dev" and "del_sub_dev", to
add and delete a plane device respectively.
A mlx5 plane device is a rdma SMI device; It provides the SMI capability
through user MAD for it's parent, the logical multi-plane aggregated
device. For a plane port:
- It supports QP0 only;
- When adding a plane device, all plane ports are added;
- For some commands like mad_ifc, both plane_index and native portnum
  is needed;
- When querying or modifying a plane port context, the native portnum
  must be used, as the query/modify_hca_vport_context command doesn't
  support plane port.

Signed-off-by: default avatarMark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/e933cd0562aece181f8657af2ca0f5b387d0f14e.1718553901.git.leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
parent a9e0faca
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -177,7 +177,7 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
	return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
}

int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
		     u16 opmod, u8 port)
{
	int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
@@ -195,12 +195,18 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,

	MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
	MLX5_SET(mad_ifc_in, in, op_mod, opmod);
	if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
		MLX5_SET(mad_ifc_in, in, plane_index, port);
		MLX5_SET(mad_ifc_in, in, port,
			 smi_to_native_portnum(dev, port));
	} else {
		MLX5_SET(mad_ifc_in, in, port, port);
	}

	data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
	memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));

	err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out);
	err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out);
	if (err)
		goto out;

+1 −1
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
			u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
		     u16 opmod, u8 port);
int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
+1 −1
Original line number Diff line number Diff line
@@ -69,7 +69,7 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
	if (ignore_bkey || !in_wc)
		op_modifier |= 0x2;

	return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
	return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier,
				port);
}

+113 −3
Original line number Diff line number Diff line
@@ -282,6 +282,14 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
	struct mlx5_ib_multiport_info *mpi;
	struct mlx5_ib_port *port;

	if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
		if (native_port_num)
			*native_port_num = smi_to_native_portnum(ibdev,
								 ib_port_num);
		return ibdev->mdev;

	}

	if (!mlx5_core_mp_enabled(ibdev->mdev) ||
	    ll != IB_LINK_LAYER_ETHERNET) {
		if (native_port_num)
@@ -1347,6 +1355,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,

	/* props being zeroed by the caller, avoid zeroing it here */

	if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
		port = smi_to_native_portnum(dev, port);

	err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
	if (err)
		goto out;
@@ -1362,7 +1373,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
	if (dev->num_plane) {
		props->port_cap_flags |= IB_PORT_SM_DISABLED;
		props->port_cap_flags &= ~IB_PORT_SM;
	}
	} else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
		props->port_cap_flags &= ~IB_PORT_CM_SUP;

	props->gid_tbl_len	= mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
	props->max_msg_sz	= 1 << MLX5_CAP_GEN(mdev, log_max_msg);
@@ -2812,7 +2824,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
		if (dev->num_plane) {
			dev->port_caps[port - 1].has_smi = false;
			continue;
		} else if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) {
		} else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) ||
			dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
			dev->port_caps[port - 1].has_smi = true;
			continue;
		}
@@ -3026,6 +3039,8 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
		return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD |
			RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA |
			RDMA_CORE_CAP_AF_IB;
	else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
		return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI;

	if (ll == IB_LINK_LAYER_INFINIBAND)
		return ret | RDMA_CORE_PORT_IBA_IB;
@@ -3062,6 +3077,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
		return err;

	if (ll == IB_LINK_LAYER_INFINIBAND) {
		if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
			port_num = smi_to_native_portnum(dev, port_num);

		err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
						   &rep);
		if (err)
@@ -3862,12 +3880,18 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
	return err;
}

static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
					     enum rdma_nl_dev_type type,
					     const char *name);
static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev);

static const struct ib_device_ops mlx5_ib_dev_ops = {
	.owner = THIS_MODULE,
	.driver_id = RDMA_DRIVER_MLX5,
	.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION,

	.add_gid = mlx5_ib_add_gid,
	.add_sub_dev = mlx5_ib_add_sub_dev,
	.alloc_mr = mlx5_ib_alloc_mr,
	.alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
	.alloc_pd = mlx5_ib_alloc_pd,
@@ -3882,6 +3906,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
	.dealloc_pd = mlx5_ib_dealloc_pd,
	.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
	.del_gid = mlx5_ib_del_gid,
	.del_sub_dev = mlx5_ib_del_sub_dev,
	.dereg_mr = mlx5_ib_dereg_mr,
	.destroy_ah = mlx5_ib_destroy_ah,
	.destroy_cq = mlx5_ib_destroy_cq,
@@ -4171,7 +4196,9 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
	const char *name;

	if (!mlx5_lag_is_active(dev->mdev))
	if (dev->sub_dev_name)
		name = dev->sub_dev_name;
	else if (!mlx5_lag_is_active(dev->mdev))
		name = "mlx5_%d";
	else
		name = "mlx5_bond_%d";
@@ -4432,6 +4459,89 @@ const struct mlx5_ib_profile raw_eth_profile = {
		     NULL),
};

static const struct mlx5_ib_profile plane_profile = {
	STAGE_CREATE(MLX5_IB_STAGE_INIT,
		     mlx5_ib_stage_init_init,
		     mlx5_ib_stage_init_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_CAPS,
		     mlx5_ib_stage_caps_init,
		     mlx5_ib_stage_caps_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
		     mlx5_ib_stage_non_default_cb,
		     NULL),
	STAGE_CREATE(MLX5_IB_STAGE_QP,
		     mlx5_init_qp_table,
		     mlx5_cleanup_qp_table),
	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
		     mlx5_init_srq_table,
		     mlx5_cleanup_srq_table),
	STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
		     mlx5_ib_dev_res_init,
		     mlx5_ib_dev_res_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_BFREG,
		     mlx5_ib_stage_bfrag_init,
		     mlx5_ib_stage_bfrag_cleanup),
	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
		     mlx5_ib_stage_ib_reg_init,
		     mlx5_ib_stage_ib_reg_cleanup),
};

static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
					     enum rdma_nl_dev_type type,
					     const char *name)
{
	struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane;
	enum rdma_link_layer ll;
	int ret;

	if (mparent->smi_dev)
		return ERR_PTR(-EEXIST);

	ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev,
							port_type));
	if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane ||
	    ll != IB_LINK_LAYER_INFINIBAND ||
	    !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
		return ERR_PTR(-EOPNOTSUPP);

	mplane = ib_alloc_device(mlx5_ib_dev, ib_dev);
	if (!mplane)
		return ERR_PTR(-ENOMEM);

	mplane->port = kcalloc(mparent->num_plane * mparent->num_ports,
			       sizeof(*mplane->port), GFP_KERNEL);
	if (!mplane->port) {
		ret = -ENOMEM;
		goto fail_kcalloc;
	}

	mplane->ib_dev.type = type;
	mplane->mdev = mparent->mdev;
	mplane->num_ports = mparent->num_plane;
	mplane->sub_dev_name = name;

	ret = __mlx5_ib_add(mplane, &plane_profile);
	if (ret)
		goto fail_ib_add;

	mparent->smi_dev = mplane;
	return &mplane->ib_dev;

fail_ib_add:
	kfree(mplane->port);
fail_kcalloc:
	ib_dealloc_device(&mplane->ib_dev);
	return ERR_PTR(ret);
}

static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev)
{
	struct mlx5_ib_dev *mdev = to_mdev(sub_dev);

	to_mdev(sub_dev->parent)->smi_dev = NULL;
	__mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX);
}

static int mlx5r_mp_probe(struct auxiliary_device *adev,
			  const struct auxiliary_device_id *id)
{
+8 −0
Original line number Diff line number Diff line
@@ -1191,6 +1191,8 @@ struct mlx5_ib_dev {
#endif

	u8 num_plane;
	struct mlx5_ib_dev *smi_dev;
	const char *sub_dev_name;
};

static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1699,4 +1701,10 @@ static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
		  unsigned int index, const union ib_gid *gid,
		  const struct ib_gid_attr *attr);

static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
{
	return (port - 1) / dev->num_ports + 1;
}

#endif /* MLX5_IB_H */
Loading