Commit 89d75c4c authored by Paolo Abeni's avatar Paolo Abeni
Browse files
Tariq Toukan says:

====================
mlx5-next updates 2025-03-10

The following pull-request contains common mlx5 updates for your *net-next* tree.
Please pull and let me know of any problem.

* 'mlx5-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
  net/mlx5: Add IFC bits for PPCNT recovery counters group
  net/mlx5: fs, add RDMA TRANSPORT steering domain support
  net/mlx5: Query ADV_RDMA capabilities
  net/mlx5: Limit non-privileged commands
  net/mlx5: Allow the throttle mechanism to be more dynamic
  net/mlx5: Add RDMA_CTRL HW capabilities
====================

Link: https://patch.msgid.link/1741608293-41436-1-git-send-email-tariqt@nvidia.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 5a1dddd2 f550694e
Loading
Loading
Loading
Loading
+104 −16
Original line number Diff line number Diff line
@@ -94,6 +94,11 @@ static u16 in_to_opcode(void *in)
	return MLX5_GET(mbox_in, in, opcode);
}

static u16 in_to_uid(void *in)
{
	return MLX5_GET(mbox_in, in, uid);
}

/* Returns true for opcodes that might be triggered very frequently and throttle
 * the command interface. Limit their command slots usage.
 */
@@ -823,7 +828,7 @@ static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)

	opcode = in_to_opcode(in);
	op_mod = MLX5_GET(mbox_in, in, op_mod);
	uid    = MLX5_GET(mbox_in, in, uid);
	uid    = in_to_uid(in);
	status = MLX5_GET(mbox_out, out, status);

	if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY &&
@@ -1871,6 +1876,17 @@ static int is_manage_pages(void *in)
	return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES;
}

static bool mlx5_has_privileged_uid(struct mlx5_core_dev *dev)
{
	return !xa_empty(&dev->cmd.vars.privileged_uids);
}

static bool mlx5_cmd_is_privileged_uid(struct mlx5_core_dev *dev,
				       u16 uid)
{
	return !!xa_load(&dev->cmd.vars.privileged_uids, uid);
}

/*  Notes:
 *    1. Callback functions may not sleep
 *    2. Page queue commands do not support asynchrous completion
@@ -1881,7 +1897,9 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
{
	struct mlx5_cmd_msg *inb, *outb;
	u16 opcode = in_to_opcode(in);
	bool throttle_op;
	bool throttle_locked = false;
	bool unpriv_locked = false;
	u16 uid = in_to_uid(in);
	int pages_queue;
	gfp_t gfp;
	u8 token;
@@ -1890,12 +1908,17 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
	if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode))
		return -ENXIO;

	throttle_op = mlx5_cmd_is_throttle_opcode(opcode);
	if (throttle_op) {
		if (callback) {
			if (down_trylock(&dev->cmd.vars.throttle_sem))
				return -EBUSY;
		} else {
	if (!callback) {
		/* The semaphore is already held for callback commands. It was
		 * acquired in mlx5_cmd_exec_cb()
		 */
		if (uid && mlx5_has_privileged_uid(dev)) {
			if (!mlx5_cmd_is_privileged_uid(dev, uid)) {
				unpriv_locked = true;
				down(&dev->cmd.vars.unprivileged_sem);
			}
		} else if (mlx5_cmd_is_throttle_opcode(opcode)) {
			throttle_locked = true;
			down(&dev->cmd.vars.throttle_sem);
		}
	}
@@ -1941,8 +1964,11 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
out_in:
	free_msg(dev, inb);
out_up:
	if (throttle_op)
	if (throttle_locked)
		up(&dev->cmd.vars.throttle_sem);
	if (unpriv_locked)
		up(&dev->cmd.vars.unprivileged_sem);

	return err;
}

@@ -2104,18 +2130,22 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
	struct mlx5_async_work *work = _work;
	struct mlx5_async_ctx *ctx;
	struct mlx5_core_dev *dev;
	u16 opcode;
	bool throttle_locked;
	bool unpriv_locked;

	ctx = work->ctx;
	dev = ctx->dev;
	opcode = work->opcode;
	throttle_locked = work->throttle_locked;
	unpriv_locked = work->unpriv_locked;
	status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out);
	work->user_callback(status, work);
	/* Can't access "work" from this point on. It could have been freed in
	 * the callback.
	 */
	if (mlx5_cmd_is_throttle_opcode(opcode))
	if (throttle_locked)
		up(&dev->cmd.vars.throttle_sem);
	if (unpriv_locked)
		up(&dev->cmd.vars.unprivileged_sem);
	if (atomic_dec_and_test(&ctx->num_inflight))
		complete(&ctx->inflight_done);
}
@@ -2124,6 +2154,8 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
		     void *out, int out_size, mlx5_async_cbk_t callback,
		     struct mlx5_async_work *work)
{
	struct mlx5_core_dev *dev = ctx->dev;
	u16 uid;
	int ret;

	work->ctx = ctx;
@@ -2131,11 +2163,43 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
	work->opcode = in_to_opcode(in);
	work->op_mod = MLX5_GET(mbox_in, in, op_mod);
	work->out = out;
	work->throttle_locked = false;
	work->unpriv_locked = false;
	uid = in_to_uid(in);

	if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
		return -EIO;
	ret = cmd_exec(ctx->dev, in, in_size, out, out_size,

	if (uid && mlx5_has_privileged_uid(dev)) {
		if (!mlx5_cmd_is_privileged_uid(dev, uid)) {
			if (down_trylock(&dev->cmd.vars.unprivileged_sem)) {
				ret = -EBUSY;
				goto dec_num_inflight;
			}
			work->unpriv_locked = true;
		}
	} else if (mlx5_cmd_is_throttle_opcode(in_to_opcode(in))) {
		if (down_trylock(&dev->cmd.vars.throttle_sem)) {
			ret = -EBUSY;
			goto dec_num_inflight;
		}
		work->throttle_locked = true;
	}

	ret = cmd_exec(dev, in, in_size, out, out_size,
		       mlx5_cmd_exec_cb_handler, work, false);
	if (ret && atomic_dec_and_test(&ctx->num_inflight))
	if (ret)
		goto sem_up;

	return 0;

sem_up:
	if (work->throttle_locked)
		up(&dev->cmd.vars.throttle_sem);
	if (work->unpriv_locked)
		up(&dev->cmd.vars.unprivileged_sem);
dec_num_inflight:
	if (atomic_dec_and_test(&ctx->num_inflight))
		complete(&ctx->inflight_done);

	return ret;
@@ -2371,10 +2435,16 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev)
	sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds);
	sema_init(&cmd->vars.pages_sem, 1);
	sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2));
	sema_init(&cmd->vars.unprivileged_sem,
		  DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2));

	xa_init(&cmd->vars.privileged_uids);

	cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
	if (!cmd->pool)
		return -ENOMEM;
	if (!cmd->pool) {
		err = -ENOMEM;
		goto err_destroy_xa;
	}

	err = alloc_cmd_page(dev, cmd);
	if (err)
@@ -2408,6 +2478,8 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev)
	free_cmd_page(dev, cmd);
err_free_pool:
	dma_pool_destroy(cmd->pool);
err_destroy_xa:
	xa_destroy(&dev->cmd.vars.privileged_uids);
	return err;
}

@@ -2420,6 +2492,7 @@ void mlx5_cmd_disable(struct mlx5_core_dev *dev)
	destroy_msg_cache(dev);
	free_cmd_page(dev, cmd);
	dma_pool_destroy(cmd->pool);
	xa_destroy(&dev->cmd.vars.privileged_uids);
}

void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
@@ -2427,3 +2500,18 @@ void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
{
	dev->cmd.state = cmdif_state;
}

int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid)
{
	return xa_insert(&dev->cmd.vars.privileged_uids, uid,
			 xa_mk_value(uid), GFP_KERNEL);
}
EXPORT_SYMBOL(mlx5_cmd_add_privileged_uid);

void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid)
{
	void *data = xa_erase(&dev->cmd.vars.privileged_uids, uid);

	WARN(!data, "Privileged UID %u does not exist\n", uid);
}
EXPORT_SYMBOL(mlx5_cmd_remove_privileged_uid);
+1 −1
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns,
	esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
		  ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");

	root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
	root_ns = mlx5_get_flow_vport_namespace(dev, ns, vport->index);
	if (!root_ns) {
		esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
			 vport_num);
+3 −3
Original line number Diff line number Diff line
@@ -2828,7 +2828,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
	if (IS_ERR(vport))
		return PTR_ERR(vport);

	egress_ns = mlx5_get_flow_vport_acl_namespace(master,
	egress_ns = mlx5_get_flow_vport_namespace(master,
						  MLX5_FLOW_NAMESPACE_ESW_EGRESS,
						  vport->index);
	if (!egress_ns)
+2 −0
Original line number Diff line number Diff line
@@ -1142,6 +1142,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
	case FS_FT_RDMA_RX:
	case FS_FT_RDMA_TX:
	case FS_FT_PORT_SEL:
	case FS_FT_RDMA_TRANSPORT_RX:
	case FS_FT_RDMA_TRANSPORT_TX:
		return mlx5_fs_cmd_get_fw_cmds();
	default:
		return mlx5_fs_cmd_get_stub_cmds();
+166 −12
Original line number Diff line number Diff line
@@ -1456,7 +1456,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
	struct mlx5_flow_table *ft;
	int autogroups_max_fte;

	ft = mlx5_create_flow_table(ns, ft_attr);
	ft = mlx5_create_vport_flow_table(ns, ft_attr, ft_attr->vport);
	if (IS_ERR(ft))
		return ft;

@@ -2764,9 +2764,9 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_get_flow_namespace);

struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
							      enum mlx5_flow_namespace_type type,
							      int vport)
struct mlx5_flow_namespace *
mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev,
			      enum mlx5_flow_namespace_type type, int vport_idx)
{
	struct mlx5_flow_steering *steering = dev->priv.steering;

@@ -2775,25 +2775,43 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d

	switch (type) {
	case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
		if (vport >= steering->esw_egress_acl_vports)
		if (vport_idx >= steering->esw_egress_acl_vports)
			return NULL;
		if (steering->esw_egress_root_ns &&
		    steering->esw_egress_root_ns[vport])
			return &steering->esw_egress_root_ns[vport]->ns;
		    steering->esw_egress_root_ns[vport_idx])
			return &steering->esw_egress_root_ns[vport_idx]->ns;
		else
			return NULL;
	case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
		if (vport >= steering->esw_ingress_acl_vports)
		if (vport_idx >= steering->esw_ingress_acl_vports)
			return NULL;
		if (steering->esw_ingress_root_ns &&
		    steering->esw_ingress_root_ns[vport])
			return &steering->esw_ingress_root_ns[vport]->ns;
		    steering->esw_ingress_root_ns[vport_idx])
			return &steering->esw_ingress_root_ns[vport_idx]->ns;
		else
			return NULL;
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
		if (vport_idx >= steering->rdma_transport_rx_vports)
			return NULL;
		if (steering->rdma_transport_rx_root_ns &&
		    steering->rdma_transport_rx_root_ns[vport_idx])
			return &steering->rdma_transport_rx_root_ns[vport_idx]->ns;
		else
			return NULL;
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
		if (vport_idx >= steering->rdma_transport_tx_vports)
			return NULL;

		if (steering->rdma_transport_tx_root_ns &&
		    steering->rdma_transport_tx_root_ns[vport_idx])
			return &steering->rdma_transport_tx_root_ns[vport_idx]->ns;
		else
			return NULL;
	default:
		return NULL;
	}
}
EXPORT_SYMBOL(mlx5_get_flow_vport_namespace);

static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
				       unsigned int prio,
@@ -3199,6 +3217,127 @@ static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
	return err;
}

static int
init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering,
				   int vport_idx)
{
	struct fs_prio *prio;

	steering->rdma_transport_rx_root_ns[vport_idx] =
		create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX);
	if (!steering->rdma_transport_rx_root_ns[vport_idx])
		return -ENOMEM;

	/* create 1 prio*/
	prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns,
			      MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
	return PTR_ERR_OR_ZERO(prio);
}

static int
init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering,
				   int vport_idx)
{
	struct fs_prio *prio;

	steering->rdma_transport_tx_root_ns[vport_idx] =
		create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX);
	if (!steering->rdma_transport_tx_root_ns[vport_idx])
		return -ENOMEM;

	/* create 1 prio*/
	prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns,
			      MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
	return PTR_ERR_OR_ZERO(prio);
}

static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering)
{
	struct mlx5_core_dev *dev = steering->dev;
	int total_vports;
	int err;
	int i;

	/* In case eswitch not supported and working in legacy mode */
	total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1;

	steering->rdma_transport_rx_root_ns =
			kcalloc(total_vports,
				sizeof(*steering->rdma_transport_rx_root_ns),
				GFP_KERNEL);
	if (!steering->rdma_transport_rx_root_ns)
		return -ENOMEM;

	for (i = 0; i < total_vports; i++) {
		err = init_rdma_transport_rx_root_ns_one(steering, i);
		if (err)
			goto cleanup_root_ns;
	}
	steering->rdma_transport_rx_vports = total_vports;
	return 0;

cleanup_root_ns:
	while (i--)
		cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]);
	kfree(steering->rdma_transport_rx_root_ns);
	steering->rdma_transport_rx_root_ns = NULL;
	return err;
}

static int init_rdma_transport_tx_root_ns(struct mlx5_flow_steering *steering)
{
	struct mlx5_core_dev *dev = steering->dev;
	int total_vports;
	int err;
	int i;

	/* In case eswitch not supported and working in legacy mode */
	total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1;

	steering->rdma_transport_tx_root_ns =
			kcalloc(total_vports,
				sizeof(*steering->rdma_transport_tx_root_ns),
				GFP_KERNEL);
	if (!steering->rdma_transport_tx_root_ns)
		return -ENOMEM;

	for (i = 0; i < total_vports; i++) {
		err = init_rdma_transport_tx_root_ns_one(steering, i);
		if (err)
			goto cleanup_root_ns;
	}
	steering->rdma_transport_tx_vports = total_vports;
	return 0;

cleanup_root_ns:
	while (i--)
		cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]);
	kfree(steering->rdma_transport_tx_root_ns);
	steering->rdma_transport_tx_root_ns = NULL;
	return err;
}

static void cleanup_rdma_transport_roots_ns(struct mlx5_flow_steering *steering)
{
	int i;

	if (steering->rdma_transport_rx_root_ns) {
		for (i = 0; i < steering->rdma_transport_rx_vports; i++)
			cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]);

		kfree(steering->rdma_transport_rx_root_ns);
		steering->rdma_transport_rx_root_ns = NULL;
	}

	if (steering->rdma_transport_tx_root_ns) {
		for (i = 0; i < steering->rdma_transport_tx_vports; i++)
			cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]);

		kfree(steering->rdma_transport_tx_root_ns);
		steering->rdma_transport_tx_root_ns = NULL;
	}
}

/* FT and tc chains are stored in the same array so we can re-use the
 * mlx5_get_fdb_sub_ns() and tc api for FT chains.
 * When creating a new ns for each chain store it in the first available slot.
@@ -3631,6 +3770,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
	cleanup_root_ns(steering->rdma_rx_root_ns);
	cleanup_root_ns(steering->rdma_tx_root_ns);
	cleanup_root_ns(steering->egress_root_ns);
	cleanup_rdma_transport_roots_ns(steering);

	devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params,
			       ARRAY_SIZE(mlx5_fs_params));
@@ -3700,6 +3840,18 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev)
			goto err;
	}

	if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(dev, ft_support)) {
		err = init_rdma_transport_rx_root_ns(steering);
		if (err)
			goto err;
	}

	if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(dev, ft_support)) {
		err = init_rdma_transport_tx_root_ns(steering);
		if (err)
			goto err;
	}

	return 0;

err:
@@ -3850,8 +4002,10 @@ mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type
	struct mlx5_flow_namespace *ns;

	if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
		ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS ||
	    ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX ||
	    ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
		ns = mlx5_get_flow_vport_namespace(dev, ns_type, 0);
	else
		ns = mlx5_get_flow_namespace(dev, ns_type);
	if (!ns)
Loading