Commit 74934ddf authored by Patrisious Haddad's avatar Patrisious Haddad Committed by Leon Romanovsky
Browse files

RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace



This patch adds RDMA_TRANSPORT_RX and RDMA_TRANSPORT_TX as a new flow
table type for matcher creation.

Signed-off-by: default avatarPatrisious Haddad <phaddad@nvidia.com>
Reviewed-by: default avatarMark Bloch <mbloch@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Link: https://patch.msgid.link/2287d8c50483e880450c7e8e08d9de34cdec1b14.1741261611.git.leon@kernel.org


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent 17ade536
Loading
Loading
Loading
Loading
+141 −13
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_ucaps.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
@@ -690,7 +691,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
					   struct mlx5_ib_flow_prio *prio,
					   int priority,
					   int num_entries, int num_groups,
					   u32 flags)
					   u32 flags, u16 vport)
{
	struct mlx5_flow_table_attr ft_attr = {};
	struct mlx5_flow_table *ft;
@@ -698,6 +699,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
	ft_attr.prio = priority;
	ft_attr.max_fte = num_entries;
	ft_attr.flags = flags;
	ft_attr.vport = vport;
	ft_attr.autogroup.max_num_groups = num_groups;
	ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
	if (IS_ERR(ft))
@@ -792,7 +794,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
	ft = prio->flow_table;
	if (!ft)
		return _get_prio(dev, ns, prio, priority, max_table_size,
				 num_groups, flags);
				 num_groups, flags, 0);

	return prio;
}
@@ -935,7 +937,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
	prio = &dev->flow_db->opfcs[type];
	if (!prio->flow_table) {
		prio = _get_prio(dev, ns, prio, priority,
				 dev->num_ports * MAX_OPFC_RULES, 1, 0);
				 dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
		if (IS_ERR(prio)) {
			err = PTR_ERR(prio);
			goto free;
@@ -1413,17 +1415,51 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
	return ERR_PTR(err);
}

static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
					  enum mlx5_flow_namespace_type type,
					  u32 *flags, u16 *vport_idx,
					  u16 *vport,
					  struct mlx5_core_dev **ft_mdev,
					  u32 ib_port)
{
	struct mlx5_core_dev *esw_mdev;

	if (!is_mdev_switchdev_mode(dev->mdev))
		return 0;

	if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
		return -EOPNOTSUPP;

	if (!dev->port[ib_port - 1].rep)
		return -EINVAL;

	esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
	if (esw_mdev != dev->mdev)
		return -EOPNOTSUPP;

	*flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
	*ft_mdev = esw_mdev;
	*vport = dev->port[ib_port - 1].rep->vport;
	*vport_idx = dev->port[ib_port - 1].rep->vport_index;

	return 0;
}

static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
		enum mlx5_flow_namespace_type ns_type,
		bool mcast)
		bool mcast, u32 ib_port)
{
	struct mlx5_core_dev *ft_mdev = dev->mdev;
	struct mlx5_flow_namespace *ns = NULL;
	struct mlx5_ib_flow_prio *prio = NULL;
	int max_table_size = 0;
	u16 vport_idx = 0;
	bool esw_encap;
	u32 flags = 0;
	u16 vport = 0;
	int priority;
	int ret;

	if (mcast)
		priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1507,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
			MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
		priority = user_priority;
		break;
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
		if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
			return ERR_PTR(-EINVAL);
		ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
						     &vport_idx, &vport,
						     &ft_mdev, ib_port);
		if (ret)
			return ERR_PTR(ret);

		if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
			max_table_size =
				BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
					ft_mdev, log_max_ft_size));
		else
			max_table_size =
				BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
					ft_mdev, log_max_ft_size));
		priority = user_priority;
		break;
	default:
		break;
	}

	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);

	ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
	if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
	    ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
		ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
	else
		ns = mlx5_get_flow_namespace(ft_mdev, ns_type);

	if (!ns)
		return ERR_PTR(-EOPNOTSUPP);

@@ -1497,6 +1558,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
	case MLX5_FLOW_NAMESPACE_RDMA_TX:
		prio = &dev->flow_db->rdma_tx[priority];
		break;
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
		prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
		break;
	case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
		prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
		break;
	default: return ERR_PTR(-EINVAL);
	}

@@ -1507,7 +1574,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
		return prio;

	return _get_prio(dev, ns, prio, priority, max_table_size,
			 MLX5_FS_MAX_TYPES, flags);
			 MLX5_FS_MAX_TYPES, flags, vport);
}

static struct mlx5_ib_flow_handler *
@@ -1626,7 +1693,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
	mutex_lock(&dev->flow_db->lock);

	ft_prio = _get_flow_table(dev, fs_matcher->priority,
				  fs_matcher->ns_type, mcast);
				  fs_matcher->ns_type, mcast,
				  fs_matcher->ib_port);
	if (IS_ERR(ft_prio)) {
		err = PTR_ERR(ft_prio);
		goto unlock;
@@ -1742,6 +1810,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
		break;
	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
		break;
	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
		*namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
		break;
	default:
		return -EINVAL;
	}
@@ -1831,7 +1905,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
		return -EINVAL;

	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
		return -EINVAL;

@@ -1848,7 +1923,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
			return -EINVAL;
		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
		    *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
			return -EINVAL;
	} else if (dest_qp) {
@@ -1869,14 +1945,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
			*dest_id = mqp->raw_packet_qp.rq.tirn;
		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
	} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
		    fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
		   !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
		*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
	}

	if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
	    (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
	     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
		return -EINVAL;

	return 0;
@@ -2353,6 +2431,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
	return 0;
}

static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
{
	if (is_mdev_switchdev_mode(dev->mdev))
		return UCAP_ENABLED(enabled_caps,
				    RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);

	return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
}

static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
	struct uverbs_attr_bundle *attrs)
{
@@ -2401,6 +2488,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
		goto end;
	}

	if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
		err = uverbs_copy_from(&obj->ib_port, attrs,
				       MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
		if (err)
			goto end;
		if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
			err = -EINVAL;
			goto end;
		}
		if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
		    obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
			err = -EINVAL;
			goto end;
		}
		if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
			err = -EOPNOTSUPP;
			goto end;
		}
	}

	uobj->object = obj;
	obj->mdev = dev->mdev;
	atomic_set(&obj->usecnt, 0);
@@ -2448,7 +2555,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(

	mutex_lock(&dev->flow_db->lock);

	ft_prio = _get_flow_table(dev, priority, ns_type, 0);
	ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
	if (IS_ERR(ft_prio)) {
		err = PTR_ERR(ft_prio);
		goto free_obj;
@@ -2834,6 +2941,9 @@ DECLARE_UVERBS_NAMED_METHOD(
			     UA_OPTIONAL),
	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
			     enum mlx5_ib_uapi_flow_table_type,
			     UA_OPTIONAL),
	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
			   UVERBS_ATTR_TYPE(u32),
			   UA_OPTIONAL));

DECLARE_UVERBS_NAMED_METHOD_DESTROY(
@@ -2904,8 +3014,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
	if (!dev->flow_db)
		return -ENOMEM;

	dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
					sizeof(struct mlx5_ib_flow_prio),
					GFP_KERNEL);
	if (!dev->flow_db->rdma_transport_rx)
		goto free_flow_db;

	dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
					sizeof(struct mlx5_ib_flow_prio),
					GFP_KERNEL);
	if (!dev->flow_db->rdma_transport_tx)
		goto free_rdma_transport_rx;

	mutex_init(&dev->flow_db->lock);

	ib_set_device_ops(&dev->ib_dev, &flow_ops);
	return 0;

free_rdma_transport_rx:
	kfree(dev->flow_db->rdma_transport_rx);
free_flow_db:
	kfree(dev->flow_db);
	return -ENOMEM;
}
+2 −0
Original line number Diff line number Diff line
@@ -40,6 +40,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
	 * is a safe assumption that all references are gone.
	 */
	mlx5_ib_fs_cleanup_anchor(dev);
	kfree(dev->flow_db->rdma_transport_tx);
	kfree(dev->flow_db->rdma_transport_rx);
	kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
+3 −0
Original line number Diff line number Diff line
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
	struct mlx5_core_dev	*mdev;
	atomic_t		usecnt;
	u8			match_criteria_enable;
	u32			ib_port;
};

struct mlx5_ib_steering_anchor {
@@ -307,6 +308,8 @@ struct mlx5_ib_flow_db {
	struct mlx5_ib_flow_prio	rdma_tx[MLX5_IB_NUM_FLOW_FT];
	struct mlx5_ib_flow_prio	opfcs[MLX5_IB_OPCOUNTER_MAX];
	struct mlx5_flow_table		*lag_demux_ft;
	struct mlx5_ib_flow_prio        *rdma_transport_rx;
	struct mlx5_ib_flow_prio        *rdma_transport_tx;
	/* Protect flow steering bypass flow tables
	 * when add/del flow rules.
	 * only single add/removal of flow steering rule could be done
+1 −0
Original line number Diff line number Diff line
@@ -239,6 +239,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
	MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
	MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
};

enum mlx5_ib_flow_matcher_destroy_attrs {
+2 −0
Original line number Diff line number Diff line
@@ -45,6 +45,8 @@ enum mlx5_ib_uapi_flow_table_type {
	MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB	= 0x2,
	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX	= 0x3,
	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX	= 0x4,
	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX	= 0x5,
	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX	= 0x6,
};

enum mlx5_ib_uapi_flow_action_packet_reformat_type {