Commit ee2a35fe authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge tag 'mlx5-updates-2023-10-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2023-10-10

1) Adham Faris, Increase max supported channels number to 256

2) Leon Romanovsky, Allow IPsec soft/hard limits in bytes

3) Shay Drory, Replace global mlx5_intf_lock with
   HCA devcom component lock

4) Wei Zhang, Optimize SF creation flow

During SF creation, HCA state gets changed from INVALID to
IN_USE step by step. Accordingly, FW sends vhca event to
driver to inform about this state change asynchronously.
Each vhca event is critical because all related SW/FW
operations are triggered by it.

Currently there is only a single mlx5 general event handler
which not only handles vhca event but many other events.
This incurs huge bottleneck because all events are forced
to be handled in serial manner.

Moreover, all SFs share same table_lock which inevitably
impacts each other when they are created in parallel.

This series will solve this issue by:

1. A dedicated vhca event handler is introduced to eliminate
   the mutual impact with other mlx5 events.
2. Max FW threads work queues are employed in the vhca event
   handler to fully utilize FW capability.
3. Redesign SF active work logic to completely remove
   table_lock.

With above optimization, SF creation time is reduced by 25%,
i.e. from 80s to 60s when creating 100 SFs.

Patches summary:

Patch 1 - implement dedicated vhca event handler with max FW
          cmd threads of work queues.
Patch 2 - remove table_lock by redesigning SF active work
          logic.

* tag 'mlx5-updates-2023-10-10' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5e: Allow IPsec soft/hard limits in bytes
  net/mlx5e: Increase max supported channels number to 256
  net/mlx5e: Preparations for supporting larger number of channels
  net/mlx5e: Refactor mlx5e_rss_init() and mlx5e_rss_free() API's
  net/mlx5e: Refactor mlx5e_rss_set_rxfh() and mlx5e_rss_get_rxfh()
  net/mlx5e: Refactor rx_res_init() and rx_res_free() APIs
  net/mlx5e: Use PTR_ERR_OR_ZERO() to simplify code
  net/mlx5: Use PTR_ERR_OR_ZERO() to simplify code
  net/mlx5: fix config name in Kconfig parameter documentation
  net/mlx5: Remove unused declaration
  net/mlx5: Replace global mlx5_intf_lock with HCA devcom component lock
  net/mlx5: Refactor LAG peer device lookout bus logic to mlx5 devcom
  net/mlx5: Avoid false positive lockdep warning by adding lock_class_key
  net/mlx5: Redesign SF active work to remove table_lock
  net/mlx5: Parallelize vhca event handling
====================

Link: https://lore.kernel.org/r/20231014171908.290428-1-saeed@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents d4b14c1d 627aa139
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -67,7 +67,7 @@ Enabling the driver and kconfig options
|    Enables :ref:`IPSec XFRM cryptography-offload acceleration <xfrm_device>`.


**CONFIG_MLX5_EN_MACSEC=(y/n)**
**CONFIG_MLX5_MACSEC=(y/n)**

|    Build support for MACsec cryptography-offload acceleration in the NIC.

+10 −95
Original line number Diff line number Diff line
@@ -38,8 +38,6 @@
#include "devlink.h"
#include "lag/lag.h"

/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
static DEFINE_IDA(mlx5_adev_ida);

static bool is_eth_rep_supported(struct mlx5_core_dev *dev)
@@ -337,9 +335,9 @@ static void del_adev(struct auxiliary_device *adev)

void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev)
{
	mutex_lock(&mlx5_intf_mutex);
	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
	dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
	mutex_unlock(&mlx5_intf_mutex);
	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
}

bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev)
@@ -355,7 +353,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
	int ret = 0, i;

	devl_assert_locked(priv_to_devlink(dev));
	mutex_lock(&mlx5_intf_mutex);
	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
	priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
	for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
		if (!priv->adev[i]) {
@@ -400,7 +398,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
			break;
		}
	}
	mutex_unlock(&mlx5_intf_mutex);
	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
	return ret;
}

@@ -413,7 +411,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend)
	int i;

	devl_assert_locked(priv_to_devlink(dev));
	mutex_lock(&mlx5_intf_mutex);
	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
	for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) {
		if (!priv->adev[i])
			continue;
@@ -443,7 +441,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend)
		priv->adev[i] = NULL;
	}
	priv->flags |= MLX5_PRIV_FLAGS_DETACH;
	mutex_unlock(&mlx5_intf_mutex);
	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
}

int mlx5_register_device(struct mlx5_core_dev *dev)
@@ -451,10 +449,10 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
	int ret;

	devl_assert_locked(priv_to_devlink(dev));
	mutex_lock(&mlx5_intf_mutex);
	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
	dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
	ret = mlx5_rescan_drivers_locked(dev);
	mutex_unlock(&mlx5_intf_mutex);
	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
	if (ret)
		mlx5_unregister_device(dev);

@@ -464,10 +462,10 @@ int mlx5_register_device(struct mlx5_core_dev *dev)
void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
	devl_assert_locked(priv_to_devlink(dev));
	mutex_lock(&mlx5_intf_mutex);
	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
	dev->priv.flags = MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV;
	mlx5_rescan_drivers_locked(dev);
	mutex_unlock(&mlx5_intf_mutex);
	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
}

static int add_drivers(struct mlx5_core_dev *dev)
@@ -545,7 +543,6 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
{
	struct mlx5_priv *priv = &dev->priv;

	lockdep_assert_held(&mlx5_intf_mutex);
	if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
		return 0;

@@ -565,85 +562,3 @@ bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev

	return (fsystem_guid && psystem_guid && fsystem_guid == psystem_guid);
}

static u32 mlx5_gen_pci_id(const struct mlx5_core_dev *dev)
{
	return (u32)((pci_domain_nr(dev->pdev->bus) << 16) |
		     (dev->pdev->bus->number << 8) |
		     PCI_SLOT(dev->pdev->devfn));
}

static int _next_phys_dev(struct mlx5_core_dev *mdev,
			  const struct mlx5_core_dev *curr)
{
	if (!mlx5_core_is_pf(mdev))
		return 0;

	if (mdev == curr)
		return 0;

	if (!mlx5_same_hw_devs(mdev, (struct mlx5_core_dev *)curr) &&
	    mlx5_gen_pci_id(mdev) != mlx5_gen_pci_id(curr))
		return 0;

	return 1;
}

static void *pci_get_other_drvdata(struct device *this, struct device *other)
{
	if (this->driver != other->driver)
		return NULL;

	return pci_get_drvdata(to_pci_dev(other));
}

static int next_phys_dev_lag(struct device *dev, const void *data)
{
	struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;

	mdev = pci_get_other_drvdata(this->device, dev);
	if (!mdev)
		return 0;

	if (!mlx5_lag_is_supported(mdev))
		return 0;

	return _next_phys_dev(mdev, data);
}

static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev,
					       int (*match)(struct device *dev, const void *data))
{
	struct device *next;

	if (!mlx5_core_is_pf(dev))
		return NULL;

	next = bus_find_device(&pci_bus_type, NULL, dev, match);
	if (!next)
		return NULL;

	put_device(next);
	return pci_get_drvdata(to_pci_dev(next));
}

/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
{
	lockdep_assert_held(&mlx5_intf_mutex);
	return mlx5_get_next_dev(dev, &next_phys_dev_lag);
}

void mlx5_dev_list_lock(void)
{
	mutex_lock(&mlx5_intf_mutex);
}
void mlx5_dev_list_unlock(void)
{
	mutex_unlock(&mlx5_intf_mutex);
}

int mlx5_dev_list_trylock(void)
{
	return mutex_trylock(&mlx5_intf_mutex);
}
+3 −2
Original line number Diff line number Diff line
@@ -141,7 +141,7 @@ struct page_pool;
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW            0x2

#define MLX5E_MIN_NUM_CHANNELS         0x1
#define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE / 2)
#define MLX5E_MAX_NUM_CHANNELS         256
#define MLX5E_TX_CQ_POLL_BUDGET        128
#define MLX5E_TX_XSK_POLL_BUDGET       64
#define MLX5E_SQ_RECOVER_MIN_INTERVAL  500 /* msecs */
@@ -200,7 +200,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
	return is_kdump_kernel() ?
		MLX5E_MIN_NUM_CHANNELS :
		min_t(int, mlx5_comp_vectors_max(mdev), MLX5E_MAX_NUM_CHANNELS);
		min3(mlx5_comp_vectors_max(mdev), (u32)MLX5E_MAX_NUM_CHANNELS,
		     (u32)(1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)));
}

/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
+0 −1
Original line number Diff line number Diff line
@@ -150,7 +150,6 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile,
					  struct dentry *dfs_root);
void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs);
struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs);
void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc);
struct mlx5e_tc_table *mlx5e_fs_get_tc(struct mlx5e_flow_steering *fs);
struct mlx5e_l2_table *mlx5e_fs_get_l2(struct mlx5e_flow_steering *fs);
struct mlx5_flow_namespace *mlx5e_fs_get_ns(struct mlx5e_flow_steering *fs, bool egress);
+22 −10
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
{
	unsigned int i;

	for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
	for (i = 0; i < indir->actual_table_size; i++)
		indir->table[i] = i % num_channels;
}

@@ -45,9 +45,9 @@ static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
}

int mlx5e_rqt_init_direct(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
			  bool indir_enabled, u32 init_rqn)
			  bool indir_enabled, u32 init_rqn, u32 indir_table_size)
{
	u16 max_size = indir_enabled ? MLX5E_INDIR_RQT_SIZE : 1;
	u16 max_size = indir_enabled ? indir_table_size : 1;

	return mlx5e_rqt_init(rqt, mdev, max_size, &init_rqn, 1);
}
@@ -68,11 +68,11 @@ static int mlx5e_calc_indir_rqns(u32 *rss_rqns, u32 *rqns, unsigned int num_rqns
{
	unsigned int i;

	for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) {
	for (i = 0; i < indir->actual_table_size; i++) {
		unsigned int ix = i;

		if (hfunc == ETH_RSS_HASH_XOR)
			ix = mlx5e_bits_invert(ix, ilog2(MLX5E_INDIR_RQT_SIZE));
			ix = mlx5e_bits_invert(ix, ilog2(indir->actual_table_size));

		ix = indir->table[ix];

@@ -94,7 +94,7 @@ int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
	u32 *rss_rqns;
	int err;

	rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
	rss_rqns = kvmalloc_array(indir->actual_table_size, sizeof(*rss_rqns), GFP_KERNEL);
	if (!rss_rqns)
		return -ENOMEM;

@@ -102,13 +102,25 @@ int mlx5e_rqt_init_indir(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
	if (err)
		goto out;

	err = mlx5e_rqt_init(rqt, mdev, MLX5E_INDIR_RQT_SIZE, rss_rqns, MLX5E_INDIR_RQT_SIZE);
	err = mlx5e_rqt_init(rqt, mdev, indir->max_table_size, rss_rqns,
			     indir->actual_table_size);

out:
	kvfree(rss_rqns);
	return err;
}

#define MLX5E_UNIFORM_SPREAD_RQT_FACTOR 2

u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
{
	u32 rqt_size = max_t(u32, MLX5E_INDIR_MIN_RQT_SIZE,
			     roundup_pow_of_two(num_channels * MLX5E_UNIFORM_SPREAD_RQT_FACTOR));
	u32 max_cap_rqt_size = 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size);

	return min_t(u32, rqt_size, max_cap_rqt_size);
}

void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
{
	mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
@@ -151,10 +163,10 @@ int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_
	u32 *rss_rqns;
	int err;

	if (WARN_ON(rqt->size != MLX5E_INDIR_RQT_SIZE))
	if (WARN_ON(rqt->size != indir->max_table_size))
		return -EINVAL;

	rss_rqns = kvmalloc_array(MLX5E_INDIR_RQT_SIZE, sizeof(*rss_rqns), GFP_KERNEL);
	rss_rqns = kvmalloc_array(indir->actual_table_size, sizeof(*rss_rqns), GFP_KERNEL);
	if (!rss_rqns)
		return -ENOMEM;

@@ -162,7 +174,7 @@ int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, unsigned int num_
	if (err)
		goto out;

	err = mlx5e_rqt_redirect(rqt, rss_rqns, MLX5E_INDIR_RQT_SIZE);
	err = mlx5e_rqt_redirect(rqt, rss_rqns, indir->actual_table_size);

out:
	kvfree(rss_rqns);
Loading