Commit 152ba35c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-mlx5e-use-multiple-doorbells'

Tariq Toukan says:

====================
net/mlx5e: Use multiple doorbells

mlx5e uses a single MMIO-mapped doorbell per netdevice for all send and
receive operations. Writes to the doorbell go over the PCIe bus directly
to the device, which then services the indicated queues.

On certain architectures and with sufficiently high volume of doorbell
ringing (many cores, many active channels, small MTU, no GSO, etc.), the
MMIO-mapped doorbell address can become contended, leading to delays in
servicing writes to that address and a global slowdown of all traffic
for that netdevice.

mlx5 NICs have supported using multiple doorbells for many years, the
mlx5_ib driver for the same hardware has been using multiple doorbells
traditionally.

This patch series extends the mlx5 Ethernet driver to also use multiple
doorbells to solve the MMIO contention issues. By allocating and using
more doorbells for all channel queues (TX and RX), the MMIO contention
on any particular doorbell address is reduced significantly.

The first patches are cleanups:
net/mlx5: Fix typo of MLX5_EQ_DOORBEL_OFFSET
net/mlx5: Remove unused 'offset' field from struct mlx5_sq_bfreg'
net/mlx5e: Remove unused 'xsk' param of mlx5e_build_xdpsq_param

The next patch separates the global doorbell from Ethernet-specific
resources:
net/mlx5: Store the global doorbell in mlx5_priv

Next, plumbing to allow a different doorbell to be used for channel TX
and RX queues:
net/mlx5e: Prepare for using multiple TX doorbells
net/mlx5e: Prepare for using different CQ doorbells

Then, enable using multiple doorbells for channel queues:
net/mlx5e: Use multiple TX doorbells
net/mlx5e: Use multiple CQ doorbells

Finally, introduce a devlink parameter to control this:
devlink: Add a 'num_doorbells' driverinit param
net/mlx5e: Use the 'num_doorbells' devlink param

Some performance results, done with the Linux pktgen script, running b2b
over Connect-X 8 NICs:
samples/pktgen/pktgen_sample02_multiqueue.sh -i $NIC -s 64 -d $DST_IP \
  -m $MAC -t 64

Baseline (1 doorbell): 9 Mpps
This series (8 doorbells): 56 Mpps

Note that pktgen without 'burst' rings the doorbell after every packet,
while real packet TX using NAPI usually batches multiple pending packets
with the xmit_more mechanism. So this is in essence a micro-benchmark
showcasing the improvement of using multiple doorbells on platforms
affected by MMIO contention. Real life traffic usually sees little
movement either way.
====================

Link: https://patch.msgid.link/1758031904-634231-1-git-send-email-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents cbff0b1e 11bbcfb7
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -148,3 +148,6 @@ own name.
     - The max number of Virtual Functions (VFs) exposed by the PF.
       after reboot/pci reset, 'sriov_totalvfs' entry under the device's sysfs
       directory will report this value.
   * - ``num_doorbells``
     - u32
     - Controls the number of doorbells used by the device.
+9 −0
Original line number Diff line number Diff line
@@ -62,6 +62,15 @@ Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW
   echo 1 >/sys/bus/pci/rescan
   grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*

   * - ``num_doorbells``
     - driverinit
     - This controls the number of channel doorbells used by the netdev. In all
       cases, an additional doorbell is allocated and used for non-channel
       communication (e.g. for PTP, HWS, etc.). Supported values are:

       - 0: No channel-specific doorbells, use the global one for everything.
       - [1, max_num_channels]: Spread netdev channels equally across these
         doorbells.

The ``mlx5`` driver also implements the following driver-specific
parameters.
+2 −2
Original line number Diff line number Diff line
@@ -648,7 +648,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
	struct mlx5_ib_cq *cq = to_mcq(ibcq);
	void __iomem *uar_page = mdev->priv.uar->map;
	void __iomem *uar_page = mdev->priv.bfreg.up->map;
	unsigned long irq_flags;
	int ret = 0;

@@ -923,7 +923,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
		 cq->buf.frag_buf.page_shift -
		 MLX5_ADAPTER_PAGE_SHIFT);

	*index = dev->mdev->priv.uar->index;
	*index = dev->mdev->priv.bfreg.up->index;

	return 0;

+0 −1
Original line number Diff line number Diff line
@@ -145,7 +145,6 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
		mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
			      cq->cqn);

	cq->uar = dev->priv.uar;
	cq->irqn = eq->core.irqn;

	return 0;
+26 −0
Original line number Diff line number Diff line
@@ -530,6 +530,25 @@ mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id,
	return 0;
}

static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id,
					       union devlink_param_value val,
					       struct netlink_ext_ack *extack)
{
	struct mlx5_core_dev *mdev = devlink_priv(devlink);
	u32 val32 = val.vu32;
	u32 max_num_channels;

	max_num_channels = mlx5e_get_max_num_channels(mdev);
	if (val32 > max_num_channels) {
		NL_SET_ERR_MSG_FMT_MOD(extack,
				       "Requested num_doorbells (%u) exceeds maximum number of channels (%u)",
				       val32, max_num_channels);
		return -EINVAL;
	}

	return 0;
}

static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink)
{
	struct mlx5_core_dev *dev = devlink_priv(devlink);
@@ -609,6 +628,9 @@ static const struct devlink_param mlx5_devlink_eth_params[] = {
			     "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32,
			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
			     mlx5_devlink_hairpin_queue_size_validate),
	DEVLINK_PARAM_GENERIC(NUM_DOORBELLS,
			      BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
			      mlx5_devlink_num_doorbells_validate),
};

static int mlx5_devlink_eth_params_register(struct devlink *devlink)
@@ -632,6 +654,10 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)

	mlx5_devlink_hairpin_params_init_values(devlink);

	value.vu32 = MLX5_DEFAULT_NUM_DOORBELLS;
	devl_param_driverinit_value_set(devlink,
					DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS,
					value);
	return 0;
}

Loading