Commit 3f1baa91 authored by Sankararaman Jayaraman's avatar Sankararaman Jayaraman Committed by Jakub Kicinski
Browse files

vmxnet3: Fix tx queue race condition with XDP



If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.

vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.

This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.

Fixes: 54f00cce ("vmxnet3: Add XDP support.")
Signed-off-by: default avatarSankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Signed-off-by: default avatarRonak Doshi <ronak.doshi@broadcom.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent a8aa6a6d
Loading
Loading
Loading
Loading
+12 −2
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter)
	if (likely(cpu < tq_number))
		tq = &adapter->tx_queue[cpu];
	else
		tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)];
		tq = &adapter->tx_queue[cpu % tq_number];

	return tq;
}
@@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
	u32 buf_size;
	u32 dw2;

	spin_lock_irq(&tq->tx_lock);
	dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
	dw2 |= xdpf->len;
	ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill;
@@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,

	if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) {
		tq->stats.tx_ring_full++;
		spin_unlock_irq(&tq->tx_lock);
		return -ENOSPC;
	}

@@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
		tbi->dma_addr = dma_map_single(&adapter->pdev->dev,
					       xdpf->data, buf_size,
					       DMA_TO_DEVICE);
		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr))
		if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) {
			spin_unlock_irq(&tq->tx_lock);
			return -EFAULT;
		}
		tbi->map_type |= VMXNET3_MAP_SINGLE;
	} else { /* XDP buffer from page pool */
		page = virt_to_page(xdpf->data);
@@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter,
	dma_wmb();
	gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^
						  VMXNET3_TXD_GEN);
	spin_unlock_irq(&tq->tx_lock);

	/* No need to handle the case when tx_num_deferred doesn't reach
	 * threshold. Backend driver at hypervisor side will poll and reset
@@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
{
	struct vmxnet3_adapter *adapter = netdev_priv(dev);
	struct vmxnet3_tx_queue *tq;
	struct netdev_queue *nq;
	int i;

	if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state)))
@@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev,
	if (tq->stopped)
		return -ENETDOWN;

	nq = netdev_get_tx_queue(adapter->netdev, tq->qid);

	__netif_tx_lock(nq, smp_processor_id());
	for (i = 0; i < n; i++) {
		if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) {
			tq->stats.xdp_xmit_err++;
@@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev,
		}
	}
	tq->stats.xdp_xmit += i;
	__netif_tx_unlock(nq);

	return i;
}