Commit 38f83a57 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'virtio-net-support-af_xdp-zero-copy-tx'

Xuan Zhuo says:

====================
virtio-net: support AF_XDP zero copy (tx)

XDP socket(AF_XDP) is an excellent bypass kernel network framework. The zero
copy feature of xsk (XDP socket) needs to be supported by the driver. The
performance of zero copy is very good. mlx5 and intel ixgbe already support
this feature, This patch set allows virtio-net to support xsk's zerocopy xmit
feature.

At present, we have completed some preparation:

1. vq-reset (virtio spec and kernel code)
2. virtio-core premapped dma
3. virtio-net xdp refactor

So it is time for Virtio-Net to complete the support for the XDP Socket
Zerocopy.

Virtio-net can not increase the queue num at will, so xsk shares the queue with
kernel.

This patch set includes some refactor to the virtio-net to let that to support
AF_XDP.

The current configuration sets the virtqueue (vq) to premapped mode,
implying that all buffers submitted to this queue must be mapped ahead
of time. This presents a challenge for the virtnet send queue (sq): the
virtnet driver would be required to keep track of dma information for vq
size * 17, which can be substantial. However, if the premapped mode were
applied on a per-buffer basis, the complexity would be greatly reduced.
With AF_XDP enabled, AF_XDP buffers would become premapped, while kernel
skb buffers could remain unmapped.

We can distinguish them by sg_page(sg), When sg_page(sg) is NULL, this
indicates that the driver has performed DMA mapping in advance, allowing
the Virtio core to directly utilize sg_dma_address(sg) without
conducting any internal DMA mapping. Additionally, DMA unmap operations
for this buffer will be bypassed.

ENV: Qemu with vhost-user(polling mode).
Host CPU: Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz

testpmd> show port stats all

 ######################## NIC statistics for port 0 ########################
 RX-packets: 19531092064 RX-missed: 0     RX-bytes: 1093741155584
 RX-errors: 0
 RX-nombuf: 0
 TX-packets: 5959955552 TX-errors: 0     TX-bytes: 371030645664

 Throughput (since last show)
 Rx-pps:   8861574     Rx-bps:  3969985208
 Tx-pps:   8861493     Tx-bps:  3969962736
 ############################################################################

testpmd> show port stats all

  ######################## NIC statistics for port 0  ########################
  RX-packets: 68152727   RX-missed: 0          RX-bytes:  3816552712
  RX-errors: 0
  RX-nombuf:  0
  TX-packets: 68114967   TX-errors: 33216      TX-bytes:  3814438152

  Throughput (since last show)
  Rx-pps:      6333196          Rx-bps:   2837272088
  Tx-pps:      6333227          Tx-bps:   2837285936
  ############################################################################

But AF_XDP consumes more CPU for tx and rx napi(100% and 86%).
====================

Link: https://patch.msgid.link/20241112012928.102478-1-xuanzhuo@linux.alibaba.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 1c978616 37e0ca65
Loading
Loading
Loading
Loading
+297 −72
Original line number Diff line number Diff line
@@ -45,9 +45,6 @@ module_param(napi_tx, bool, 0644);
#define VIRTIO_XDP_TX		BIT(0)
#define VIRTIO_XDP_REDIR	BIT(1)

#define VIRTIO_XDP_FLAG		BIT(0)
#define VIRTIO_ORPHAN_FLAG	BIT(1)

/* RX packet size EWMA. The average packet size is used to determine the packet
 * buffer size when refilling RX rings. As the entire RX ring may be refilled
 * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -86,6 +83,7 @@ struct virtnet_sq_free_stats {
	u64 bytes;
	u64 napi_packets;
	u64 napi_bytes;
	u64 xsk;
};

struct virtnet_sq_stats {
@@ -298,6 +296,10 @@ struct send_queue {

	/* Record whether sq is in reset state. */
	bool reset;

	struct xsk_buff_pool *xsk_pool;

	dma_addr_t xsk_hdr_dma_addr;
};

/* Internal representation of a receive virtqueue */
@@ -498,6 +500,8 @@ struct virtio_net_common_hdr {
	};
};

static struct virtio_net_common_hdr xsk_hdr;

static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
			       struct net_device *dev,
@@ -509,6 +513,14 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb,
					       struct sk_buff *curr_skb,
					       struct page *page, void *buf,
					       int len, int truesize);
static void virtnet_xsk_completed(struct send_queue *sq, int num);

enum virtnet_xmit_type {
	VIRTNET_XMIT_TYPE_SKB,
	VIRTNET_XMIT_TYPE_SKB_ORPHAN,
	VIRTNET_XMIT_TYPE_XDP,
	VIRTNET_XMIT_TYPE_XSK,
};

static int rss_indirection_table_alloc(struct virtio_net_ctrl_rss *rss, u16 indir_table_size)
{
@@ -529,67 +541,99 @@ static void rss_indirection_table_free(struct virtio_net_ctrl_rss *rss)
	kfree(rss->indirection_table);
}

static bool is_xdp_frame(void *ptr)
{
	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
}
/* We use the last two bits of the pointer to distinguish the xmit type. */
#define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1))

static void *xdp_to_ptr(struct xdp_frame *ptr)
#define VIRTIO_XSK_FLAG_OFFSET 2

static enum virtnet_xmit_type virtnet_xmit_ptr_unpack(void **ptr)
{
	return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
	unsigned long p = (unsigned long)*ptr;

	*ptr = (void *)(p & ~VIRTNET_XMIT_TYPE_MASK);

	return p & VIRTNET_XMIT_TYPE_MASK;
}

static struct xdp_frame *ptr_to_xdp(void *ptr)
static void *virtnet_xmit_ptr_pack(void *ptr, enum virtnet_xmit_type type)
{
	return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
	return (void *)((unsigned long)ptr | type);
}

static bool is_orphan_skb(void *ptr)
static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data,
			      enum virtnet_xmit_type type)
{
	return (unsigned long)ptr & VIRTIO_ORPHAN_FLAG;
	return virtqueue_add_outbuf(sq->vq, sq->sg, num,
				    virtnet_xmit_ptr_pack(data, type),
				    GFP_ATOMIC);
}

static void *skb_to_ptr(struct sk_buff *skb, bool orphan)
static u32 virtnet_ptr_to_xsk_buff_len(void *ptr)
{
	return (void *)((unsigned long)skb | (orphan ? VIRTIO_ORPHAN_FLAG : 0));
	return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET;
}

static struct sk_buff *ptr_to_skb(void *ptr)
static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
{
	return (struct sk_buff *)((unsigned long)ptr & ~VIRTIO_ORPHAN_FLAG);
	sg_dma_address(sg) = addr;
	sg_dma_len(sg) = len;
}

static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
			    bool in_napi, struct virtnet_sq_free_stats *stats)
{
	struct xdp_frame *frame;
	struct sk_buff *skb;
	unsigned int len;
	void *ptr;

	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
		if (!is_xdp_frame(ptr)) {
			struct sk_buff *skb = ptr_to_skb(ptr);
		switch (virtnet_xmit_ptr_unpack(&ptr)) {
		case VIRTNET_XMIT_TYPE_SKB:
			skb = ptr;

			pr_debug("Sent skb %p\n", skb);
			stats->napi_packets++;
			stats->napi_bytes += skb->len;
			napi_consume_skb(skb, in_napi);
			break;

		case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
			skb = ptr;

			if (is_orphan_skb(ptr)) {
			stats->packets++;
			stats->bytes += skb->len;
			} else {
				stats->napi_packets++;
				stats->napi_bytes += skb->len;
			}
			napi_consume_skb(skb, in_napi);
		} else {
			struct xdp_frame *frame = ptr_to_xdp(ptr);
			break;

		case VIRTNET_XMIT_TYPE_XDP:
			frame = ptr;

			stats->packets++;
			stats->bytes += xdp_get_frame_len(frame);
			xdp_return_frame(frame);
			break;

		case VIRTNET_XMIT_TYPE_XSK:
			stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr);
			stats->xsk++;
			break;
		}
	}
	netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes);
}

static void virtnet_free_old_xmit(struct send_queue *sq,
				  struct netdev_queue *txq,
				  bool in_napi,
				  struct virtnet_sq_free_stats *stats)
{
	__free_old_xmit(sq, txq, in_napi, stats);

	if (stats->xsk)
		virtnet_xsk_completed(sq, stats->xsk);
}

/* Converting between virtqueue no. and kernel tx/rx queue no.
 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 */
@@ -936,8 +980,7 @@ static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
	addr = dma->addr - sizeof(*dma) + offset;

	sg_init_table(rq->sg, 1);
	rq->sg[0].dma_address = addr;
	rq->sg[0].length = len;
	sg_fill_dma(rq->sg, addr, len);
}

static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
@@ -1020,7 +1063,7 @@ static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq,
{
	struct virtnet_sq_free_stats stats = {0};

	__free_old_xmit(sq, txq, in_napi, &stats);
	virtnet_free_old_xmit(sq, txq, in_napi, &stats);

	/* Avoid overhead when no packets have been processed
	 * happens when called speculatively from start_xmit.
@@ -1087,12 +1130,6 @@ static void check_sq_full_and_disable(struct virtnet_info *vi,
	}
}

static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len)
{
	sg->dma_address = addr;
	sg->length = len;
}

static struct xdp_buff *buf_to_xdp(struct virtnet_info *vi,
				   struct receive_queue *rq, void *buf, u32 len)
{
@@ -1373,7 +1410,8 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
		sg_init_table(rq->sg, 1);
		sg_fill_dma(rq->sg, addr, len);

		err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, xsk_buffs[i], gfp);
		err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1,
						    xsk_buffs[i], NULL, gfp);
		if (err)
			goto err;
	}
@@ -1387,6 +1425,120 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue
	return err;
}

static void *virtnet_xsk_to_ptr(u32 len)
{
	unsigned long p;

	p = len << VIRTIO_XSK_FLAG_OFFSET;

	return virtnet_xmit_ptr_pack((void *)p, VIRTNET_XMIT_TYPE_XSK);
}

static int virtnet_xsk_xmit_one(struct send_queue *sq,
				struct xsk_buff_pool *pool,
				struct xdp_desc *desc)
{
	struct virtnet_info *vi;
	dma_addr_t addr;

	vi = sq->vq->vdev->priv;

	addr = xsk_buff_raw_get_dma(pool, desc->addr);
	xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len);

	sg_init_table(sq->sg, 2);
	sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len);
	sg_fill_dma(sq->sg + 1, addr, desc->len);

	return virtqueue_add_outbuf_premapped(sq->vq, sq->sg, 2,
					      virtnet_xsk_to_ptr(desc->len),
					      GFP_ATOMIC);
}

static int virtnet_xsk_xmit_batch(struct send_queue *sq,
				  struct xsk_buff_pool *pool,
				  unsigned int budget,
				  u64 *kicks)
{
	struct xdp_desc *descs = pool->tx_descs;
	bool kick = false;
	u32 nb_pkts, i;
	int err;

	budget = min_t(u32, budget, sq->vq->num_free);

	nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget);
	if (!nb_pkts)
		return 0;

	for (i = 0; i < nb_pkts; i++) {
		err = virtnet_xsk_xmit_one(sq, pool, &descs[i]);
		if (unlikely(err)) {
			xsk_tx_completed(sq->xsk_pool, nb_pkts - i);
			break;
		}

		kick = true;
	}

	if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
		(*kicks)++;

	return i;
}

static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool,
			     int budget)
{
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct virtnet_sq_free_stats stats = {};
	struct net_device *dev = vi->dev;
	u64 kicks = 0;
	int sent;

	/* Avoid to wakeup napi meanless, so call __free_old_xmit instead of
	 * free_old_xmit().
	 */
	__free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats);

	if (stats.xsk)
		xsk_tx_completed(sq->xsk_pool, stats.xsk);

	sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks);

	if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
		check_sq_full_and_disable(vi, vi->dev, sq);

	if (sent) {
		struct netdev_queue *txq;

		txq = netdev_get_tx_queue(vi->dev, sq - vi->sq);
		txq_trans_cond_update(txq);
	}

	u64_stats_update_begin(&sq->stats.syncp);
	u64_stats_add(&sq->stats.packets, stats.packets);
	u64_stats_add(&sq->stats.bytes,   stats.bytes);
	u64_stats_add(&sq->stats.kicks,   kicks);
	u64_stats_add(&sq->stats.xdp_tx,  sent);
	u64_stats_update_end(&sq->stats.syncp);

	if (xsk_uses_need_wakeup(pool))
		xsk_set_tx_need_wakeup(pool);

	return sent;
}

static void xsk_wakeup(struct send_queue *sq)
{
	if (napi_if_scheduled_mark_missed(&sq->napi))
		return;

	local_bh_disable();
	virtqueue_napi_schedule(&sq->napi, sq->vq);
	local_bh_enable();
}

static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)
{
	struct virtnet_info *vi = netdev_priv(dev);
@@ -1400,14 +1552,19 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag)

	sq = &vi->sq[qid];

	if (napi_if_scheduled_mark_missed(&sq->napi))
	xsk_wakeup(sq);
	return 0;
}

	local_bh_disable();
	virtqueue_napi_schedule(&sq->napi, sq->vq);
	local_bh_enable();
static void virtnet_xsk_completed(struct send_queue *sq, int num)
{
	xsk_tx_completed(sq->xsk_pool, num);

	return 0;
	/* If this is called by rx poll, start_xmit and xdp xmit we should
	 * wakeup the tx napi to consume the xsk tx queue, because the tx
	 * interrupt may not be triggered.
	 */
	xsk_wakeup(sq);
}

static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
@@ -1450,8 +1607,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
			    skb_frag_size(frag), skb_frag_off(frag));
	}

	err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
				   xdp_to_ptr(xdpf), GFP_ATOMIC);
	err = virtnet_add_outbuf(sq, nr_frags + 1, xdpf, VIRTNET_XMIT_TYPE_XDP);
	if (unlikely(err))
		return -ENOSPC; /* Caller handle free/refcnt */

@@ -1524,7 +1680,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
	}

	/* Free up any pending old buffers before queueing new ones. */
	__free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
	virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq),
			      false, &stats);

	for (i = 0; i < n; i++) {
@@ -2453,7 +2609,7 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,

	virtnet_rq_init_one_sg(rq, buf, vi->hdr_len + GOOD_PACKET_LEN);

	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
	err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp);
	if (err < 0) {
		virtnet_rq_unmap(rq, buf, 0);
		put_page(virt_to_head_page(buf));
@@ -2573,7 +2729,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
	virtnet_rq_init_one_sg(rq, buf, len);

	ctx = mergeable_len_to_ctx(len + room, headroom);
	err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
	err = virtqueue_add_inbuf_premapped(rq->vq, rq->sg, 1, buf, ctx, gfp);
	if (err < 0) {
		virtnet_rq_unmap(rq, buf, 0);
		put_page(virt_to_head_page(buf));
@@ -2982,7 +3138,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
	struct virtnet_info *vi = sq->vq->vdev->priv;
	unsigned int index = vq2txq(sq->vq);
	struct netdev_queue *txq;
	int opaque;
	int opaque, xsk_done = 0;
	bool done;

	if (unlikely(is_xdp_raw_buffer_queue(vi, index))) {
@@ -2994,6 +3150,10 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
	txq = netdev_get_tx_queue(vi->dev, index);
	__netif_tx_lock(txq, raw_smp_processor_id());
	virtqueue_disable_cb(sq->vq);

	if (sq->xsk_pool)
		xsk_done = virtnet_xsk_xmit(sq, sq->xsk_pool, budget);
	else
		free_old_xmit(sq, txq, !!budget);

	if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) {
@@ -3005,6 +3165,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)
		netif_tx_wake_queue(txq);
	}

	if (xsk_done >= budget) {
		__netif_tx_unlock(txq);
		return budget;
	}

	opaque = virtqueue_enable_cb_prepare(sq->vq);

	done = napi_complete_done(napi, 0);
@@ -3072,8 +3237,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
			return num_sg;
		num_sg++;
	}
	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
				    skb_to_ptr(skb, orphan), GFP_ATOMIC);

	return virtnet_add_outbuf(sq, num_sg, skb,
				  orphan ? VIRTNET_XMIT_TYPE_SKB_ORPHAN : VIRTNET_XMIT_TYPE_SKB);
}

static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -5078,7 +5244,7 @@ static int virtnet_set_coalesce(struct net_device *dev,
				struct netlink_ext_ack *extack)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int ret, queue_number, napi_weight;
	int ret, queue_number, napi_weight, i;
	bool update_napi = false;

	/* Can't change NAPI weight if the link is up */
@@ -5107,6 +5273,14 @@ static int virtnet_set_coalesce(struct net_device *dev,
		return ret;

	if (update_napi) {
		/* xsk xmit depends on the tx napi. So if xsk is active,
		 * prevent modifications to tx napi.
		 */
		for (i = queue_number; i < vi->max_queue_pairs; i++) {
			if (vi->sq[i].xsk_pool)
				return -EBUSY;
		}

		for (; queue_number < vi->max_queue_pairs; queue_number++)
			vi->sq[queue_number].napi.weight = napi_weight;
	}
@@ -5555,6 +5729,29 @@ static int virtnet_rq_bind_xsk_pool(struct virtnet_info *vi, struct receive_queu
	return err;
}

static int virtnet_sq_bind_xsk_pool(struct virtnet_info *vi,
				    struct send_queue *sq,
				    struct xsk_buff_pool *pool)
{
	int err, qindex;

	qindex = sq - vi->sq;

	virtnet_tx_pause(vi, sq);

	err = virtqueue_reset(sq->vq, virtnet_sq_free_unused_buf);
	if (err) {
		netdev_err(vi->dev, "reset tx fail: tx queue index: %d err: %d\n", qindex, err);
		pool = NULL;
	}

	sq->xsk_pool = pool;

	virtnet_tx_resume(vi, sq);

	return err;
}

static int virtnet_xsk_pool_enable(struct net_device *dev,
				   struct xsk_buff_pool *pool,
				   u16 qid)
@@ -5563,6 +5760,7 @@ static int virtnet_xsk_pool_enable(struct net_device *dev,
	struct receive_queue *rq;
	struct device *dma_dev;
	struct send_queue *sq;
	dma_addr_t hdr_dma;
	int err, size;

	if (vi->hdr_len > xsk_pool_get_headroom(pool))
@@ -5600,6 +5798,11 @@ static int virtnet_xsk_pool_enable(struct net_device *dev,
	if (!rq->xsk_buffs)
		return -ENOMEM;

	hdr_dma = virtqueue_dma_map_single_attrs(sq->vq, &xsk_hdr, vi->hdr_len,
						 DMA_TO_DEVICE, 0);
	if (virtqueue_dma_mapping_error(sq->vq, hdr_dma))
		return -ENOMEM;

	err = xsk_pool_dma_map(pool, dma_dev, 0);
	if (err)
		goto err_xsk_map;
@@ -5608,11 +5811,24 @@ static int virtnet_xsk_pool_enable(struct net_device *dev,
	if (err)
		goto err_rq;

	err = virtnet_sq_bind_xsk_pool(vi, sq, pool);
	if (err)
		goto err_sq;

	/* Now, we do not support tx offload(such as tx csum), so all the tx
	 * virtnet hdr is zero. So all the tx packets can share a single hdr.
	 */
	sq->xsk_hdr_dma_addr = hdr_dma;

	return 0;

err_sq:
	virtnet_rq_bind_xsk_pool(vi, rq, NULL);
err_rq:
	xsk_pool_dma_unmap(pool, 0);
err_xsk_map:
	virtqueue_dma_unmap_single_attrs(rq->vq, hdr_dma, vi->hdr_len,
					 DMA_TO_DEVICE, 0);
	return err;
}

@@ -5621,19 +5837,24 @@ static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid)
	struct virtnet_info *vi = netdev_priv(dev);
	struct xsk_buff_pool *pool;
	struct receive_queue *rq;
	struct send_queue *sq;
	int err;

	if (qid >= vi->curr_queue_pairs)
		return -EINVAL;

	sq = &vi->sq[qid];
	rq = &vi->rq[qid];

	pool = rq->xsk_pool;

	err = virtnet_rq_bind_xsk_pool(vi, rq, NULL);
	err |= virtnet_sq_bind_xsk_pool(vi, sq, NULL);

	xsk_pool_dma_unmap(pool, 0);

	virtqueue_dma_unmap_single_attrs(sq->vq, sq->xsk_hdr_dma_addr,
					 vi->hdr_len, DMA_TO_DEVICE, 0);
	kvfree(rq->xsk_buffs);

	return err;
@@ -5991,10 +6212,26 @@ static void free_receive_page_frags(struct virtnet_info *vi)

static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf)
{
	if (!is_xdp_frame(buf))
	struct virtnet_info *vi = vq->vdev->priv;
	struct send_queue *sq;
	int i = vq2rxq(vq);

	sq = &vi->sq[i];

	switch (virtnet_xmit_ptr_unpack(&buf)) {
	case VIRTNET_XMIT_TYPE_SKB:
	case VIRTNET_XMIT_TYPE_SKB_ORPHAN:
		dev_kfree_skb(buf);
	else
		xdp_return_frame(ptr_to_xdp(buf));
		break;

	case VIRTNET_XMIT_TYPE_XDP:
		xdp_return_frame(buf);
		break;

	case VIRTNET_XMIT_TYPE_XSK:
		xsk_tx_completed(sq->xsk_pool, 1);
		break;
	}
}

static void free_unused_bufs(struct virtnet_info *vi)
@@ -6168,15 +6405,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
	return -ENOMEM;
}

static void virtnet_rq_set_premapped(struct virtnet_info *vi)
{
	int i;

	for (i = 0; i < vi->max_queue_pairs; i++)
		/* error should never happen */
		BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq));
}

static int init_vqs(struct virtnet_info *vi)
{
	int ret;
@@ -6190,10 +6418,6 @@ static int init_vqs(struct virtnet_info *vi)
	if (ret)
		goto err_free;

	/* disable for big mode */
	if (!vi->big_packets || vi->mergeable_rx_bufs)
		virtnet_rq_set_premapped(vi);

	cpus_read_lock();
	virtnet_set_affinity(vi);
	cpus_read_unlock();
@@ -6455,7 +6679,8 @@ static int virtnet_probe(struct virtio_device *vdev)
		dev->hw_features |= NETIF_F_GRO_HW;

	dev->vlan_features = dev->features;
	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
		NETDEV_XDP_ACT_XSK_ZEROCOPY;

	/* MTU range: 68 - 65535 */
	dev->min_mtu = MIN_MTU;
+181 −175

File changed.

Preview size limit exceeded, changes collapsed.

+11 −2
Original line number Diff line number Diff line
@@ -56,6 +56,17 @@ int virtqueue_add_inbuf_ctx(struct virtqueue *vq,
			    void *ctx,
			    gfp_t gfp);

int virtqueue_add_inbuf_premapped(struct virtqueue *vq,
				  struct scatterlist *sg, unsigned int num,
				  void *data,
				  void *ctx,
				  gfp_t gfp);

int virtqueue_add_outbuf_premapped(struct virtqueue *vq,
				   struct scatterlist *sg, unsigned int num,
				   void *data,
				   gfp_t gfp);

int virtqueue_add_sgs(struct virtqueue *vq,
		      struct scatterlist *sgs[],
		      unsigned int out_sgs,
@@ -82,8 +93,6 @@ bool virtqueue_enable_cb(struct virtqueue *vq);

unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);

int virtqueue_set_dma_premapped(struct virtqueue *_vq);

bool virtqueue_poll(struct virtqueue *vq, unsigned);

bool virtqueue_enable_cb_delayed(struct virtqueue *vq);