Commit b430f6c3 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'virtio_udp_tunnel_08_07_2025' of https://github.com/pabeni/linux-devel

Paolo Abeni says:

====================
virtio: introduce GSO over UDP tunnel

Some virtualized deployments use UDP tunnel pervasively and are impacted
negatively by the lack of GSO support for such kind of traffic in the
virtual NIC driver.

The virtio_net specification recently introduced support for GSO over
UDP tunnel, this series updates the virtio implementation to support
such a feature.

Currently the kernel virtio support limits the feature space to 64,
while the virtio specification allows for a larger number of features.
Specifically the GSO-over-UDP-tunnel-related virtio features use bits
65-69.

The first four patches in this series rework the virtio and vhost
feature support to cope with up to 128 bits. The limit is set by
a define and could be easily raised in future, as needed.

This implementation choice is aimed at keeping the code churn as
limited as possible. For the same reason, only the virtio_net driver is
reworked to leverage the extended feature space; all other
virtio/vhost drivers are unaffected, but could be upgraded to support
the extended features space in a later time.

The last four patches bring in the actual GSO over UDP tunnel support.
As per specification, some additional fields are introduced into the
virtio net header to support the new offload. The presence of such
fields depends on the negotiated features.

New helpers are introduced to convert the UDP-tunneled skb metadata to
an extended virtio net header and vice versa. Such helpers are used by
the tun and virtio_net driver to cope with the newly supported offloads.

Tested with basic stream transfer with all the possible permutations of
host kernel/qemu/guest kernel with/without GSO over UDP tunnel support.
====================

Link: https://patch.msgid.link/cover.1751874094.git.pabeni@redhat.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 3321e97e bbca931f
Loading
Loading
Loading
Loading
+49 −9
Original line number Diff line number Diff line
@@ -186,7 +186,8 @@ struct tun_struct {
	struct net_device	*dev;
	netdev_features_t	set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
			  NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4)
			  NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \
			  NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM)

	int			align;
	int			vnet_hdr_sz;
@@ -925,6 +926,7 @@ static int tun_net_init(struct net_device *dev)
	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
			   TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
			   NETIF_F_HW_VLAN_STAG_TX;
	dev->hw_enc_features = dev->hw_features;
	dev->features = dev->hw_features;
	dev->vlan_features = dev->features &
			     ~(NETIF_F_HW_VLAN_CTAG_TX |
@@ -1698,7 +1700,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
	struct sk_buff *skb;
	size_t total_len = iov_iter_count(from);
	size_t len = total_len, align = tun->align, linear;
	struct virtio_net_hdr gso = { 0 };
	struct virtio_net_hdr_v1_hash_tunnel hdr;
	struct virtio_net_hdr *gso;
	int good_linear;
	int copylen;
	int hdr_len = 0;
@@ -1708,6 +1711,15 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
	int skb_xdp = 1;
	bool frags = tun_napi_frags_enabled(tfile);
	enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
	netdev_features_t features = 0;

	/*
	 * Keep it easy and always zero the whole buffer, even if the
	 * tunnel-related field will be touched only when the feature
	 * is enabled and the hdr size id compatible.
	 */
	memset(&hdr, 0, sizeof(hdr));
	gso = (struct virtio_net_hdr *)&hdr;

	if (!(tun->flags & IFF_NO_PI)) {
		if (len < sizeof(pi))
@@ -1721,7 +1733,9 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
	if (tun->flags & IFF_VNET_HDR) {
		int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);

		hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso);
		features = tun_vnet_hdr_guest_features(vnet_hdr_sz);
		hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags,
					     features, from, gso);
		if (hdr_len < 0)
			return hdr_len;

@@ -1755,7 +1769,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
		 * (e.g gso or jumbo packet), we will do it at after
		 * skb was created with generic XDP routine.
		 */
		skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
		skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp);
		err = PTR_ERR_OR_ZERO(skb);
		if (err)
			goto drop;
@@ -1799,7 +1813,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
		}
	}

	if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) {
	if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) {
		atomic_long_inc(&tun->rx_frame_errors);
		err = -EINVAL;
		goto free_skb;
@@ -2050,13 +2064,21 @@ static ssize_t tun_put_user(struct tun_struct *tun,
	}

	if (vnet_hdr_sz) {
		struct virtio_net_hdr gso;
		struct virtio_net_hdr_v1_hash_tunnel hdr;
		struct virtio_net_hdr *gso;

		ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
		ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb,
						&hdr);
		if (ret)
			return ret;

		ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
		/*
		 * Drop the packet if the configured header size is too small
		 * WRT the enabled offloads.
		 */
		gso = (struct virtio_net_hdr *)&hdr;
		ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features,
					 iter, gso);
		if (ret)
			return ret;
	}
@@ -2357,9 +2379,11 @@ static int tun_xdp_one(struct tun_struct *tun,
{
	unsigned int datasize = xdp->data_end - xdp->data;
	struct virtio_net_hdr *gso = xdp->data_hard_start;
	struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
	struct bpf_prog *xdp_prog;
	struct sk_buff *skb = NULL;
	struct sk_buff_head *queue;
	netdev_features_t features;
	u32 rxhash = 0, act;
	int buflen = xdp->frame_sz;
	int metasize = 0;
@@ -2425,7 +2449,9 @@ static int tun_xdp_one(struct tun_struct *tun,
	if (metasize > 0)
		skb_metadata_set(skb, metasize);

	if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
	features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz));
	tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso;
	if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) {
		atomic_long_inc(&tun->rx_frame_errors);
		kfree_skb(skb);
		ret = -EINVAL;
@@ -2811,6 +2837,8 @@ static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)

}

#define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6)

/* This is like a cut-down ethtool ops, except done via tun fd so no
 * privs required. */
static int set_offload(struct tun_struct *tun, unsigned long arg)
@@ -2840,6 +2868,18 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
			features |= NETIF_F_GSO_UDP_L4;
			arg &= ~(TUN_F_USO4 | TUN_F_USO6);
		}

		/*
		 * Tunnel offload is allowed only if some plain offload is
		 * available, too.
		 */
		if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) {
			features |= NETIF_F_GSO_UDP_TUNNEL;
			if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM)
				features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
			arg &= ~(TUN_F_UDP_TUNNEL_GSO |
				 TUN_F_UDP_TUNNEL_GSO_CSUM);
		}
	}

	/* This gives the user a way to test for new features in future by
+92 −9
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@
#define TUN_VNET_LE     0x80000000
#define TUN_VNET_BE     0x40000000

#define TUN_VNET_TNL_SIZE	sizeof(struct virtio_net_hdr_v1_hash_tunnel)

static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
{
	bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) &&
@@ -107,16 +109,26 @@ static inline long tun_vnet_ioctl(int *vnet_hdr_sz, unsigned int *flags,
	}
}

static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
static inline unsigned int tun_vnet_parse_size(netdev_features_t features)
{
	if (!(features & NETIF_F_GSO_UDP_TUNNEL))
		return sizeof(struct virtio_net_hdr);

	return TUN_VNET_TNL_SIZE;
}

static inline int __tun_vnet_hdr_get(int sz, unsigned int flags,
				     netdev_features_t features,
				     struct iov_iter *from,
				     struct virtio_net_hdr *hdr)
{
	unsigned int parsed_size = tun_vnet_parse_size(features);
	u16 hdr_len;

	if (iov_iter_count(from) < sz)
		return -EINVAL;

	if (!copy_from_iter_full(hdr, sizeof(*hdr), from))
	if (!copy_from_iter_full(hdr, parsed_size, from))
		return -EFAULT;

	hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len);
@@ -129,32 +141,70 @@ static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
	if (hdr_len > iov_iter_count(from))
		return -EINVAL;

	iov_iter_advance(from, sz - sizeof(*hdr));
	iov_iter_advance(from, sz - parsed_size);

	return hdr_len;
}

static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
				   struct iov_iter *from,
				   struct virtio_net_hdr *hdr)
{
	return __tun_vnet_hdr_get(sz, flags, 0, from, hdr);
}

static inline int __tun_vnet_hdr_put(int sz, netdev_features_t features,
				     struct iov_iter *iter,
				     const struct virtio_net_hdr *hdr)
{
	unsigned int parsed_size = tun_vnet_parse_size(features);

	if (unlikely(iov_iter_count(iter) < sz))
		return -EINVAL;

	if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr)))
	if (unlikely(copy_to_iter(hdr, parsed_size, iter) != parsed_size))
		return -EFAULT;

	if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr))
	if (iov_iter_zero(sz - parsed_size, iter) != sz - parsed_size)
		return -EFAULT;

	return 0;
}

static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
				   const struct virtio_net_hdr *hdr)
{
	return __tun_vnet_hdr_put(sz, 0, iter, hdr);
}

static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb,
				      const struct virtio_net_hdr *hdr)
{
	return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
}

/*
 * Tun is not aware of the negotiated guest features, guess them from the
 * virtio net hdr size
 */
static inline netdev_features_t tun_vnet_hdr_guest_features(int vnet_hdr_sz)
{
	if (vnet_hdr_sz >= TUN_VNET_TNL_SIZE)
		return NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM;
	return 0;
}

static inline int
tun_vnet_hdr_tnl_to_skb(unsigned int flags, netdev_features_t features,
			struct sk_buff *skb,
			const struct virtio_net_hdr_v1_hash_tunnel *hdr)
{
	return virtio_net_hdr_tnl_to_skb(skb, hdr,
				features & NETIF_F_GSO_UDP_TUNNEL,
				features & NETIF_F_GSO_UDP_TUNNEL_CSUM,
				tun_vnet_is_little_endian(flags));
}

static inline int tun_vnet_hdr_from_skb(unsigned int flags,
					const struct net_device *dev,
					const struct sk_buff *skb,
@@ -183,4 +233,37 @@ static inline int tun_vnet_hdr_from_skb(unsigned int flags,
	return 0;
}

static inline int
tun_vnet_hdr_tnl_from_skb(unsigned int flags,
			  const struct net_device *dev,
			  const struct sk_buff *skb,
			  struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr)
{
	bool has_tnl_offload = !!(dev->features & NETIF_F_GSO_UDP_TUNNEL);
	int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;

	if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload,
					tun_vnet_is_little_endian(flags),
					vlan_hlen)) {
		struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr;
		struct skb_shared_info *sinfo = skb_shinfo(skb);

		if (net_ratelimit()) {
			int hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len);

			netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
				   sinfo->gso_type,
				   tun_vnet16_to_cpu(flags, hdr->gso_size),
				   tun_vnet16_to_cpu(flags, hdr->hdr_len));
			print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE,
				       16, 1, skb->head, min(hdr_len, 64),
				       true);
		}
		WARN_ON_ONCE(1);
		return -EINVAL;
	}

	return 0;
}

#endif /* TUN_VNET_H */
+87 −23
Original line number Diff line number Diff line
@@ -35,6 +35,23 @@ module_param(csum, bool, 0444);
module_param(gso, bool, 0444);
module_param(napi_tx, bool, 0644);

#define VIRTIO_OFFLOAD_MAP_MIN	46
#define VIRTIO_OFFLOAD_MAP_MAX	47
#define VIRTIO_FEATURES_MAP_MIN	65
#define VIRTIO_O2F_DELTA	(VIRTIO_FEATURES_MAP_MIN - \
				 VIRTIO_OFFLOAD_MAP_MIN)

static bool virtio_is_mapped_offload(unsigned int obit)
{
	return obit >= VIRTIO_OFFLOAD_MAP_MIN &&
	       obit <= VIRTIO_OFFLOAD_MAP_MAX;
}

static unsigned int virtio_offload_to_feature(unsigned int obit)
{
	return virtio_is_mapped_offload(obit) ? obit + VIRTIO_O2F_DELTA : obit;
}

/* FIXME: MTU in config. */
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
#define GOOD_COPY_LEN	128
@@ -62,7 +79,9 @@ static const unsigned long guest_offloads[] = {
	VIRTIO_NET_F_GUEST_CSUM,
	VIRTIO_NET_F_GUEST_USO4,
	VIRTIO_NET_F_GUEST_USO6,
	VIRTIO_NET_F_GUEST_HDRLEN
	VIRTIO_NET_F_GUEST_HDRLEN,
	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED,
	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED,
};

#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
@@ -70,7 +89,9 @@ static const unsigned long guest_offloads[] = {
			(1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
			(1ULL << VIRTIO_NET_F_GUEST_UFO)  | \
			(1ULL << VIRTIO_NET_F_GUEST_USO4) | \
				(1ULL << VIRTIO_NET_F_GUEST_USO6))
			(1ULL << VIRTIO_NET_F_GUEST_USO6) | \
			(1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | \
			(1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED))

struct virtnet_stat_desc {
	char desc[ETH_GSTRING_LEN];
@@ -423,6 +444,13 @@ struct virtnet_info {
	/* Work struct for delayed refilling if we run low on memory. */
	struct delayed_work refill;

	/* UDP tunnel support */
	bool tx_tnl;

	bool rx_tnl;

	bool rx_tnl_csum;

	/* Is delayed refill enabled? */
	bool refill_enabled;

@@ -482,6 +510,7 @@ struct virtio_net_common_hdr {
		struct virtio_net_hdr hdr;
		struct virtio_net_hdr_mrg_rxbuf	mrg_hdr;
		struct virtio_net_hdr_v1_hash hash_v1_hdr;
		struct virtio_net_hdr_v1_hash_tunnel tnl_hdr;
	};
};

@@ -2545,14 +2574,21 @@ static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *
	if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report)
		virtio_skb_set_hash(&hdr->hash_v1_hdr, skb);

	if (flags & VIRTIO_NET_HDR_F_DATA_VALID)
		skb->ip_summed = CHECKSUM_UNNECESSARY;
	hdr->hdr.flags = flags;
	if (virtio_net_handle_csum_offload(skb, &hdr->hdr, vi->rx_tnl_csum)) {
		net_warn_ratelimited("%s: bad csum: flags: %x, gso_type: %x rx_tnl_csum %d\n",
				     dev->name, hdr->hdr.flags,
				     hdr->hdr.gso_type, vi->rx_tnl_csum);
		goto frame_err;
	}

	if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
	if (virtio_net_hdr_tnl_to_skb(skb, &hdr->tnl_hdr, vi->rx_tnl,
				      vi->rx_tnl_csum,
				      virtio_is_little_endian(vi->vdev))) {
		net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
		net_warn_ratelimited("%s: bad gso: type: %x, size: %u, flags %x tunnel %d tnl csum %d\n",
				     dev->name, hdr->hdr.gso_type,
				     hdr->hdr.gso_size);
				     hdr->hdr.gso_size, hdr->hdr.flags,
				     vi->rx_tnl, vi->rx_tnl_csum);
		goto frame_err;
	}

@@ -3264,9 +3300,9 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget)

static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
{
	struct virtio_net_hdr_mrg_rxbuf *hdr;
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
	struct virtnet_info *vi = sq->vq->vdev->priv;
	struct virtio_net_hdr_v1_hash_tunnel *hdr;
	int num_sg;
	unsigned hdr_len = vi->hdr_len;
	bool can_push;
@@ -3279,17 +3315,17 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan)
	/* Even if we can, don't push here yet as this would skew
	 * csum_start offset below. */
	if (can_push)
		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
		hdr = (struct virtio_net_hdr_v1_hash_tunnel *)(skb->data -
							       hdr_len);
	else
		hdr = &skb_vnet_common_hdr(skb)->mrg_hdr;
		hdr = &skb_vnet_common_hdr(skb)->tnl_hdr;

	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
				    virtio_is_little_endian(vi->vdev), false,
				    0))
	if (virtio_net_hdr_tnl_from_skb(skb, hdr, vi->tx_tnl,
					virtio_is_little_endian(vi->vdev), 0))
		return -EPROTO;

	if (vi->mergeable_rx_bufs)
		hdr->num_buffers = 0;
		hdr->hash_hdr.hdr.num_buffers = 0;

	sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
	if (can_push) {
@@ -6784,10 +6820,20 @@ static int virtnet_probe(struct virtio_device *vdev)
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_USO))
			dev->hw_features |= NETIF_F_GSO_UDP_L4;

		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO)) {
			dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
			dev->hw_enc_features = dev->hw_features;
		}
		if (dev->hw_features & NETIF_F_GSO_UDP_TUNNEL &&
		    virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM)) {
			dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
			dev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
		}

		dev->features |= NETIF_F_GSO_ROBUST;

		if (gso)
			dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
			dev->features |= dev->hw_features;
		/* (!csum && gso) case will be fixed by register_netdev() */
	}

@@ -6880,7 +6926,10 @@ static int virtnet_probe(struct virtio_device *vdev)
		dev->xdp_metadata_ops = &virtnet_xdp_metadata_ops;
	}

	if (vi->has_rss_hash_report)
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);
	else if (vi->has_rss_hash_report)
		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
	else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
		 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
@@ -6888,6 +6937,13 @@ static int virtnet_probe(struct virtio_device *vdev)
	else
		vi->hdr_len = sizeof(struct virtio_net_hdr);

	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM))
		vi->rx_tnl_csum = true;
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
		vi->rx_tnl = true;
	if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO))
		vi->tx_tnl = true;

	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
		vi->any_header_sg = true;
@@ -7062,9 +7118,13 @@ static int virtnet_probe(struct virtio_device *vdev)
		netif_carrier_on(dev);
	}

	for (i = 0; i < ARRAY_SIZE(guest_offloads); i++)
		if (virtio_has_feature(vi->vdev, guest_offloads[i]))
	for (i = 0; i < ARRAY_SIZE(guest_offloads); i++) {
		unsigned int fbit;

		fbit = virtio_offload_to_feature(guest_offloads[i]);
		if (virtio_has_feature(vi->vdev, fbit))
			set_bit(guest_offloads[i], &vi->guest_offloads);
	}
	vi->guest_offloads_capable = vi->guest_offloads;

	rtnl_unlock();
@@ -7194,6 +7254,10 @@ static struct virtio_device_id id_table[] = {

static unsigned int features[] = {
	VIRTNET_FEATURES,
	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
	VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
	VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
	VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM,
};

static unsigned int features_legacy[] = {
+75 −20
Original line number Diff line number Diff line
@@ -69,12 +69,14 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"

#define VHOST_DMA_IS_DONE(len) ((__force u32)(len) >= (__force u32)VHOST_DMA_DONE_LEN)

enum {
	VHOST_NET_FEATURES = VHOST_FEATURES |
static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
	VHOST_FEATURES |
	(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
	(1ULL << VIRTIO_NET_F_MRG_RXBUF) |
	(1ULL << VIRTIO_F_ACCESS_PLATFORM) |
			 (1ULL << VIRTIO_F_RING_RESET)
	(1ULL << VIRTIO_F_RING_RESET),
	VIRTIO_BIT(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO) |
	VIRTIO_BIT(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO),
};

enum {
@@ -1606,16 +1608,23 @@ static long vhost_net_reset_owner(struct vhost_net *n)
	return err;
}

static int vhost_net_set_features(struct vhost_net *n, u64 features)
static int vhost_net_set_features(struct vhost_net *n, const u64 *features)
{
	size_t vhost_hlen, sock_hlen, hdr_len;
	int i;

	hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
			       (1ULL << VIRTIO_F_VERSION_1))) ?
	hdr_len = virtio_features_test_bit(features, VIRTIO_NET_F_MRG_RXBUF) ||
		  virtio_features_test_bit(features, VIRTIO_F_VERSION_1) ?
		  sizeof(struct virtio_net_hdr_mrg_rxbuf) :
		  sizeof(struct virtio_net_hdr);
	if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {

	if (virtio_features_test_bit(features,
				     VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
	    virtio_features_test_bit(features,
				     VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO))
		hdr_len = sizeof(struct virtio_net_hdr_v1_hash_tunnel);

	if (virtio_features_test_bit(features, VHOST_NET_F_VIRTIO_NET_HDR)) {
		/* vhost provides vnet_hdr */
		vhost_hlen = hdr_len;
		sock_hlen = 0;
@@ -1625,18 +1634,19 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
		sock_hlen = hdr_len;
	}
	mutex_lock(&n->dev.mutex);
	if ((features & (1 << VHOST_F_LOG_ALL)) &&
	if (virtio_features_test_bit(features, VHOST_F_LOG_ALL) &&
	    !vhost_log_access_ok(&n->dev))
		goto out_unlock;

	if ((features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) {
	if (virtio_features_test_bit(features, VIRTIO_F_ACCESS_PLATFORM)) {
		if (vhost_init_device_iotlb(&n->dev))
			goto out_unlock;
	}

	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
		mutex_lock(&n->vqs[i].vq.mutex);
		n->vqs[i].vq.acked_features = features;
		virtio_features_copy(n->vqs[i].vq.acked_features_array,
				     features);
		n->vqs[i].vhost_hlen = vhost_hlen;
		n->vqs[i].sock_hlen = sock_hlen;
		mutex_unlock(&n->vqs[i].vq.mutex);
@@ -1673,12 +1683,13 @@ static long vhost_net_set_owner(struct vhost_net *n)
static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
			    unsigned long arg)
{
	u64 all_features[VIRTIO_FEATURES_DWORDS];
	struct vhost_net *n = f->private_data;
	void __user *argp = (void __user *)arg;
	u64 __user *featurep = argp;
	struct vhost_vring_file backend;
	u64 features;
	int r;
	u64 features, count, copied;
	int r, i;

	switch (ioctl) {
	case VHOST_NET_SET_BACKEND:
@@ -1686,16 +1697,60 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
			return -EFAULT;
		return vhost_net_set_backend(n, backend.index, backend.fd);
	case VHOST_GET_FEATURES:
		features = VHOST_NET_FEATURES;
		features = vhost_net_features[0];
		if (copy_to_user(featurep, &features, sizeof features))
			return -EFAULT;
		return 0;
	case VHOST_SET_FEATURES:
		if (copy_from_user(&features, featurep, sizeof features))
			return -EFAULT;
		if (features & ~VHOST_NET_FEATURES)
		if (features & ~vhost_net_features[0])
			return -EOPNOTSUPP;
		return vhost_net_set_features(n, features);

		virtio_features_from_u64(all_features, features);
		return vhost_net_set_features(n, all_features);
	case VHOST_GET_FEATURES_ARRAY:
		if (copy_from_user(&count, featurep, sizeof(count)))
			return -EFAULT;

		/* Copy the net features, up to the user-provided buffer size */
		argp += sizeof(u64);
		copied = min(count, VIRTIO_FEATURES_DWORDS);
		if (copy_to_user(argp, vhost_net_features,
				 copied * sizeof(u64)))
			return -EFAULT;

		/* Zero the trailing space provided by user-space, if any */
		if (clear_user(argp, size_mul(count - copied, sizeof(u64))))
			return -EFAULT;
		return 0;
	case VHOST_SET_FEATURES_ARRAY:
		if (copy_from_user(&count, featurep, sizeof(count)))
			return -EFAULT;

		virtio_features_zero(all_features);
		argp += sizeof(u64);
		copied = min(count, VIRTIO_FEATURES_DWORDS);
		if (copy_from_user(all_features, argp, copied * sizeof(u64)))
			return -EFAULT;

		/*
		 * Any feature specified by user-space above
		 * VIRTIO_FEATURES_MAX is not supported by definition.
		 */
		for (i = copied; i < count; ++i) {
			if (copy_from_user(&features, featurep + 1 + i,
					   sizeof(features)))
				return -EFAULT;
			if (features)
				return -EOPNOTSUPP;
		}

		for (i = 0; i < VIRTIO_FEATURES_DWORDS; i++)
			if (all_features[i] & ~vhost_net_features[i])
				return -EOPNOTSUPP;

		return vhost_net_set_features(n, all_features);
	case VHOST_GET_BACKEND_FEATURES:
		features = VHOST_NET_BACKEND_FEATURES;
		if (copy_to_user(featurep, &features, sizeof(features)))
+1 −1
Original line number Diff line number Diff line
@@ -372,7 +372,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
	vq->log_used = false;
	vq->log_addr = -1ull;
	vq->private_data = NULL;
	vq->acked_features = 0;
	virtio_features_zero(vq->acked_features_array);
	vq->acked_backend_features = 0;
	vq->log_base = NULL;
	vq->error_ctx = NULL;
Loading