Commit eae38f09 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vxlan-skb-drop-reasons'



Menglong Dong says:

====================
net: vxlan: add skb drop reasons support

In this series, we add skb drop reasons support to VXLAN, and following
new skb drop reasons are introduced:

  SKB_DROP_REASON_VXLAN_INVALID_HDR
  SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND
  SKB_DROP_REASON_VXLAN_ENTRY_EXISTS
  SKB_DROP_REASON_VXLAN_NO_REMOTE
  SKB_DROP_REASON_MAC_INVALID_SOURCE
  SKB_DROP_REASON_IP_TUNNEL_ECN
  SKB_DROP_REASON_TUNNEL_TXINFO
  SKB_DROP_REASON_LOCAL_MAC

We add some helper functions in this series, who will capture the drop
reasons from pskb_may_pull_reason and return them:

  pskb_network_may_pull_reason()
  pskb_inet_may_pull_reason()

And we also make the following functions return skb drop reasons:

  skb_vlan_inet_prepare()
  vxlan_remcsum()
  vxlan_snoop()
  vxlan_set_mac()

Changes since v6:
- fix some typos in the document for SKB_DROP_REASON_TUNNEL_TXINFO

Changes since v5:
- fix some typos in the document for SKB_DROP_REASON_TUNNEL_TXINFO

Changes since v4:
- make skb_vlan_inet_prepare() return drop reasons, instead of introduce
  a wrapper for it in the 3rd patch.
- modify the document for SKB_DROP_REASON_LOCAL_MAC and
  SKB_DROP_REASON_TUNNEL_TXINFO.

Changes since v3:
- rename SKB_DROP_REASON_VXLAN_INVALID_SMAC to
  SKB_DROP_REASON_MAC_INVALID_SOURCE in the 6th patch

Changes since v2:
- move all the drop reasons of VXLAN to the "core", instead of introducing
  the VXLAN drop reason subsystem
- add the 6th patch, which capture the drop reasons from vxlan_snoop()
- move the commits for vxlan_remcsum() and vxlan_set_mac() after
  vxlan_rcv() to update the call of them accordingly
- fix some format problems

Changes since v1:
- document all the drop reasons that we introduce
- rename the drop reasons to make them more descriptive, as Ido advised
- remove the 2nd patch, which introduce the SKB_DR_RESET
- add the 4th patch, which adds skb_vlan_inet_prepare_reason() helper
- introduce the 6th patch, which make vxlan_set_mac return drop reasons
- introduce the 10th patch, which uses VXLAN_DROP_NO_REMOTE as the drop
  reasons, as Ido advised
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c531f226 790961d8
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -317,7 +317,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
	__be32 saddr;
	int err;

	if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB)))
	if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB)))
		return -EINVAL;

	if (!sock)
@@ -387,7 +387,7 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
	__be16 sport;
	int err;

	if (!skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB)))
	if (skb_vlan_inet_prepare(skb, skb->protocol != htons(ETH_P_TEB)))
		return -EINVAL;

	if (!sock)
+2 −2
Original line number Diff line number Diff line
@@ -827,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
	__be16 sport;
	int err;

	if (!skb_vlan_inet_prepare(skb, inner_proto_inherit))
	if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
		return -EINVAL;

	if (!gs4)
@@ -937,7 +937,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
	__be16 sport;
	int err;

	if (!skb_vlan_inet_prepare(skb, inner_proto_inherit))
	if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
		return -EINVAL;

	if (!gs6)
+71 −40
Original line number Diff line number Diff line
@@ -1437,9 +1437,10 @@ static int vxlan_fdb_get(struct sk_buff *skb,
 * and Tunnel endpoint.
 * Return true if packet is bogus and should be dropped.
 */
static bool vxlan_snoop(struct net_device *dev,
			union vxlan_addr *src_ip, const u8 *src_mac,
			u32 src_ifindex, __be32 vni)
static enum skb_drop_reason vxlan_snoop(struct net_device *dev,
					union vxlan_addr *src_ip,
					const u8 *src_mac, u32 src_ifindex,
					__be32 vni)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_fdb *f;
@@ -1447,7 +1448,7 @@ static bool vxlan_snoop(struct net_device *dev,

	/* Ignore packets from invalid src-address */
	if (!is_valid_ether_addr(src_mac))
		return true;
		return SKB_DROP_REASON_MAC_INVALID_SOURCE;

#if IS_ENABLED(CONFIG_IPV6)
	if (src_ip->sa.sa_family == AF_INET6 &&
@@ -1461,15 +1462,15 @@ static bool vxlan_snoop(struct net_device *dev,

		if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) &&
			   rdst->remote_ifindex == ifindex))
			return false;
			return SKB_NOT_DROPPED_YET;

		/* Don't migrate static entries, drop packets */
		if (f->state & (NUD_PERMANENT | NUD_NOARP))
			return true;
			return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;

		/* Don't override an fdb with nexthop with a learnt entry */
		if (rcu_access_pointer(f->nh))
			return true;
			return SKB_DROP_REASON_VXLAN_ENTRY_EXISTS;

		if (net_ratelimit())
			netdev_info(dev,
@@ -1497,7 +1498,7 @@ static bool vxlan_snoop(struct net_device *dev,
		spin_unlock(&vxlan->hash_lock[hash_index]);
	}

	return false;
	return SKB_NOT_DROPPED_YET;
}

static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
@@ -1551,9 +1552,11 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
#endif
}

static bool vxlan_remcsum(struct vxlanhdr *unparsed,
			  struct sk_buff *skb, u32 vxflags)
static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed,
					  struct sk_buff *skb,
					  u32 vxflags)
{
	enum skb_drop_reason reason;
	size_t start, offset;

	if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
@@ -1562,15 +1565,17 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed,
	start = vxlan_rco_start(unparsed->vx_vni);
	offset = start + vxlan_rco_offset(unparsed->vx_vni);

	if (!pskb_may_pull(skb, offset + sizeof(u16)))
		return false;
	reason = pskb_may_pull_reason(skb, offset + sizeof(u16));
	if (reason)
		return reason;

	skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
			    !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
out:
	unparsed->vx_flags &= ~VXLAN_HF_RCO;
	unparsed->vx_vni &= VXLAN_VNI_MASK;
	return true;

	return SKB_NOT_DROPPED_YET;
}

static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
@@ -1604,7 +1609,7 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
	unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}

static bool vxlan_set_mac(struct vxlan_dev *vxlan,
static enum skb_drop_reason vxlan_set_mac(struct vxlan_dev *vxlan,
					  struct vxlan_sock *vs,
					  struct sk_buff *skb, __be32 vni)
{
@@ -1617,7 +1622,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,

	/* Ignore packet loops (and multicast echo) */
	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
		return false;
		return SKB_DROP_REASON_LOCAL_MAC;

	/* Get address from the outer IP header */
	if (vxlan_get_sk_family(vs) == AF_INET) {
@@ -1630,11 +1635,11 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
#endif
	}

	if ((vxlan->cfg.flags & VXLAN_F_LEARN) &&
	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, ifindex, vni))
		return false;
	if (!(vxlan->cfg.flags & VXLAN_F_LEARN))
		return SKB_NOT_DROPPED_YET;

	return true;
	return vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source,
			   ifindex, vni);
}

static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
@@ -1671,13 +1676,15 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
	struct vxlan_metadata _md;
	struct vxlan_metadata *md = &_md;
	__be16 protocol = htons(ETH_P_TEB);
	enum skb_drop_reason reason;
	bool raw_proto = false;
	void *oiph;
	__be32 vni = 0;
	int nh;

	/* Need UDP and VXLAN header to be present */
	if (!pskb_may_pull(skb, VXLAN_HLEN))
	reason = pskb_may_pull_reason(skb, VXLAN_HLEN);
	if (reason)
		goto drop;

	unparsed = *vxlan_hdr(skb);
@@ -1686,6 +1693,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
			   ntohl(vxlan_hdr(skb)->vx_flags),
			   ntohl(vxlan_hdr(skb)->vx_vni));
		reason = SKB_DROP_REASON_VXLAN_INVALID_HDR;
		/* Return non vxlan pkt */
		goto drop;
	}
@@ -1699,8 +1707,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
	vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);

	vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode);
	if (!vxlan)
	if (!vxlan) {
		reason = SKB_DROP_REASON_VXLAN_VNI_NOT_FOUND;
		goto drop;
	}

	/* For backwards compatibility, only allow reserved fields to be
	 * used by VXLAN extensions if explicitly requested.
@@ -1713,12 +1723,16 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
	}

	if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
				   !net_eq(vxlan->net, dev_net(vxlan->dev))))
				   !net_eq(vxlan->net, dev_net(vxlan->dev)))) {
		reason = SKB_DROP_REASON_NOMEM;
		goto drop;
	}

	if (vs->flags & VXLAN_F_REMCSUM_RX)
		if (unlikely(!vxlan_remcsum(&unparsed, skb, vs->flags)))
	if (vs->flags & VXLAN_F_REMCSUM_RX) {
		reason = vxlan_remcsum(&unparsed, skb, vs->flags);
		if (unlikely(reason))
			goto drop;
	}

	if (vxlan_collect_metadata(vs)) {
		IP_TUNNEL_DECLARE_FLAGS(flags) = { };
@@ -1728,8 +1742,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
		tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), flags,
					 key32_to_tunnel_id(vni), sizeof(*md));

		if (!tun_dst)
		if (!tun_dst) {
			reason = SKB_DROP_REASON_NOMEM;
			goto drop;
		}

		md = ip_tunnel_info_opts(&tun_dst->u.tun_info);

@@ -1753,11 +1769,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
		 * is more robust and provides a little more security in
		 * adding extensions to VXLAN.
		 */
		reason = SKB_DROP_REASON_VXLAN_INVALID_HDR;
		goto drop;
	}

	if (!raw_proto) {
		if (!vxlan_set_mac(vxlan, vs, skb, vni))
		reason = vxlan_set_mac(vxlan, vs, skb, vni);
		if (reason)
			goto drop;
	} else {
		skb_reset_mac_header(skb);
@@ -1773,7 +1791,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)

	skb_reset_network_header(skb);

	if (!pskb_inet_may_pull(skb)) {
	reason = pskb_inet_may_pull_reason(skb);
	if (reason) {
		DEV_STATS_INC(vxlan->dev, rx_length_errors);
		DEV_STATS_INC(vxlan->dev, rx_errors);
		vxlan_vnifilter_count(vxlan, vni, vninode,
@@ -1785,6 +1804,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
	oiph = skb->head + nh;

	if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
		reason = SKB_DROP_REASON_IP_TUNNEL_ECN;
		DEV_STATS_INC(vxlan->dev, rx_frame_errors);
		DEV_STATS_INC(vxlan->dev, rx_errors);
		vxlan_vnifilter_count(vxlan, vni, vninode,
@@ -1799,6 +1819,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
		dev_core_stats_rx_dropped_inc(vxlan->dev);
		vxlan_vnifilter_count(vxlan, vni, vninode,
				      VXLAN_VNI_STATS_RX_DROPS, 0);
		reason = SKB_DROP_REASON_DEV_READY;
		goto drop;
	}

@@ -1811,8 +1832,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
	return 0;

drop:
	reason = reason ?: SKB_DROP_REASON_NOT_SPECIFIED;
	/* Consume bad packet */
	kfree_skb(skb);
	kfree_skb_reason(skb, reason);
	return 0;
}

@@ -2268,7 +2290,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
	rcu_read_lock();
	dev = skb->dev;
	if (unlikely(!(dev->flags & IFF_UP))) {
		kfree_skb(skb);
		kfree_skb_reason(skb, SKB_DROP_REASON_DEV_READY);
		goto drop;
	}

@@ -2319,7 +2341,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
			DEV_STATS_INC(dev, tx_errors);
			vxlan_vnifilter_count(vxlan, vni, NULL,
					      VXLAN_VNI_STATS_TX_ERRORS, 0);
			kfree_skb(skb);
			kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_INVALID_HDR);

			return -ENOENT;
		}
@@ -2352,13 +2374,16 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	bool use_cache;
	bool udp_sum = false;
	bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
	enum skb_drop_reason reason;
	bool no_eth_encap;
	__be32 vni = 0;

	no_eth_encap = flags & VXLAN_F_GPE && skb->protocol != htons(ETH_P_TEB);
	if (!skb_vlan_inet_prepare(skb, no_eth_encap))
	reason = skb_vlan_inet_prepare(skb, no_eth_encap);
	if (reason)
		goto drop;

	reason = SKB_DROP_REASON_NOT_SPECIFIED;
	old_iph = ip_hdr(skb);

	info = skb_tunnel_info(skb);
@@ -2462,6 +2487,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
					   tos, use_cache ? dst_cache : NULL);
		if (IS_ERR(rt)) {
			err = PTR_ERR(rt);
			reason = SKB_DROP_REASON_IP_OUTNOROUTES;
			goto tx_error;
		}

@@ -2513,8 +2539,10 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
				      vni, md, flags, udp_sum);
		if (err < 0)
		if (err < 0) {
			reason = SKB_DROP_REASON_NOMEM;
			goto tx_error;
		}

		udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
				    pkey->u.ipv4.dst, tos, ttl, df,
@@ -2534,6 +2562,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		if (IS_ERR(ndst)) {
			err = PTR_ERR(ndst);
			ndst = NULL;
			reason = SKB_DROP_REASON_IP_OUTNOROUTES;
			goto tx_error;
		}

@@ -2574,8 +2603,10 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		skb_scrub_packet(skb, xnet);
		err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
				      vni, md, flags, udp_sum);
		if (err < 0)
		if (err < 0) {
			reason = SKB_DROP_REASON_NOMEM;
			goto tx_error;
		}

		udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
				     &saddr, &pkey->u.ipv6.dst, tos, ttl,
@@ -2590,7 +2621,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
drop:
	dev_core_stats_tx_dropped_inc(dev);
	vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0);
	dev_kfree_skb(skb);
	kfree_skb_reason(skb, reason);
	return;

tx_error:
@@ -2602,7 +2633,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	dst_release(ndst);
	DEV_STATS_INC(dev, tx_errors);
	vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0);
	kfree_skb(skb);
	kfree_skb_reason(skb, reason);
}

static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
@@ -2708,7 +2739,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
			if (info && info->mode & IP_TUNNEL_INFO_TX)
				vxlan_xmit_one(skb, dev, vni, NULL, false);
			else
				kfree_skb(skb);
				kfree_skb_reason(skb, SKB_DROP_REASON_TUNNEL_TXINFO);
			return NETDEV_TX_OK;
		}
	}
@@ -2771,7 +2802,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
			dev_core_stats_tx_dropped_inc(dev);
			vxlan_vnifilter_count(vxlan, vni, NULL,
					      VXLAN_VNI_STATS_TX_DROPS, 0);
			kfree_skb(skb);
			kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE);
			return NETDEV_TX_OK;
		}
	}
@@ -2794,7 +2825,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
		if (fdst)
			vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
		else
			kfree_skb(skb);
			kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE);
	}

	return NETDEV_TX_OK;
+1 −1
Original line number Diff line number Diff line
@@ -1712,7 +1712,7 @@ netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan,
		vxlan_xmit_one(skb, vxlan->dev, src_vni,
			       rcu_dereference(fremote->rd), false);
	else
		kfree_skb(skb);
		kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE);

	return NETDEV_TX_OK;
}
+7 −1
Original line number Diff line number Diff line
@@ -3130,9 +3130,15 @@ static inline int skb_inner_network_offset(const struct sk_buff *skb)
	return skb_inner_network_header(skb) - skb->data;
}

static inline enum skb_drop_reason
pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len)
{
	return pskb_may_pull_reason(skb, skb_network_offset(skb) + len);
}

static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
{
	return pskb_may_pull(skb, skb_network_offset(skb) + len);
	return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET;
}

/*
Loading