Commit f8337efa authored by Petr Machata's avatar Petr Machata Committed by Jakub Kicinski
Browse files

vxlan: Support MC routing in the underlay



Locally-generated MC packets have so far not been subject to MC routing.
Instead an MC-enabled installation would maintain the MC routing tables,
and separately from that the list of interfaces to send packets to as part
of the VXLAN FDB and MDB.

In a previous patch, a ip_mr_output() and ip6_mr_output() routines were
added for IPv4 and IPv6. All locally generated MC traffic is now passed
through these functions. For reasons of backward compatibility, an SKB
(IPCB / IP6CB) flag guards the actual MC routing.

This patch adds logic to set the flag, and the UAPI to enable the behavior.

Signed-off-by: default avatarPetr Machata <petrm@nvidia.com>
Reviewed-by: default avatarIdo Schimmel <idosch@nvidia.com>
Reviewed-by: default avatarNikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 96e8f5a9
Loading
Loading
Loading
Loading
+20 −2
Original line number Diff line number Diff line
@@ -2451,6 +2451,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	rcu_read_lock();
	if (addr_family == AF_INET) {
		struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
		u16 ipcb_flags = 0;
		struct rtable *rt;
		__be16 df = 0;
		__be32 saddr;
@@ -2467,6 +2468,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
			goto tx_error;
		}

		if (flags & VXLAN_F_MC_ROUTE)
			ipcb_flags |= IPSKB_MCROUTE;

		if (!info) {
			/* Bypass encapsulation if the destination is local */
			err = encap_bypass_if_local(skb, dev, vxlan, AF_INET,
@@ -2522,11 +2526,13 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

		udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr,
				    pkey->u.ipv4.dst, tos, ttl, df,
				    src_port, dst_port, xnet, !udp_sum, 0);
				    src_port, dst_port, xnet, !udp_sum,
				    ipcb_flags);
#if IS_ENABLED(CONFIG_IPV6)
	} else {
		struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
		struct in6_addr saddr;
		u16 ip6cb_flags = 0;

		if (!ifindex)
			ifindex = sock6->sock->sk->sk_bound_dev_if;
@@ -2542,6 +2548,9 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
			goto tx_error;
		}

		if (flags & VXLAN_F_MC_ROUTE)
			ip6cb_flags |= IP6SKB_MCROUTE;

		if (!info) {
			u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags;

@@ -2587,7 +2596,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
				     &saddr, &pkey->u.ipv6.dst, tos, ttl,
				     pkey->label, src_port, dst_port, !udp_sum,
				     0);
				     ip6cb_flags);
#endif
	}
	vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
@@ -3402,6 +3411,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
	[IFLA_VXLAN_LOCALBYPASS]	= NLA_POLICY_MAX(NLA_U8, 1),
	[IFLA_VXLAN_LABEL_POLICY]       = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX),
	[IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)),
	[IFLA_VXLAN_MC_ROUTE]		= NLA_POLICY_MAX(NLA_U8, 1),
};

static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -4315,6 +4325,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
			return err;
	}

	if (data[IFLA_VXLAN_MC_ROUTE]) {
		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE,
				    VXLAN_F_MC_ROUTE, changelink,
				    true, extack);
		if (err)
			return err;
	}

	if (tb[IFLA_MTU]) {
		if (changelink) {
			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+4 −1
Original line number Diff line number Diff line
@@ -332,6 +332,7 @@ struct vxlan_dev {
#define VXLAN_F_VNIFILTER               0x20000
#define VXLAN_F_MDB			0x40000
#define VXLAN_F_LOCALBYPASS		0x80000
#define VXLAN_F_MC_ROUTE		0x100000

/* Flags that are used in the receive path. These flags must match in
 * order for a socket to be shareable
@@ -353,7 +354,9 @@ struct vxlan_dev {
					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
					 VXLAN_F_COLLECT_METADATA  |	\
					 VXLAN_F_VNIFILTER         |    \
					 VXLAN_F_LOCALBYPASS)
					 VXLAN_F_LOCALBYPASS       |	\
					 VXLAN_F_MC_ROUTE          |	\
					 0)

struct net_device *vxlan_dev_create(struct net *net, const char *name,
				    u8 name_assign_type, struct vxlan_config *conf);
+1 −0
Original line number Diff line number Diff line
@@ -1398,6 +1398,7 @@ enum {
	IFLA_VXLAN_LOCALBYPASS,
	IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */
	IFLA_VXLAN_RESERVED_BITS,
	IFLA_VXLAN_MC_ROUTE,
	__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)