Commit 01072dea authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'vxlan-join-leave-mc-group-when-reconfigured'

Petr Machata says:

====================
vxlan: Join / leave MC group when reconfigured

When a vxlan netdevice is brought up, if its default remote is a multicast
address, the device joins the indicated group.

Therefore when the multicast remote address changes, the device should
leave the current group and subscribe to the new one. Similarly when the
interface used for endpoint communication is changed in a situation when
multicast remote is configured. This is currently not done.

Both vxlan_igmp_join() and vxlan_igmp_leave() can however fail. So it is
possible that with such fix, the netdevice will end up in an inconsistent
situation where the old group is not joined anymore, but joining the
new group fails. Should we join the new group first, and leave the old one
second, we might end up in the opposite situation, where both groups are
joined. Undoing any of this during rollback is going to be similarly
problematic.

One solution would be to just forbid the change when the netdevice is up.
However in vnifilter mode, changing the group address is allowed, and these
problems are simply ignored (see vxlan_vni_update_group()):

 # ip link add name br up type bridge vlan_filtering 1
 # ip link add vx1 up master br type vxlan external vnifilter local 192.0.2.1 dev lo dstport 4789
 # bridge vni add dev vx1 vni 200 group 224.0.0.1
 # tcpdump -i lo &
 # bridge vni add dev vx1 vni 200 group 224.0.0.2
 18:55:46.523438 IP 0.0.0.0 > 224.0.0.22: igmp v3 report, 1 group record(s)
 18:55:46.943447 IP 0.0.0.0 > 224.0.0.22: igmp v3 report, 1 group record(s)
 # bridge vni
 dev               vni                group/remote
 vx1               200                224.0.0.2

Having two different modes of operation for conceptually the same interface
is silly, so in this patchset, just do what the vnifilter code does and
deal with the errors by crossing fingers real hard.
====================

Link: https://patch.msgid.link/cover.1739548836.git.petrm@nvidia.com


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 43130d02 eae1e92a
Loading
Loading
Loading
Loading
+19 −5
Original line number Diff line number Diff line
@@ -3936,7 +3936,7 @@ static void vxlan_config_apply(struct net_device *dev,
}

static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
			       struct vxlan_config *conf, bool changelink,
			       struct vxlan_config *conf,
			       struct netlink_ext_ack *extack)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -3947,7 +3947,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
	if (ret)
		return ret;

	vxlan_config_apply(dev, conf, lowerdev, src_net, changelink);
	vxlan_config_apply(dev, conf, lowerdev, src_net, false);

	return 0;
}
@@ -3965,7 +3965,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
	int err;

	dst = &vxlan->default_dst;
	err = vxlan_dev_configure(net, dev, conf, false, extack);
	err = vxlan_dev_configure(net, dev, conf, extack);
	if (err)
		return err;

@@ -4419,6 +4419,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
			    struct netlink_ext_ack *extack)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	bool rem_ip_changed, change_igmp;
	struct net_device *lowerdev;
	struct vxlan_config conf;
	struct vxlan_rdst *dst;
@@ -4442,8 +4443,13 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
	if (err)
		return err;

	rem_ip_changed = !vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip);
	change_igmp = vxlan->dev->flags & IFF_UP &&
		      (rem_ip_changed ||
		       dst->remote_ifindex != conf.remote_ifindex);

	/* handle default dst entry */
	if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
	if (rem_ip_changed) {
		u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);

		spin_lock_bh(&vxlan->hash_lock[hash_index]);
@@ -4487,6 +4493,9 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
		}
	}

	if (change_igmp && vxlan_addr_multicast(&dst->remote_ip))
		err = vxlan_multicast_leave(vxlan);

	if (conf.age_interval != vxlan->cfg.age_interval)
		mod_timer(&vxlan->age_timer, jiffies);

@@ -4494,7 +4503,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
	if (lowerdev && lowerdev != dst->remote_dev)
		dst->remote_dev = lowerdev;
	vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
	return 0;

	if (!err && change_igmp &&
	    vxlan_addr_multicast(&dst->remote_ip))
		err = vxlan_multicast_join(vxlan);

	return err;
}

static void vxlan_dellink(struct net_device *dev, struct list_head *head)
+0 −10
Original line number Diff line number Diff line
@@ -291,16 +291,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
	check_tc_version
fi

require_command()
{
	local cmd=$1; shift

	if [[ ! -x "$(command -v "$cmd")" ]]; then
		echo "SKIP: $cmd not installed"
		exit $ksft_skip
	fi
}

# IPv6 support was added in v3.0
check_mtools_version()
{
+19 −0
Original line number Diff line number Diff line
@@ -450,6 +450,25 @@ kill_process()
	{ kill $pid && wait $pid; } 2>/dev/null
}

check_command()
{
	local cmd=$1; shift

	if [[ ! -x "$(command -v "$cmd")" ]]; then
		log_test_skip "$cmd not installed"
		return $EXIT_STATUS
	fi
}

require_command()
{
	local cmd=$1; shift

	if ! check_command "$cmd"; then
		exit $EXIT_STATUS
	fi
}

ip_link_add()
{
	local name=$1; shift
+98 −13
Original line number Diff line number Diff line
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

# Check FDB default-remote handling across "ip link set".
ALL_TESTS="
	test_set_remote
	test_change_mc_remote
"
source lib.sh

check_remotes()
{
	local what=$1; shift
	local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)

	echo -ne "expected two remotes after $what\t"
	if [[ $N != 2 ]]; then
		echo "[FAIL]"
		EXIT_STATUS=1
	else
		echo "[ OK ]"
	fi
	((N == 2))
	check_err $? "expected 2 remotes after $what, got $N"
}

ip link add name vx up type vxlan id 2000 dstport 4789
# Check FDB default-remote handling across "ip link set".
test_set_remote()
{
	RET=0

	ip_link_add vx up type vxlan id 2000 dstport 4789
	bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
	bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
	check_remotes "fdb append"
@@ -25,5 +29,86 @@ check_remotes "fdb append"
	ip link set dev vx type vxlan remote 192.0.2.30
	check_remotes "link set"

ip link del dev vx
	log_test 'FDB default-remote handling across "ip link set"'
}

fmt_remote()
{
	local addr=$1; shift

	if [[ $addr == 224.* ]]; then
		echo "group $addr"
	else
		echo "remote $addr"
	fi
}

change_remote()
{
	local remote=$1; shift

	ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
}

check_membership()
{
	local check_vec=("$@")

	local memberships
	memberships=$(
	    netstat -n --groups |
		sed -n '/^v1\b/p' |
		grep -o '[^ ]*$'
	)
	check_err $? "Couldn't obtain group memberships"

	local item
	for item in "${check_vec[@]}"; do
		eval "local $item"
		echo "$memberships" | grep -q "\b$group\b"
		check_err_fail $fail $? "$group is_ex reported in IGMP query response"
	done
}

test_change_mc_remote()
{
	check_command netstat || return

	ip_link_add v1 up type veth peer name v2
	ip_link_set_up v2

	RET=0

	ip_link_add vx up type vxlan dstport 4789 \
		local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000

	check_membership "group=224.1.1.1 fail=0" \
			 "group=224.1.1.2 fail=1" \
			 "group=224.1.1.3 fail=1"

	log_test "MC group report after VXLAN creation"

	RET=0

	change_remote 224.1.1.2
	check_membership "group=224.1.1.1 fail=1" \
			 "group=224.1.1.2 fail=0" \
			 "group=224.1.1.3 fail=1"

	log_test "MC group report after changing VXLAN remote MC->MC"

	RET=0

	change_remote 192.0.2.2
	check_membership "group=224.1.1.1 fail=1" \
			 "group=224.1.1.2 fail=1" \
			 "group=224.1.1.3 fail=1"

	log_test "MC group report after changing VXLAN remote MC->UC"
}

trap defer_scopes_cleanup EXIT

tests_run

exit $EXIT_STATUS