Commit ab4b6e4e authored by Paolo Abeni's avatar Paolo Abeni
Browse files

Merge branch 'net-bpf-fix-null-ptr-deref-in-xdp_master_redirect-for-bonding-and-add-selftest'

Jiayuan Chen says:

====================
net,bpf: fix null-ptr-deref in xdp_master_redirect() for bonding and add selftest

From: Jiayuan Chen <jiayuan.chen@shopee.com>

This series has gone through several rounds of discussion and the
maintainers hold different views on where the fix should live (in the
generic xdp_master_redirect() path vs. inside bonding). I respect all
of the suggestions, but I would like to get the crash fixed first, so
this version takes the approach of checking whether the master device
is up in xdp_master_redirect(), as suggested by Daniel Borkmann. If a
different shape is preferred later it can be done as a follow-up, but
the null-ptr-deref should not linger.

syzkaller reported a kernel panic, full decoded trace here:
https://syzkaller.appspot.com/bug?extid=80e046b8da2820b6ba73

Problem Description

bond_rr_gen_slave_id() dereferences bond->rr_tx_counter without a NULL
check. rr_tx_counter is a per-CPU counter that bonding only allocates
in bond_open() when the mode is round-robin. If the bond device was
never brought up, rr_tx_counter stays NULL.

The XDP redirect path can still reach that code on a bond that was
never opened: bpf_master_redirect_enabled_key is a global static key,
so as soon as any bond device has native XDP attached, the
XDP_TX -> xdp_master_redirect() interception is enabled for every
slave system-wide. The path xdp_master_redirect() ->
bond_xdp_get_xmit_slave() -> bond_xdp_xmit_roundrobin_slave_get() ->
bond_rr_gen_slave_id() then runs against a bond that has no
rr_tx_counter and crashes.

Solution

Patch 1: Fix this in the generic xdp_master_redirect() by skipping
master interception when the master device is not running. Returning
XDP_TX keeps the original XDP_TX behaviour on the receiving slave, and
avoids calling into any master ->ndo_xdp_get_xmit_slave() on a device
that has not fully initialized its XDP state. This is not specific to
bonding: any current or future master that defers XDP state allocation
to ->ndo_open() is protected.
Patch 2: Add a selftest that reproduces the above scenario.

v6: https://lore.kernel.org/netdev/20260410113726.368111-1-jiayuan.chen@linux.dev/T/#t
v5: https://lore.kernel.org/netdev/20260309030659.xxxxx-1-jiayuan.chen@linux.dev/
v4: https://lore.kernel.org/netdev/20260304074301.35482-1-jiayuan.chen@linux.dev/
v3: https://lore.kernel.org/netdev/20260228021918.141002-1-jiayuan.chen@linux.dev/T/#t
v2: https://lore.kernel.org/netdev/20260227092254.272603-1-jiayuan.chen@linux.dev/T/#t
v1: https://lore.kernel.org/netdev/20260224112545.37888-1-jiayuan.chen@linux.dev/T/#t

[1] https://syzkaller.appspot.com/bug?extid=80e046b8da2820b6ba73
====================

Link: https://patch.msgid.link/20260411005524.201200-1-jiayuan.chen@linux.dev


Signed-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 2cd7e697 8dd1bdde
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -4395,6 +4395,8 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
	struct net_device *master, *slave;

	master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
	if (unlikely(!(master->flags & IFF_UP)))
		return XDP_ABORTED;
	slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
	if (slave && slave != xdp->rxq->dev) {
		/* The target device is different from the receiving device, so
+94 −2
Original line number Diff line number Diff line
@@ -191,13 +191,18 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
	return -1;
}

static void bonding_cleanup(struct skeletons *skeletons)
static void link_cleanup(struct skeletons *skeletons)
{
	restore_root_netns();
	while (skeletons->nlinks) {
		skeletons->nlinks--;
		bpf_link__destroy(skeletons->links[skeletons->nlinks]);
	}
}

static void bonding_cleanup(struct skeletons *skeletons)
{
	restore_root_netns();
	link_cleanup(skeletons);
	ASSERT_OK(system("ip link delete bond1"), "delete bond1");
	ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
	ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
@@ -493,6 +498,90 @@ static void test_xdp_bonding_nested(struct skeletons *skeletons)
	system("ip link del bond_nest2");
}

/*
 * Test that XDP redirect via xdp_master_redirect() does not crash when
 * the bond master device is not up. When bond is in round-robin mode but
 * never opened, rr_tx_counter is NULL.
 */
static void test_xdp_bonding_redirect_no_up(struct skeletons *skeletons)
{
	struct nstoken *nstoken = NULL;
	int xdp_pass_fd;
	int veth1_ifindex;
	int err;
	char pkt[ETH_HLEN + 1];
	struct xdp_md ctx_in = {};

	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
			    .data_in = &pkt,
			    .data_size_in = sizeof(pkt),
			    .ctx_in = &ctx_in,
			    .ctx_size_in = sizeof(ctx_in),
			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
			    .repeat = 1,
			    .batch_size = 1,
		);

	/* We can't use bonding_setup() because bond will be active */
	SYS(out, "ip netns add ns_rr_no_up");
	nstoken = open_netns("ns_rr_no_up");
	if (!ASSERT_OK_PTR(nstoken, "open ns_rr_no_up"))
		goto out;

	/* bond0: active-backup, UP with slave veth0.
	 * Attaching native XDP to bond0 enables bpf_master_redirect_enabled_key
	 * globally.
	 */
	SYS(out, "ip link add bond0 type bond mode active-backup");
	SYS(out, "ip link add veth0 type veth peer name veth0p");
	SYS(out, "ip link set veth0 master bond0");
	SYS(out, "ip link set bond0 up");
	SYS(out, "ip link set veth0p up");

	/* bond1: round-robin, never UP -> rr_tx_counter stays NULL */
	SYS(out, "ip link add bond1 type bond mode balance-rr");
	SYS(out, "ip link add veth1 type veth peer name veth1p");
	SYS(out, "ip link set veth1 master bond1");

	veth1_ifindex = if_nametoindex("veth1");
	if (!ASSERT_GT(veth1_ifindex, 0, "veth1_ifindex"))
		goto out;

	/* Attach native XDP to bond0 -> enables global redirect key */
	if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond0"))
		goto out;

	/* Attach generic XDP (XDP_TX) to veth1.
	 * When packets arrive at veth1 via netif_receive_skb, do_xdp_generic()
	 * runs this program. XDP_TX + bond slave triggers xdp_master_redirect().
	 */
	err = bpf_xdp_attach(veth1_ifindex,
			     bpf_program__fd(skeletons->xdp_tx->progs.xdp_tx),
			     XDP_FLAGS_SKB_MODE, NULL);
	if (!ASSERT_OK(err, "attach generic XDP to veth1"))
		goto out;

	/* Run BPF_PROG_TEST_RUN with XDP_PASS live frames on veth1.
	 * XDP_PASS frames become SKBs with skb->dev = veth1, entering
	 * netif_receive_skb -> do_xdp_generic -> xdp_master_redirect.
	 * Without the fix, bond_rr_gen_slave_id() dereferences NULL
	 * rr_tx_counter and crashes.
	 */
	xdp_pass_fd = bpf_program__fd(skeletons->xdp_dummy->progs.xdp_dummy_prog);

	memset(pkt, 0, sizeof(pkt));
	ctx_in.data_end = sizeof(pkt);
	ctx_in.ingress_ifindex = veth1_ifindex;

	err = bpf_prog_test_run_opts(xdp_pass_fd, &opts);
	ASSERT_OK(err, "xdp_pass test_run should not crash");

out:
	link_cleanup(skeletons);
	close_netns(nstoken);
	SYS_NOFAIL("ip netns del ns_rr_no_up");
}

static void test_xdp_bonding_features(struct skeletons *skeletons)
{
	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
@@ -738,6 +827,9 @@ void serial_test_xdp_bonding(void)
	if (test__start_subtest("xdp_bonding_redirect_multi"))
		test_xdp_bonding_redirect_multi(&skeletons);

	if (test__start_subtest("xdp_bonding_redirect_no_up"))
		test_xdp_bonding_redirect_no_up(&skeletons);

out:
	xdp_dummy__destroy(skeletons.xdp_dummy);
	xdp_tx__destroy(skeletons.xdp_tx);