Commit 8e5f1bb8 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Jakub Kicinski
Browse files

ipv6: Narrow down RCU critical section in inet6_rtm_newroute().



Commit 169fd627 ("ipv6: Get rid of RTNL for SIOCADDRT and
RTM_NEWROUTE.") added rcu_read_lock() covering
ip6_route_info_create_nh() and __ip6_ins_rt() to guarantee that
nexthop and netdev will not go away.

However, as reported by syzkaller [0], ip_tun_build_state() calls
dst_cache_init() with GFP_KERNEL during the RCU critical section.

ip6_route_info_create_nh() fetches nexthop or netdev depending on
whether RTA_NH_ID is set, and struct fib6_info holds a refcount
of either of them by nexthop_get() or netdev_get_by_index().

netdev_get_by_index() looks up a dev and calls dev_hold() under RCU.

So, we need RCU only around nexthop_find_by_id() and nexthop_get()
( and a few more nexthop code).

Let's add rcu_read_lock() there and remove rcu_read_lock() in
ip6_route_add() and ip6_route_multipath_add().

Now these functions called from fib6_add() need RCU:

  - inet6_rt_notify()
  - fib6_drop_pcpu_from() (via fib6_purge_rt())
  - rt6_flush_exceptions() (via fib6_purge_rt())
  - ip6_ignore_linkdown() (via rt6_multipath_rebalance())

All callers of inet6_rt_notify() need RCU, so rcu_read_lock() is
added there.

[0]:
[ BUG: Invalid wait context ]
6.15.0-rc4-syzkaller-00746-g836b313a14a3 #0 Tainted: G W
 ----------------------------
syz-executor234/5832 is trying to lock:
ffffffff8e021688 (pcpu_alloc_mutex){+.+.}-{4:4}, at:
pcpu_alloc_noprof+0x284/0x16b0 mm/percpu.c:1782
other info that might help us debug this:
context-{5:5}
1 lock held by syz-executor234/5832:
 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at: rcu_lock_acquire
include/linux/rcupdate.h:331 [inline]
 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at: rcu_read_lock
include/linux/rcupdate.h:841 [inline]
 0: ffffffff8df3b860 (rcu_read_lock){....}-{1:3}, at:
ip6_route_add+0x4d/0x2f0 net/ipv6/route.c:3913
stack backtrace:
CPU: 0 UID: 0 PID: 5832 Comm: syz-executor234 Tainted: G W
6.15.0-rc4-syzkaller-00746-g836b313a14a3 #0 PREEMPT(full)
Tainted: [W]=WARN
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 04/29/2025
Call Trace:
 <TASK>
 dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120
 print_lock_invalid_wait_context kernel/locking/lockdep.c:4831 [inline]
 check_wait_context kernel/locking/lockdep.c:4903 [inline]
 __lock_acquire+0xbcf/0xd20 kernel/locking/lockdep.c:5185
 lock_acquire+0x120/0x360 kernel/locking/lockdep.c:5866
 __mutex_lock_common kernel/locking/mutex.c:601 [inline]
 __mutex_lock+0x182/0xe80 kernel/locking/mutex.c:746
 pcpu_alloc_noprof+0x284/0x16b0 mm/percpu.c:1782
 dst_cache_init+0x37/0xc0 net/core/dst_cache.c:145
 ip_tun_build_state+0x193/0x6b0 net/ipv4/ip_tunnel_core.c:687
 lwtunnel_build_state+0x381/0x4c0 net/core/lwtunnel.c:137
 fib_nh_common_init+0x129/0x460 net/ipv4/fib_semantics.c:635
 fib6_nh_init+0x15e4/0x2030 net/ipv6/route.c:3669
 ip6_route_info_create_nh+0x139/0x870 net/ipv6/route.c:3866
 ip6_route_add+0xf6/0x2f0 net/ipv6/route.c:3915
 inet6_rtm_newroute+0x284/0x1c50 net/ipv6/route.c:5732
 rtnetlink_rcv_msg+0x7cc/0xb70 net/core/rtnetlink.c:6955
 netlink_rcv_skb+0x219/0x490 net/netlink/af_netlink.c:2534
 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline]
 netlink_unicast+0x758/0x8d0 net/netlink/af_netlink.c:1339
 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1883
 sock_sendmsg_nosec net/socket.c:712 [inline]
 __sock_sendmsg+0x219/0x270 net/socket.c:727
 ____sys_sendmsg+0x505/0x830 net/socket.c:2566
 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2620
 __sys_sendmsg net/socket.c:2652 [inline]
 __do_sys_sendmsg net/socket.c:2657 [inline]
 __se_sys_sendmsg net/socket.c:2655 [inline]
 __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2655
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xf6/0x210 arch/x86/entry/syscall_64.c:94

Fixes: 169fd627 ("ipv6: Get rid of RTNL for SIOCADDRT and RTM_NEWROUTE.")
Reported-by: default avatar <syzbot+bcc12d6799364500fbec@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=bcc12d6799364500fbec


Reported-by: default avatarEric Dumazet <edumazet@google.com>
Closes: https://lore.kernel.org/netdev/CANn89i+r1cGacVC_6n3-A-WSkAa_Nr+pmxJ7Gt+oP-P9by2aGw@mail.gmail.com/


Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250516022759.44392-4-kuniyu@amazon.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent f0a56c17
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -1027,8 +1027,9 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
			.table = table
		};

		nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from,
					 &arg);
		rcu_read_lock();
		nexthop_for_each_fib6_nh(f6i->nh, fib6_nh_drop_pcpu_from, &arg);
		rcu_read_unlock();
	} else {
		struct fib6_nh *fib6_nh;

@@ -1221,7 +1222,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
			fib6_nsiblings++;
		}
		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
		rcu_read_lock();
		rt6_multipath_rebalance(temp_sibling);
		rcu_read_unlock();
	}

	/*
@@ -1264,7 +1267,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
					sibling->fib6_nsiblings--;
				rt->fib6_nsiblings = 0;
				list_del_rcu(&rt->fib6_siblings);
				rcu_read_lock();
				rt6_multipath_rebalance(next_sibling);
				rcu_read_unlock();
				return err;
			}
		}
+18 −13
Original line number Diff line number Diff line
@@ -1820,12 +1820,14 @@ static int rt6_nh_flush_exceptions(struct fib6_nh *nh, void *arg)

void rt6_flush_exceptions(struct fib6_info *f6i)
{
	if (f6i->nh)
		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions,
					 f6i);
	else
	if (f6i->nh) {
		rcu_read_lock();
		nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_flush_exceptions, f6i);
		rcu_read_unlock();
	} else {
		fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
	}
}

/* Find cached rt in the hash table inside passed in rt
 * Caller has to hold rcu_read_lock()
@@ -3841,6 +3843,8 @@ static int ip6_route_info_create_nh(struct fib6_info *rt,
	if (cfg->fc_nh_id) {
		struct nexthop *nh;

		rcu_read_lock();

		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
		if (!nh) {
			err = -EINVAL;
@@ -3860,6 +3864,8 @@ static int ip6_route_info_create_nh(struct fib6_info *rt,

		rt->nh = nh;
		fib6_nh = nexthop_fib6_nh(rt->nh);

		rcu_read_unlock();
	} else {
		int addr_type;

@@ -3895,6 +3901,7 @@ static int ip6_route_info_create_nh(struct fib6_info *rt,
	fib6_info_release(rt);
	return err;
out_free:
	rcu_read_unlock();
	ip_fib_metrics_put(rt->fib6_metrics);
	kfree(rt);
	return err;
@@ -3910,16 +3917,12 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
	if (IS_ERR(rt))
		return PTR_ERR(rt);

	rcu_read_lock();

	err = ip6_route_info_create_nh(rt, cfg, extack);
	if (err)
		goto unlock;
		return err;

	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
	fib6_info_release(rt);
unlock:
	rcu_read_unlock();

	return err;
}
@@ -5534,8 +5537,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
	if (err)
		return err;

	rcu_read_lock();

	err = ip6_route_mpath_info_create_nh(&rt6_nh_list, extack);
	if (err)
		goto cleanup;
@@ -5627,8 +5628,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
	}

cleanup:
	rcu_read_unlock();

	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, list) {
		fib6_info_release(nh->fib6_info);
		list_del(&nh->list);
@@ -6410,6 +6409,8 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
	err = -ENOBUFS;
	seq = info->nlh ? info->nlh->nlmsg_seq : 0;

	rcu_read_lock();

	skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
	if (!skb)
		goto errout;
@@ -6422,10 +6423,14 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
		kfree_skb(skb);
		goto errout;
	}

	rcu_read_unlock();

	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
		    info->nlh, GFP_ATOMIC);
	return;
errout:
	rcu_read_unlock();
	rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}