Commit eecbb169 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'ipv6-drop-rtnl-from-mcast-c-and-anycast-c'

Kuniyuki Iwashima says:

====================
ipv6: Drop RTNL from mcast.c and anycast.c

This is a prep series for RCU conversion of RTM_NEWNEIGH, which needs
RTNL during neigh_table.{pconstructor,pdestructor}() touching IPv6
multicast code.

Currently, IPv6 multicast code is protected by lock_sock() and
inet6_dev->mc_lock, and RTNL is not actually needed.

In addition, anycast code is also in the same situation and does not
need RTNL at all.

This series removes RTNL from net/ipv6/{mcast.c,anycast.c} and finally
removes setsockopt_needs_rtnl() from do_ipv6_setsockopt().

v2: https://lore.kernel.org/20250624202616.526600-1-kuni1840@gmail.com
v1: https://lore.kernel.org/20250616233417.1153427-1-kuni1840@gmail.com
====================

Link: https://patch.msgid.link/20250702230210.3115355-1-kuni1840@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 01af0001 db38443d
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -3332,7 +3332,7 @@ int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
			  struct net_device_path_stack *stack);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
					unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
+18 −20
Original line number Diff line number Diff line
@@ -1267,33 +1267,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
EXPORT_SYMBOL(dev_getfirstbyhwtype);

/**
 *	__dev_get_by_flags - find any device with given flags
 * dev_get_by_flags_rcu - find any device with given flags
 * @net: the applicable net namespace
 * @if_flags: IFF_* values
 * @mask: bitmask of bits in if_flags to check
 *
 *	Search for any interface with the given flags. Returns NULL if a device
 *	is not found or a pointer to the device. Must be called inside
 *	rtnl_lock(), and result refcount is unchanged.
 * Search for any interface with the given flags.
 *
 * Context: rcu_read_lock() must be held.
 * Returns: NULL if a device is not found or a pointer to the device.
 */

struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
					unsigned short mask)
{
	struct net_device *dev, *ret;

	ASSERT_RTNL();
	struct net_device *dev;

	ret = NULL;
	for_each_netdev(net, dev) {
		if (((dev->flags ^ if_flags) & mask) == 0) {
			ret = dev;
			break;
	for_each_netdev_rcu(net, dev) {
		if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) {
			dev_hold(dev);
			return dev;
		}
	}
	return ret;

	return NULL;
}
EXPORT_SYMBOL(__dev_get_by_flags);
EXPORT_IPV6_MOD(dev_get_by_flags_rcu);

/**
 *	dev_valid_name - check if name is okay for network device
+4 −8
Original line number Diff line number Diff line
@@ -2229,32 +2229,29 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
	in6_ifa_put(ifp);
}

/* Join to solicited addr multicast group.
 * caller must hold RTNL */
/* Join to solicited addr multicast group. */
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
	struct in6_addr maddr;

	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
	if (READ_ONCE(dev->flags) & (IFF_LOOPBACK | IFF_NOARP))
		return;

	addrconf_addr_solict_mult(addr, &maddr);
	ipv6_dev_mc_inc(dev, &maddr);
}

/* caller must hold RTNL */
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
	struct in6_addr maddr;

	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
	if (READ_ONCE(idev->dev->flags) & (IFF_LOOPBACK | IFF_NOARP))
		return;

	addrconf_addr_solict_mult(addr, &maddr);
	__ipv6_dev_mc_dec(idev, &maddr);
}

/* caller must hold RTNL */
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
	struct in6_addr addr;
@@ -2267,7 +2264,6 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
	__ipv6_dev_ac_inc(ifp->idev, &addr);
}

/* caller must hold RTNL */
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
	struct in6_addr addr;
@@ -3865,7 +3861,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister)
	 *	   Do not dev_put!
	 */
	if (unregister) {
		idev->dead = 1;
		WRITE_ONCE(idev->dead, 1);

		/* protected by rtnl_lock */
		RCU_INIT_POINTER(dev->ip6_ptr, NULL);
+58 −42
Original line number Diff line number Diff line
@@ -47,6 +47,9 @@
static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(acaddr_hash_lock);

#define ac_dereference(a, idev)						\
	rcu_dereference_protected(a, lockdep_is_held(&(idev)->lock))

static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);

static u32 inet6_acaddr_hash(const struct net *net,
@@ -64,14 +67,11 @@ static u32 inet6_acaddr_hash(const struct net *net,
int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6_ac_socklist *pac = NULL;
	struct net *net = sock_net(sk);
	struct net_device *dev = NULL;
	struct inet6_dev *idev;
	struct ipv6_ac_socklist *pac;
	struct net *net = sock_net(sk);
	int	ishost = !net->ipv6.devconf_all->forwarding;
	int	err = 0;

	ASSERT_RTNL();
	int err = 0, ishost;

	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
		return -EPERM;
@@ -79,32 +79,43 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
		return -EINVAL;

	if (ifindex)
		dev = __dev_get_by_index(net, ifindex);
		dev = dev_get_by_index(net, ifindex);

	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
		return -EINVAL;
	if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) {
		err = -EINVAL;
		goto error;
	}

	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
	if (!pac)
		return -ENOMEM;
	if (!pac) {
		err = -ENOMEM;
		goto error;
	}

	pac->acl_next = NULL;
	pac->acl_addr = *addr;

	ishost = !READ_ONCE(net->ipv6.devconf_all->forwarding);

	if (ifindex == 0) {
		struct rt6_info *rt;

		rcu_read_lock();
		rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
		if (rt) {
			dev = rt->dst.dev;
			dev = dst_dev(&rt->dst);
			dev_hold(dev);
			ip6_rt_put(rt);
		} else if (ishost) {
			rcu_read_unlock();
			err = -EADDRNOTAVAIL;
			goto error;
		} else {
			/* router, no matching interface: just pick one */
			dev = __dev_get_by_flags(net, IFF_UP,
			dev = dev_get_by_flags_rcu(net, IFF_UP,
						   IFF_UP | IFF_LOOPBACK);
		}
		rcu_read_unlock();
	}

	if (!dev) {
@@ -112,7 +123,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
		goto error;
	}

	idev = __in6_dev_get(dev);
	idev = in6_dev_get(dev);
	if (!idev) {
		if (ifindex)
			err = -ENODEV;
@@ -120,8 +131,9 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
			err = -EADDRNOTAVAIL;
		goto error;
	}

	/* reset ishost, now that we have a specific device */
	ishost = !idev->cnf.forwarding;
	ishost = !READ_ONCE(idev->cnf.forwarding);

	pac->acl_ifindex = dev->ifindex;

@@ -134,7 +146,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
		if (ishost)
			err = -EADDRNOTAVAIL;
		if (err)
			goto error;
			goto error_idev;
	}

	err = __ipv6_dev_ac_inc(idev, addr);
@@ -144,7 +156,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
		pac = NULL;
	}

error_idev:
	in6_dev_put(idev);
error:
	dev_put(dev);

	if (pac)
		sock_kfree_s(sk, pac, sizeof(*pac));
	return err;
@@ -155,12 +171,10 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 */
int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net_device *dev;
	struct ipv6_ac_socklist *pac, *prev_pac;
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net *net = sock_net(sk);

	ASSERT_RTNL();
	struct net_device *dev;

	prev_pac = NULL;
	for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
@@ -176,9 +190,11 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
	else
		np->ipv6_ac_list = pac->acl_next;

	dev = __dev_get_by_index(net, pac->acl_ifindex);
	if (dev)
	dev = dev_get_by_index(net, pac->acl_ifindex);
	if (dev) {
		ipv6_dev_ac_dec(dev, &pac->acl_addr);
		dev_put(dev);
	}

	sock_kfree_s(sk, pac, sizeof(*pac));
	return 0;
@@ -187,21 +203,20 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
void __ipv6_sock_ac_close(struct sock *sk)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net *net = sock_net(sk);
	struct net_device *dev = NULL;
	struct ipv6_ac_socklist *pac;
	struct net *net = sock_net(sk);
	int	prev_index;
	int prev_index = 0;

	ASSERT_RTNL();
	pac = np->ipv6_ac_list;
	np->ipv6_ac_list = NULL;

	prev_index = 0;
	while (pac) {
		struct ipv6_ac_socklist *next = pac->acl_next;

		if (pac->acl_ifindex != prev_index) {
			dev = __dev_get_by_index(net, pac->acl_ifindex);
			dev_put(dev);
			dev = dev_get_by_index(net, pac->acl_ifindex);
			prev_index = pac->acl_ifindex;
		}
		if (dev)
@@ -209,6 +224,8 @@ void __ipv6_sock_ac_close(struct sock *sk)
		sock_kfree_s(sk, pac, sizeof(*pac));
		pac = next;
	}

	dev_put(dev);
}

void ipv6_sock_ac_close(struct sock *sk)
@@ -217,9 +234,8 @@ void ipv6_sock_ac_close(struct sock *sk)

	if (!np->ipv6_ac_list)
		return;
	rtnl_lock();

	__ipv6_sock_ac_close(sk);
	rtnl_unlock();
}

static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
@@ -319,16 +335,14 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
	struct net *net;
	int err;

	ASSERT_RTNL();

	write_lock_bh(&idev->lock);
	if (idev->dead) {
		err = -ENODEV;
		goto out;
	}

	for (aca = rtnl_dereference(idev->ac_list); aca;
	     aca = rtnl_dereference(aca->aca_next)) {
	for (aca = ac_dereference(idev->ac_list, idev); aca;
	     aca = ac_dereference(aca->aca_next, idev)) {
		if (ipv6_addr_equal(&aca->aca_addr, addr)) {
			aca->aca_users++;
			err = 0;
@@ -380,12 +394,10 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
{
	struct ifacaddr6 *aca, *prev_aca;

	ASSERT_RTNL();

	write_lock_bh(&idev->lock);
	prev_aca = NULL;
	for (aca = rtnl_dereference(idev->ac_list); aca;
	     aca = rtnl_dereference(aca->aca_next)) {
	for (aca = ac_dereference(idev->ac_list, idev); aca;
	     aca = ac_dereference(aca->aca_next, idev)) {
		if (ipv6_addr_equal(&aca->aca_addr, addr))
			break;
		prev_aca = aca;
@@ -414,14 +426,18 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
	return 0;
}

/* called with rtnl_lock() */
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
{
	struct inet6_dev *idev = __in6_dev_get(dev);
	struct inet6_dev *idev = in6_dev_get(dev);
	int err;

	if (!idev)
		return -ENODEV;
	return __ipv6_dev_ac_dec(idev, addr);

	err = __ipv6_dev_ac_dec(idev, addr);
	in6_dev_put(idev);

	return err;
}

void ipv6_ac_destroy_dev(struct inet6_dev *idev)
@@ -429,7 +445,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
	struct ifacaddr6 *aca;

	write_lock_bh(&idev->lock);
	while ((aca = rtnl_dereference(idev->ac_list)) != NULL) {
	while ((aca = ac_dereference(idev->ac_list, idev)) != NULL) {
		rcu_assign_pointer(idev->ac_list, aca->aca_next);
		write_unlock_bh(&idev->lock);

+2 −26
Original line number Diff line number Diff line
@@ -117,26 +117,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
	return opt;
}

static bool setsockopt_needs_rtnl(int optname)
{
	switch (optname) {
	case IPV6_ADDRFORM:
	case IPV6_ADD_MEMBERSHIP:
	case IPV6_DROP_MEMBERSHIP:
	case IPV6_JOIN_ANYCAST:
	case IPV6_LEAVE_ANYCAST:
	case MCAST_JOIN_GROUP:
	case MCAST_LEAVE_GROUP:
	case MCAST_JOIN_SOURCE_GROUP:
	case MCAST_LEAVE_SOURCE_GROUP:
	case MCAST_BLOCK_SOURCE:
	case MCAST_UNBLOCK_SOURCE:
	case MCAST_MSFILTER:
		return true;
	}
	return false;
}

static int copy_group_source_from_sockptr(struct group_source_req *greqs,
		sockptr_t optval, int optlen)
{
@@ -395,9 +375,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net *net = sock_net(sk);
	int val, valbool;
	int retv = -ENOPROTOOPT;
	bool needs_rtnl = setsockopt_needs_rtnl(optname);
	int val, valbool;

	if (sockptr_is_null(optval))
		val = 0;
@@ -562,8 +541,7 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
		return 0;
	}
	}
	if (needs_rtnl)
		rtnl_lock();

	sockopt_lock_sock(sk);

	/* Another thread has converted the socket into IPv4 with
@@ -969,8 +947,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,

unlock:
	sockopt_release_sock(sk);
	if (needs_rtnl)
		rtnl_unlock();

	return retv;

Loading