Commit 58ec6857 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'ipv4-preliminary-work-for-per-netns-rtnl'

Eric Dumazet says:

====================
ipv4: preliminary work for per-netns RTNL

Inspired by 9b8ca048 ("ipv4: avoid quadratic behavior in
FIB insertion of common address") and per-netns RTNL conversion
started by Kuniyuki this week.

ip_fib_check_default() can use RCU instead of a shared spinlock.

fib_info_lock can be removed, RTNL is already used.

fib_info_devhash[] can be removed in favor of a single
pointer in net_device.
====================

Link: https://patch.msgid.link/20241004134720.579244-1-edumazet@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 1405981b a3f5f4c2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ unsigned_int allmulti
bool                                uc_promisc                                                      
unsigned_char                       nested_level                                                    
struct_in_device*                   ip_ptr                  read_mostly         read_mostly         __in_dev_get
struct hlist_head                   fib_nh_head
struct_inet6_dev*                   ip6_ptr                 read_mostly         read_mostly         __in6_dev_get
struct_vlan_info*                   vlan_info                                                       
struct_dsa_port*                    dsa_ptr                                                         
+3 −0
Original line number Diff line number Diff line
@@ -2211,6 +2211,9 @@ struct net_device {

	/* Protocol-specific pointers */
	struct in_device __rcu	*ip_ptr;
	/** @fib_nh_head: nexthops associated with this netdev */
	struct hlist_head	fib_nh_head;

#if IS_ENABLED(CONFIG_VLAN_8021Q)
	struct vlan_info __rcu	*vlan_info;
#endif
+28 −49
Original line number Diff line number Diff line
@@ -50,17 +50,12 @@

#include "fib_lookup.h"

static DEFINE_SPINLOCK(fib_info_lock);
static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_info_hash_size;
static unsigned int fib_info_hash_bits;
static unsigned int fib_info_cnt;

#define DEVINDEX_HASHBITS 8
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];

/* for_nexthops and change_nexthops only used when nexthop object
 * is not set in a fib_info. The logic within can reference fib_nh.
 */
@@ -260,12 +255,11 @@ EXPORT_SYMBOL_GPL(free_fib_info);

void fib_release_info(struct fib_info *fi)
{
	spin_lock_bh(&fib_info_lock);
	ASSERT_RTNL();
	if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
		hlist_del(&fi->fib_hash);

		/* Paired with READ_ONCE() in fib_create_info(). */
		WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
		fib_info_cnt--;

		if (fi->fib_prefsrc)
			hlist_del(&fi->fib_lhash);
@@ -275,14 +269,13 @@ void fib_release_info(struct fib_info *fi)
			change_nexthops(fi) {
				if (!nexthop_nh->fib_nh_dev)
					continue;
				hlist_del(&nexthop_nh->nh_hash);
				hlist_del_rcu(&nexthop_nh->nh_hash);
			} endfor_nexthops(fi)
		}
		/* Paired with READ_ONCE() from fib_table_lookup() */
		WRITE_ONCE(fi->fib_dead, 1);
		fib_info_put(fi);
	}
	spin_unlock_bh(&fib_info_lock);
}

static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
@@ -322,17 +315,9 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
	return 0;
}

static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
	return hash_32(val, DEVINDEX_HASHBITS);
}

static struct hlist_head *
fib_info_devhash_bucket(const struct net_device *dev)
static struct hlist_head *fib_nh_head(struct net_device *dev)
{
	u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;

	return &fib_info_devhash[fib_devindex_hashfn(val)];
	return &dev->fib_nh_head;
}

static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -362,10 +347,10 @@ static inline unsigned int fib_info_hashfn(struct fib_info *fi)
				fi->fib_priority);

	if (fi->nh) {
		val ^= fib_devindex_hashfn(fi->nh->id);
		val ^= fi->nh->id;
	} else {
		for_nexthops(fi) {
			val ^= fib_devindex_hashfn(nh->fib_nh_oif);
			val ^= nh->fib_nh_oif;
		} endfor_nexthops(fi)
	}

@@ -380,7 +365,7 @@ static struct fib_info *fib_find_info_nh(struct net *net,
	struct fib_info *fi;
	unsigned int hash;

	hash = fib_info_hashfn_1(fib_devindex_hashfn(cfg->fc_nh_id),
	hash = fib_info_hashfn_1(cfg->fc_nh_id,
				 cfg->fc_protocol, cfg->fc_scope,
				 (__force u32)cfg->fc_prefsrc,
				 cfg->fc_priority);
@@ -436,28 +421,23 @@ static struct fib_info *fib_find_info(struct fib_info *nfi)
}

/* Check, that the gateway is already configured.
 * Used only by redirect accept routine.
 * Used only by redirect accept routine, under rcu_read_lock();
 */
int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
	struct hlist_head *head;
	struct fib_nh *nh;

	spin_lock(&fib_info_lock);
	head = fib_nh_head(dev);

	head = fib_info_devhash_bucket(dev);

	hlist_for_each_entry(nh, head, nh_hash) {
		if (nh->fib_nh_dev == dev &&
		    nh->fib_nh_gw4 == gw &&
	hlist_for_each_entry_rcu(nh, head, nh_hash) {
		DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
		if (nh->fib_nh_gw4 == gw &&
		    !(nh->fib_nh_flags & RTNH_F_DEAD)) {
			spin_unlock(&fib_info_lock);
			return 0;
		}
	}

	spin_unlock(&fib_info_lock);

	return -1;
}

@@ -1276,7 +1256,7 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
	unsigned int old_size = fib_info_hash_size;
	unsigned int i;

	spin_lock_bh(&fib_info_lock);
	ASSERT_RTNL();
	old_info_hash = fib_info_hash;
	old_laddrhash = fib_info_laddrhash;
	fib_info_hash_size = new_size;
@@ -1313,8 +1293,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
		}
	}

	spin_unlock_bh(&fib_info_lock);

	kvfree(old_info_hash);
	kvfree(old_laddrhash);
}
@@ -1390,6 +1368,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
	int nhs = 1;
	struct net *net = cfg->fc_nlinfo.nl_net;

	ASSERT_RTNL();
	if (cfg->fc_type > RTN_MAX)
		goto err_inval;

@@ -1432,8 +1411,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,

	err = -ENOBUFS;

	/* Paired with WRITE_ONCE() in fib_release_info() */
	if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
	if (fib_info_cnt >= fib_info_hash_size) {
		unsigned int new_size = fib_info_hash_size << 1;
		struct hlist_head *new_info_hash;
		struct hlist_head *new_laddrhash;
@@ -1592,7 +1570,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,

	refcount_set(&fi->fib_treeref, 1);
	refcount_set(&fi->fib_clntref, 1);
	spin_lock_bh(&fib_info_lock);

	fib_info_cnt++;
	hlist_add_head(&fi->fib_hash,
		       &fib_info_hash[fib_info_hashfn(fi)]);
@@ -1610,11 +1588,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,

			if (!nexthop_nh->fib_nh_dev)
				continue;
			head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
			hlist_add_head(&nexthop_nh->nh_hash, head);
			head = fib_nh_head(nexthop_nh->fib_nh_dev);
			hlist_add_head_rcu(&nexthop_nh->nh_hash, head);
		} endfor_nexthops(fi)
	}
	spin_unlock_bh(&fib_info_lock);
	return fi;

err_inval:
@@ -1964,11 +1941,11 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)

void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
	struct hlist_head *head = fib_info_devhash_bucket(dev);
	struct hlist_head *head = fib_nh_head(dev);
	struct fib_nh *nh;

	hlist_for_each_entry(nh, head, nh_hash) {
		if (nh->fib_nh_dev == dev)
		DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
		fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
	}
}
@@ -1983,7 +1960,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 */
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
	struct hlist_head *head = fib_info_devhash_bucket(dev);
	struct hlist_head *head = fib_nh_head(dev);
	struct fib_info *prev_fi = NULL;
	int scope = RT_SCOPE_NOWHERE;
	struct fib_nh *nh;
@@ -1997,7 +1974,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
		int dead;

		BUG_ON(!fi->fib_nhs);
		if (nh->fib_nh_dev != dev || fi == prev_fi)
		DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
		if (fi == prev_fi)
			continue;
		prev_fi = fi;
		dead = 0;
@@ -2147,7 +2125,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
	}

	prev_fi = NULL;
	head = fib_info_devhash_bucket(dev);
	head = fib_nh_head(dev);
	ret = 0;

	hlist_for_each_entry(nh, head, nh_hash) {
@@ -2155,7 +2133,8 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
		int alive;

		BUG_ON(!fi->fib_nhs);
		if (nh->fib_nh_dev != dev || fi == prev_fi)
		DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev);
		if (fi == prev_fi)
			continue;

		prev_fi = fi;