Commit 5eb902b8 authored by Kui-Feng Lee's avatar Kui-Feng Lee Committed by David S. Miller
Browse files

net/ipv6: Remove expired routes with a separated list of routes.



FIB6 GC walks trees of fib6_tables to remove expired routes. Walking a tree
can be expensive if the number of routes in a table is big, even if most of
them are permanent. Checking routes in a separated list of routes having
expiration will avoid this potential issue.

Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Signed-off-by: default avatarKui-Feng Lee <thinker.li@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 60df43d3
Loading
Loading
Loading
Loading
+45 −1
Original line number Diff line number Diff line
@@ -173,6 +173,9 @@ struct fib6_info {

	refcount_t			fib6_ref;
	unsigned long			expires;

	struct hlist_node		gc_link;

	struct dst_metrics		*fib6_metrics;
#define fib6_pmtu		fib6_metrics->metrics[RTAX_MTU-1]

@@ -241,12 +244,18 @@ static inline bool fib6_requires_src(const struct fib6_info *rt)
	return rt->fib6_src.plen > 0;
}

/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
 * been added to a table before.
 */
static inline void fib6_clean_expires(struct fib6_info *f6i)
{
	f6i->fib6_flags &= ~RTF_EXPIRES;
	f6i->expires = 0;
}

/* The callers should hold f6i->fib6_table->tb6_lock if a route has ever
 * been added to a table before.
 */
static inline void fib6_set_expires(struct fib6_info *f6i,
				    unsigned long expires)
{
@@ -327,9 +336,11 @@ static inline bool fib6_info_hold_safe(struct fib6_info *f6i)

static inline void fib6_info_release(struct fib6_info *f6i)
{
	if (f6i && refcount_dec_and_test(&f6i->fib6_ref))
	if (f6i && refcount_dec_and_test(&f6i->fib6_ref)) {
		DEBUG_NET_WARN_ON_ONCE(!hlist_unhashed(&f6i->gc_link));
		call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
	}
}

enum fib6_walk_state {
#ifdef CONFIG_IPV6_SUBTREES
@@ -382,6 +393,7 @@ struct fib6_table {
	struct inet_peer_base	tb6_peers;
	unsigned int		flags;
	unsigned int		fib_seq;
	struct hlist_head       tb6_gc_hlist;	/* GC candidates */
#define RT6_TABLE_HAS_DFLT_ROUTER	BIT(0)
};

@@ -498,6 +510,38 @@ void fib6_gc_cleanup(void);

int fib6_init(void);

/* Add the route to the gc list if it is not already there
 *
 * The callers should hold f6i->fib6_table->tb6_lock.
 */
static inline void fib6_add_gc_list(struct fib6_info *f6i)
{
	/* If fib6_node is null, the f6i is not in (or removed from) the
	 * table.
	 *
	 * There is a gap between finding the f6i from the table and
	 * calling this function without the protection of the tb6_lock.
	 * This check makes sure the f6i is not added to the gc list when
	 * it is not on the table.
	 */
	if (!rcu_dereference_protected(f6i->fib6_node,
				       lockdep_is_held(&f6i->fib6_table->tb6_lock)))
		return;

	if (hlist_unhashed(&f6i->gc_link))
		hlist_add_head(&f6i->gc_link, &f6i->fib6_table->tb6_gc_hlist);
}

/* Remove the route from the gc list if it is on the list.
 *
 * The callers should hold f6i->fib6_table->tb6_lock.
 */
static inline void fib6_remove_gc_list(struct fib6_info *f6i)
{
	if (!hlist_unhashed(&f6i->gc_link))
		hlist_del_init(&f6i->gc_link);
}

struct ipv6_route_iter {
	struct seq_net_private p;
	struct fib6_walker w;
+34 −7
Original line number Diff line number Diff line
@@ -1255,6 +1255,7 @@ static void
cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
		     bool del_rt, bool del_peer)
{
	struct fib6_table *table;
	struct fib6_info *f6i;

	f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr,
@@ -1264,8 +1265,15 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
		if (del_rt)
			ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
		else {
			if (!(f6i->fib6_flags & RTF_EXPIRES))
			if (!(f6i->fib6_flags & RTF_EXPIRES)) {
				table = f6i->fib6_table;
				spin_lock_bh(&table->tb6_lock);

				fib6_set_expires(f6i, expires);
				fib6_add_gc_list(f6i);

				spin_unlock_bh(&table->tb6_lock);
			}
			fib6_info_release(f6i);
		}
	}
@@ -2706,6 +2714,7 @@ EXPORT_SYMBOL_GPL(addrconf_prefix_rcv_add_addr);
void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
{
	struct prefix_info *pinfo;
	struct fib6_table *table;
	__u32 valid_lft;
	__u32 prefered_lft;
	int addr_type, err;
@@ -2782,11 +2791,20 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
			if (valid_lft == 0) {
				ip6_del_rt(net, rt, false);
				rt = NULL;
			} else if (addrconf_finite_timeout(rt_expires)) {
			} else {
				table = rt->fib6_table;
				spin_lock_bh(&table->tb6_lock);

				if (addrconf_finite_timeout(rt_expires)) {
					/* not infinity */
					fib6_set_expires(rt, jiffies + rt_expires);
					fib6_add_gc_list(rt);
				} else {
					fib6_clean_expires(rt);
					fib6_remove_gc_list(rt);
				}

				spin_unlock_bh(&table->tb6_lock);
			}
		} else if (valid_lft) {
			clock_t expires = 0;
@@ -4741,6 +4759,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
			       unsigned long expires, u32 flags,
			       bool modify_peer)
{
	struct fib6_table *table;
	struct fib6_info *f6i;
	u32 prio;

@@ -4761,10 +4780,18 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
				      ifp->rt_priority, ifp->idev->dev,
				      expires, flags, GFP_KERNEL);
	} else {
		if (!expires)
		table = f6i->fib6_table;
		spin_lock_bh(&table->tb6_lock);

		if (!expires) {
			fib6_clean_expires(f6i);
		else
			fib6_remove_gc_list(f6i);
		} else {
			fib6_set_expires(f6i, expires);
			fib6_add_gc_list(f6i);
		}

		spin_unlock_bh(&table->tb6_lock);

		fib6_info_release(f6i);
	}
+55 −5
Original line number Diff line number Diff line
@@ -160,6 +160,8 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
	INIT_LIST_HEAD(&f6i->fib6_siblings);
	refcount_set(&f6i->fib6_ref, 1);

	INIT_HLIST_NODE(&f6i->gc_link);

	return f6i;
}

@@ -246,6 +248,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
				   net->ipv6.fib6_null_entry);
		table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
		inet_peer_base_init(&table->tb6_peers);
		INIT_HLIST_HEAD(&table->tb6_gc_hlist);
	}

	return table;
@@ -1055,6 +1058,9 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
				    lockdep_is_held(&table->tb6_lock));
		}
	}

	fib6_clean_expires(rt);
	fib6_remove_gc_list(rt);
}

/*
@@ -1115,10 +1121,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
					rt->fib6_nsiblings = 0;
				if (!(iter->fib6_flags & RTF_EXPIRES))
					return -EEXIST;
				if (!(rt->fib6_flags & RTF_EXPIRES))
				if (!(rt->fib6_flags & RTF_EXPIRES)) {
					fib6_clean_expires(iter);
				else
					fib6_remove_gc_list(iter);
				} else {
					fib6_set_expires(iter, rt->expires);
					fib6_add_gc_list(iter);
				}

				if (rt->fib6_pmtu)
					fib6_metric_set(iter, RTAX_MTU,
@@ -1477,6 +1486,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
		if (rt->nh)
			list_add(&rt->nh_list, &rt->nh->f6i_list);
		__fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));

		if (rt->fib6_flags & RTF_EXPIRES)
			fib6_add_gc_list(rt);

		fib6_start_gc(info->nl_net, rt);
	}

@@ -2280,9 +2293,8 @@ static void fib6_flush_trees(struct net *net)
 *	Garbage collection
 */

static int fib6_age(struct fib6_info *rt, void *arg)
static int fib6_age(struct fib6_info *rt, struct fib6_gc_args *gc_args)
{
	struct fib6_gc_args *gc_args = arg;
	unsigned long now = jiffies;

	/*
@@ -2307,6 +2319,42 @@ static int fib6_age(struct fib6_info *rt, void *arg)
	return 0;
}

static void fib6_gc_table(struct net *net,
			  struct fib6_table *tb6,
			  struct fib6_gc_args *gc_args)
{
	struct fib6_info *rt;
	struct hlist_node *n;
	struct nl_info info = {
		.nl_net = net,
		.skip_notify = false,
	};

	hlist_for_each_entry_safe(rt, n, &tb6->tb6_gc_hlist, gc_link)
		if (fib6_age(rt, gc_args) == -1)
			fib6_del(rt, &info);
}

static void fib6_gc_all(struct net *net, struct fib6_gc_args *gc_args)
{
	struct fib6_table *table;
	struct hlist_head *head;
	unsigned int h;

	rcu_read_lock();
	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
		head = &net->ipv6.fib_table_hash[h];
		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
			spin_lock_bh(&table->tb6_lock);

			fib6_gc_table(net, table, gc_args);

			spin_unlock_bh(&table->tb6_lock);
		}
	}
	rcu_read_unlock();
}

void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
	struct fib6_gc_args gc_args;
@@ -2322,7 +2370,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
			  net->ipv6.sysctl.ip6_rt_gc_interval;
	gc_args.more = 0;

	fib6_clean_all(net, fib6_age, &gc_args);
	fib6_gc_all(net, &gc_args);
	now = jiffies;
	net->ipv6.ip6_rt_last_gc = now;

@@ -2382,6 +2430,7 @@ static int __net_init fib6_net_init(struct net *net)
	net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
	inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
	INIT_HLIST_HEAD(&net->ipv6.fib6_main_tbl->tb6_gc_hlist);

#ifdef CONFIG_IPV6_MULTIPLE_TABLES
	net->ipv6.fib6_local_tbl = kzalloc(sizeof(*net->ipv6.fib6_local_tbl),
@@ -2394,6 +2443,7 @@ static int __net_init fib6_net_init(struct net *net)
	net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
		RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
	inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
	INIT_HLIST_HEAD(&net->ipv6.fib6_local_tbl->tb6_gc_hlist);
#endif
	fib6_tables_init(net);

+9 −1
Original line number Diff line number Diff line
@@ -1237,6 +1237,7 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
	struct ndisc_options ndopts;
	struct fib6_info *rt = NULL;
	struct inet6_dev *in6_dev;
	struct fib6_table *table;
	u32 defrtr_usr_metric;
	unsigned int pref = 0;
	__u32 old_if_flags;
@@ -1410,8 +1411,15 @@ static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
		inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE);
	}

	if (rt)
	if (rt) {
		table = rt->fib6_table;
		spin_lock_bh(&table->tb6_lock);

		fib6_set_expires(rt, jiffies + (HZ * lifetime));
		fib6_add_gc_list(rt);

		spin_unlock_bh(&table->tb6_lock);
	}
	if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
	    ra_msg->icmph.icmp6_hop_limit) {
		if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
+11 −2
Original line number Diff line number Diff line
@@ -931,6 +931,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
	struct net *net = dev_net(dev);
	struct route_info *rinfo = (struct route_info *) opt;
	struct in6_addr prefix_buf, *prefix;
	struct fib6_table *table;
	unsigned int pref;
	unsigned long lifetime;
	struct fib6_info *rt;
@@ -989,10 +990,18 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);

	if (rt) {
		if (!addrconf_finite_timeout(lifetime))
		table = rt->fib6_table;
		spin_lock_bh(&table->tb6_lock);

		if (!addrconf_finite_timeout(lifetime)) {
			fib6_clean_expires(rt);
		else
			fib6_remove_gc_list(rt);
		} else {
			fib6_set_expires(rt, jiffies + HZ * lifetime);
			fib6_add_gc_list(rt);
		}

		spin_unlock_bh(&table->tb6_lock);

		fib6_info_release(rt);
	}