Commit f7f52738 authored by Gilad Naaman's avatar Gilad Naaman Committed by Jakub Kicinski
Browse files

neighbour: Create netdev->neighbour association



Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: default avatarGilad Naaman <gnaaman@drivenets.com>
Reviewed-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent a01a67ab
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -188,4 +188,5 @@ u64 max_pacing_offload_horizon
struct_napi_config*                 napi_config
unsigned_long                       gro_flush_timeout
u32                                 napi_defer_hard_irqs
struct hlist_head                   neighbours[2]
=================================== =========================== =================== =================== ===================================================================================
+7 −0
Original line number Diff line number Diff line
@@ -52,6 +52,7 @@
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>
#include <net/neighbour_tables.h>

struct netpoll_info;
struct device;
@@ -2032,6 +2033,9 @@ enum netdev_reg_state {
 *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
 *				allow to avoid NIC hard IRQ, on busy queues.
 *
 *	@neighbours:	List heads pointing to this device's neighbours'
 *			dev_list, one per address-family.
 *
 *	FIXME: cleanup struct net_device such that network protocol info
 *	moves out.
 */
@@ -2440,6 +2444,9 @@ struct net_device {
	 */
	struct net_shaper_hierarchy *net_shaper_hierarchy;
#endif

	struct hlist_head neighbours[NEIGH_NR_TABLES];

	u8			priv[] ____cacheline_aligned
				       __counted_by(priv_len);
} ____cacheline_aligned;
+2 −7
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/rtnetlink.h>
#include <net/neighbour_tables.h>

/*
 * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@ struct neigh_statistics {

struct neighbour {
	struct hlist_node	hash;
	struct hlist_node	dev_list;
	struct neigh_table	*tbl;
	struct neigh_parms	*parms;
	unsigned long		confirmed;
@@ -236,13 +238,6 @@ struct neigh_table {
	struct pneigh_entry	**phash_buckets;
};

enum {
	NEIGH_ARP_TABLE = 0,
	NEIGH_ND_TABLE = 1,
	NEIGH_NR_TABLES,
	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};

static inline int neigh_parms_family(struct neigh_parms *p)
{
	return p->tbl->family;
+12 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_NEIGHBOUR_TABLES_H
#define _NET_NEIGHBOUR_TABLES_H

enum {
	NEIGH_ARP_TABLE = 0,
	NEIGH_ND_TABLE = 1,
	NEIGH_NR_TABLES,
	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};

#endif
+58 −38
Original line number Diff line number Diff line
@@ -60,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif

static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
{
	int i;

	switch (family) {
	default:
		DEBUG_NET_WARN_ON_ONCE(1);
		fallthrough; /* to avoid panic by null-ptr-deref */
	case AF_INET:
		i = NEIGH_ARP_TABLE;
		break;
	case AF_INET6:
		i = NEIGH_ND_TABLE;
		break;
	}

	return &dev->neighbours[i];
}

/*
   Neighbour hash table buckets are protected with rwlock tbl->lock.

@@ -211,6 +230,7 @@ bool neigh_remove_one(struct neighbour *n)
	write_lock(&n->lock);
	if (refcount_read(&n->refcnt) == 1) {
		hlist_del_rcu(&n->hash);
		hlist_del_rcu(&n->dev_list);
		neigh_mark_dead(n);
		retval = true;
	}
@@ -351,35 +371,30 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
			    bool skip_perm)
{
	int i;
	struct neigh_hash_table *nht;

	nht = rcu_dereference_protected(tbl->nht,
					lockdep_is_held(&tbl->lock));

	for (i = 0; i < (1 << nht->hash_shift); i++) {
	struct hlist_head *dev_head;
	struct hlist_node *tmp;
	struct neighbour *n;

		neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
			if (dev && n->dev != dev)
				continue;
	dev_head = neigh_get_dev_table(dev, tbl->family);

	hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
		if (skip_perm && n->nud_state & NUD_PERMANENT)
			continue;

		hlist_del_rcu(&n->hash);
		hlist_del_rcu(&n->dev_list);
		write_lock(&n->lock);
		neigh_del_timer(n);
		neigh_mark_dead(n);
		if (refcount_read(&n->refcnt) != 1) {
			/* The most unpleasant situation.
				   We must destroy neighbour entry,
				   but someone still uses it.

				   The destroy will be delayed until
				   the last user releases us, but
				   we must kill timers etc. and move
				   it to safe state.
			 * We must destroy neighbour entry,
			 * but someone still uses it.
			 *
			 * The destroy will be delayed until
			 * the last user releases us, but
			 * we must kill timers etc. and move
			 * it to safe state.
			 */
			__skb_queue_purge(&n->arp_queue);
			n->arp_queue_len_bytes = 0;
@@ -394,7 +409,6 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
		neigh_cleanup_and_release(n);
	}
}
}

void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
@@ -655,6 +669,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
	if (want_ref)
		neigh_hold(n);
	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);

	hlist_add_head_rcu(&n->dev_list,
			   neigh_get_dev_table(dev, tbl->family));

	write_unlock_bh(&tbl->lock);
	neigh_dbg(2, "neigh %p is created\n", n);
	rc = n;
@@ -935,6 +953,7 @@ static void neigh_periodic_work(struct work_struct *work)
			     !time_in_range_open(jiffies, n->used,
						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
				hlist_del_rcu(&n->hash);
				hlist_del_rcu(&n->dev_list);
				neigh_mark_dead(n);
				write_unlock(&n->lock);
				neigh_cleanup_and_release(n);
@@ -3054,6 +3073,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
			release = cb(n);
			if (release) {
				hlist_del_rcu(&n->hash);
				hlist_del_rcu(&n->dev_list);
				neigh_mark_dead(n);
			}
			write_unlock(&n->lock);