Commit d539d8fb authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by Jakub Kicinski
Browse files

neighbour: Free pneigh_entry after RCU grace period.



We will convert RTM_GETNEIGH to RCU.

neigh_get() looks up pneigh_entry by pneigh_lookup() and passes
it to pneigh_fill_info().

Then, we must ensure that the entry is alive till pneigh_fill_info()
completes, but read_lock_bh(&tbl->lock) in pneigh_lookup() does not
guarantee that.

Also, we will convert all readers of tbl->phash_buckets[] to RCU.

Let's use call_rcu() to free pneigh_entry and update phash_buckets[]
and ->next by rcu_assign_pointer().

pneigh_ifdown_and_unlock() uses list_head to avoid overwriting
->next and moving RCU iterators to another list.

pndisc_destructor() (only IPv6 ndisc uses this) uses a mutex, so it
is not delayed to call_rcu(), where we cannot sleep.  This is fine
because the mcast code works with RCU and ipv6_dev_mc_dec() frees
mcast objects after RCU grace period.

While at it, we change the return type of pneigh_ifdown_and_unlock()
to void.

Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250716221221.442239-8-kuniyu@google.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent d63382ae
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -180,6 +180,10 @@ struct pneigh_entry {
	possible_net_t		net;
	struct net_device	*dev;
	netdevice_tracker	dev_tracker;
	union {
		struct list_head	free_node;
		struct rcu_head		rcu;
	};
	u32			flags;
	u8			protocol;
	bool			permanent;
+28 −17
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
			   u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
				     struct net_device *dev,
				     bool skip_perm);

@@ -810,6 +810,14 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
	return n;
}

static void pneigh_destroy(struct rcu_head *rcu)
{
	struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);

	netdev_put(n->dev, &n->dev_tracker);
	kfree(n);
}

int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
		  struct net_device *dev)
{
@@ -828,10 +836,11 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
		    net_eq(pneigh_net(n), net)) {
			rcu_assign_pointer(*np, n->next);
			write_unlock_bh(&tbl->lock);

			if (tbl->pdestructor)
				tbl->pdestructor(n);
			netdev_put(n->dev, &n->dev_tracker);
			kfree(n);

			call_rcu(&n->rcu, pneigh_destroy);
			return 0;
		}
	}
@@ -839,11 +848,12 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
	return -ENOENT;
}

static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
				     struct net_device *dev,
				     bool skip_perm)
{
	struct pneigh_entry *n, __rcu **np, *freelist = NULL;
	struct pneigh_entry *n, __rcu **np;
	LIST_HEAD(head);
	u32 h;

	for (h = 0; h <= PNEIGH_HASHMASK; h++) {
@@ -853,24 +863,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
				goto skip;
			if (!dev || n->dev == dev) {
				rcu_assign_pointer(*np, n->next);
				rcu_assign_pointer(n->next, freelist);
				freelist = n;
				list_add(&n->free_node, &head);
				continue;
			}
skip:
			np = &n->next;
		}
	}

	write_unlock_bh(&tbl->lock);
	while ((n = freelist)) {
		freelist = rcu_dereference_protected(n->next, 1);
		n->next = NULL;

	while (!list_empty(&head)) {
		n = list_first_entry(&head, typeof(*n), free_node);
		list_del(&n->free_node);

		if (tbl->pdestructor)
			tbl->pdestructor(n);
		netdev_put(n->dev, &n->dev_tracker);
		kfree(n);

		call_rcu(&n->rcu, pneigh_destroy);
	}
	return -ENOENT;
}

static inline void neigh_parms_put(struct neigh_parms *parms)