Commit a82dc19d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

net: avoid potential race between netdev_get_by_index_lock() and netns switch



netdev_get_by_index_lock() performs following steps:

  rcu_lock();
  dev = lookup(netns, ifindex);
  dev_get(dev);
  rcu_unlock();
  [... lock & validate the dev ...]
  return dev

Validation right now only checks if the device is registered but since
the lookup is netns-aware we must also protect against the device
switching netns right after we dropped the RCU lock. Otherwise
the caller in netns1 may get a pointer to a device which has just
switched to netns2.

We can't hold the lock for the entire netns change process (because of
the NETDEV_UNREGISTER notifier), and there's no existing marking to
indicate that the netns is unlisted because of netns move, so add one.

AFAIU none of the existing netdev_get_by_index_lock() callers can
suffer from this problem (NAPI code double checks the netns membership
and other callers are either under rtnl_lock or not ns-sensitive),
so this patch does not have to be treated as a fix.

Reviewed-by: default avatarJoe Damato <jdamato@fastly.com>
Acked-by: default avatarStanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250408195956.412733-2-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 420aabef
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -1952,6 +1952,7 @@ enum netdev_reg_state {
 *	@priv_destructor:	Called from unregister
 *	@npinfo:		XXX: need comments on this one
 * 	@nd_net:		Network namespace this network device is inside
 *				protected by @lock
 *
 * 	@ml_priv:	Mid-layer private
 *	@ml_priv_type:  Mid-layer private type
@@ -2359,6 +2360,9 @@ struct net_device {

	bool dismantle;

	/** @moving_ns: device is changing netns, protected by @lock */
	bool moving_ns;

	enum {
		RTNL_LINK_INITIALIZED,
		RTNL_LINK_INITIALIZING,
@@ -2521,7 +2525,7 @@ struct net_device {
	 *	@net_shaper_hierarchy, @reg_state, @threaded
	 *
	 * Double protects:
	 *	@up
	 *	@up, @moving_ns, @nd_net
	 *
	 * Double ops protects:
	 *	@real_num_rx_queues, @real_num_tx_queues
+18 −7
Original line number Diff line number Diff line
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
	dev_hold(dev);
	rcu_read_unlock();

	dev = __netdev_put_lock(dev);
	dev = __netdev_put_lock(dev, net);
	if (!dev)
		return NULL;

@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
 * This helper is intended for locking net_device after it has been looked up
 * using a lockless lookup helper. Lock prevents the instance from going away.
 */
struct net_device *__netdev_put_lock(struct net_device *dev)
struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
	netdev_lock(dev);
	if (dev->reg_state > NETREG_REGISTERED) {
	if (dev->reg_state > NETREG_REGISTERED ||
	    dev->moving_ns || !net_eq(dev_net(dev), net)) {
		netdev_unlock(dev);
		dev_put(dev);
		return NULL;
@@ -1070,7 +1071,7 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
	if (!dev)
		return NULL;

	return __netdev_put_lock(dev);
	return __netdev_put_lock(dev, net);
}

struct net_device *
@@ -1090,7 +1091,7 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
		dev_hold(dev);
		rcu_read_unlock();

		dev = __netdev_put_lock(dev);
		dev = __netdev_put_lock(dev, net);
		if (dev)
			return dev;

@@ -12157,7 +12158,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
	netif_close(dev);
	/* And unlink it from device chain */
	unlist_netdevice(dev);
	netdev_unlock_ops(dev);

	if (!netdev_need_ops_lock(dev))
		netdev_lock(dev);
	dev->moving_ns = true;
	netdev_unlock(dev);

	synchronize_net();

@@ -12193,7 +12198,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
	move_netdevice_notifiers_dev_net(dev, net);

	/* Actually switch the network namespace */
	netdev_lock(dev);
	dev_net_set(dev, net);
	netdev_unlock(dev);
	dev->ifindex = new_ifindex;

	if (new_name[0]) {
@@ -12219,7 +12226,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
	err = netdev_change_owner(dev, net_old, net);
	WARN_ON(err);

	netdev_lock_ops(dev);
	netdev_lock(dev);
	dev->moving_ns = false;
	if (!netdev_need_ops_lock(dev))
		netdev_unlock(dev);

	/* Add the device back in the hashes */
	list_netdevice(dev);
	/* Notify protocols, that a new device appeared. */
+1 −1
Original line number Diff line number Diff line
@@ -30,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);

struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
struct net_device *__netdev_put_lock(struct net_device *dev);
struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
		    unsigned long *index);