Commit fd07ba16 authored by Cosmin Ratiu's avatar Cosmin Ratiu Committed by Jakub Kicinski
Browse files

IB/IPoIB: Allow using netdevs that require the instance lock



After the last patch removing vlan_rwsem, it is an incremental step to
allow ipoib to work with netdevs that require the instance lock.
In several places, netdev_lock() is changed to netdev_lock_ops_to_full()
which takes care of not acquiring the lock again when the netdev is
already locked.

In ipoib_ib_tx_timeout_work() and __ipoib_ib_dev_flush() for HEAVY
flushes, the netdev lock is acquired/released. This is needed because
these functions end up calling .ndo_stop()/.ndo_open() on subinterfaces,
and the device may expect the netdev instance lock to be held.

ipoib_set_mode() now explicitly acquires ops lock while manipulating the
features, mtu and tx queues.

Finally, ipoib_napi_enable()/ipoib_napi_disable() now use the *_locked
variants of the napi_enable()/napi_disable() calls and optionally
acquire the netdev lock themselves depending on the dev they operate on.

Signed-off-by: default avatarCosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: default avatarCarolina Jubran <cjubran@nvidia.com>
Reviewed-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1747829342-1018757-4-git-send-email-tariqt@nvidia.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 463e5176
Loading
Loading
Loading
Loading
+14 −5
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@

#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/netdev_lock.h>
#include <rdma/ib_cache.h>

#include "ipoib.h"
@@ -781,16 +782,20 @@ static void ipoib_napi_enable(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = ipoib_priv(dev);

	napi_enable(&priv->recv_napi);
	napi_enable(&priv->send_napi);
	netdev_lock_ops_to_full(dev);
	napi_enable_locked(&priv->recv_napi);
	napi_enable_locked(&priv->send_napi);
	netdev_unlock_full_to_ops(dev);
}

static void ipoib_napi_disable(struct net_device *dev)
{
	struct ipoib_dev_priv *priv = ipoib_priv(dev);

	napi_disable(&priv->recv_napi);
	napi_disable(&priv->send_napi);
	netdev_lock_ops_to_full(dev);
	napi_disable_locked(&priv->recv_napi);
	napi_disable_locked(&priv->send_napi);
	netdev_unlock_full_to_ops(dev);
}

int ipoib_ib_dev_stop_default(struct net_device *dev)
@@ -1240,10 +1245,14 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
		ipoib_ib_dev_down(dev);

	if (level == IPOIB_FLUSH_HEAVY) {
		netdev_lock_ops(dev);
		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
			ipoib_ib_dev_stop(dev);

		if (ipoib_ib_dev_open(dev))
		result = ipoib_ib_dev_open(dev);
		netdev_unlock_ops(dev);

		if (result)
			return;

		if (netif_queue_stopped(dev))
+17 −10
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@
#include <linux/jhash.h>
#include <net/arp.h>
#include <net/addrconf.h>
#include <net/netdev_lock.h>
#include <net/pkt_sched.h>
#include <linux/inetdevice.h>
#include <rdma/ib_cache.h>
@@ -200,10 +201,10 @@ int ipoib_open(struct net_device *dev)
		struct ipoib_dev_priv *cpriv;

		/* Bring up any child interfaces too */
		netdev_lock(dev);
		netdev_lock_ops_to_full(dev);
		list_for_each_entry(cpriv, &priv->child_intfs, list)
			ipoib_schedule_ifupdown_task(cpriv->dev, true);
		netdev_unlock(dev);
		netdev_unlock_full_to_ops(dev);
	} else if (priv->parent) {
		struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);

@@ -238,10 +239,10 @@ static int ipoib_stop(struct net_device *dev)
		struct ipoib_dev_priv *cpriv;

		/* Bring down any child interfaces too */
		netdev_lock(dev);
		netdev_lock_ops_to_full(dev);
		list_for_each_entry(cpriv, &priv->child_intfs, list)
			ipoib_schedule_ifupdown_task(cpriv->dev, false);
		netdev_unlock(dev);
		netdev_unlock_full_to_ops(dev);
	}

	return 0;
@@ -566,9 +567,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
		set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
		ipoib_warn(priv, "enabling connected mode "
			   "will cause multicast packet drops\n");
		netdev_lock_ops(dev);
		netdev_update_features(dev);
		dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
		netif_set_mtu(dev, ipoib_cm_max_mtu(dev));
		netif_set_real_num_tx_queues(dev, 1);
		netdev_unlock_ops(dev);
		rtnl_unlock();
		priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;

@@ -578,9 +581,11 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)

	if (!strcmp(buf, "datagram\n")) {
		clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
		netdev_lock_ops(dev);
		netdev_update_features(dev);
		dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
		netif_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
		netif_set_real_num_tx_queues(dev, dev->num_tx_queues);
		netdev_unlock_ops(dev);
		rtnl_unlock();
		ipoib_flush_paths(dev);
		return (!rtnl_trylock()) ? -EBUSY : 0;
@@ -1247,6 +1252,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work)
	int err;

	rtnl_lock();
	netdev_lock_ops(priv->dev);

	if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
		goto unlock;
@@ -1261,6 +1267,7 @@ void ipoib_ib_tx_timeout_work(struct work_struct *work)

	netif_tx_wake_all_queues(priv->dev);
unlock:
	netdev_unlock_ops(priv->dev);
	rtnl_unlock();

}
@@ -2404,10 +2411,10 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
	netif_addr_unlock_bh(netdev);

	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
		netdev_lock(priv->dev);
		netdev_lock_ops_to_full(priv->dev);
		list_for_each_entry(child_priv, &priv->child_intfs, list)
			set_base_guid(child_priv, gid);
		netdev_unlock(priv->dev);
		netdev_unlock_full_to_ops(priv->dev);
	}
}

@@ -2450,10 +2457,10 @@ static int ipoib_set_mac(struct net_device *dev, void *addr)
	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
		struct ipoib_dev_priv *cpriv;

		netdev_lock(dev);
		netdev_lock_ops_to_full(dev);
		list_for_each_entry(cpriv, &priv->child_intfs, list)
			queue_work(ipoib_workqueue, &cpriv->flush_light);
		netdev_unlock(dev);
		netdev_unlock_full_to_ops(dev);
	}
	queue_work(ipoib_workqueue, &priv->flush_light);