Commit 325eb217 authored by Michael Chan's avatar Michael Chan Committed by Jakub Kicinski
Browse files

bnxt_en: bring back rtnl_lock() in the bnxt_open() path



Error recovery, PCIe AER, resume, and TX timeout will invoke bnxt_open()
with netdev_lock only.  This will cause RTNL assert failure in
netif_set_real_num_tx_queues(), netif_set_real_num_tx_queues(),
and netif_set_real_num_tx_queues().

Example error recovery assert:

RTNL: assertion failed at net/core/dev.c (3178)
WARNING: CPU: 3 PID: 3392 at net/core/dev.c:3178 netif_set_real_num_tx_queues+0x1fd/0x210

Call Trace:
 <TASK>
 ? __pfx_bnxt_msix+0x10/0x10 [bnxt_en]
 __bnxt_open_nic+0x1ef/0xb20 [bnxt_en]
 bnxt_open+0xda/0x130 [bnxt_en]
 bnxt_fw_reset_task+0x21f/0x780 [bnxt_en]
 process_scheduled_works+0x9d/0x400

For now, bring back rtnl_lock() in all these code paths that can invoke
bnxt_open().  In the bnxt_queue_start() error path, we don't have
rtnl_lock held so we just change it to call netif_close() instead of
bnxt_reset_task() for simplicity.  This error path is unlikely so it
should be fine.

Fixes: 004b5008 ("eth: bnxt: remove most dependencies on RTNL")
Reviewed-by: default avatarKalesh AP <kalesh-anakkur.purayil@broadcom.com>
Reviewed-by: default avatarPavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: default avatarAndy Gospodarek <andrew.gospodarek@broadcom.com>
Signed-off-by: default avatarMichael Chan <michael.chan@broadcom.com>
Link: https://patch.msgid.link/20250514062908.2766677-1-michael.chan@broadcom.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 92ec4855
Loading
Loading
Loading
Loading
+29 −7
Original line number Diff line number Diff line
@@ -14013,13 +14013,28 @@ static void bnxt_unlock_sp(struct bnxt *bp)
	netdev_unlock(bp->dev);
}

/* Same as bnxt_lock_sp() with additional rtnl_lock */
static void bnxt_rtnl_lock_sp(struct bnxt *bp)
{
	clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
	rtnl_lock();
	netdev_lock(bp->dev);
}

static void bnxt_rtnl_unlock_sp(struct bnxt *bp)
{
	set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
	netdev_unlock(bp->dev);
	rtnl_unlock();
}

/* Only called from bnxt_sp_task() */
static void bnxt_reset(struct bnxt *bp, bool silent)
{
	bnxt_lock_sp(bp);
	bnxt_rtnl_lock_sp(bp);
	if (test_bit(BNXT_STATE_OPEN, &bp->state))
		bnxt_reset_task(bp, silent);
	bnxt_unlock_sp(bp);
	bnxt_rtnl_unlock_sp(bp);
}

/* Only called from bnxt_sp_task() */
@@ -14027,9 +14042,9 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
{
	int i;

	bnxt_lock_sp(bp);
	bnxt_rtnl_lock_sp(bp);
	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
		bnxt_unlock_sp(bp);
		bnxt_rtnl_unlock_sp(bp);
		return;
	}
	/* Disable and flush TPA before resetting the RX ring */
@@ -14068,7 +14083,7 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
	}
	if (bp->flags & BNXT_FLAG_TPA)
		bnxt_set_tpa(bp, true);
	bnxt_unlock_sp(bp);
	bnxt_rtnl_unlock_sp(bp);
}

static void bnxt_fw_fatal_close(struct bnxt *bp)
@@ -14960,15 +14975,17 @@ static void bnxt_fw_reset_task(struct work_struct *work)
		bp->fw_reset_state = BNXT_FW_RESET_STATE_OPENING;
		fallthrough;
	case BNXT_FW_RESET_STATE_OPENING:
		while (!netdev_trylock(bp->dev)) {
		while (!rtnl_trylock()) {
			bnxt_queue_fw_reset_work(bp, HZ / 10);
			return;
		}
		netdev_lock(bp->dev);
		rc = bnxt_open(bp->dev);
		if (rc) {
			netdev_err(bp->dev, "bnxt_open() failed during FW reset\n");
			bnxt_fw_reset_abort(bp, rc);
			netdev_unlock(bp->dev);
			rtnl_unlock();
			goto ulp_start;
		}

@@ -14988,6 +15005,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
			bnxt_dl_health_fw_status_update(bp, true);
		}
		netdev_unlock(bp->dev);
		rtnl_unlock();
		bnxt_ulp_start(bp, 0);
		bnxt_reenable_sriov(bp);
		netdev_lock(bp->dev);
@@ -15936,7 +15954,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
		   rc);
	napi_enable_locked(&bnapi->napi);
	bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons);
	bnxt_reset_task(bp, true);
	netif_close(dev);
	return rc;
}

@@ -16752,6 +16770,7 @@ static int bnxt_resume(struct device *device)
	struct bnxt *bp = netdev_priv(dev);
	int rc = 0;

	rtnl_lock();
	netdev_lock(dev);
	rc = pci_enable_device(bp->pdev);
	if (rc) {
@@ -16796,6 +16815,7 @@ static int bnxt_resume(struct device *device)

resume_exit:
	netdev_unlock(bp->dev);
	rtnl_unlock();
	bnxt_ulp_start(bp, rc);
	if (!rc)
		bnxt_reenable_sriov(bp);
@@ -16961,6 +16981,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
	int err;

	netdev_info(bp->dev, "PCI Slot Resume\n");
	rtnl_lock();
	netdev_lock(netdev);

	err = bnxt_hwrm_func_qcaps(bp);
@@ -16978,6 +16999,7 @@ static void bnxt_io_resume(struct pci_dev *pdev)
		netif_device_attach(netdev);

	netdev_unlock(netdev);
	rtnl_unlock();
	bnxt_ulp_start(bp, err);
	if (!err)
		bnxt_reenable_sriov(bp);