Commit 72a813a4 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlxsw-new-reset-flow'

Petr Machata says:

====================
mlxsw: Add support for new reset flow

Ido Schimmel writes:

This patchset changes mlxsw to issue a PCI reset during probe and
devlink reload so that the PCI firmware could be upgraded without a
reboot.

Unlike the old version of this patchset [1], in this version the driver
no longer tries to issue a PCI reset by triggering a PCI link toggle on
its own, but instead calls the PCI core to issue the reset.

The PCI APIs require the device lock to be held which is why patches

Patches #7 adds reset method quirk for NVIDIA Spectrum devices.

Patch #8 adds a debug level print in PCI core so that device ready delay
will be printed even if it is shorter than one second.

Patches #9-#11 are straightforward preparations in mlxsw.

Patch #12 finally implements the new reset flow in mlxsw.

Patch #13 adds PCI reset handlers in mlxsw to avoid user space from
resetting the device from underneath an unaware driver. Instead, the
driver is gracefully de-initialized before the PCI reset and then
initialized again after it.

Patch #14 adds a PCI reset selftest to make sure this code path does not
regress.

[1] https://lore.kernel.org/netdev/cover.1679502371.git.petrm@nvidia.com/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4dce97b1 af51d6bd
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1484,8 +1484,8 @@ operations:
      dont-validate: [ strict ]
      flags: [ admin-perm ]
      do:
        pre: devlink-nl-pre-doit
        post: devlink-nl-post-doit
        pre: devlink-nl-pre-doit-dev-lock
        post: devlink-nl-post-doit-dev-lock
        request:
          attributes:
            - bus-name
+83 −7
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ struct mlxsw_pci {
	const struct pci_device_id *id;
	enum mlxsw_pci_cqe_v max_cqe_ver; /* Maximal supported CQE version */
	u8 num_sdq_cqs; /* Number of CQs used for SDQs */
	bool skip_reset;
};

static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q)
@@ -1476,11 +1477,47 @@ static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
	return -EBUSY;
}

static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
			      const struct pci_device_id *id)
static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
{
	struct pci_dev *pdev = mlxsw_pci->pdev;
	char mrsr_pl[MLXSW_REG_MRSR_LEN];
	int err;

	mlxsw_reg_mrsr_pack(mrsr_pl,
			    MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
	if (err)
		return err;

	device_lock_assert(&pdev->dev);

	pci_cfg_access_lock(pdev);
	pci_save_state(pdev);

	err = __pci_reset_function_locked(pdev);
	if (err)
		pci_err(pdev, "PCI function reset failed with %d\n", err);

	pci_restore_state(pdev);
	pci_cfg_access_unlock(pdev);

	return err;
}

static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
{
	char mrsr_pl[MLXSW_REG_MRSR_LEN];

	mlxsw_reg_mrsr_pack(mrsr_pl, MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET);
	return mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
}

static int
mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
{
	struct pci_dev *pdev = mlxsw_pci->pdev;
	char mcam_pl[MLXSW_REG_MCAM_LEN];
	bool pci_reset_supported;
	u32 sys_status;
	int err;

@@ -1491,11 +1528,27 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
		return err;
	}

	mlxsw_reg_mrsr_pack(mrsr_pl);
	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
	/* PCI core already issued a PCI reset, do not issue another reset. */
	if (mlxsw_pci->skip_reset)
		return 0;

	mlxsw_reg_mcam_pack(mcam_pl,
			    MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
	err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
	if (err)
		return err;

	mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
			      &pci_reset_supported);

	if (pci_reset_supported) {
		pci_dbg(pdev, "Starting PCI reset flow\n");
		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
	} else {
		pci_dbg(pdev, "Starting software reset flow\n");
		err = mlxsw_pci_reset_sw(mlxsw_pci);
	}

	err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
	if (err) {
		dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
@@ -1537,9 +1590,9 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
	if (!mbox)
		return -ENOMEM;

	err = mlxsw_pci_sw_reset(mlxsw_pci, mlxsw_pci->id);
	err = mlxsw_pci_reset(mlxsw_pci, mlxsw_pci->id);
	if (err)
		goto err_sw_reset;
		goto err_reset;

	err = mlxsw_pci_alloc_irq_vectors(mlxsw_pci);
	if (err < 0) {
@@ -1672,7 +1725,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
err_query_fw:
	mlxsw_pci_free_irq_vectors(mlxsw_pci);
err_alloc_irq:
err_sw_reset:
err_reset:
mbox_put:
	mlxsw_cmd_mbox_free(mbox);
	return err;
@@ -2059,11 +2112,34 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
	kfree(mlxsw_pci);
}

static void mlxsw_pci_reset_prepare(struct pci_dev *pdev)
{
	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);

	mlxsw_core_bus_device_unregister(mlxsw_pci->core, false);
}

static void mlxsw_pci_reset_done(struct pci_dev *pdev)
{
	struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev);

	mlxsw_pci->skip_reset = true;
	mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus,
				       mlxsw_pci, false, NULL, NULL);
	mlxsw_pci->skip_reset = false;
}

static const struct pci_error_handlers mlxsw_pci_err_handler = {
	.reset_prepare = mlxsw_pci_reset_prepare,
	.reset_done = mlxsw_pci_reset_done,
};

int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
{
	pci_driver->probe = mlxsw_pci_probe;
	pci_driver->remove = mlxsw_pci_remove;
	pci_driver->shutdown = mlxsw_pci_remove;
	pci_driver->err_handler = &mlxsw_pci_err_handler;
	return pci_register_driver(pci_driver);
}
EXPORT_SYMBOL(mlxsw_pci_driver_register);
+14 −2
Original line number Diff line number Diff line
@@ -10122,6 +10122,15 @@ mlxsw_reg_mgir_unpack(char *payload, u32 *hw_rev, char *fw_info_psid,

MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);

enum mlxsw_reg_mrsr_command {
	/* Switch soft reset, does not reset PCI firmware. */
	MLXSW_REG_MRSR_COMMAND_SOFTWARE_RESET = 1,
	/* Reset will be done when PCI link will be disabled.
	 * This command will reset PCI firmware also.
	 */
	MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE = 6,
};

/* reg_mrsr_command
 * Reset/shutdown command
 * 0 - do nothing
@@ -10130,10 +10139,11 @@ MLXSW_REG_DEFINE(mrsr, MLXSW_REG_MRSR_ID, MLXSW_REG_MRSR_LEN);
 */
MLXSW_ITEM32(reg, mrsr, command, 0x00, 0, 4);

static inline void mlxsw_reg_mrsr_pack(char *payload)
static inline void mlxsw_reg_mrsr_pack(char *payload,
				       enum mlxsw_reg_mrsr_command command)
{
	MLXSW_REG_ZERO(mrsr, payload);
	mlxsw_reg_mrsr_command_set(payload, 1);
	mlxsw_reg_mrsr_command_set(payload, command);
}

/* MLCR - Management LED Control Register
@@ -10584,6 +10594,8 @@ MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
	/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
	MLXSW_REG_MCAM_MCIA_128B = 34,
	/* If set, MRSR.command=6 is supported. */
	MLXSW_REG_MCAM_PCI_RESET = 48,
};

#define MLXSW_REG_BYTES_PER_DWORD 0x4
+3 −0
Original line number Diff line number Diff line
@@ -1219,6 +1219,9 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout)
	if (delay > PCI_RESET_WAIT)
		pci_info(dev, "ready %dms after %s\n", delay - 1,
			 reset_type);
	else
		pci_dbg(dev, "ready %dms after %s\n", delay - 1,
			reset_type);

	return 0;
}
+13 −0
Original line number Diff line number Diff line
@@ -3786,6 +3786,19 @@ static void quirk_no_pm_reset(struct pci_dev *dev)
DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, PCI_ANY_ID,
			       PCI_CLASS_DISPLAY_VGA, 8, quirk_no_pm_reset);

/*
 * Spectrum-{1,2,3,4} devices report that a D3hot->D0 transition causes a reset
 * (i.e., they advertise NoSoftRst-). However, this transition does not have
 * any effect on the device: It continues to be operational and network ports
 * remain up. Advertising this support makes it seem as if a PM reset is viable
 * for these devices. Mark it as unavailable to skip it when testing reset
 * methods.
 */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcb84, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf6c, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf70, quirk_no_pm_reset);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MELLANOX, 0xcf80, quirk_no_pm_reset);

/*
 * Thunderbolt controllers with broken MSI hotplug signaling:
 * Entire 1st generation (Light Ridge, Eagle Ridge, Light Peak) and part
Loading