Commit eaea5130 authored by Wen Xiong's avatar Wen Xiong Committed by Martin K. Petersen
Browse files

scsi: qla2xxx: Enable/disable IRQD_NO_BALANCING during reset



A dynamic remove/add storage adapter test hits EEH on PowerPC:

  EEH: [c00000000004f77c] __eeh_send_failure_event+0x7c/0x160
  EEH: [c000000000048464] eeh_dev_check_failure.part.0+0x254/0x660
  EEH: [c000000000934e0c] __pci_read_msi_msg+0x1ac/0x280
  EEH: [c000000000100f68] pseries_msi_compose_msg+0x28/0x40
  EEH: [c00000000020e1cc] irq_chip_compose_msi_msg+0x5c/0x90
  EEH: [c000000000214b1c] msi_domain_set_affinity+0xbc/0x100
  EEH: [c000000000206be4] irq_do_set_affinity+0x214/0x2c0
  EEH: [c000000000206e04] irq_set_affinity_locked+0x174/0x230
  EEH: [c000000000207044] irq_set_affinity+0x64/0xa0
  EEH: [c000000000212890] write_irq_affinity.constprop.0.isra.0+0x130/0x150
  EEH: [c00000000068868c] proc_reg_write+0xfc/0x160
  EEH: [c0000000005adb48] vfs_write+0xf8/0x4e0
  EEH: [c0000000005ae234] ksys_write+0x84/0x140
  EEH: [c00000000002e994] system_call_exception+0x164/0x310
  EEH: [c00000000000bfe8] system_call_vectored_common+0xe8/0x278

The irqbalance daemon kicks in before invoking qla2xxx->slot_reset
during the EEH recovery process.

  irqbalance daemon
  ->irq_set_affinity()
  ->msi_domain_set_affinity()
  ->irq_chip_set_affiinity_parent()
  ->xive_irq_set_affinity()
  ->pseries_msi_compose_ms()
  ->__pci_read_msi_msg()
  ->irq_chip_compose_msi_msg()

In __pci_read_msi_msg(), the first MSI-X vector is set to all F by the
irqbalance daemon.  pci_write_msg_msix: index=0, lo=ffffffff hi=fffffff

IRQ balancing is not required during adapter reset.

Enable "IRQ_NO_BALANCING" bit before starting adapter reset and disable
it calling pci_restore_state(). The irqbalance daemon is disabled for
this short period of time (~2s).

Co-developed-by: default avatarKyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
Signed-off-by: default avatarKyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
Signed-off-by: default avatarWen Xiong <wenxiong@linux.ibm.com>
Link: https://patch.msgid.link/20251028142427.3969819-3-wenxiong@linux.ibm.com


Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 6ac3484f
Loading
Loading
Loading
Loading
+30 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/crash_dump.h>
#include <linux/trace_events.h>
#include <linux/trace.h>
#include <linux/irq.h>

#include <scsi/scsi_tcq.h>
#include <scsi/scsicam.h>
@@ -7776,6 +7777,31 @@ static void qla_pci_error_cleanup(scsi_qla_host_t *vha)
}


/**
 * qla2xxx_set_affinity_nobalance
 * @pdev: pci_dev struct for a qla2xxx device
 * @flag: bool
 * true: enable "IRQ_NO_BALANCING" bit for msix interrupt
 * false: disable "IRQ_NO_BALANCING" bit for msix interrupt
 * Description: This function will be called to disable/enable
 * "IRQ_NO_BALANCING" to avoid irqbalance daemon
 * kicking in during adapter reset.
 **/

static void qla2xxx_set_affinity_nobalance(struct pci_dev *pdev, bool flag)
{
	int irq, i;

	for (i = 0; i < QLA_BASE_VECTORS; i++) {
		irq = pci_irq_vector(pdev, i);

		if (flag)
			irq_set_status_flags(irq, IRQ_NO_BALANCING);
		else
			irq_clear_status_flags(irq, IRQ_NO_BALANCING);
	}
}

static pci_ers_result_t
qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
{
@@ -7794,6 +7820,8 @@ qla2xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
		goto out;
	}

	qla2xxx_set_affinity_nobalance(pdev, false);

	switch (state) {
	case pci_channel_io_normal:
		qla_pci_set_eeh_busy(vha);
@@ -7935,6 +7963,8 @@ qla2xxx_pci_slot_reset(struct pci_dev *pdev)
	ql_dbg(ql_dbg_aer, base_vha, 0x900e,
	    "Slot Reset returning %x.\n", ret);

	qla2xxx_set_affinity_nobalance(pdev, true);

	return ret;
}