Commit 751dc837 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

genirq: Introduce common irq_force_complete_move() implementation



CONFIG_GENERIC_PENDING_IRQ requires an architecture specific implementation
of irq_force_complete_move() for CPU hotplug. At the moment, only x86
implements this unconditionally, but for RISC-V irq_force_complete_move()
is only needed when the RISC-V IMSIC driver is in use and not needed
otherwise.

To allow runtime configuration of this mechanism, introduce a common
irq_force_complete_move() implementation in the interrupt core code, which
only invokes the completion function, when a interrupt chip in the
hierarchy implements it.

Switch X86 over to the new mechanism. No functional change intended.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAnup Patel <apatel@ventanamicro.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/all/20250217085657.789309-5-apatel@ventanamicro.com
parent fe35ecee
Loading
Loading
Loading
Loading
+108 −123
Original line number Diff line number Diff line
@@ -888,8 +888,109 @@ static int apic_set_affinity(struct irq_data *irqd,
	return err ? err : IRQ_SET_MASK_OK;
}

static void free_moved_vector(struct apic_chip_data *apicd)
{
	unsigned int vector = apicd->prev_vector;
	unsigned int cpu = apicd->prev_cpu;
	bool managed = apicd->is_managed;

	/*
	 * Managed interrupts are usually not migrated away
	 * from an online CPU, but CPU isolation 'managed_irq'
	 * can make that happen.
	 * 1) Activation does not take the isolation into account
	 *    to keep the code simple
	 * 2) Migration away from an isolated CPU can happen when
	 *    a non-isolated CPU which is in the calculated
	 *    affinity mask comes online.
	 */
	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
	irq_matrix_free(vector_matrix, cpu, vector, managed);
	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
	hlist_del_init(&apicd->clist);
	apicd->prev_vector = 0;
	apicd->move_in_progress = 0;
}

/*
 * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
 */
static void apic_force_complete_move(struct irq_data *irqd)
{
	unsigned int cpu = smp_processor_id();
	struct apic_chip_data *apicd;
	unsigned int vector;

	guard(raw_spinlock)(&vector_lock);
	apicd = apic_chip_data(irqd);
	if (!apicd)
		return;

	/*
	 * If prev_vector is empty or the descriptor is neither currently
	 * nor previously on the outgoing CPU no action required.
	 */
	vector = apicd->prev_vector;
	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
		return;

	/*
	 * This is tricky. If the cleanup of the old vector has not been
	 * done yet, then the following setaffinity call will fail with
	 * -EBUSY. This can leave the interrupt in a stale state.
	 *
	 * All CPUs are stuck in stop machine with interrupts disabled so
	 * calling __irq_complete_move() would be completely pointless.
	 *
	 * 1) The interrupt is in move_in_progress state. That means that we
	 *    have not seen an interrupt since the io_apic was reprogrammed to
	 *    the new vector.
	 *
	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
	 *    have not been processed yet.
	 */
	if (apicd->move_in_progress) {
		/*
		 * In theory there is a race:
		 *
		 * set_ioapic(new_vector) <-- Interrupt is raised before update
		 *			      is effective, i.e. it's raised on
		 *			      the old vector.
		 *
		 * So if the target cpu cannot handle that interrupt before
		 * the old vector is cleaned up, we get a spurious interrupt
		 * and in the worst case the ioapic irq line becomes stale.
		 *
		 * But in case of cpu hotplug this should be a non issue
		 * because if the affinity update happens right before all
		 * cpus rendezvous in stop machine, there is no way that the
		 * interrupt can be blocked on the target cpu because all cpus
		 * loops first with interrupts enabled in stop machine, so the
		 * old vector is not yet cleaned up when the interrupt fires.
		 *
		 * So the only way to run into this issue is if the delivery
		 * of the interrupt on the apic/system bus would be delayed
		 * beyond the point where the target cpu disables interrupts
		 * in stop machine. I doubt that it can happen, but at least
		 * there is a theoretical chance. Virtualization might be
		 * able to expose this, but AFAICT the IOAPIC emulation is not
		 * as stupid as the real hardware.
		 *
		 * Anyway, there is nothing we can do about that at this point
		 * w/o refactoring the whole fixup_irq() business completely.
		 * We print at least the irq number and the old vector number,
		 * so we have the necessary information when a problem in that
		 * area arises.
		 */
		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
			irqd->irq, vector);
	}
	free_moved_vector(apicd);
}

#else
# define apic_set_affinity		NULL
# define apic_force_complete_move	NULL
#endif

static int apic_retrigger_irq(struct irq_data *irqd)
@@ -927,35 +1028,12 @@ static struct irq_chip lapic_controller = {
	.irq_ack			= apic_ack_edge,
	.irq_set_affinity		= apic_set_affinity,
	.irq_compose_msi_msg		= x86_vector_msi_compose_msg,
	.irq_force_complete_move	= apic_force_complete_move,
	.irq_retrigger			= apic_retrigger_irq,
};

#ifdef CONFIG_SMP

static void free_moved_vector(struct apic_chip_data *apicd)
{
	unsigned int vector = apicd->prev_vector;
	unsigned int cpu = apicd->prev_cpu;
	bool managed = apicd->is_managed;

	/*
	 * Managed interrupts are usually not migrated away
	 * from an online CPU, but CPU isolation 'managed_irq'
	 * can make that happen.
	 * 1) Activation does not take the isolation into account
	 *    to keep the code simple
	 * 2) Migration away from an isolated CPU can happen when
	 *    a non-isolated CPU which is in the calculated
	 *    affinity mask comes online.
	 */
	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
	irq_matrix_free(vector_matrix, cpu, vector, managed);
	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
	hlist_del_init(&apicd->clist);
	apicd->prev_vector = 0;
	apicd->move_in_progress = 0;
}

static void __vector_cleanup(struct vector_cleanup *cl, bool check_irr)
{
	struct apic_chip_data *apicd;
@@ -1068,99 +1146,6 @@ void irq_complete_move(struct irq_cfg *cfg)
		__vector_schedule_cleanup(apicd);
}

/*
 * Called from fixup_irqs() with @desc->lock held and interrupts disabled.
 */
void irq_force_complete_move(struct irq_desc *desc)
{
	unsigned int cpu = smp_processor_id();
	struct apic_chip_data *apicd;
	struct irq_data *irqd;
	unsigned int vector;

	/*
	 * The function is called for all descriptors regardless of which
	 * irqdomain they belong to. For example if an IRQ is provided by
	 * an irq_chip as part of a GPIO driver, the chip data for that
	 * descriptor is specific to the irq_chip in question.
	 *
	 * Check first that the chip_data is what we expect
	 * (apic_chip_data) before touching it any further.
	 */
	irqd = irq_domain_get_irq_data(x86_vector_domain,
				       irq_desc_get_irq(desc));
	if (!irqd)
		return;

	raw_spin_lock(&vector_lock);
	apicd = apic_chip_data(irqd);
	if (!apicd)
		goto unlock;

	/*
	 * If prev_vector is empty or the descriptor is neither currently
	 * nor previously on the outgoing CPU no action required.
	 */
	vector = apicd->prev_vector;
	if (!vector || (apicd->cpu != cpu && apicd->prev_cpu != cpu))
		goto unlock;

	/*
	 * This is tricky. If the cleanup of the old vector has not been
	 * done yet, then the following setaffinity call will fail with
	 * -EBUSY. This can leave the interrupt in a stale state.
	 *
	 * All CPUs are stuck in stop machine with interrupts disabled so
	 * calling __irq_complete_move() would be completely pointless.
	 *
	 * 1) The interrupt is in move_in_progress state. That means that we
	 *    have not seen an interrupt since the io_apic was reprogrammed to
	 *    the new vector.
	 *
	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
	 *    have not been processed yet.
	 */
	if (apicd->move_in_progress) {
		/*
		 * In theory there is a race:
		 *
		 * set_ioapic(new_vector) <-- Interrupt is raised before update
		 *			      is effective, i.e. it's raised on
		 *			      the old vector.
		 *
		 * So if the target cpu cannot handle that interrupt before
		 * the old vector is cleaned up, we get a spurious interrupt
		 * and in the worst case the ioapic irq line becomes stale.
		 *
		 * But in case of cpu hotplug this should be a non issue
		 * because if the affinity update happens right before all
		 * cpus rendezvous in stop machine, there is no way that the
		 * interrupt can be blocked on the target cpu because all cpus
		 * loops first with interrupts enabled in stop machine, so the
		 * old vector is not yet cleaned up when the interrupt fires.
		 *
		 * So the only way to run into this issue is if the delivery
		 * of the interrupt on the apic/system bus would be delayed
		 * beyond the point where the target cpu disables interrupts
		 * in stop machine. I doubt that it can happen, but at least
		 * there is a theoretical chance. Virtualization might be
		 * able to expose this, but AFAICT the IOAPIC emulation is not
		 * as stupid as the real hardware.
		 *
		 * Anyway, there is nothing we can do about that at this point
		 * w/o refactoring the whole fixup_irq() business completely.
		 * We print at least the irq number and the old vector number,
		 * so we have the necessary information when a problem in that
		 * area arises.
		 */
		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
			irqd->irq, vector);
	}
	free_moved_vector(apicd);
unlock:
	raw_spin_unlock(&vector_lock);
}

#ifdef CONFIG_HOTPLUG_CPU
/*
 * Note, this is not accurate accounting, but at least good enough to
+3 −2
Original line number Diff line number Diff line
@@ -486,6 +486,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
 * @ipi_send_mask:	send an IPI to destination cpus in cpumask
 * @irq_nmi_setup:	function called from core code before enabling an NMI
 * @irq_nmi_teardown:	function called from core code after disabling an NMI
 * @irq_force_complete_move:	optional function to force complete pending irq move
 * @flags:		chip specific flags
 */
struct irq_chip {
@@ -537,6 +538,8 @@ struct irq_chip {
	int		(*irq_nmi_setup)(struct irq_data *data);
	void		(*irq_nmi_teardown)(struct irq_data *data);

	void		(*irq_force_complete_move)(struct irq_data *data);

	unsigned long	flags;
};

@@ -619,11 +622,9 @@ static inline void irq_move_irq(struct irq_data *data)
		__irq_move_irq(data);
}
void irq_move_masked_irq(struct irq_data *data);
void irq_force_complete_move(struct irq_desc *desc);
#else
static inline void irq_move_irq(struct irq_data *data) { }
static inline void irq_move_masked_irq(struct irq_data *data) { }
static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif

extern int no_irq_affinity;
+2 −0
Original line number Diff line number Diff line
@@ -442,6 +442,7 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
	return desc->pending_mask;
}
bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
void irq_force_complete_move(struct irq_desc *desc);
#else /* CONFIG_GENERIC_PENDING_IRQ */
static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
@@ -467,6 +468,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
{
	return false;
}
static inline void irq_force_complete_move(struct irq_desc *desc) { }
#endif /* !CONFIG_GENERIC_PENDING_IRQ */

#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
+10 −0
Original line number Diff line number Diff line
@@ -35,6 +35,16 @@ bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
	return true;
}

void irq_force_complete_move(struct irq_desc *desc)
{
	for (struct irq_data *d = irq_desc_get_irq_data(desc); d; d = d->parent_data) {
		if (d->chip && d->chip->irq_force_complete_move) {
			d->chip->irq_force_complete_move(d);
			return;
		}
	}
}

void irq_move_masked_irq(struct irq_data *idata)
{
	struct irq_desc *desc = irq_data_to_desc(idata);