Commit ce0b5eed authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

x86/irq: Plug vector setup race



Hogan reported a vector setup race, which overwrites the interrupt
descriptor in the per CPU vector array resulting in a disfunctional device.

CPU0				CPU1
				interrupt is raised in APIC IRR
				but not handled
  free_irq()
    per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;

  request_irq()			common_interrupt()
  				  d = this_cpu_read(vector_irq[vector]);

    per_cpu(vector_irq, CPU1)[vector] = desc;

    				  if (d == VECTOR_SHUTDOWN)
				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);

free_irq() cannot observe the pending vector in the CPU1 APIC as there is
no way to query the remote CPUs APIC IRR.

This requires that request_irq() uses the same vector/CPU as the one which
was freed, but this also can be triggered by a spurious interrupt.

Interestingly enough this problem managed to be hidden for more than a
decade.

Prevent this by reevaluating vector_irq under the vector lock, which is
held by the interrupt activation code when vector_irq is updated.

To avoid ifdeffery or IS_ENABLED() nonsense, move the
[un]lock_vector_lock() declarations out under the
CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when
CONFIG_X86_LOCAL_APIC=y.

The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by
CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the
Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n
fail.

Can we just get rid of this !APIC nonsense once and forever?

Fixes: 9345005f ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs")
Reported-by: default avatarHogan Wang <hogan.wang@huawei.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Tested-by: default avatarHogan Wang <hogan.wang@huawei.com>
Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx
parent 49f84878
Loading
Loading
Loading
Loading
+7 −5
Original line number Diff line number Diff line
@@ -92,8 +92,6 @@ struct irq_cfg {

extern struct irq_cfg *irq_cfg(unsigned int irq);
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
#ifdef CONFIG_SMP
extern void vector_schedule_cleanup(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
@@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg);
static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif

extern void apic_ack_edge(struct irq_data *data);
#else	/*  CONFIG_IRQ_DOMAIN_HIERARCHY */
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */

#ifdef CONFIG_X86_LOCAL_APIC
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
#else
static inline void lock_vector_lock(void) {}
static inline void unlock_vector_lock(void) {}
#endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
#endif

/* Statistics */
extern atomic_t irq_err_count;
+48 −15
Original line number Diff line number Diff line
@@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc,
		__handle_irq(desc, regs);
}

static __always_inline int call_irq_handler(int vector, struct pt_regs *regs)
static struct irq_desc *reevaluate_vector(int vector)
{
	struct irq_desc *desc;
	int ret = 0;
	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);

	desc = __this_cpu_read(vector_irq[vector]);
	if (likely(!IS_ERR_OR_NULL(desc))) {
		handle_irq(desc, regs);
	} else {
		ret = -EINVAL;
		if (desc == VECTOR_UNUSED) {
			pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
					     __func__, smp_processor_id(),
					     vector);
		} else {
	if (!IS_ERR_OR_NULL(desc))
		return desc;

	if (desc == VECTOR_UNUSED)
		pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector);
	else
		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
	return NULL;
}

static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs)
{
	struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);

	if (likely(!IS_ERR_OR_NULL(desc))) {
		handle_irq(desc, regs);
		return true;
	}

	return ret;
	/*
	 * Reevaluate with vector_lock held to prevent a race against
	 * request_irq() setting up the vector:
	 *
	 * CPU0				CPU1
	 *				interrupt is raised in APIC IRR
	 *				but not handled
	 * free_irq()
	 *   per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
	 *
	 * request_irq()		common_interrupt()
	 *				  d = this_cpu_read(vector_irq[vector]);
	 *
	 * per_cpu(vector_irq, CPU1)[vector] = desc;
	 *
	 *				  if (d == VECTOR_SHUTDOWN)
	 *				    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
	 *
	 * This requires that the same vector on the same target CPU is
	 * handed out or that a spurious interrupt hits that CPU/vector.
	 */
	lock_vector_lock();
	desc = reevaluate_vector(vector);
	unlock_vector_lock();

	if (!desc)
		return false;

	handle_irq(desc, regs);
	return true;
}

/*
@@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
	/* entry code tells RCU that we're not quiescent.  Check it. */
	RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");

	if (unlikely(call_irq_handler(vector, regs)))
	if (unlikely(!call_irq_handler(vector, regs)))
		apic_eoi();

	set_irq_regs(old_regs);