Commit f83a30f4 authored by Rodrigo Vivi's avatar Rodrigo Vivi
Browse files

drm/xe: Fix an invalid locking wait context bug

We cannot have spin locks around xe_irq_reset, since it will
call the intel_display_power_is_enabled() function, and
that needs a mutex lock. Hence causing the undesired
"[ BUG: Invalid wait context ]"

We cannot convert i915's power domain lock to spin lock
due to the nested dependency of non-atomic context waits.

So, let's move the xe_irq_reset functions from the
critical area, while still ensuring that we are protecting
the irq.enabled and ensuring the right serialization
in the irq handlers.

v2: On the first version, I had missed the fact that
irq.enabled is checked on the xe/display glue layer,
and that i915 display code is actually using the irq
spin lock properly. So, this got changed to a version
suggested by Matthew Auld.

v3: do not use lockdep_assert for display glue.
    do not save restore irq from inside IRQ or we can
    get bogus irq restore warnings

Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/463


Suggested-by: default avatarMatthew Auld <matthew.auld@intel.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent fda48d15
Loading
Loading
Loading
Loading
+26 −6
Original line number Diff line number Diff line
@@ -308,6 +308,13 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg)
	unsigned long intr_dw[2];
	u32 identity[32];

	spin_lock(&xe->irq.lock);
	if (!xe->irq.enabled) {
		spin_unlock(&xe->irq.lock);
		return IRQ_NONE;
	}
	spin_unlock(&xe->irq.lock);

	master_ctl = xelp_intr_disable(xe);
	if (!master_ctl) {
		xelp_intr_enable(xe, false);
@@ -366,6 +373,13 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)

	/* TODO: This really shouldn't be copied+pasted */

	spin_lock(&xe->irq.lock);
	if (!xe->irq.enabled) {
		spin_unlock(&xe->irq.lock);
		return IRQ_NONE;
	}
	spin_unlock(&xe->irq.lock);

	master_tile_ctl = dg1_intr_disable(xe);
	if (!master_tile_ctl) {
		dg1_intr_enable(xe, false);
@@ -563,10 +577,14 @@ void xe_irq_shutdown(struct xe_device *xe)

void xe_irq_suspend(struct xe_device *xe)
{
	int irq = to_pci_dev(xe->drm.dev)->irq;

	spin_lock_irq(&xe->irq.lock);
	xe->irq.enabled = false;
	xe_irq_reset(xe);
	xe->irq.enabled = false; /* no new irqs */
	spin_unlock_irq(&xe->irq.lock);

	synchronize_irq(irq); /* flush irqs */
	xe_irq_reset(xe); /* turn irqs off */
}

void xe_irq_resume(struct xe_device *xe)
@@ -574,13 +592,15 @@ void xe_irq_resume(struct xe_device *xe)
	struct xe_gt *gt;
	int id;

	spin_lock_irq(&xe->irq.lock);
	/*
	 * lock not needed:
	 * 1. no irq will arrive before the postinstall
	 * 2. display is not yet resumed
	 */
	xe->irq.enabled = true;
	xe_irq_reset(xe);
	xe_irq_postinstall(xe);
	xe_irq_postinstall(xe); /* turn irqs on */

	for_each_gt(gt, xe, id)
		xe_irq_enable_hwe(gt);

	spin_unlock_irq(&xe->irq.lock);
}