Commit 75360a9a authored by Oliver Upton's avatar Oliver Upton Committed by Marc Zyngier
Browse files

KVM: arm64: vgic-v3: Reinstate IRQ lock ordering for LPI xarray



Zenghui reports that running a KVM guest with an assigned device and
lockdep enabled produces an unfriendly splat due to an inconsistent irq
context when taking the lpi_xa's spinlock.

This is no good as in rare cases the last reference to an LPI can get
dropped after injection of a cached LPI translation. In this case,
vgic_put_irq() will release the IRQ struct and take the lpi_xa's
spinlock to erase it from the xarray.

Reinstate the IRQ ordering and update the lockdep hint accordingly. Note
that there is no irqsave equivalent of might_lock(), so just explictly
grab and release the spinlock on lockdep kernels.

Reported-by: default avatarZenghui Yu <yuzenghui@huawei.com>
Closes: https://lore.kernel.org/kvmarm/b4d7cb0f-f007-0b81-46d1-998b15cc14bc@huawei.com/


Fixes: 982f31bb ("KVM: arm64: vgic-v3: Don't require IRQs be disabled for LPI xarray lock")
Signed-off-by: default avatarOliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251107184847.1784820-2-oupton@kernel.org


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 50e7cce8
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
static int iter_mark_lpis(struct kvm *kvm)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	unsigned long intid, flags;
	struct vgic_irq *irq;
	unsigned long intid;
	int nr_lpis = 0;

	xa_lock_irqsave(&dist->lpi_xa, flags);

	xa_for_each(&dist->lpi_xa, intid, irq) {
		if (!vgic_try_get_irq_ref(irq))
			continue;

		xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
		__xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
		nr_lpis++;
	}

	xa_unlock_irqrestore(&dist->lpi_xa, flags);

	return nr_lpis;
}

static void iter_unmark_lpis(struct kvm *kvm)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	unsigned long intid, flags;
	struct vgic_irq *irq;
	unsigned long intid;

	xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) {
		xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
		xa_lock_irqsave(&dist->lpi_xa, flags);
		__xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
		xa_unlock_irqrestore(&dist->lpi_xa, flags);

		/* vgic_put_irq() expects to be called outside of the xa_lock */
		vgic_put_irq(kvm, irq);
	}
}
+1 −1
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
	struct vgic_dist *dist = &kvm->arch.vgic;

	xa_init(&dist->lpi_xa);
	xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}

/* CREATION */
+4 −3
Original line number Diff line number Diff line
@@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq;
	unsigned long flags;
	int ret;

	/* In this case there is no put, since we keep the reference. */
@@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
	if (!irq)
		return ERR_PTR(-ENOMEM);

	ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
	ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT);
	if (ret) {
		kfree(irq);
		return ERR_PTR(ret);
@@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
	irq->target_vcpu = vcpu;
	irq->group = 1;

	xa_lock(&dist->lpi_xa);
	xa_lock_irqsave(&dist->lpi_xa, flags);

	/*
	 * There could be a race with another vgic_add_lpi(), so we need to
@@ -125,7 +126,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
	}

out_unlock:
	xa_unlock(&dist->lpi_xa);
	xa_unlock_irqrestore(&dist->lpi_xa, flags);

	if (ret)
		return ERR_PTR(ret);
+15 −8
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
 *     kvm->arch.config_lock (mutex)
 *       its->cmd_lock (mutex)
 *         its->its_lock (mutex)
 *           vgic_dist->lpi_xa.xa_lock
 *           vgic_dist->lpi_xa.xa_lock		must be taken with IRQs disabled
 *             vgic_cpu->ap_list_lock		must be taken with IRQs disabled
 *               vgic_irq->irq_lock		must be taken with IRQs disabled
 *
@@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	unsigned long flags;

	if (irq->intid >= VGIC_MIN_LPI)
		might_lock(&dist->lpi_xa.xa_lock);
	/*
	 * Normally the lock is only taken when the refcount drops to 0.
	 * Acquire/release it early on lockdep kernels to make locking issues
	 * in rare release paths a bit more obvious.
	 */
	if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) {
		guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
	}

	if (!__vgic_put_irq(kvm, irq))
		return;

	xa_lock(&dist->lpi_xa);
	xa_lock_irqsave(&dist->lpi_xa, flags);
	vgic_release_lpi_locked(dist, irq);
	xa_unlock(&dist->lpi_xa);
	xa_unlock_irqrestore(&dist->lpi_xa, flags);
}

static void vgic_release_deleted_lpis(struct kvm *kvm)
{
	struct vgic_dist *dist = &kvm->arch.vgic;
	unsigned long intid;
	unsigned long flags, intid;
	struct vgic_irq *irq;

	xa_lock(&dist->lpi_xa);
	xa_lock_irqsave(&dist->lpi_xa, flags);

	xa_for_each(&dist->lpi_xa, intid, irq) {
		if (irq->pending_release)
			vgic_release_lpi_locked(dist, irq);
	}

	xa_unlock(&dist->lpi_xa);
	xa_unlock_irqrestore(&dist->lpi_xa, flags);
}

void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)