Commit 7d768563 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge branch 'kvm-pi-fix-lockdep' into HEAD

parents b6262dd6 c0b8dcab
Loading
Loading
Loading
Loading
+30 −7
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
 */
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);

#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING

static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
	return &(to_vmx(vcpu)->pi_desc);
@@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
	 * current pCPU if the task was migrated.
	 */
	if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
		raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
		raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);

		/*
		 * In addition to taking the wakeup lock for the regular/IRQ
		 * context, tell lockdep it is being taken for the "sched out"
		 * context as well.  vCPU loads happens in task context, and
		 * this is taking the lock of the *previous* CPU, i.e. can race
		 * with both the scheduler and the wakeup handler.
		 */
		raw_spin_lock(spinlock);
		spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
		list_del(&vmx->pi_wakeup_list);
		raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
		spin_release(&spinlock->dep_map, _RET_IP_);
		raw_spin_unlock(spinlock);
	}

	dest = cpu_physical_id(cpu);
@@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
	struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
	struct vcpu_vmx *vmx = to_vmx(vcpu);
	struct pi_desc old, new;
	unsigned long flags;

	local_irq_save(flags);
	lockdep_assert_irqs_disabled();

	raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
	/*
	 * Acquire the wakeup lock using the "sched out" context to workaround
	 * a lockdep false positive.  When this is called, schedule() holds
	 * various per-CPU scheduler locks.  When the wakeup handler runs, it
	 * holds this CPU's wakeup lock while calling try_to_wake_up(), which
	 * can eventually take the aforementioned scheduler locks, which causes
	 * lockdep to assume there is deadlock.
	 *
	 * Deadlock can't actually occur because IRQs are disabled for the
	 * entirety of the sched_out critical section, i.e. the wakeup handler
	 * can't run while the scheduler locks are held.
	 */
	raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
			     PI_LOCK_SCHED_OUT);
	list_add_tail(&vmx->pi_wakeup_list,
		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
@@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
	 */
	if (pi_test_on(&new))
		__apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);

	local_irq_restore(flags);
}

static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)