Commit 73554b29 authored by Jim Mattson's avatar Jim Mattson Committed by Sean Christopherson
Browse files

KVM: x86/pmu: Synthesize at most one PMI per VM-exit



When the irq_work callback, kvm_pmi_trigger_fn(), is invoked during a
VM-exit that also invokes __kvm_perf_overflow() as a result of
instruction emulation, kvm_pmu_deliver_pmi() will be called twice
before the next VM-entry.

Calling kvm_pmu_deliver_pmi() twice is unlikely to be problematic now that
KVM sets the LVTPC mask bit when delivering a PMI.  But using IRQ work to
trigger the PMI is still broken, albeit very theoretically.

E.g. if the self-IPI to trigger IRQ work is be delayed long enough for the
vCPU to be migrated to a different pCPU, then it's possible for
kvm_pmi_trigger_fn() to race with the kvm_pmu_deliver_pmi() from
KVM_REQ_PMI and still generate two PMIs.

KVM could set the mask bit using an atomic operation, but that'd just be
piling on unnecessary code to workaround what is effectively a hack.  The
*only* reason KVM uses IRQ work is to ensure the PMI is treated as a wake
event, e.g. if the vCPU just executed HLT.

Remove the irq_work callback for synthesizing a PMI, and all of the
logic for invoking it. Instead, to prevent a vcpu from leaving C0 with
a PMI pending, add a check for KVM_REQ_PMI to kvm_vcpu_has_events().

Fixes: 9cd803d4 ("KVM: x86: Update vPMCs when retiring instructions")
Signed-off-by: default avatarJim Mattson <jmattson@google.com>
Tested-by: default avatarMingwei Zhang <mizhang@google.com>
Tested-by: default avatarDapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: default avatarMingwei Zhang <mizhang@google.com>
Link: https://lore.kernel.org/r/20230925173448.3518223-2-mizhang@google.com


[sean: massage changelog]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent a16eb25b
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -528,7 +528,6 @@ struct kvm_pmu {
	u64 raw_event_mask;
	struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
	struct irq_work irq_work;

	/*
	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
+1 −26
Original line number Diff line number Diff line
@@ -93,14 +93,6 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops)
#undef __KVM_X86_PMU_OP
}

static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
{
	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);

	kvm_pmu_deliver_pmi(vcpu);
}

static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
{
	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
@@ -124,20 +116,7 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
	}

	if (!pmc->intr || skip_pmi)
		return;

	/*
	 * Inject PMI. If vcpu was in a guest mode during NMI PMI
	 * can be ejected on a guest mode re-entry. Otherwise we can't
	 * be sure that vcpu wasn't executing hlt instruction at the
	 * time of vmexit and is not going to re-enter guest mode until
	 * woken up. So we should wake it, but this is impossible from
	 * NMI context. Do it from irq work instead.
	 */
	if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
		irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
	else
	if (pmc->intr && !skip_pmi)
		kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
}

@@ -675,9 +654,6 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)

void kvm_pmu_reset(struct kvm_vcpu *vcpu)
{
	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);

	irq_work_sync(&pmu->irq_work);
	static_call(kvm_x86_pmu_reset)(vcpu);
}

@@ -687,7 +663,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)

	memset(pmu, 0, sizeof(*pmu));
	static_call(kvm_x86_pmu_init)(vcpu);
	init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
	pmu->event_count = 0;
	pmu->need_cleanup = false;
	kvm_pmu_refresh(vcpu);
+3 −0
Original line number Diff line number Diff line
@@ -12843,6 +12843,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
		return true;
#endif

	if (kvm_test_request(KVM_REQ_PMI, vcpu))
		return true;

	if (kvm_arch_interrupt_allowed(vcpu) &&
	    (kvm_cpu_has_interrupt(vcpu) ||
	    kvm_guest_apic_has_interrupt(vcpu)))