Commit 7649412a authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: x86: Load guest/host PKRU outside of the fastpath run loop



Move KVM's swapping of PKRU outside of the fastpath loop, as there is no
KVM code anywhere in the fastpath that accesses guest/userspace memory,
i.e. that can consume protection keys.

As documented by commit 1be0e61c ("KVM, pkeys: save/restore PKRU when
guest/host switches"), KVM just needs to ensure the host's PKRU is loaded
when KVM (or the kernel at-large) may access userspace memory.  And at the
time of commit 1be0e61c, KVM didn't have a fastpath, and PKU was
strictly contained to VMX, i.e. there was no reason to swap PKRU outside
of vmx_vcpu_run().

Over time, the "need" to swap PKRU close to VM-Enter was likely falsely
solidified by the association with XFEATUREs in commit 37486135
("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c"),
and XFEATURE swapping was in turn moved close to VM-Enter/VM-Exit as a
KVM hack-a-fix ution for an #MC handler bug by commit 1811d979
("x86/kvm: move kvm_load/put_guest_xcr0 into atomic context").

Deferring the PKRU loads shaves ~40 cycles off the fastpath for Intel,
and ~60 cycles for AMD.  E.g. using INVD in KVM-Unit-Test's vmexit.c,
with extra hacks to enable CR4.PKE and PKRU=(-1u & ~0x3), latency numbers
for AMD Turin go from ~1560 => ~1500, and for Intel Emerald Rapids, go
from ~810 => ~770.

Reviewed-by: default avatarRick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: default avatarJon Kohler <jon@nutanix.com>
Link: https://patch.msgid.link/20251118222328.2265758-5-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 75c69c82
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -4250,7 +4250,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
		svm_set_dr6(vcpu, DR6_ACTIVE_LOW);

	clgi();
	kvm_load_guest_xsave_state(vcpu);

	/*
	 * Hardware only context switches DEBUGCTL if LBR virtualization is
@@ -4293,7 +4292,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
	    vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
		update_debugctlmsr(vcpu->arch.host_debugctl);

	kvm_load_host_xsave_state(vcpu);
	stgi();

	/* Any pending NMI will happen here */
+0 −4
Original line number Diff line number Diff line
@@ -7473,8 +7473,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
		vmx_set_interrupt_shadow(vcpu, 0);

	kvm_load_guest_xsave_state(vcpu);

	pt_guest_enter(vmx);

	atomic_switch_perf_msrs(vmx);
@@ -7518,8 +7516,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)

	pt_guest_exit(vmx);

	kvm_load_host_xsave_state(vcpu);

	if (is_guest_mode(vcpu)) {
		/*
		 * Track VMLAUNCH/VMRESUME that have made past guest state
+10 −4
Original line number Diff line number Diff line
@@ -1235,7 +1235,7 @@ static void kvm_load_host_xfeatures(struct kvm_vcpu *vcpu)
	}
}

void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
static void kvm_load_guest_pkru(struct kvm_vcpu *vcpu)
{
	if (vcpu->arch.guest_state_protected)
		return;
@@ -1246,9 +1246,8 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
	     kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
		wrpkru(vcpu->arch.pkru);
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_guest_xsave_state);

void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
static void kvm_load_host_pkru(struct kvm_vcpu *vcpu)
{
	if (vcpu->arch.guest_state_protected)
		return;
@@ -1261,7 +1260,6 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
			wrpkru(vcpu->arch.host_pkru);
	}
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_load_host_xsave_state);

#ifdef CONFIG_X86_64
static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
@@ -11303,6 +11301,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)

	guest_timing_enter_irqoff();

	/*
	 * Swap PKRU with hardware breakpoints disabled to minimize the number
	 * of flows where non-KVM code can run with guest state loaded.
	 */
	kvm_load_guest_pkru(vcpu);

	for (;;) {
		/*
		 * Assert that vCPU vs. VM APICv state is consistent.  An APICv
@@ -11331,6 +11335,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
		++vcpu->stat.exits;
	}

	kvm_load_host_pkru(vcpu);

	/*
	 * Do this here before restoring debug registers on the host.  And
	 * since we do this before handling the vmexit, a DR access vmexit
+0 −2
Original line number Diff line number Diff line
@@ -636,8 +636,6 @@ static inline void kvm_machine_check(void)
#endif
}

void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
int kvm_spec_ctrl_test_value(u64 value);
int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
			      struct x86_exception *e);