Commit 428afac5 authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: x86: Move bulk of emergency virtualizaton logic to virt subsystem



Move the majority of the code related to disabling hardware virtualization
in emergency from KVM into the virt subsystem so that virt can take full
ownership of the state of SVM/VMX.  This will allow refcounting usage of
SVM/VMX so that KVM and the TDX subsystem can enable VMX without stomping
on each other.

To route the emergency callback to the "right" vendor code, add to avoid
mixing vendor and generic code, implement a x86_virt_ops structure to
track the emergency callback, along with the SVM vs. VMX (vs. "none")
feature that is active.

To avoid having to choose between SVM and VMX, simply refuse to enable
either if both are somehow supported.  No known CPU supports both SVM and
VMX, and it's comically unlikely such a CPU will ever exist.

Leave KVM's clearing of loaded VMCSes and MSR_VM_HSAVE_PA in KVM, via a
callback explicitly scoped to KVM.  Loading VMCSes and saving/restoring
host state are firmly tied to running VMs, and thus are (a) KVM's
responsibility and (b) operations that are still exclusively reserved for
KVM (as far as in-tree code is concerned).  I.e. the contract being
established is that non-KVM subsystems can utilize virtualization, but for
all intents and purposes cannot act as full-blown hypervisors.

Reviewed-by: default avatarChao Gao <chao.gao@intel.com>
Tested-by: default avatarChao Gao <chao.gao@intel.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Tested-by: default avatarSagi Shahar <sagis@google.com>
Link: https://patch.msgid.link/20260214012702.2368778-9-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 32d76cdf
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -40,7 +40,8 @@
#include <asm/irq_remapping.h>
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/reboot.h>
#include <asm/virt.h>

#include <hyperv/hvhdk.h>

#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+0 −11
Original line number Diff line number Diff line
@@ -25,17 +25,6 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS	0
#define MRR_APM		1

typedef void (cpu_emergency_virt_cb)(void);
#if IS_ENABLED(CONFIG_KVM_X86)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
#else
static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_disable_virtualization(void) {}
#endif /* CONFIG_KVM_X86 */

typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
void run_crash_ipi_callback(struct pt_regs *regs);
+7 −2
Original line number Diff line number Diff line
@@ -4,6 +4,8 @@

#include <asm/reboot.h>

typedef void (cpu_emergency_virt_cb)(void);

#if IS_ENABLED(CONFIG_KVM_X86)
extern bool virt_rebooting;

@@ -12,17 +14,20 @@ void __init x86_virt_init(void);
#if IS_ENABLED(CONFIG_KVM_INTEL)
int x86_vmx_enable_virtualization_cpu(void);
int x86_vmx_disable_virtualization_cpu(void);
void x86_vmx_emergency_disable_virtualization_cpu(void);
#endif

#if IS_ENABLED(CONFIG_KVM_AMD)
int x86_svm_enable_virtualization_cpu(void);
int x86_svm_disable_virtualization_cpu(void);
void x86_svm_emergency_disable_virtualization_cpu(void);
#endif

int x86_virt_emergency_disable_virtualization_cpu(void);

void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback);
void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback);
#else
static __always_inline void x86_virt_init(void) {}
static inline int x86_virt_emergency_disable_virtualization_cpu(void) { return -ENOENT; }
#endif

#endif /* _ASM_X86_VIRT_H */
+2 −1
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@
#include <asm/crash.h>
#include <asm/cmdline.h>
#include <asm/sev.h>
#include <asm/virt.h>

/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -111,7 +112,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)

	crash_smp_send_stop();

	cpu_emergency_disable_virtualization();
	x86_virt_emergency_disable_virtualization_cpu();

	/*
	 * Disable Intel PT to stop its logging
+7 −56
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <asm/cpu.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/virt.h>

#include <linux/ctype.h>
#include <linux/mc146818rtc.h>
@@ -532,51 +533,6 @@ static inline void kb_wait(void)
static inline void nmi_shootdown_cpus_on_restart(void);

#if IS_ENABLED(CONFIG_KVM_X86)
/* RCU-protected callback to disable virtualization prior to reboot. */
static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;

void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
{
	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
		return;

	rcu_assign_pointer(cpu_emergency_virt_callback, callback);
}
EXPORT_SYMBOL_FOR_KVM(cpu_emergency_register_virt_callback);

void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
{
	if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
		return;

	rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
	synchronize_rcu();
}
EXPORT_SYMBOL_FOR_KVM(cpu_emergency_unregister_virt_callback);

/*
 * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
 * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
 * GIF=0, i.e. if the crash occurred between CLGI and STGI.
 */
void cpu_emergency_disable_virtualization(void)
{
	cpu_emergency_virt_cb *callback;

	/*
	 * IRQs must be disabled as KVM enables virtualization in hardware via
	 * function call IPIs, i.e. IRQs need to be disabled to guarantee
	 * virtualization stays disabled.
	 */
	lockdep_assert_irqs_disabled();

	rcu_read_lock();
	callback = rcu_dereference(cpu_emergency_virt_callback);
	if (callback)
		callback();
	rcu_read_unlock();
}

static void emergency_reboot_disable_virtualization(void)
{
	local_irq_disable();
@@ -588,17 +544,12 @@ static void emergency_reboot_disable_virtualization(void)
	 * We can't take any locks and we may be on an inconsistent state, so
	 * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
	 *
	 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
	 * other CPUs may have virtualization enabled.
	 * Safely force _this_ CPU out of VMX/SVM operation, and if necessary,
	 * blast NMIs to force other CPUs out of VMX/SVM as well.k
	 */
	if (rcu_access_pointer(cpu_emergency_virt_callback)) {
		/* Safely force _this_ CPU out of VMX/SVM operation. */
		cpu_emergency_disable_virtualization();

		/* Disable VMX/SVM and halt on other CPUs. */
	if (!x86_virt_emergency_disable_virtualization_cpu())
		nmi_shootdown_cpus_on_restart();
}
}
#else
static void emergency_reboot_disable_virtualization(void) { }
#endif /* CONFIG_KVM_X86 */
@@ -875,10 +826,10 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
		shootdown_callback(cpu, regs);

	/*
	 * Prepare the CPU for reboot _after_ invoking the callback so that the
	 * callback can safely use virtualization instructions, e.g. VMCLEAR.
	 * Disable virtualization, as both VMX and SVM can block INIT and thus
	 * prevent AP bringup, e.g. in a kdump kernel or in firmware.
	 */
	cpu_emergency_disable_virtualization();
	x86_virt_emergency_disable_virtualization_cpu();

	atomic_dec(&waiting_for_crash_ipi);

Loading