Commit 77ab80c6 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge branch 'kvm-tdx-enter-exit' into HEAD

This series introduces callbacks to facilitate the entry of a TD VCPU
and the corresponding save/restore of host state.

A TD VCPU is entered via the SEAMCALL TDH.VP.ENTER. The TDX Module manages
the save/restore of guest state and, in conjunction with the SEAMCALL
interface, handles certain aspects of host state. However, there are
specific elements of the host state that require additional attention, as
detailed in the Intel TDX ABI documentation for TDH.VP.ENTER.

TDX is quite different from VMX in this regard.  For VMX, the host VMM is
heavily involved in restoring, managing and saving guest CPU state, whereas
for TDX this is handled by the TDX Module.  In that way, the TDX Module can
protect the confidentiality and integrity of TD CPU state.

The TDX Module does not save/restore all host CPU state because the host
VMM can do it more efficiently and selectively.  CPU state referred to
below is host CPU state.  Often values are already held in memory so no
explicit save is needed, and restoration may not be needed if the kernel
is not using a feature.

TDX does not support PAUSE-loop exiting.  According to the TDX Module
Base arch. spec., hypercalls are expected to be used instead.  Note that
the Linux TDX guest supports existing hypercalls via TDG.VP.VMCALL.

This series requires TDX module 1.5.06.00.0744, or later, due to removal
of the workarounds for the lack of the NO_RBP_MOD feature required by the
kernel. NO_RBP_MOD is now required.
parents fcbe3482 484612f1
Loading
Loading
Loading
Loading
+10 −2
Original line number Diff line number Diff line
@@ -606,8 +606,15 @@ struct kvm_pmu {
struct kvm_pmu_ops;

enum {
	KVM_DEBUGREG_BP_ENABLED = 1,
	KVM_DEBUGREG_WONT_EXIT = 2,
	KVM_DEBUGREG_BP_ENABLED		= BIT(0),
	KVM_DEBUGREG_WONT_EXIT		= BIT(1),
	/*
	 * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by
	 * hardware on exit from or enter to guest. KVM needn't switch them.
	 * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM
	 * exit, host values need to be restored.
	 */
	KVM_DEBUGREG_AUTO_SWITCH	= BIT(2),
};

struct kvm_mtrr {
@@ -2328,6 +2335,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
void kvm_user_return_msr_update_cache(unsigned int index, u64 val);

static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
        return level - 1;
}

u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+69 −0
Original line number Diff line number Diff line
@@ -3,9 +3,78 @@
#define __KVM_X86_VMX_COMMON_H

#include <linux/kvm_host.h>
#include <asm/posted_intr.h>

#include "mmu.h"

union vmx_exit_reason {
	struct {
		u32	basic			: 16;
		u32	reserved16		: 1;
		u32	reserved17		: 1;
		u32	reserved18		: 1;
		u32	reserved19		: 1;
		u32	reserved20		: 1;
		u32	reserved21		: 1;
		u32	reserved22		: 1;
		u32	reserved23		: 1;
		u32	reserved24		: 1;
		u32	reserved25		: 1;
		u32	bus_lock_detected	: 1;
		u32	enclave_mode		: 1;
		u32	smi_pending_mtf		: 1;
		u32	smi_from_vmx_root	: 1;
		u32	reserved30		: 1;
		u32	failed_vmentry		: 1;
	};
	u32 full;
};

struct vcpu_vt {
	/* Posted interrupt descriptor */
	struct pi_desc pi_desc;

	/* Used if this vCPU is waiting for PI notification wakeup. */
	struct list_head pi_wakeup_list;

	union vmx_exit_reason exit_reason;

	unsigned long	exit_qualification;
	u32		exit_intr_info;

	/*
	 * If true, guest state has been loaded into hardware, and host state
	 * saved into vcpu_{vt,vmx,tdx}.  If false, host state is loaded into
	 * hardware.
	 */
	bool		guest_state_loaded;

#ifdef CONFIG_X86_64
	u64		msr_host_kernel_gs_base;
#endif

	unsigned long	host_debugctlmsr;
};

#ifdef CONFIG_KVM_INTEL_TDX

static __always_inline bool is_td(struct kvm *kvm)
{
	return kvm->arch.vm_type == KVM_X86_TDX_VM;
}

static __always_inline bool is_td_vcpu(struct kvm_vcpu *vcpu)
{
	return is_td(vcpu->kvm);
}

#else

static inline bool is_td(struct kvm *kvm) { return false; }
static inline bool is_td_vcpu(struct kvm_vcpu *vcpu) { return false; }

#endif

static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa)
{
	/* For TDX the direct mask is the shared mask. */
+44 −4
Original line number Diff line number Diff line
@@ -10,6 +10,10 @@
#include "tdx.h"
#include "tdx_arch.h"

#ifdef CONFIG_KVM_INTEL_TDX
static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt));
#endif

static void vt_disable_virtualization_cpu(void)
{
	/* Note, TDX *and* VMX need to be disabled if TDX is enabled. */
@@ -141,6 +145,42 @@ static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
	vmx_update_cpu_dirty_logging(vcpu);
}

static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu)) {
		tdx_prepare_switch_to_guest(vcpu);
		return;
	}

	vmx_prepare_switch_to_guest(vcpu);
}

static void vt_vcpu_put(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu)) {
		tdx_vcpu_put(vcpu);
		return;
	}

	vmx_vcpu_put(vcpu);
}

static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu))
		return tdx_vcpu_pre_run(vcpu);

	return vmx_vcpu_pre_run(vcpu);
}

static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
	if (is_td_vcpu(vcpu))
		return tdx_vcpu_run(vcpu, force_immediate_exit);

	return vmx_vcpu_run(vcpu, force_immediate_exit);
}

static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
{
	if (is_td_vcpu(vcpu)) {
@@ -245,9 +285,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
	.vcpu_free = vt_vcpu_free,
	.vcpu_reset = vt_vcpu_reset,

	.prepare_switch_to_guest = vmx_prepare_switch_to_guest,
	.prepare_switch_to_guest = vt_prepare_switch_to_guest,
	.vcpu_load = vt_vcpu_load,
	.vcpu_put = vmx_vcpu_put,
	.vcpu_put = vt_vcpu_put,

	.update_exception_bitmap = vmx_update_exception_bitmap,
	.get_feature_msr = vmx_get_feature_msr,
@@ -281,8 +321,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
	.flush_tlb_gva = vt_flush_tlb_gva,
	.flush_tlb_guest = vt_flush_tlb_guest,

	.vcpu_pre_run = vmx_vcpu_pre_run,
	.vcpu_run = vmx_vcpu_run,
	.vcpu_pre_run = vt_vcpu_pre_run,
	.vcpu_run = vt_vcpu_run,
	.handle_exit = vmx_handle_exit,
	.skip_emulated_instruction = vmx_skip_emulated_instruction,
	.update_emulated_instruction = vmx_update_emulated_instruction,
+5 −5
Original line number Diff line number Diff line
@@ -275,7 +275,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
{
	struct vmcs_host_state *dest, *src;

	if (unlikely(!vmx->guest_state_loaded))
	if (unlikely(!vmx->vt.guest_state_loaded))
		return;

	src = &prev->host_state;
@@ -425,7 +425,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
		 * tables also changed, but KVM should not treat EPT Misconfig
		 * VM-Exits as writes.
		 */
		WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION);
		WARN_ON_ONCE(vmx->vt.exit_reason.basic != EXIT_REASON_EPT_VIOLATION);

		/*
		 * PML Full and EPT Violation VM-Exits both use bit 12 to report
@@ -4622,7 +4622,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
	/* update exit information fields: */
	vmcs12->vm_exit_reason = vm_exit_reason;
	if (to_vmx(vcpu)->exit_reason.enclave_mode)
	if (vmx_get_exit_reason(vcpu).enclave_mode)
		vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
	vmcs12->exit_qualification = exit_qualification;

@@ -6126,7 +6126,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
	 * nested VM-Exit.  Pass the original exit reason, i.e. don't hardcode
	 * EXIT_REASON_VMFUNC as the exit reason.
	 */
	nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
	nested_vmx_vmexit(vcpu, vmx->vt.exit_reason.full,
			  vmx_get_intr_info(vcpu),
			  vmx_get_exit_qual(vcpu));
	return 1;
@@ -6571,7 +6571,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = to_vmx(vcpu);
	union vmx_exit_reason exit_reason = vmx->exit_reason;
	union vmx_exit_reason exit_reason = vmx->vt.exit_reason;
	unsigned long exit_qual;
	u32 exit_intr_info;

Loading