Commit a24dbf98 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-vmx-6.15' of https://github.com/kvm-x86/linux into HEAD

KVM VMX changes for 6.15

 - Fix a bug where KVM unnecessarily reads XFD_ERR from hardware and thus
   modifies the vCPU's XFD_ERR on a #NM due to CR0.TS=1.

 - Pass XFD_ERR as a psueo-payload when injecting #NM as a preparatory step
   for upcoming FRED virtualization support.

 - Decouple the EPT entry RWX protection bit macros from the EPT Violation bits
   as a general cleanup, and in anticipation of adding support for emulating
   Mode-Based Execution (MBEC).

 - Reject KVM_RUN if userspace manages to gain control and stuff invalid guest
   state while KVM is in the middle of emulating nested VM-Enter.

 - Add a macro to handle KVM's sanity checks on entry/exit VMCS control pairs
   in anticipation of adding sanity checks for secondary exit controls (the
   primary field is out of bits).
parents 783e9cd0 0c3566b6
Loading
Loading
Loading
Loading
+16 −12
Original line number Diff line number Diff line
@@ -580,18 +580,22 @@ enum vm_entry_failure_code {
/*
 * Exit Qualifications for EPT Violations
 */
#define EPT_VIOLATION_ACC_READ_BIT	0
#define EPT_VIOLATION_ACC_WRITE_BIT	1
#define EPT_VIOLATION_ACC_INSTR_BIT	2
#define EPT_VIOLATION_RWX_SHIFT		3
#define EPT_VIOLATION_GVA_IS_VALID_BIT	7
#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
#define EPT_VIOLATION_ACC_READ		(1 << EPT_VIOLATION_ACC_READ_BIT)
#define EPT_VIOLATION_ACC_WRITE		(1 << EPT_VIOLATION_ACC_WRITE_BIT)
#define EPT_VIOLATION_ACC_INSTR		(1 << EPT_VIOLATION_ACC_INSTR_BIT)
#define EPT_VIOLATION_RWX_MASK		(VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
#define EPT_VIOLATION_GVA_IS_VALID	(1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
#define EPT_VIOLATION_GVA_TRANSLATED	(1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
#define EPT_VIOLATION_ACC_READ		BIT(0)
#define EPT_VIOLATION_ACC_WRITE		BIT(1)
#define EPT_VIOLATION_ACC_INSTR		BIT(2)
#define EPT_VIOLATION_PROT_READ		BIT(3)
#define EPT_VIOLATION_PROT_WRITE	BIT(4)
#define EPT_VIOLATION_PROT_EXEC		BIT(5)
#define EPT_VIOLATION_PROT_MASK		(EPT_VIOLATION_PROT_READ  | \
					 EPT_VIOLATION_PROT_WRITE | \
					 EPT_VIOLATION_PROT_EXEC)
#define EPT_VIOLATION_GVA_IS_VALID	BIT(7)
#define EPT_VIOLATION_GVA_TRANSLATED	BIT(8)

#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)

static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
	      (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));

/*
 * Exit Qualifications for NOTIFY VM EXIT
+1 −2
Original line number Diff line number Diff line
@@ -510,8 +510,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
		 * Note, pte_access holds the raw RWX bits from the EPTE, not
		 * ACC_*_MASK flags!
		 */
		walker->fault.exit_qualification |= (pte_access & VMX_EPT_RWX_MASK) <<
						     EPT_VIOLATION_RWX_SHIFT;
		walker->fault.exit_qualification |= EPT_VIOLATION_RWX_TO_PROT(pte_access);
	}
#endif
	walker->fault.address = addr;
+75 −31
Original line number Diff line number Diff line
@@ -2578,6 +2578,34 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
	return  ctl_opt & allowed;
}

#define vmx_check_entry_exit_pairs(pairs, entry_controls, exit_controls)	\
({										\
	int i, r = 0;								\
										\
	BUILD_BUG_ON(sizeof(pairs[0].entry_control) != sizeof(entry_controls));	\
	BUILD_BUG_ON(sizeof(pairs[0].exit_control)  != sizeof(exit_controls));	\
										\
	for (i = 0; i < ARRAY_SIZE(pairs); i++) {				\
		typeof(entry_controls) n_ctrl = pairs[i].entry_control;		\
		typeof(exit_controls) x_ctrl = pairs[i].exit_control;		\
										\
		if (!(entry_controls & n_ctrl) == !(exit_controls & x_ctrl))	\
			continue;						\
										\
		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, "		\
			     "entry = %llx (%llx), exit = %llx (%llx)\n",	\
			     (u64)(entry_controls & n_ctrl), (u64)n_ctrl,	\
			     (u64)(exit_controls & x_ctrl), (u64)x_ctrl);	\
										\
		if (error_on_inconsistent_vmcs_config)				\
			r = -EIO;						\
										\
		entry_controls &= ~n_ctrl;					\
		exit_controls &= ~x_ctrl;					\
	}									\
	r;									\
})

static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
			     struct vmx_capability *vmx_cap)
{
@@ -2589,7 +2617,6 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
	u32 _vmentry_control = 0;
	u64 basic_msr;
	u64 misc_msr;
	int i;

	/*
	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
@@ -2693,23 +2720,10 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
				&_vmentry_control))
		return -EIO;

	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;

		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
			continue;

		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);

		if (error_on_inconsistent_vmcs_config)
	if (vmx_check_entry_exit_pairs(vmcs_entry_exit_pairs,
				       _vmentry_control, _vmexit_control))
		return -EIO;

		_vmentry_control &= ~n_ctrl;
		_vmexit_control &= ~x_ctrl;
	}

	/*
	 * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
	 * can't be used due to an errata where VM Exit may incorrectly clear
@@ -5211,6 +5225,12 @@ bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
	       (kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
}

static bool is_xfd_nm_fault(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.guest_fpu.fpstate->xfd &&
	       !kvm_is_cr0_bit_set(vcpu, X86_CR0_TS);
}

static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5237,7 +5257,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
	 * point.
	 */
	if (is_nm_fault(intr_info)) {
		kvm_queue_exception(vcpu, NM_VECTOR);
		kvm_queue_exception_p(vcpu, NM_VECTOR,
				      is_xfd_nm_fault(vcpu) ? vcpu->arch.guest_fpu.xfd_err : 0);
		return 1;
	}

@@ -5817,7 +5838,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
	error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
		      ? PFERR_FETCH_MASK : 0;
	/* ept page table entry is present? */
	error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK)
	error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK)
		      ? PFERR_PRESENT_MASK : 0;

	if (error_code & EPT_VIOLATION_GVA_IS_VALID)
@@ -5871,11 +5892,35 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
	return 1;
}

static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
/*
 * Returns true if emulation is required (due to the vCPU having invalid state
 * with unsrestricted guest mode disabled) and KVM can't faithfully emulate the
 * current vCPU state.
 */
static bool vmx_unhandleable_emulation_required(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = to_vmx(vcpu);

	return vmx->emulation_required && !vmx->rmode.vm86_active &&
	if (!vmx->emulation_required)
		return false;

	/*
	 * It is architecturally impossible for emulation to be required when a
	 * nested VM-Enter is pending completion, as VM-Enter will VM-Fail if
	 * guest state is invalid and unrestricted guest is disabled, i.e. KVM
	 * should synthesize VM-Fail instead emulation L2 code.  This path is
	 * only reachable if userspace modifies L2 guest state after KVM has
	 * performed the nested VM-Enter consistency checks.
	 */
	if (vmx->nested.nested_run_pending)
		return true;

	/*
	 * KVM only supports emulating exceptions if the vCPU is in Real Mode.
	 * If emulation is required, KVM can't perform a successful VM-Enter to
	 * inject the exception.
	 */
	return !vmx->rmode.vm86_active &&
	       (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
}

@@ -5898,7 +5943,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
		if (!kvm_emulate_instruction(vcpu, 0))
			return 0;

		if (vmx_emulation_required_with_pending_exception(vcpu)) {
		if (vmx_unhandleable_emulation_required(vcpu)) {
			kvm_prepare_emulation_failure_exit(vcpu);
			return 0;
		}
@@ -5922,7 +5967,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)

int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
{
	if (vmx_emulation_required_with_pending_exception(vcpu)) {
	if (vmx_unhandleable_emulation_required(vcpu)) {
		kvm_prepare_emulation_failure_exit(vcpu);
		return 0;
	}
@@ -6997,16 +7042,15 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
	 * MSR value is not clobbered by the host activity before the guest
	 * has chance to consume it.
	 *
	 * Do not blindly read xfd_err here, since this exception might
	 * be caused by L1 interception on a platform which doesn't
	 * support xfd at all.
	 *
	 * Do it conditionally upon guest_fpu::xfd. xfd_err matters
	 * only when xfd contains a non-zero value.
	 * Update the guest's XFD_ERR if and only if XFD is enabled, as the #NM
	 * interception may have been caused by L1 interception.  Per the SDM,
	 * XFD_ERR is not modified for non-XFD #NM, i.e. if CR0.TS=1.
	 *
	 * Queuing exception is done in vmx_handle_exit. See comment there.
	 * Note, XFD_ERR is updated _before_ the #NM interception check, i.e.
	 * unlike CR2 and DR6, the value is not a payload that is attached to
	 * the #NM exception.
	 */
	if (vcpu->arch.guest_fpu.fpstate->xfd)
	if (is_xfd_nm_fault(vcpu))
		rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}