Commit 14b63209 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "x86 and selftests fixes.

  x86:

   - When emulating a guest TLB flush for a nested guest, flush vpid01,
     not vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if
     L2 and L1 are sharing VPID '0' (from L1's perspective).

   - Fix a bug in the SNP initialization flow where KVM would return '0'
     to userspace instead of -errno on failure.

   - Move the Intel PT virtualization (i.e. outputting host trace to
     host buffer and guest trace to guest buffer) behind CONFIG_BROKEN.

   - Fix memory leak on failure of KVM_SEV_SNP_LAUNCH_START

   - Fix a bug where KVM fails to inject an interrupt from the IRR after
     KVM_SET_LAPIC.

  Selftests:

   - Increase the timeout for the memslot performance selftest to avoid
     false failures on arm64 and nested x86 platforms.

   - Fix a goof in the guest_memfd selftest where a for-loop initialized
     a bit mask to zero instead of BIT(0).

   - Disable strict aliasing when building KVM selftests to prevent the
     compiler from treating things like "u64 *" to "uint64_t *" cases as
     undefined behavior, which can lead to nasty, hard to debug
     failures.

   - Force -march=x86-64-v2 for KVM x86 selftests if and only if the
     uarch is supported by the compiler.

   - Fix broken compilation of kvm selftests after a header sync in
     tools/"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: VMX: Bury Intel PT virtualization (guest/host mode) behind CONFIG_BROKEN
  KVM: x86: Unconditionally set irr_pending when updating APICv state
  kvm: svm: Fix gctx page leak on invalid inputs
  KVM: selftests: use X86_MEMTYPE_WB instead of VMX_BASIC_MEM_TYPE_WB
  KVM: SVM: Propagate error from snp_guest_req_init() to userspace
  KVM: nVMX: Treat vpid01 as current if L2 is active, but with VPID disabled
  KVM: selftests: Don't force -march=x86-64-v2 if it's unsupported
  KVM: selftests: Disable strict aliasing
  KVM: selftests: fix unintentional noop test in guest_memfd_test.c
  KVM: selftests: memslot_perf_test: increase guest sync timeout
parents 5456ec9d aa0d42ca
Loading
Loading
Loading
Loading
+18 −11
Original line number Diff line number Diff line
@@ -2629,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;

	if (apic->apicv_active) {
		/* irr_pending is always true when apicv is activated. */
		apic->irr_pending = true;
		apic->isr_count = 1;
	} else {
	/*
		 * Don't clear irr_pending, searching the IRR can race with
		 * updates from the CPU as APICv is still active from hardware's
		 * perspective.  The flag will be cleared as appropriate when
		 * KVM injects the interrupt.
	 * When APICv is enabled, KVM must always search the IRR for a pending
	 * IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
	 * isn't running.  If APICv is disabled, KVM _should_ search the IRR
	 * for a pending IRQ.  But KVM currently doesn't ensure *all* hardware,
	 * e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
	 * the IRR at this time could race with IRQ delivery from hardware that
	 * still sees APICv as being enabled.
	 *
	 * FIXME: Ensure other vCPUs and devices observe the change in APICv
	 *        state prior to updating KVM's metadata caches, so that KVM
	 *        can safely search the IRR and set irr_pending accordingly.
	 */
	apic->irr_pending = true;

	if (apic->apicv_active)
		apic->isr_count = 1;
	else
		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
	}

	apic->highest_isr_cache = -1;
}

+9 −6
Original line number Diff line number Diff line
@@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
		goto e_free;

	/* This needs to happen after SEV/SNP firmware initialization. */
	if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
	if (vm_type == KVM_X86_SNP_VM) {
		ret = snp_guest_req_init(kvm);
		if (ret)
			goto e_free;
	}

	INIT_LIST_HEAD(&sev->regions_list);
	INIT_LIST_HEAD(&sev->mirror_vms);
@@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
	if (sev->snp_context)
		return -EINVAL;

	sev->snp_context = snp_context_create(kvm, argp);
	if (!sev->snp_context)
		return -ENOTTY;

	if (params.flags)
		return -EINVAL;

@@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
	if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
		return -EINVAL;

	sev->snp_context = snp_context_create(kvm, argp);
	if (!sev->snp_context)
		return -ENOTTY;

	start.gctx_paddr = __psp_pa(sev->snp_context);
	start.policy = params.policy;
	memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
+25 −5
Original line number Diff line number Diff line
@@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
	kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);

	/*
	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
	 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
	 * full TLB flush from the guest's perspective.  This is required even
	 * if VPID is disabled in the host as KVM may need to synchronize the
	 * MMU in response to the guest TLB flush.
	 * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
	 * same VPID as the host, and so architecturally, linear and combined
	 * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit.  KVM
	 * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
	 * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01.  This
	 * is required if VPID is disabled in KVM, as a TLB flush (there are no
	 * VPIDs) still occurs from L1's perspective, and KVM may need to
	 * synchronize the MMU in response to the guest TLB flush.
	 *
	 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
	 * EPT is a special snowflake, as guest-physical mappings aren't
@@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,

	vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);

	/*
	 * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
	 * same VPID as the host.  Emulate this behavior by using vpid01 for L2
	 * if VPID is disabled in vmcs12.  Note, if VPID is disabled, VM-Enter
	 * and VM-Exit are architecturally required to flush VPID=0, but *only*
	 * VPID=0.  I.e. using vpid02 would be ok (so long as KVM emulates the
	 * required flushes), but doing so would cause KVM to over-flush.  E.g.
	 * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled,
	 * and then runs L2 X again, then KVM can and should retain TLB entries
	 * for VPID12=1.
	 */
	if (enable_vpid) {
		if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
			vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
@@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
		return nested_vmx_fail(vcpu,
			VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);

	/*
	 * Always flush the effective vpid02, i.e. never flush the current VPID
	 * and never explicitly flush vpid01.  INVVPID targets a VPID, not a
	 * VMCS, and so whether or not the current vmcs12 has VPID enabled is
	 * irrelevant (and there may not be a loaded vmcs12).
	 */
	vpid02 = nested_get_vpid02(vcpu);
	switch (type) {
	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+4 −2
Original line number Diff line number Diff line
@@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444);
static unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
module_param(ple_window_max, uint, 0444);

/* Default is SYSTEM mode, 1 for host-guest mode */
/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */
int __read_mostly pt_mode = PT_MODE_SYSTEM;
#ifdef CONFIG_BROKEN
module_param(pt_mode, int, S_IRUGO);
#endif

struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;

@@ -3216,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)

static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
{
	if (is_guest_mode(vcpu))
	if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu)))
		return nested_get_vpid02(vcpu);
	return to_vmx(vcpu)->vpid;
}
+6 −4
Original line number Diff line number Diff line
@@ -241,16 +241,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
	-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
	-fno-builtin-memcmp -fno-builtin-memcpy \
	-fno-builtin-memset -fno-builtin-strnlen \
	-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
	-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
	-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
	$(KHDR_INCLUDES)
	-fno-stack-protector -fno-PIE -fno-strict-aliasing \
	-I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
	-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \
	-I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
ifeq ($(ARCH),s390)
	CFLAGS += -march=z10
endif
ifeq ($(ARCH),x86)
ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
	CFLAGS += -march=x86-64-v2
endif
endif
ifeq ($(ARCH),arm64)
tools_dir := $(top_srcdir)/tools
arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
Loading