Commit cd802e7e authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull KVM fixes from Paolo Bonzini:
 "ARM:

   - Avoid use of uninitialized memcache pointer in user_mem_abort()

   - Always set HCR_EL2.xMO bits when running in VHE, allowing
     interrupts to be taken while TGE=0 and fixing an ugly bug on
     AmpereOne that occurs when taking an interrupt while clearing the
     xMO bits (AC03_CPU_36)

   - Prevent VMMs from hiding support for AArch64 at any EL virtualized
     by KVM

   - Save/restore the host value for HCRX_EL2 instead of restoring an
     incorrect fixed value

   - Make host_stage2_set_owner_locked() check that the entire requested
     range is memory rather than just the first page

  RISC-V:

   - Add missing reset of smstateen CSRs

  x86:

   - Forcibly leave SMM on SHUTDOWN interception on AMD CPUs to avoid
     causing problems due to KVM stuffing INIT on SHUTDOWN (KVM needs to
     sanitize the VMCB as its state is undefined after SHUTDOWN,
     emulating INIT is the least awful choice).

   - Track the valid sync/dirty fields in kvm_run as a u64 to ensure KVM
     KVM doesn't goof a sanity check in the future.

   - Free obsolete roots when (re)loading the MMU to fix a bug where
     pre-faulting memory can get stuck due to always encountering a
     stale root.

   - When dumping GHCB state, use KVM's snapshot instead of the raw GHCB
     page to print state, so that KVM doesn't print stale/wrong
     information.

   - When changing memory attributes (e.g. shared <=> private), add
     potential hugepage ranges to the mmu_invalidate_range_{start,end}
     set so that KVM doesn't create a shared/private hugepage when the
     the corresponding attributes will become mixed (the attributes are
     commited *after* KVM finishes the invalidation).

   - Rework the SRSO mitigation to enable BP_SPEC_REDUCE only when KVM
     has at least one active VM. Effectively BP_SPEC_REDUCE when KVM is
     loaded led to very measurable performance regressions for non-KVM
     workloads"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Set/clear SRSO's BP_SPEC_REDUCE on 0 <=> 1 VM count transitions
  KVM: arm64: Fix memory check in host_stage2_set_owner_locked()
  KVM: arm64: Kill HCRX_HOST_FLAGS
  KVM: arm64: Properly save/restore HCRX_EL2
  KVM: arm64: selftest: Don't try to disable AArch64 support
  KVM: arm64: Prevent userspace from disabling AArch64 support at any virtualisable EL
  KVM: arm64: Force HCR_EL2.xMO to 1 at all times in VHE mode
  KVM: arm64: Fix uninitialized memcache pointer in user_mem_abort()
  KVM: x86/mmu: Prevent installing hugepages when mem attributes are changing
  KVM: SVM: Update dump_ghcb() to use the GHCB snapshot fields
  KVM: RISC-V: reset smstateen CSRs
  KVM: x86/mmu: Check and free obsolete roots in kvm_mmu_reload()
  KVM: x86: Check that the high 32bits are clear in kvm_arch_vcpu_ioctl_run()
  KVM: SVM: Forcibly leave SMM mode on SHUTDOWN interception
parents ecb9194d add20321
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@
	mrs	x0, id_aa64mmfr1_el1
	ubfx	x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
	cbz	x0, .Lskip_hcrx_\@
	mov_q	x0, HCRX_HOST_FLAGS
	mov_q	x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)

        /* Enable GCS if supported */
	mrs_s	x1, SYS_ID_AA64PFR1_EL1
+1 −2
Original line number Diff line number Diff line
@@ -100,9 +100,8 @@
			 HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO)

#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
#define MPAMHCR_HOST_FLAGS	0

/* TCR_EL2 Registers bits */
+6 −7
Original line number Diff line number Diff line
@@ -235,6 +235,8 @@ static inline void __deactivate_traps_mpam(void)

static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
{
	struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);

	/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
	write_sysreg(1 << 15, hstr_el2);

@@ -245,11 +247,8 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
	 * EL1 instead of being trapped to EL2.
	 */
	if (system_supports_pmuv3()) {
		struct kvm_cpu_context *hctxt;

		write_sysreg(0, pmselr_el0);

		hctxt = host_data_ptr(host_ctxt);
		ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
		write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
		vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
@@ -269,6 +268,7 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
			hcrx &= ~clr;
		}

		ctxt_sys_reg(hctxt, HCRX_EL2) = read_sysreg_s(SYS_HCRX_EL2);
		write_sysreg_s(hcrx, SYS_HCRX_EL2);
	}

@@ -278,19 +278,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)

static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
	struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);

	write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);

	write_sysreg(0, hstr_el2);
	if (system_supports_pmuv3()) {
		struct kvm_cpu_context *hctxt;

		hctxt = host_data_ptr(host_ctxt);
		write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
		vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
	}

	if (cpus_have_final_cap(ARM64_HAS_HCX))
		write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
		write_sysreg_s(ctxt_sys_reg(hctxt, HCRX_EL2), SYS_HCRX_EL2);

	__deactivate_traps_hfgxtr(vcpu);
	__deactivate_traps_mpam();
+1 −1
Original line number Diff line number Diff line
@@ -503,7 +503,7 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
	int ret;

	if (!addr_is_memory(addr))
	if (!range_is_memory(addr, addr + size))
		return -EPERM;

	ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
+21 −15
Original line number Diff line number Diff line
@@ -429,23 +429,27 @@ u64 __vgic_v3_get_gic_config(void)
	/*
	 * To check whether we have a MMIO-based (GICv2 compatible)
	 * CPU interface, we need to disable the system register
	 * view. To do that safely, we have to prevent any interrupt
	 * from firing (which would be deadly).
	 * view.
	 *
	 * Note that this only makes sense on VHE, as interrupts are
	 * already masked for nVHE as part of the exception entry to
	 * EL2.
	 */
	if (has_vhe())
		flags = local_daif_save();

	/*
	 * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates
	 * that to be able to set ICC_SRE_EL1.SRE to 0, all the
	 * interrupt overrides must be set. You've got to love this.
	 *
	 * As we always run VHE with HCR_xMO set, no extra xMO
	 * manipulation is required in that case.
	 *
	 * To safely disable SRE, we have to prevent any interrupt
	 * from firing (which would be deadly). This only makes sense
	 * on VHE, as interrupts are already masked for nVHE as part
	 * of the exception entry to EL2.
	 */
	if (has_vhe()) {
		flags = local_daif_save();
	} else {
		sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO);
		isb();
	}

	write_gicreg(0, ICC_SRE_EL1);
	isb();

@@ -453,11 +457,13 @@ u64 __vgic_v3_get_gic_config(void)

	write_gicreg(sre, ICC_SRE_EL1);
	isb();
	sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0);
	isb();

	if (has_vhe())
	if (has_vhe()) {
		local_daif_restore(flags);
	} else {
		sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0);
		isb();
	}

	val  = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
	val |= read_gicreg(ICH_VTR_EL2);
Loading