Commit 4ea7c171 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "Arm:

   - Fix trapping regression when no in-kernel irqchip is present

   - Check host-provided, untrusted ranges and offsets in pKVM

   - Fix regression restoring the ID_PFR1_EL1 register

   - Fix vgic ITS locking issues when LPIs are not directly injected

  Arm selftests:

   - Correct target CPU programming in vgic_lpi_stress selftest

   - Fix exposure of SCTLR2_EL2 and ZCR_EL2 in get-reg-list selftest

  RISC-V:

   - Fix check for local interrupts on riscv32

   - Read HGEIP CSR on the correct cpu when checking for IMSIC
     interrupts

   - Remove automatic I/O mapping from kvm_arch_prepare_memory_region()

  x86:

   - Inject #UD if the guest attempts to execute SEAMCALL or TDCALL as
     KVM doesn't support virtualization the instructions, but the
     instructions are gated only by VMXON. That is, they will VM-Exit
     instead of taking a #UD and until now this resulted in KVM exiting
     to userspace with an emulation error.

   - Unload the "FPU" when emulating INIT of XSTATE features if and only
     if the FPU is actually loaded, instead of trying to predict when
     KVM will emulate an INIT (CET support missed the MP_STATE path).
     Add sanity checks to detect and harden against similar bugs in the
     future.

   - Unregister KVM's GALog notifier (for AVIC) when kvm-amd.ko is
     unloaded.

   - Use a raw spinlock for svm->ir_list_lock as the lock is taken
     during schedule(), and "normal" spinlocks are sleepable locks when
     PREEMPT_RT=y.

   - Remove guest_memfd bindings on memslot deletion when a gmem file is
     dying to fix a use-after-free race found by syzkaller.

   - Fix a goof in the EPT Violation handler where KVM checks the wrong
     variable when determining if the reported GVA is valid.

   - Fix and simplify the handling of LBR virtualization on AMD, which
     was made buggy and unnecessarily complicated by nested VM support

  Misc:

   - Update Oliver's email address"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  KVM: nSVM: Fix and simplify LBR virtualization handling with nested
  KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()
  KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated
  MAINTAINERS: Switch myself to using kernel.org address
  KVM: arm64: vgic-v3: Release reserved slot outside of lpi_xa's lock
  KVM: arm64: vgic-v3: Reinstate IRQ lock ordering for LPI xarray
  KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip
  KVM: arm64: Set ID_{AA64PFR0,PFR1}_EL1.GIC when GICv3 is configured
  KVM: arm64: Make all 32bit ID registers fully writable
  KVM: VMX: Fix check for valid GVA on an EPT violation
  KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying
  KVM: SVM: switch to raw spinlock for svm->ir_list_lock
  KVM: SVM: Make avic_ga_log_notifier() local to avic.c
  KVM: SVM: Unregister KVM's GALog notifier on kvm-amd.ko exit
  KVM: SVM: Initialize per-CPU svm_data at the end of hardware setup
  KVM: x86: Call out MSR_IA32_S_CET is not handled by XSAVES
  KVM: x86: Harden KVM against imbalanced load/put of guest FPU state
  KVM: x86: Unload "FPU" state on INIT if and only if its currently in-use
  KVM: arm64: Check the untrusted offset in FF-A memory share
  KVM: arm64: Check range args for pKVM mem transitions
  ...
parents e9a6fb0b 8a482141
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -605,7 +605,8 @@ Oleksij Rempel <o.rempel@pengutronix.de>
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oupton@google.com>
Oliver Upton <oupton@kernel.org> <oliver.upton@linux.dev>
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>
+1 −1
Original line number Diff line number Diff line
@@ -13659,7 +13659,7 @@ F: virt/kvm/*
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M:	Marc Zyngier <maz@kernel.org>
M:	Oliver Upton <oliver.upton@linux.dev>
M:	Oliver Upton <oupton@kernel.org>
R:	Joey Gouly <joey.gouly@arm.com>
R:	Suzuki K Poulose <suzuki.poulose@arm.com>
R:	Zenghui Yu <yuzenghui@huawei.com>
+7 −2
Original line number Diff line number Diff line
@@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id,
	struct ffa_mem_region_attributes *ep_mem_access;
	struct ffa_composite_mem_region *reg;
	struct ffa_mem_region *buf;
	u32 offset, nr_ranges;
	u32 offset, nr_ranges, checked_offset;
	int ret = 0;

	if (addr_mbz || npages_mbz || fraglen > len ||
@@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id,
		goto out_unlock;
	}

	if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) {
	if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) {
		ret = FFA_RET_INVALID_PARAMETERS;
		goto out_unlock;
	}

	if (fraglen < checked_offset) {
		ret = FFA_RET_INVALID_PARAMETERS;
		goto out_unlock;
	}
+28 −0
Original line number Diff line number Diff line
@@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void)
	return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
}

/*
 * Ensure the PFN range is contained within PA-range.
 *
 * This check is also robust to overflows and is therefore a requirement before
 * using a pfn/nr_pages pair from an untrusted source.
 */
static bool pfn_range_is_valid(u64 pfn, u64 nr_pages)
{
	u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT);

	return pfn < limit && ((limit - pfn) >= nr_pages);
}

struct kvm_mem_range {
	u64 start;
	u64 end;
@@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
	void *virt = __hyp_va(phys);
	int ret;

	if (!pfn_range_is_valid(pfn, nr_pages))
		return -EINVAL;

	host_lock_component();
	hyp_lock_component();

@@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
	u64 virt = (u64)__hyp_va(phys);
	int ret;

	if (!pfn_range_is_valid(pfn, nr_pages))
		return -EINVAL;

	host_lock_component();
	hyp_lock_component();

@@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages)
	u64 size = PAGE_SIZE * nr_pages;
	int ret;

	if (!pfn_range_is_valid(pfn, nr_pages))
		return -EINVAL;

	host_lock_component();
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED);
	if (!ret)
@@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
	u64 size = PAGE_SIZE * nr_pages;
	int ret;

	if (!pfn_range_is_valid(pfn, nr_pages))
		return -EINVAL;

	host_lock_component();
	ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED);
	if (!ret)
@@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu
	if (prot & ~KVM_PGTABLE_PROT_RWX)
		return -EINVAL;

	if (!pfn_range_is_valid(pfn, nr_pages))
		return -EINVAL;

	ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
	if (ret)
		return ret;
+38 −33
Original line number Diff line number Diff line
@@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
	.val = 0,				\
}

/* sys_reg_desc initialiser for known cpufeature ID registers */
#define AA32_ID_SANITISED(name) {		\
	ID_DESC(name),				\
	.visibility = aa32_id_visibility,	\
	.val = 0,				\
}

/* sys_reg_desc initialiser for writable ID registers */
#define ID_WRITABLE(name, mask) {		\
	ID_DESC(name),				\
	.val = mask,				\
}

/*
 * 32bit ID regs are fully writable when the guest is 32bit
 * capable. Nothing in the KVM code should rely on 32bit features
 * anyway, only 64bit, so let the VMM do its worse.
 */
#define AA32_ID_WRITABLE(name) {		\
	ID_DESC(name),				\
	.visibility = aa32_id_visibility,	\
	.val = GENMASK(31, 0),			\
}

/* sys_reg_desc initialiser for cpufeature ID registers that need filtering */
#define ID_FILTERED(sysreg, name, mask) {	\
	ID_DESC(sysreg),				\
@@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {

	/* AArch64 mappings of the AArch32 ID registers */
	/* CRm=1 */
	AA32_ID_SANITISED(ID_PFR0_EL1),
	AA32_ID_SANITISED(ID_PFR1_EL1),
	AA32_ID_WRITABLE(ID_PFR0_EL1),
	AA32_ID_WRITABLE(ID_PFR1_EL1),
	{ SYS_DESC(SYS_ID_DFR0_EL1),
	  .access = access_id_reg,
	  .get_user = get_id_reg,
	  .set_user = set_id_dfr0_el1,
	  .visibility = aa32_id_visibility,
	  .reset = read_sanitised_id_dfr0_el1,
	  .val = ID_DFR0_EL1_PerfMon_MASK |
		 ID_DFR0_EL1_CopDbg_MASK, },
	  .val = GENMASK(31, 0) },
	ID_HIDDEN(ID_AFR0_EL1),
	AA32_ID_SANITISED(ID_MMFR0_EL1),
	AA32_ID_SANITISED(ID_MMFR1_EL1),
	AA32_ID_SANITISED(ID_MMFR2_EL1),
	AA32_ID_SANITISED(ID_MMFR3_EL1),
	AA32_ID_WRITABLE(ID_MMFR0_EL1),
	AA32_ID_WRITABLE(ID_MMFR1_EL1),
	AA32_ID_WRITABLE(ID_MMFR2_EL1),
	AA32_ID_WRITABLE(ID_MMFR3_EL1),

	/* CRm=2 */
	AA32_ID_SANITISED(ID_ISAR0_EL1),
	AA32_ID_SANITISED(ID_ISAR1_EL1),
	AA32_ID_SANITISED(ID_ISAR2_EL1),
	AA32_ID_SANITISED(ID_ISAR3_EL1),
	AA32_ID_SANITISED(ID_ISAR4_EL1),
	AA32_ID_SANITISED(ID_ISAR5_EL1),
	AA32_ID_SANITISED(ID_MMFR4_EL1),
	AA32_ID_SANITISED(ID_ISAR6_EL1),
	AA32_ID_WRITABLE(ID_ISAR0_EL1),
	AA32_ID_WRITABLE(ID_ISAR1_EL1),
	AA32_ID_WRITABLE(ID_ISAR2_EL1),
	AA32_ID_WRITABLE(ID_ISAR3_EL1),
	AA32_ID_WRITABLE(ID_ISAR4_EL1),
	AA32_ID_WRITABLE(ID_ISAR5_EL1),
	AA32_ID_WRITABLE(ID_MMFR4_EL1),
	AA32_ID_WRITABLE(ID_ISAR6_EL1),

	/* CRm=3 */
	AA32_ID_SANITISED(MVFR0_EL1),
	AA32_ID_SANITISED(MVFR1_EL1),
	AA32_ID_SANITISED(MVFR2_EL1),
	AA32_ID_WRITABLE(MVFR0_EL1),
	AA32_ID_WRITABLE(MVFR1_EL1),
	AA32_ID_WRITABLE(MVFR2_EL1),
	ID_UNALLOCATED(3,3),
	AA32_ID_SANITISED(ID_PFR2_EL1),
	AA32_ID_WRITABLE(ID_PFR2_EL1),
	ID_HIDDEN(ID_DFR1_EL1),
	AA32_ID_SANITISED(ID_MMFR5_EL1),
	AA32_ID_WRITABLE(ID_MMFR5_EL1),
	ID_UNALLOCATED(3,7),

	/* AArch64 ID registers */
@@ -5606,11 +5609,13 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)

	guard(mutex)(&kvm->arch.config_lock);

	if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) &&
	      irqchip_in_kernel(kvm) &&
	      kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) {
		kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK;
		kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK;
	if (!irqchip_in_kernel(kvm)) {
		u64 val;

		val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;
		kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
		val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC;
		kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val);
	}

	if (vcpu_has_nv(vcpu)) {
Loading