Commit 0e886324 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Rework heuristics for resolving the fault IPA (HPFAR_EL2 v. re-walk
     stage-1 page tables) to align with the architecture. This avoids
     possibly taking an SEA at EL2 on the page table walk or using an
     architecturally UNKNOWN fault IPA

   - Use acquire/release semantics in the KVM FF-A proxy to avoid
     reading a stale value for the FF-A version

   - Fix KVM guest driver to match PV CPUID hypercall ABI

   - Use Inner Shareable Normal Write-Back mappings at stage-1 in KVM
     selftests, which is the only memory type for which atomic
     instructions are architecturally guaranteed to work

  s390:

   - Don't use %pK for debug printing and tracepoints

  x86:

   - Use a separate subclass when acquiring KVM's per-CPU posted
     interrupts wakeup lock in the scheduled out path, i.e. when adding
     a vCPU on the list of vCPUs to wake, to workaround a false positive
     deadlock. The schedule out code runs with a scheduler lock that the
     wakeup handler takes in the opposite order; but it does so with
     IRQs disabled and cannot run concurrently with a wakeup

   - Explicitly zero-initialize on-stack CPUID unions

   - Allow building irqbypass.ko as as module when kvm.ko is a module

   - Wrap relatively expensive sanity check with KVM_PROVE_MMU

   - Acquire SRCU in KVM_GET_MP_STATE to protect guest memory accesses

  selftests:

   - Add more scenarios to the MONITOR/MWAIT test

   - Add option to rseq test to override /dev/cpu_dma_latency

   - Bring list of exit reasons up to date

   - Cleanup Makefile to list once tests that are valid on all
     architectures

  Other:

   - Documentation fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits)
  KVM: arm64: Use acquire/release to communicate FF-A version negotiation
  KVM: arm64: selftests: Explicitly set the page attrs to Inner-Shareable
  KVM: arm64: selftests: Introduce and use hardware-definition macros
  KVM: VMX: Use separate subclasses for PI wakeup lock to squash false positive
  KVM: VMX: Assert that IRQs are disabled when putting vCPU on PI wakeup list
  KVM: x86: Explicitly zero-initialize on-stack CPUID unions
  KVM: Allow building irqbypass.ko as as module when kvm.ko is a module
  KVM: x86/mmu: Wrap sanity check on number of TDP MMU pages with KVM_PROVE_MMU
  KVM: selftests: Add option to rseq test to override /dev/cpu_dma_latency
  KVM: x86: Acquire SRCU in KVM_GET_MP_STATE to protect guest memory accesses
  Documentation: kvm: remove KVM_CAP_MIPS_TE
  Documentation: kvm: organize capabilities in the right section
  Documentation: kvm: fix some definition lists
  Documentation: kvm: drop "Capability" heading from capabilities
  Documentation: kvm: give correct name for KVM_CAP_SPAPR_MULTITCE
  Documentation: KVM: KVM_GET_SUPPORTED_CPUID now exposes TSC_DEADLINE
  selftests: kvm: list once tests that are valid on all architectures
  selftests: kvm: bring list of exit reasons up to date
  selftests: kvm: revamp MONITOR/MWAIT tests
  KVM: arm64: Don't translate FAR if invalid/unsafe
  ...
parents bec7dcbc c478032d
Loading
Loading
Loading
Loading
+383 −406

File changed.

Preview size limit exceeded, changes collapsed.

+42 −2
Original line number Diff line number Diff line
@@ -121,6 +121,15 @@
#define ESR_ELx_FSC_SEA_TTW(n)	(0x14 + (n))
#define ESR_ELx_FSC_SECC	(0x18)
#define ESR_ELx_FSC_SECC_TTW(n)	(0x1c + (n))
#define ESR_ELx_FSC_ADDRSZ	(0x00)

/*
 * Annoyingly, the negative levels for Address size faults aren't laid out
 * contiguously (or in the desired order)
 */
#define ESR_ELx_FSC_ADDRSZ_nL(n)	((n) == -1 ? 0x25 : 0x2C)
#define ESR_ELx_FSC_ADDRSZ_L(n)		((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
						   (ESR_ELx_FSC_ADDRSZ + (n)))

/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n)	(ESR_ELx_FSC_ACCESS + (n))
@@ -161,8 +170,6 @@
#define ESR_ELx_Xs_MASK		(GENMASK_ULL(4, 0))

/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ	(0x00)
#define ESR_ELx_FSC_ADDRSZ_L(n)	(ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV		(UL(1) << 24)
#define ESR_ELx_COND_SHIFT	(20)
#define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -464,6 +471,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
	       (esr == ESR_ELx_FSC_ACCESS_L(0));
}

static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
{
	esr &= ESR_ELx_FSC;

	return (esr == ESR_ELx_FSC_ADDRSZ_L(3))	||
	       (esr == ESR_ELx_FSC_ADDRSZ_L(2))	||
	       (esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
	       (esr == ESR_ELx_FSC_ADDRSZ_L(0))	||
	       (esr == ESR_ELx_FSC_ADDRSZ_L(-1));
}

static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
{
	esr = esr & ESR_ELx_FSC;

	return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
	       (esr == ESR_ELx_FSC_SEA_TTW(2)) ||
	       (esr == ESR_ELx_FSC_SEA_TTW(1)) ||
	       (esr == ESR_ELx_FSC_SEA_TTW(0)) ||
	       (esr == ESR_ELx_FSC_SEA_TTW(-1));
}

static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
{
	esr = esr & ESR_ELx_FSC;

	return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
	       (esr == ESR_ELx_FSC_SECC_TTW(2)) ||
	       (esr == ESR_ELx_FSC_SECC_TTW(1)) ||
	       (esr == ESR_ELx_FSC_SECC_TTW(0)) ||
	       (esr == ESR_ELx_FSC_SECC_TTW(-1));
}

/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
static inline bool esr_iss_is_eretax(unsigned long esr)
{
+6 −1
Original line number Diff line number Diff line
@@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc

static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
	u64 hpfar = vcpu->arch.fault.hpfar_el2;

	if (unlikely(!(hpfar & HPFAR_EL2_NS)))
		return INVALID_GPA;

	return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}

static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@
 * Was this synchronous external abort a RAS notification?
 * Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
 */
static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
static inline int kvm_handle_guest_sea(void)
{
	/* apei_claim_sea(NULL) expects to mask interrupts itself */
	lockdep_assert_irqs_enabled();
+48 −22
Original line number Diff line number Diff line
@@ -12,6 +12,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>

static inline bool __fault_safe_to_translate(u64 esr)
{
	u64 fsc = esr & ESR_ELx_FSC;

	if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr))
		return false;

	return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV));
}

static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
	int ret;
@@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
	return true;
}

static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
/*
 * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR.
 */
static inline bool __hpfar_valid(u64 esr)
{
	u64 hpfar, far;

	far = read_sysreg_el2(SYS_FAR);

	/*
	 * The HPFAR can be invalid if the stage 2 fault did not
	 * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
	 * bit is clear) and one of the two following cases are true:
	 *   1. The fault was due to a permission fault
	 *   2. The processor carries errata 834220
	 * CPUs affected by ARM erratum #834220 may incorrectly report a
	 * stage-2 translation fault when a stage-1 permission fault occurs.
	 *
	 * Therefore, for all non S1PTW faults where we either have a
	 * permission fault or the errata workaround is enabled, we
	 * resolve the IPA using the AT instruction.
	 * Re-walk the page tables to determine if a stage-1 fault actually
	 * occurred.
	 */
	if (!(esr & ESR_ELx_S1PTW) &&
	    (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
	     esr_fsc_is_permission_fault(esr))) {
		if (!__translate_far_to_hpfar(far, &hpfar))
	if (cpus_have_final_cap(ARM64_WORKAROUND_834220) &&
	    esr_fsc_is_translation_fault(esr))
		return false;
	} else {
		hpfar = read_sysreg(hpfar_el2);

	if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr))
		return true;

	if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr))
		return true;

	return esr_fsc_is_addr_sz_fault(esr);
}

	fault->far_el2 = far;
	fault->hpfar_el2 = hpfar;
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
{
	u64 hpfar;

	fault->far_el2		= read_sysreg_el2(SYS_FAR);
	fault->hpfar_el2	= 0;

	if (__hpfar_valid(esr))
		hpfar = read_sysreg(hpfar_el2);
	else if (unlikely(!__fault_safe_to_translate(esr)))
		return true;
	else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar))
		return false;

	/*
	 * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid
	 * HPFAR value.
	 */
	fault->hpfar_el2 = hpfar | HPFAR_EL2_NS;
	return true;
}

Loading