Commit 11e5ea52 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Marc Zyngier
Browse files

KVM: arm64: Use helpers to classify exception types reported via ESR



Currently, we rely on the fact that exceptions can be trivially
classified by applying a mask/value pair to the syndrome value reported
via the ESR register, but this will no longer be true once we enable
support for 5 level paging.

So introduce a couple of helpers that encapsulate this mask/value pair
matching, and wire them up in the code. No functional change intended,
the actual handling of translation level -1 will be added in a
subsequent patch.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Acked-by: default avatarMark Rutland <mark.rutland@arm.com>
[maz: folded in changes suggested by Mark]
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20231128140400.3132145-2-ardb@google.com
parent 10a0cc3b
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
	return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
}

static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
}

static inline bool esr_fsc_is_permission_fault(unsigned long esr)
{
	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
}

static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
{
	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
}

const char *esr_get_class_string(unsigned long esr);
#endif /* __ASSEMBLY */

+16 −20
Original line number Diff line number Diff line
@@ -404,24 +404,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
}

static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
static inline
bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
{
	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
	return esr_fsc_is_permission_fault(kvm_vcpu_get_esr(vcpu));
}

static __always_inline s8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
static inline
bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
{
	/*
	 * Note: With the introduction of FEAT_LPA2 an extra level of
	 * translation (level -1) is added. This level (obviously) doesn't
	 * follow the previous convention of encoding the 4 levels in the 2 LSBs
	 * of the FSC so this function breaks if the fault is for level -1.
	 *
	 * However, stage2 tables always use concatenated tables for first level
	 * lookup and therefore it is guaranteed that the level will be between
	 * 0 and 3, and this function continues to work.
	 */
	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
	return esr_fsc_is_translation_fault(kvm_vcpu_get_esr(vcpu));
}

static inline
u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
{
	unsigned long esr = kvm_vcpu_get_esr(vcpu);

	BUG_ON(!esr_fsc_is_permission_fault(esr));
	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
}

static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
@@ -464,12 +465,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
		 * first), then a permission fault to allow the flags
		 * to be set.
		 */
		switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
		case ESR_ELx_FSC_PERM:
			return true;
		default:
			return false;
		}
		return kvm_vcpu_trap_is_permission_fault(vcpu);
	}

	if (kvm_vcpu_trap_is_iabt(vcpu))
+1 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
	 */
	if (!(esr & ESR_ELx_S1PTW) &&
	    (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
	     (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
	     esr_fsc_is_permission_fault(esr))) {
		if (!__translate_far_to_hpfar(far, &hpfar))
			return false;
	} else {
+1 −1
Original line number Diff line number Diff line
@@ -591,7 +591,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
	if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
		bool valid;

		valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
		valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
			kvm_vcpu_dabt_isvalid(vcpu) &&
			!kvm_vcpu_abt_issea(vcpu) &&
			!kvm_vcpu_abt_iss1tw(vcpu);
+17 −18
Original line number Diff line number Diff line
@@ -1376,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)

static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
			  struct kvm_memory_slot *memslot, unsigned long hva,
			  unsigned long fault_status)
			  bool fault_is_perm)
{
	int ret = 0;
	bool write_fault, writable, force_pte = false;
@@ -1390,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	gfn_t gfn;
	kvm_pfn_t pfn;
	bool logging_active = memslot_is_logging(memslot);
	s8 fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
	long vma_pagesize, fault_granule;
	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
	struct kvm_pgtable *pgt;

	fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
	if (fault_is_perm)
		fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
	write_fault = kvm_is_write_fault(vcpu);
	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
	VM_BUG_ON(write_fault && exec_fault);

	if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
	if (fault_is_perm && !write_fault && !exec_fault) {
		kvm_err("Unexpected L2 read permission error\n");
		return -EFAULT;
	}
@@ -1411,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * only exception to this is when dirty logging is enabled at runtime
	 * and a write fault needs to collapse a block entry into a table.
	 */
	if (fault_status != ESR_ELx_FSC_PERM ||
	    (logging_active && write_fault)) {
	if (!fault_is_perm || (logging_active && write_fault)) {
		ret = kvm_mmu_topup_memory_cache(memcache,
						 kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
		if (ret)
@@ -1529,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * backed by a THP and thus use block mapping if possible.
	 */
	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
		if (fault_status ==  ESR_ELx_FSC_PERM &&
		    fault_granule > PAGE_SIZE)
		if (fault_is_perm && fault_granule > PAGE_SIZE)
			vma_pagesize = fault_granule;
		else
			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
@@ -1543,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
		}
	}

	if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
	if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
		/* Check the VMM hasn't introduced a new disallowed VMA */
		if (mte_allowed) {
			sanitise_mte_tags(kvm, pfn, vma_pagesize);
@@ -1569,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
	 * kvm_pgtable_stage2_map() should be called to change block size.
	 */
	if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
	if (fault_is_perm && vma_pagesize == fault_granule)
		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
	else
		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
@@ -1620,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
 */
int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
{
	unsigned long fault_status;
	unsigned long esr;
	phys_addr_t fault_ipa;
	struct kvm_memory_slot *memslot;
	unsigned long hva;
@@ -1628,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
	gfn_t gfn;
	int ret, idx;

	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
	esr = kvm_vcpu_get_esr(vcpu);

	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);

	if (fault_status == ESR_ELx_FSC_FAULT) {
	if (esr_fsc_is_permission_fault(esr)) {
		/* Beyond sanitised PARange (which is the IPA limit) */
		if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
			kvm_inject_size_fault(vcpu);
@@ -1668,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
			      kvm_vcpu_get_hfar(vcpu), fault_ipa);

	/* Check the stage-2 fault is trans. fault or write fault */
	if (fault_status != ESR_ELx_FSC_FAULT &&
	    fault_status != ESR_ELx_FSC_PERM &&
	    fault_status != ESR_ELx_FSC_ACCESS) {
	if (!esr_fsc_is_translation_fault(esr) &&
	    !esr_fsc_is_permission_fault(esr) &&
	    !esr_fsc_is_access_flag_fault(esr)) {
		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
			kvm_vcpu_trap_get_class(vcpu),
			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1732,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
	/* Userspace should not be able to register out-of-bounds IPAs */
	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));

	if (fault_status == ESR_ELx_FSC_ACCESS) {
	if (esr_fsc_is_access_flag_fault(esr)) {
		handle_access_fault(vcpu, fault_ipa);
		ret = 1;
		goto out_unlock;
	}

	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
			     esr_fsc_is_permission_fault(esr));
	if (ret == 0)
		ret = 1;
out: