Commit 069a05e5 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

KVM: arm64: nv: Handle VNCR_EL2-triggered faults



As VNCR_EL2.BADDR contains a VA, it is bound to trigger faults.

These faults can have multiple source:

- We haven't mapped anything on the host: we need to compute the
  resulting translation, populate a TLB, and eventually map
  the corresponding page

- The permissions are out of whack: we need to tell the guest about
  this state of affairs

Note that the kernel doesn't support S1POE for itself yet, so
the particular case of a VNCR page mapped with no permissions
or with write-only permissions is not correctly handled yet.

Reviewed-by: default avatarOliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-10-maz@kernel.org


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 6fb75733
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -99,6 +99,8 @@
#define ESR_ELx_AET_CE		(UL(6) << ESR_ELx_AET_SHIFT)

/* Shared ISS field definitions for Data/Instruction aborts */
#define ESR_ELx_VNCR_SHIFT	(13)
#define ESR_ELx_VNCR		(UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_SET_SHIFT	(11)
#define ESR_ELx_SET_MASK	(UL(3) << ESR_ELx_SET_SHIFT)
#define ESR_ELx_FnV_SHIFT	(10)
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@
#define KVM_REQ_RESYNC_PMU_EL0		KVM_ARCH_REQ(7)
#define KVM_REQ_NESTED_S2_UNMAP		KVM_ARCH_REQ(8)
#define KVM_REQ_GUEST_HYP_IRQ_PENDING	KVM_ARCH_REQ(9)
#define KVM_REQ_MAP_L1_VNCR_EL2		KVM_ARCH_REQ(10)

#define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
				     KVM_DIRTY_LOG_INITIALLY_SET)
+1 −0
Original line number Diff line number Diff line
@@ -335,5 +335,6 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,

/* VNCR management */
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);

#endif /* __ARM64_KVM_NESTED_H */
+1 −0
Original line number Diff line number Diff line
@@ -317,6 +317,7 @@ static exit_handle_fn arm_exit_handlers[] = {
	[ESR_ELx_EC_ERET]	= kvm_handle_eret,
	[ESR_ELx_EC_IABT_LOW]	= kvm_handle_guest_abort,
	[ESR_ELx_EC_DABT_LOW]	= kvm_handle_guest_abort,
	[ESR_ELx_EC_DABT_CUR]	= kvm_handle_vncr_abort,
	[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
	[ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
	[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
+159 −0
Original line number Diff line number Diff line
@@ -883,6 +883,165 @@ int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
	return 0;
}

static u64 read_vncr_el2(struct kvm_vcpu *vcpu)
{
	return (u64)sign_extend64(__vcpu_sys_reg(vcpu, VNCR_EL2), 48);
}

static int kvm_translate_vncr(struct kvm_vcpu *vcpu)
{
	bool write_fault, writable;
	unsigned long mmu_seq;
	struct vncr_tlb *vt;
	struct page *page;
	u64 va, pfn, gfn;
	int ret;

	vt = vcpu->arch.vncr_tlb;

	vt->wi = (struct s1_walk_info) {
		.regime	= TR_EL20,
		.as_el0	= false,
		.pan	= false,
	};
	vt->wr = (struct s1_walk_result){};
	vt->valid = false;

	guard(srcu)(&vcpu->kvm->srcu);

	va =  read_vncr_el2(vcpu);

	ret = __kvm_translate_va(vcpu, &vt->wi, &vt->wr, va);
	if (ret)
		return ret;

	write_fault = kvm_is_write_fault(vcpu);

	mmu_seq = vcpu->kvm->mmu_invalidate_seq;
	smp_rmb();

	gfn = vt->wr.pa >> PAGE_SHIFT;
	pfn = kvm_faultin_pfn(vcpu, gfn, write_fault, &writable, &page);
	if (is_error_noslot_pfn(pfn) || (write_fault && !writable))
		return -EFAULT;

	scoped_guard(write_lock, &vcpu->kvm->mmu_lock) {
		if (mmu_invalidate_retry(vcpu->kvm, mmu_seq))
			return -EAGAIN;

		vt->gva = va;
		vt->hpa = pfn << PAGE_SHIFT;
		vt->valid = true;
		vt->cpu = -1;

		kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
	}

	kvm_release_faultin_page(vcpu->kvm, page, false, vt->wr.pw);
	if (vt->wr.pw)
		mark_page_dirty(vcpu->kvm, gfn);

	return 0;
}

static void inject_vncr_perm(struct kvm_vcpu *vcpu)
{
	struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
	u64 esr = kvm_vcpu_get_esr(vcpu);

	/* Adjust the fault level to reflect that of the guest's */
	esr &= ~ESR_ELx_FSC;
	esr |= FIELD_PREP(ESR_ELx_FSC,
			  ESR_ELx_FSC_PERM_L(vt->wr.level));

	kvm_inject_nested_sync(vcpu, esr);
}

static bool kvm_vncr_tlb_lookup(struct kvm_vcpu *vcpu)
{
	struct vncr_tlb *vt = vcpu->arch.vncr_tlb;

	lockdep_assert_held_read(&vcpu->kvm->mmu_lock);

	if (!vt->valid)
		return false;

	if (read_vncr_el2(vcpu) != vt->gva)
		return false;

	if (vt->wr.nG) {
		u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
		u64 ttbr = ((tcr & TCR_A1) ?
			    vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
			    vcpu_read_sys_reg(vcpu, TTBR0_EL2));
		u16 asid;

		asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
		    !(tcr & TCR_ASID16))
			asid &= GENMASK(7, 0);

		return asid != vt->wr.asid;
	}

	return true;
}

int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
{
	struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
	u64 esr = kvm_vcpu_get_esr(vcpu);

	BUG_ON(!(esr & ESR_ELx_VNCR_SHIFT));

	if (esr_fsc_is_permission_fault(esr)) {
		inject_vncr_perm(vcpu);
	} else if (esr_fsc_is_translation_fault(esr)) {
		bool valid;
		int ret;

		scoped_guard(read_lock, &vcpu->kvm->mmu_lock)
			valid = kvm_vncr_tlb_lookup(vcpu);

		if (!valid)
			ret = kvm_translate_vncr(vcpu);
		else
			ret = -EPERM;

		switch (ret) {
		case -EAGAIN:
		case -ENOMEM:
			/* Let's try again... */
			break;
		case -EFAULT:
		case -EINVAL:
		case -ENOENT:
		case -EACCES:
			/*
			 * Translation failed, inject the corresponding
			 * exception back to EL2.
			 */
			BUG_ON(!vt->wr.failed);

			esr &= ~ESR_ELx_FSC;
			esr |= FIELD_PREP(ESR_ELx_FSC, vt->wr.fst);

			kvm_inject_nested_sync(vcpu, esr);
			break;
		case -EPERM:
			/* Hack to deal with POE until we get kernel support */
			inject_vncr_perm(vcpu);
			break;
		case 0:
			break;
		}
	} else {
		WARN_ONCE(1, "Unhandled VNCR abort, ESR=%llx\n", esr);
	}

	return 1;
}

/*
 * Our emulated CPU doesn't support all the possible features. For the
 * sake of simplicity (and probably mental sanity), wipe out a number