Commit 2a359e07 authored by Marc Zyngier's avatar Marc Zyngier
Browse files

KVM: arm64: nv: Handle mapping of VNCR_EL2 at EL2



Now that we can handle faults triggered through VNCR_EL2, we need
to map the corresponding page at EL2. But where, you'll ask?

Since each CPU in the system can run a vcpu, we need a per-CPU
mapping. For that, we carve a NR_CPUS range in the fixmap, giving
us a per-CPU va at which to map the guest's VNCR's page.

The mapping occurs both on vcpu load and on the back of a fault,
both generating a request that will take care of the mapping.
That mapping will also get dropped on vcpu put.

Yes, this is a bit heavy handed, but it is simple. Eventually,
we may want to have a per-VM, per-CPU mapping, which would avoid
all the TLBI overhead.

Reviewed-by: default avatarOliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250514103501.2225951-11-maz@kernel.org


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 069a05e5
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -48,6 +48,12 @@ enum fixed_addresses {
	FIX_EARLYCON_MEM_BASE,
	FIX_TEXT_POKE0,

#ifdef CONFIG_KVM
	/* One slot per CPU, mapping the guest's VNCR page at EL2. */
	FIX_VNCR_END,
	FIX_VNCR = FIX_VNCR_END + NR_CPUS,
#endif

#ifdef CONFIG_ACPI_APEI_GHES
	/* Used for GHES mapping from assorted contexts */
	FIX_APEI_GHES_IRQ,
+1 −0
Original line number Diff line number Diff line
@@ -658,6 +658,7 @@ struct kvm_host_data {
#define KVM_HOST_DATA_FLAG_TRBE_ENABLED			4
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED	5
#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT		6
#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED		7
	unsigned long flags;

	struct kvm_cpu_context host_ctxt;
+7 −0
Original line number Diff line number Diff line
@@ -337,4 +337,11 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);

#define vncr_fixmap(c)						\
	({							\
		u32 __c = (c);					\
		BUG_ON(__c >= NR_CPUS);				\
		(FIX_VNCR - __c);				\
	})

#endif /* __ARM64_KVM_NESTED_H */
+89 −9
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>

#include <asm/fixmap.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
@@ -704,23 +705,35 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
{
	/*
	 * The vCPU kept its reference on the MMU after the last put, keep
	 * rolling with it.
	 * If the vCPU kept its reference on the MMU after the last put,
	 * keep rolling with it.
	 */
	if (vcpu->arch.hw_mmu)
		return;

	if (is_hyp_ctxt(vcpu)) {
		if (!vcpu->arch.hw_mmu)
			vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
	} else {
		write_lock(&vcpu->kvm->mmu_lock);
		if (!vcpu->arch.hw_mmu) {
			scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
				vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
		write_unlock(&vcpu->kvm->mmu_lock);
		}

		if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
			kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
	}
}

void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
{
	/* Unconditionally drop the VNCR mapping if we have one */
	if (host_data_test_flag(L1_VNCR_MAPPED)) {
		BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
		BUG_ON(is_hyp_ctxt(vcpu));

		clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
		vcpu->arch.vncr_tlb->cpu = -1;
		host_data_clear_flag(L1_VNCR_MAPPED);
	}

	/*
	 * Keep a reference on the associated stage-2 MMU if the vCPU is
	 * scheduling out and not in WFI emulation, suggesting it is likely to
@@ -1042,6 +1055,70 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
	return 1;
}

static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
{
	struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
	pgprot_t prot;

	guard(preempt)();
	guard(read_lock)(&vcpu->kvm->mmu_lock);

	/*
	 * The request to map VNCR may have raced against some other
	 * event, such as an interrupt, and may not be valid anymore.
	 */
	if (is_hyp_ctxt(vcpu))
		return;

	/*
	 * Check that the pseudo-TLB is valid and that VNCR_EL2 still
	 * contains the expected value. If it doesn't, we simply bail out
	 * without a mapping -- a transformed MSR/MRS will generate the
	 * fault and allows us to populate the pseudo-TLB.
	 */
	if (!vt->valid)
		return;

	if (read_vncr_el2(vcpu) != vt->gva)
		return;

	if (vt->wr.nG) {
		u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
		u64 ttbr = ((tcr & TCR_A1) ?
			    vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
			    vcpu_read_sys_reg(vcpu, TTBR0_EL2));
		u16 asid;

		asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
		    !(tcr & TCR_ASID16))
			asid &= GENMASK(7, 0);

		if (asid != vt->wr.asid)
			return;
	}

	vt->cpu = smp_processor_id();

	if (vt->wr.pw && vt->wr.pr)
		prot = PAGE_KERNEL;
	else if (vt->wr.pr)
		prot = PAGE_KERNEL_RO;
	else
		prot = PAGE_NONE;

	/*
	 * We can't map write-only (or no permission at all) in the kernel,
	 * but the guest can do it if using POE, so we'll have to turn a
	 * translation fault into a permission fault at runtime.
	 * FIXME: WO doesn't work at all, need POE support in the kernel.
	 */
	if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) {
		__set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot);
		host_data_set_flag(L1_VNCR_MAPPED);
	}
}

/*
 * Our emulated CPU doesn't support all the possible features. For the
 * sake of simplicity (and probably mental sanity), wipe out a number
@@ -1582,6 +1659,9 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
		write_unlock(&vcpu->kvm->mmu_lock);
	}

	if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu))
		kvm_map_l1_vncr(vcpu);

	/* Must be last, as may switch context! */
	if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
		kvm_inject_nested_irq(vcpu);