Commit 4f128f8e authored by Marc Zyngier's avatar Marc Zyngier Committed by Oliver Upton
Browse files

KVM: arm64: nv: Support multiple nested Stage-2 mmu structures



Add Stage-2 mmu data structures for virtual EL2 and for nested guests.
We don't yet populate shadow Stage-2 page tables, but we now have a
framework for getting to a shadow Stage-2 pgd.

We allocate twice the number of vcpus as Stage-2 mmu structures because
that's sufficient for each vcpu running two translation regimes without
having to flush the Stage-2 page tables.

Co-developed-by: default avatarChristoffer Dall <christoffer.dall@arm.com>
Signed-off-by: default avatarChristoffer Dall <christoffer.dall@arm.com>
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240614144552.2773592-2-maz@kernel.org


Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parent 83a7eefe
Loading
Loading
Loading
Loading
+36 −0
Original line number Diff line number Diff line
@@ -189,6 +189,33 @@ struct kvm_s2_mmu {
	uint64_t split_page_chunk_size;

	struct kvm_arch *arch;

	/*
	 * For a shadow stage-2 MMU, the virtual vttbr used by the
	 * host to parse the guest S2.
	 * This either contains:
	 * - the virtual VTTBR programmed by the guest hypervisor with
         *   CnP cleared
	 * - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
	 *
	 * We also cache the full VTCR which gets used for TLB invalidation,
	 * taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
	 * to be cached in a TLB" to the letter.
	 */
	u64	tlb_vttbr;
	u64	tlb_vtcr;

	/*
	 * true when this represents a nested context where virtual
	 * HCR_EL2.VM == 1
	 */
	bool	nested_stage2_enabled;

	/*
	 *  0: Nobody is currently using this, check vttbr for validity
	 * >0: Somebody is actively using this.
	 */
	atomic_t refcnt;
};

struct kvm_arch_memory_slot {
@@ -256,6 +283,14 @@ struct kvm_arch {
	 */
	u64 fgu[__NR_FGT_GROUP_IDS__];

	/*
	 * Stage 2 paging state for VMs with nested S2 using a virtual
	 * VMID.
	 */
	struct kvm_s2_mmu *nested_mmus;
	size_t nested_mmus_size;
	int nested_mmus_next;

	/* Interrupt controller */
	struct vgic_dist	vgic;

@@ -1306,6 +1341,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);

int __init kvm_set_ipa_limit(void);
u32 kvm_get_pa_bits(struct kvm *kvm);

#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
+24 −0
Original line number Diff line number Diff line
@@ -98,6 +98,7 @@ alternative_cb_end
#include <asm/mmu_context.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_nested.h>

void kvm_update_va_mask(struct alt_instr *alt,
			__le32 *origptr, __le32 *updptr, int nr_inst);
@@ -165,6 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);

void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);

void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
void kvm_uninit_stage2_mmu(struct kvm *kvm);
@@ -326,5 +329,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
{
	return container_of(mmu->arch, struct kvm, arch);
}

static inline u64 get_vmid(u64 vttbr)
{
	return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
		VTTBR_VMID_SHIFT;
}

static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
{
	return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
}

static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
	/*
	 * Be careful, mmu may not be fully initialised so do look at
	 * *any* of its fields.
	 */
	return &kvm->arch.mmu != mmu;
}

#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
+6 −0
Original line number Diff line number Diff line
@@ -61,6 +61,12 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
}

extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);

int kvm_init_nv_sysregs(struct kvm *kvm);

+11 −0
Original line number Diff line number Diff line
@@ -170,6 +170,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
	mutex_unlock(&kvm->lock);
#endif

	kvm_init_nested(kvm);

	ret = kvm_share_hyp(kvm, kvm + 1);
	if (ret)
		return ret;
@@ -551,6 +553,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
	struct kvm_s2_mmu *mmu;
	int *last_ran;

	if (vcpu_has_nv(vcpu))
		kvm_vcpu_load_hw_mmu(vcpu);

	mmu = vcpu->arch.hw_mmu;
	last_ran = this_cpu_ptr(mmu->last_vcpu_ran);

@@ -601,6 +606,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
	kvm_timer_vcpu_put(vcpu);
	kvm_vgic_put(vcpu);
	kvm_vcpu_pmu_restore_host(vcpu);
	if (vcpu_has_nv(vcpu))
		kvm_vcpu_put_hw_mmu(vcpu);
	kvm_arm_vmid_clear_active();

	vcpu_clear_on_unsupported_cpu(vcpu);
@@ -1459,6 +1466,10 @@ static int kvm_setup_vcpu(struct kvm_vcpu *vcpu)
	if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu)
		ret = kvm_arm_set_default_pmu(kvm);

	/* Prepare for nested if required */
	if (!ret && vcpu_has_nv(vcpu))
		ret = kvm_vcpu_init_nested(vcpu);

	return ret;
}

+48 −21
Original line number Diff line number Diff line
@@ -328,7 +328,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
				   may_block));
}

static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
	__unmap_stage2_range(mmu, start, size, true);
}
@@ -855,21 +855,9 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
	.icache_inval_pou	= invalidate_icache_guest_page,
};

/**
 * kvm_init_stage2_mmu - Initialise a S2 MMU structure
 * @kvm:	The pointer to the KVM structure
 * @mmu:	The pointer to the s2 MMU structure
 * @type:	The machine type of the virtual machine
 *
 * Allocates only the stage-2 HW PGD level table(s).
 * Note we don't need locking here as this is only called when the VM is
 * created, which can only be done once.
 */
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
{
	u32 kvm_ipa_limit = get_kvm_ipa_limit();
	int cpu, err;
	struct kvm_pgtable *pgt;
	u64 mmfr0, mmfr1;
	u32 phys_shift;

@@ -896,11 +884,51 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
	mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);

	return 0;
}

/**
 * kvm_init_stage2_mmu - Initialise a S2 MMU structure
 * @kvm:	The pointer to the KVM structure
 * @mmu:	The pointer to the s2 MMU structure
 * @type:	The machine type of the virtual machine
 *
 * Allocates only the stage-2 HW PGD level table(s).
 * Note we don't need locking here as this is only called in two cases:
 *
 * - when the VM is created, which can't race against anything
 *
 * - when secondary kvm_s2_mmu structures are initialised for NV
 *   guests, and the caller must hold kvm->lock as this is called on a
 *   per-vcpu basis.
 */
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
{
	int cpu, err;
	struct kvm_pgtable *pgt;

	/*
	 * If we already have our page tables in place, and that the
	 * MMU context is the canonical one, we have a bug somewhere,
	 * as this is only supposed to ever happen once per VM.
	 *
	 * Otherwise, we're building nested page tables, and that's
	 * probably because userspace called KVM_ARM_VCPU_INIT more
	 * than once on the same vcpu. Since that's actually legal,
	 * don't kick a fuss and leave gracefully.
	 */
	if (mmu->pgt != NULL) {
		if (kvm_is_nested_s2_mmu(kvm, mmu))
			return 0;

		kvm_err("kvm_arch already initialized?\n");
		return -EINVAL;
	}

	err = kvm_init_ipa_range(mmu, type);
	if (err)
		return err;

	pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
	if (!pgt)
		return -ENOMEM;
@@ -925,6 +953,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t

	mmu->pgt = pgt;
	mmu->pgd_phys = __pa(pgt->pgd);

	if (kvm_is_nested_s2_mmu(kvm, mmu))
		kvm_init_nested_s2_mmu(mmu);

	return 0;

out_destroy_pgtable:
@@ -976,7 +1008,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,

		if (!(vma->vm_flags & VM_PFNMAP)) {
			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
			unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
			kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
		}
		hva = vm_end;
	} while (hva < reg_end);
@@ -2022,11 +2054,6 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
}

void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
	kvm_uninit_stage2_mmu(kvm);
}

void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
				   struct kvm_memory_slot *slot)
{
@@ -2034,7 +2061,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
	phys_addr_t size = slot->npages << PAGE_SHIFT;

	write_lock(&kvm->mmu_lock);
	unmap_stage2_range(&kvm->arch.mmu, gpa, size);
	kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
	write_unlock(&kvm->mmu_lock);
}

Loading