Commit c353fde1 authored by Vincent Donnefort's avatar Vincent Donnefort Committed by Marc Zyngier
Browse files

KVM: arm64: np-guest CMOs with PMD_SIZE fixmap



With the introduction of stage-2 huge mappings in the pKVM hypervisor,
guest pages CMO is needed for PMD_SIZE size. Fixmap only supports
PAGE_SIZE and iterating over the huge-page is time consuming (mostly due
to TLBI on hyp_fixmap_unmap) which is a problem for EL2 latency.

Introduce a shared PMD_SIZE fixmap (hyp_fixblock_map/hyp_fixblock_unmap)
to improve guest page CMOs when stage-2 huge mappings are installed.

On a Pixel6, the iterative solution resulted in a latency of ~700us,
while the PMD_SIZE fixmap reduces it to ~100us.

Because of the horrendous private range allocation that would be
necessary, this is disabled for 64KiB pages systems.

Suggested-by: default avatarQuentin Perret <qperret@google.com>
Signed-off-by: default avatarVincent Donnefort <vdonnefort@google.com>
Signed-off-by: default avatarQuentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20250521124834.1070650-11-vdonnefort@google.com


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent db14091d
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -59,6 +59,11 @@ typedef u64 kvm_pte_t;

#define KVM_PHYS_INVALID		(-1ULL)

#define KVM_PTE_TYPE			BIT(1)
#define KVM_PTE_TYPE_BLOCK		0
#define KVM_PTE_TYPE_PAGE		1
#define KVM_PTE_TYPE_TABLE		1

#define KVM_PTE_LEAF_ATTR_LO		GENMASK(11, 2)

#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX	GENMASK(4, 2)
+3 −1
Original line number Diff line number Diff line
@@ -13,9 +13,11 @@
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;

int hyp_create_pcpu_fixmap(void);
int hyp_create_fixmap(void);
void *hyp_fixmap_map(phys_addr_t phys);
void hyp_fixmap_unmap(void);
void *hyp_fixblock_map(phys_addr_t phys, size_t *size);
void hyp_fixblock_unmap(void);

int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
+25 −17
Original line number Diff line number Diff line
@@ -216,34 +216,42 @@ static void guest_s2_put_page(void *addr)
	hyp_put_page(&current_vm->pool, addr);
}

static void clean_dcache_guest_page(void *va, size_t size)
static void __apply_guest_page(void *va, size_t size,
			       void (*func)(void *addr, size_t size))
{
	size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
	va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
	size = PAGE_ALIGN(size);

	while (size) {
		__clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
					  PAGE_SIZE);
		size_t map_size = PAGE_SIZE;
		void *map;

		if (IS_ALIGNED((unsigned long)va, PMD_SIZE) && size >= PMD_SIZE)
			map = hyp_fixblock_map(__hyp_pa(va), &map_size);
		else
			map = hyp_fixmap_map(__hyp_pa(va));

		func(map, map_size);

		if (map_size == PMD_SIZE)
			hyp_fixblock_unmap();
		else
			hyp_fixmap_unmap();
		va += PAGE_SIZE;
		size -= PAGE_SIZE;

		size -= map_size;
		va += map_size;
	}
}

static void invalidate_icache_guest_page(void *va, size_t size)
static void clean_dcache_guest_page(void *va, size_t size)
{
	size += va - PTR_ALIGN_DOWN(va, PAGE_SIZE);
	va = PTR_ALIGN_DOWN(va, PAGE_SIZE);
	size = PAGE_ALIGN(size);

	while (size) {
		__invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
					       PAGE_SIZE);
		hyp_fixmap_unmap();
		va += PAGE_SIZE;
		size -= PAGE_SIZE;
	__apply_guest_page(va, size, __clean_dcache_guest_page);
}

static void invalidate_icache_guest_page(void *va, size_t size)
{
	__apply_guest_page(va, size, __invalidate_icache_guest_page);
}

int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
+89 −8
Original line number Diff line number Diff line
@@ -229,9 +229,8 @@ int hyp_map_vectors(void)
	return 0;
}

void *hyp_fixmap_map(phys_addr_t phys)
static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
{
	struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
	kvm_pte_t pte, *ptep = slot->ptep;

	pte = *ptep;
@@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys)
	return (void *)slot->addr;
}

void *hyp_fixmap_map(phys_addr_t phys)
{
	return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
}

static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
{
	kvm_pte_t *ptep = slot->ptep;
	u64 addr = slot->addr;
	u32 level;

	if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE)
		level = KVM_PGTABLE_LAST_LEVEL;
	else
		level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */

	WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);

@@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
	 * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
	 */
	dsb(ishst);
	__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
	__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
	dsb(ish);
	isb();
}
@@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void)
static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
				   enum kvm_pgtable_walk_flags visit)
{
	struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
	struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg;

	if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
	if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level))
		return -EINVAL;

	slot->addr = ctx->addr;
@@ -296,13 +306,84 @@ static int create_fixmap_slot(u64 addr, u64 cpu)
	struct kvm_pgtable_walker walker = {
		.cb	= __create_fixmap_slot_cb,
		.flags	= KVM_PGTABLE_WALK_LEAF,
		.arg = (void *)cpu,
		.arg	= per_cpu_ptr(&fixmap_slots, cpu),
	};

	return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
}

int hyp_create_pcpu_fixmap(void)
#if PAGE_SHIFT < 16
#define HAS_FIXBLOCK
static struct hyp_fixmap_slot hyp_fixblock_slot;
static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock);
#endif

static int create_fixblock(void)
{
#ifdef HAS_FIXBLOCK
	struct kvm_pgtable_walker walker = {
		.cb	= __create_fixmap_slot_cb,
		.flags	= KVM_PGTABLE_WALK_LEAF,
		.arg	= &hyp_fixblock_slot,
	};
	unsigned long addr;
	phys_addr_t phys;
	int ret, i;

	/* Find a RAM phys address, PMD aligned */
	for (i = 0; i < hyp_memblock_nr; i++) {
		phys = ALIGN(hyp_memory[i].base, PMD_SIZE);
		if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size))
			break;
	}

	if (i >= hyp_memblock_nr)
		return -EINVAL;

	hyp_spin_lock(&pkvm_pgd_lock);
	addr = ALIGN(__io_map_base, PMD_SIZE);
	ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE);
	if (ret)
		goto unlock;

	ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP);
	if (ret)
		goto unlock;

	ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker);

unlock:
	hyp_spin_unlock(&pkvm_pgd_lock);

	return ret;
#else
	return 0;
#endif
}

void *hyp_fixblock_map(phys_addr_t phys, size_t *size)
{
#ifdef HAS_FIXBLOCK
	*size = PMD_SIZE;
	hyp_spin_lock(&hyp_fixblock_lock);
	return fixmap_map_slot(&hyp_fixblock_slot, phys);
#else
	*size = PAGE_SIZE;
	return hyp_fixmap_map(phys);
#endif
}

void hyp_fixblock_unmap(void)
{
#ifdef HAS_FIXBLOCK
	fixmap_clear_slot(&hyp_fixblock_slot);
	hyp_spin_unlock(&hyp_fixblock_lock);
#else
	hyp_fixmap_unmap();
#endif
}

int hyp_create_fixmap(void)
{
	unsigned long addr, i;
	int ret;
@@ -322,7 +403,7 @@ int hyp_create_pcpu_fixmap(void)
			return ret;
	}

	return 0;
	return create_fixblock();
}

int hyp_create_idmap(u32 hyp_va_bits)
+1 −1
Original line number Diff line number Diff line
@@ -312,7 +312,7 @@ void __noreturn __pkvm_init_finalise(void)
	if (ret)
		goto out;

	ret = hyp_create_pcpu_fixmap();
	ret = hyp_create_fixmap();
	if (ret)
		goto out;

Loading