Commit e912efed authored by Quentin Perret's avatar Quentin Perret Committed by Marc Zyngier
Browse files

KVM: arm64: Introduce the EL1 pKVM MMU



Introduce a set of helper functions allowing to manipulate the pKVM
guest stage-2 page-tables from EL1 using pKVM's HVC interface.

Each helper has an exact one-to-one correspondance with the traditional
kvm_pgtable_stage2_*() functions from pgtable.c, with a strictly
matching prototype. This will ease plumbing later on in mmu.c.

These callbacks track the gfn->pfn mappings in a simple rb_tree indexed
by IPA in lieu of a page-table. This rb-tree is kept in sync with pKVM's
state and is protected by the mmu_lock like a traditional stage-2
page-table.

Signed-off-by: default avatarQuentin Perret <qperret@google.com>
Tested-by: default avatarFuad Tabba <tabba@google.com>
Reviewed-by: default avatarFuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20241218194059.3670226-18-qperret@google.com


Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent 0adce4d4
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
	phys_addr_t head;
	unsigned long nr_pages;
	struct pkvm_mapping *mapping; /* only used from EL1 */
};

static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+14 −9
Original line number Diff line number Diff line
@@ -412,16 +412,21 @@ static inline bool kvm_pgtable_walk_lock_held(void)
 *			be used instead of block mappings.
 */
struct kvm_pgtable {
	union {
		struct rb_root					pkvm_mappings;
		struct {
			u32					ia_bits;
			s8					start_level;
			kvm_pteref_t				pgd;
			struct kvm_pgtable_mm_ops		*mm_ops;

			/* Stage-2 only */
	struct kvm_s2_mmu			*mmu;
			enum kvm_pgtable_stage2_flags		flags;
			kvm_pgtable_force_pte_cb_t		force_pte_cb;
		};
	};
	struct kvm_s2_mmu					*mmu;
};

/**
 * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
+26 −0
Original line number Diff line number Diff line
@@ -137,4 +137,30 @@ static inline size_t pkvm_host_sve_state_size(void)
			SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
}

struct pkvm_mapping {
	struct rb_node node;
	u64 gfn;
	u64 pfn;
};

int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
			     struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
			    enum kvm_pgtable_prot prot, void *mc,
			    enum kvm_pgtable_walk_flags flags);
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
				    enum kvm_pgtable_walk_flags flags);
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
				 enum kvm_pgtable_walk_flags flags);
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
			      struct kvm_mmu_memory_cache *mc);
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
					       enum kvm_pgtable_prot prot, void *mc,
					       bool force_pte);
#endif	/* __ARM64_KVM_PKVM_H__ */
+201 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/kmemleak.h>
#include <linux/kvm_host.h>
#include <asm/kvm_mmu.h>
#include <linux/memblock.h>
#include <linux/mutex.h>
#include <linux/sort.h>
@@ -268,3 +269,203 @@ static int __init finalize_pkvm(void)
	return ret;
}
device_initcall_sync(finalize_pkvm);

static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
{
	struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
	struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);

	if (a->gfn < b->gfn)
		return -1;
	if (a->gfn > b->gfn)
		return 1;
	return 0;
}

static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
{
	struct rb_node *node = root->rb_node, *prev = NULL;
	struct pkvm_mapping *mapping;

	while (node) {
		mapping = rb_entry(node, struct pkvm_mapping, node);
		if (mapping->gfn == gfn)
			return node;
		prev = node;
		node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
	}

	return prev;
}

/*
 * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
 * of __map inline.
 */
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map)				\
	for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings,		\
							     ((__start) >> PAGE_SHIFT));	\
	     __tmp && ({									\
				__map = rb_entry(__tmp, struct pkvm_mapping, node);		\
				__tmp = rb_next(__tmp);						\
				true;								\
		       });									\
	    )											\
		if (__map->gfn < ((__start) >> PAGE_SHIFT))					\
			continue;								\
		else if (__map->gfn >= ((__end) >> PAGE_SHIFT))					\
			break;									\
		else

int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
			     struct kvm_pgtable_mm_ops *mm_ops)
{
	pgt->pkvm_mappings	= RB_ROOT;
	pgt->mmu		= mmu;

	return 0;
}

void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	pkvm_handle_t handle = kvm->arch.pkvm.handle;
	struct pkvm_mapping *mapping;
	struct rb_node *node;

	if (!handle)
		return;

	node = rb_first(&pgt->pkvm_mappings);
	while (node) {
		mapping = rb_entry(node, struct pkvm_mapping, node);
		kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
		node = rb_next(node);
		rb_erase(&mapping->node, &pgt->pkvm_mappings);
		kfree(mapping);
	}
}

int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
			   u64 phys, enum kvm_pgtable_prot prot,
			   void *mc, enum kvm_pgtable_walk_flags flags)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	struct pkvm_mapping *mapping = NULL;
	struct kvm_hyp_memcache *cache = mc;
	u64 gfn = addr >> PAGE_SHIFT;
	u64 pfn = phys >> PAGE_SHIFT;
	int ret;

	if (size != PAGE_SIZE)
		return -EINVAL;

	lockdep_assert_held_write(&kvm->mmu_lock);
	ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
	if (ret) {
		/* Is the gfn already mapped due to a racing vCPU? */
		if (ret == -EPERM)
			return -EAGAIN;
	}

	swap(mapping, cache->mapping);
	mapping->gfn = gfn;
	mapping->pfn = pfn;
	WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));

	return ret;
}

int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	pkvm_handle_t handle = kvm->arch.pkvm.handle;
	struct pkvm_mapping *mapping;
	int ret = 0;

	lockdep_assert_held_write(&kvm->mmu_lock);
	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
		ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
		if (WARN_ON(ret))
			break;
		rb_erase(&mapping->node, &pgt->pkvm_mappings);
		kfree(mapping);
	}

	return ret;
}

int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	pkvm_handle_t handle = kvm->arch.pkvm.handle;
	struct pkvm_mapping *mapping;
	int ret = 0;

	lockdep_assert_held(&kvm->mmu_lock);
	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
		ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
		if (WARN_ON(ret))
			break;
	}

	return ret;
}

int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	struct pkvm_mapping *mapping;

	lockdep_assert_held(&kvm->mmu_lock);
	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
		__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);

	return 0;
}

bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
{
	struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
	pkvm_handle_t handle = kvm->arch.pkvm.handle;
	struct pkvm_mapping *mapping;
	bool young = false;

	lockdep_assert_held(&kvm->mmu_lock);
	for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
		young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
					   mkold);

	return young;
}

int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
				    enum kvm_pgtable_walk_flags flags)
{
	return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
}

void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
				 enum kvm_pgtable_walk_flags flags)
{
	WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
}

void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
	WARN_ON_ONCE(1);
}

kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
					enum kvm_pgtable_prot prot, void *mc, bool force_pte)
{
	WARN_ON_ONCE(1);
	return NULL;
}

int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
			      struct kvm_mmu_memory_cache *mc)
{
	WARN_ON_ONCE(1);
	return -EINVAL;
}