Commit 0e89ca13 authored by Raghavendra Rao Ananta's avatar Raghavendra Rao Ananta Committed by Oliver Upton
Browse files

KVM: arm64: Split kvm_pgtable_stage2_destroy()



Split kvm_pgtable_stage2_destroy() into two:
  - kvm_pgtable_stage2_destroy_range(), that performs the
    page-table walk and free the entries over a range of addresses.
  - kvm_pgtable_stage2_destroy_pgd(), that frees the PGD.

This refactoring enables subsequent patches to free large page-tables
in chunks, calling cond_resched() between each chunk, to yield the
CPU as necessary.

Existing callers of kvm_pgtable_stage2_destroy(), that probably cannot
take advantage of this (such as nVMHE), will continue to function as is.

Signed-off-by: default avatarRaghavendra Rao Ananta <rananta@google.com>
Suggested-by: default avatarOliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20250820162242.2624752-2-rananta@google.com


Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parent d19c541d
Loading
Loading
Loading
Loading
+30 −0
Original line number Diff line number Diff line
@@ -355,6 +355,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
	return pteref;
}

static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
	return pteref;
}

static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
	/*
@@ -384,6 +389,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
	return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
}

static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
	return rcu_dereference_raw(pteref);
}

static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -551,6 +561,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
 */
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);

/**
 * kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
 * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
 * @addr:      Intermediate physical address at which to place the mapping.
 * @size:      Size of the mapping.
 *
 * The page-table is assumed to be unreachable by any hardware walkers prior
 * to freeing and therefore no TLB invalidation is performed.
 */
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
					u64 addr, u64 size);

/**
 * kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
 * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
 *
 * It is assumed that the rest of the page-table is freed before this operation.
 */
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);

/**
 * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
 * @mm_ops:	Memory management callbacks.
+3 −1
Original line number Diff line number Diff line
@@ -179,7 +179,9 @@ struct pkvm_mapping {

int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
			     struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
					u64 addr, u64 size);
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
			    enum kvm_pgtable_prot prot, void *mc,
			    enum kvm_pgtable_walk_flags flags);
+21 −4
Original line number Diff line number Diff line
@@ -1551,21 +1551,38 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
	return 0;
}

void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
				       u64 addr, u64 size)
{
	size_t pgd_sz;
	struct kvm_pgtable_walker walker = {
		.cb	= stage2_free_walker,
		.flags	= KVM_PGTABLE_WALK_LEAF |
			  KVM_PGTABLE_WALK_TABLE_POST,
	};

	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
	WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
}

void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
	size_t pgd_sz;

	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);

	/*
	 * Since the pgtable is unlinked at this point, and not shared with
	 * other walkers, safely deference pgd with kvm_dereference_pteref_raw()
	 */
	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
	pgt->pgd = NULL;
}

void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
	kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
	kvm_pgtable_stage2_destroy_pgd(pgt);
}

void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
	kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
+10 −2
Original line number Diff line number Diff line
@@ -904,6 +904,14 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
	return 0;
}

static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
{
	unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);

	KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, 0, BIT(ia_bits));
	KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
}

/**
 * kvm_init_stage2_mmu - Initialise a S2 MMU structure
 * @kvm:	The pointer to the KVM structure
@@ -980,7 +988,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
	return 0;

out_destroy_pgtable:
	KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
	kvm_stage2_destroy(pgt);
out_free_pgtable:
	kfree(pgt);
	return err;
@@ -1077,7 +1085,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
	write_unlock(&kvm->mmu_lock);

	if (pgt) {
		KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
		kvm_stage2_destroy(pgt);
		kfree(pgt);
	}
}
+9 −2
Original line number Diff line number Diff line
@@ -316,9 +316,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
	return 0;
}

void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
					u64 addr, u64 size)
{
	__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
	__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
}

void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
	/* Expected to be called after all pKVM mappings have been released. */
	WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
}

int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,