Commit 9f073ac2 authored by Sean Christopherson's avatar Sean Christopherson
Browse files

KVM: selftests: Add "struct kvm_mmu" to track a given MMU instance



Add a "struct kvm_mmu" to track a given MMU instance, e.g. a VM's stage-1
MMU versus a VM's stage-2 MMU, so that x86 can share MMU functionality for
both stage-1 and stage-2 MMUs, without creating the potential for subtle
bugs, e.g. due to consuming on vm->pgtable_levels when operating a stage-2
MMU.

Encapsulate the existing de facto MMU in "struct kvm_vm", e.g instead of
burying the MMU details in "struct kvm_vm_arch", to avoid more #ifdefs in
____vm_create(), and in the hopes that other architectures can utilize the
formalized MMU structure if/when they too support stage-2 page tables.

No functional change intended.

Reviewed-by: default avatarYosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251230230150.4150236-7-seanjc@google.com


Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 3cd50028
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -88,12 +88,17 @@ enum kvm_mem_region_type {
	NR_MEM_REGIONS,
};

struct kvm_mmu {
	bool pgd_created;
	uint64_t pgd;
	int pgtable_levels;
};

struct kvm_vm {
	int mode;
	unsigned long type;
	int kvm_fd;
	int fd;
	unsigned int pgtable_levels;
	unsigned int page_size;
	unsigned int page_shift;
	unsigned int pa_bits;
@@ -104,13 +109,13 @@ struct kvm_vm {
	struct sparsebit *vpages_valid;
	struct sparsebit *vpages_mapped;
	bool has_irqchip;
	bool pgd_created;
	vm_paddr_t ucall_mmio_addr;
	vm_paddr_t pgd;
	vm_vaddr_t handlers;
	uint32_t dirty_ring_size;
	uint64_t gpa_tag_mask;

	struct kvm_mmu mmu;

	struct kvm_vm_arch arch;

	struct kvm_binary_stats stats;
+19 −19
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@ static uint64_t page_align(struct kvm_vm *vm, uint64_t v)

static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
{
	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
	unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
	uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;

	return (gva >> shift) & mask;
@@ -39,7 +39,7 @@ static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
	unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;

	TEST_ASSERT(vm->pgtable_levels == 4,
	TEST_ASSERT(vm->mmu.pgtable_levels == 4,
		"Mode %d does not have 4 page table levels", vm->mode);

	return (gva >> shift) & mask;
@@ -50,7 +50,7 @@ static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
	unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
	uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;

	TEST_ASSERT(vm->pgtable_levels >= 3,
	TEST_ASSERT(vm->mmu.pgtable_levels >= 3,
		"Mode %d does not have >= 3 page table levels", vm->mode);

	return (gva >> shift) & mask;
@@ -104,7 +104,7 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)

static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
{
	unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
	unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
	return 1 << (vm->va_bits - shift);
}

@@ -117,13 +117,13 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
	size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;

	if (vm->pgd_created)
	if (vm->mmu.pgd_created)
		return;

	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
	vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
					 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
					 vm->memslots[MEM_REGION_PT]);
	vm->pgd_created = true;
	vm->mmu.pgd_created = true;
}

static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
@@ -147,12 +147,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
		paddr, vm->max_gfn, vm->page_size);

	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
	ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8;
	if (!*ptep)
		*ptep = addr_pte(vm, vm_alloc_page_table(vm),
				 PGD_TYPE_TABLE | PTE_VALID);

	switch (vm->pgtable_levels) {
	switch (vm->mmu.pgtable_levels) {
	case 4:
		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
		if (!*ptep)
@@ -190,16 +190,16 @@ uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level
{
	uint64_t *ptep;

	if (!vm->pgd_created)
	if (!vm->mmu.pgd_created)
		goto unmapped_gva;

	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
	ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8;
	if (!ptep)
		goto unmapped_gva;
	if (level == 0)
		return ptep;

	switch (vm->pgtable_levels) {
	switch (vm->mmu.pgtable_levels) {
	case 4:
		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
		if (!ptep)
@@ -263,13 +263,13 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p

void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
	int level = 4 - (vm->pgtable_levels - 1);
	int level = 4 - (vm->mmu.pgtable_levels - 1);
	uint64_t pgd, *ptep;

	if (!vm->pgd_created)
	if (!vm->mmu.pgd_created)
		return;

	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
	for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
		ptep = addr_gpa2hva(vm, pgd);
		if (!*ptep)
			continue;
@@ -350,7 +350,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
	}

	ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
	ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift);

	/* Configure output size */
	switch (vm->mode) {
@@ -358,7 +358,7 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
	case VM_MODE_P52V48_16K:
	case VM_MODE_P52V48_64K:
		tcr_el1 |= TCR_IPS_52_BITS;
		ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
		ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2;
		break;
	case VM_MODE_P48V48_4K:
	case VM_MODE_P48V48_16K:
+14 −14
Original line number Diff line number Diff line
@@ -281,34 +281,34 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
	/* Setup mode specific traits. */
	switch (vm->mode) {
	case VM_MODE_P52V48_4K:
		vm->pgtable_levels = 4;
		vm->mmu.pgtable_levels = 4;
		break;
	case VM_MODE_P52V48_64K:
		vm->pgtable_levels = 3;
		vm->mmu.pgtable_levels = 3;
		break;
	case VM_MODE_P48V48_4K:
		vm->pgtable_levels = 4;
		vm->mmu.pgtable_levels = 4;
		break;
	case VM_MODE_P48V48_64K:
		vm->pgtable_levels = 3;
		vm->mmu.pgtable_levels = 3;
		break;
	case VM_MODE_P40V48_4K:
	case VM_MODE_P36V48_4K:
		vm->pgtable_levels = 4;
		vm->mmu.pgtable_levels = 4;
		break;
	case VM_MODE_P40V48_64K:
	case VM_MODE_P36V48_64K:
		vm->pgtable_levels = 3;
		vm->mmu.pgtable_levels = 3;
		break;
	case VM_MODE_P52V48_16K:
	case VM_MODE_P48V48_16K:
	case VM_MODE_P40V48_16K:
	case VM_MODE_P36V48_16K:
		vm->pgtable_levels = 4;
		vm->mmu.pgtable_levels = 4;
		break;
	case VM_MODE_P47V47_16K:
	case VM_MODE_P36V47_16K:
		vm->pgtable_levels = 3;
		vm->mmu.pgtable_levels = 3;
		break;
	case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
@@ -321,22 +321,22 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
			 vm->va_bits);

		if (vm->va_bits == 57) {
			vm->pgtable_levels = 5;
			vm->mmu.pgtable_levels = 5;
		} else {
			TEST_ASSERT(vm->va_bits == 48,
				    "Unexpected guest virtual address width: %d",
				    vm->va_bits);
			vm->pgtable_levels = 4;
			vm->mmu.pgtable_levels = 4;
		}
#else
		TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
		break;
	case VM_MODE_P47V64_4K:
		vm->pgtable_levels = 5;
		vm->mmu.pgtable_levels = 5;
		break;
	case VM_MODE_P44V64_4K:
		vm->pgtable_levels = 5;
		vm->mmu.pgtable_levels = 5;
		break;
	default:
		TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
@@ -1956,8 +1956,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
	fprintf(stream, "%*spgd_created: %u\n", indent, "",
		vm->pgd_created);
	if (vm->pgd_created) {
		vm->mmu.pgd_created);
	if (vm->mmu.pgd_created) {
		fprintf(stream, "%*sVirtual Translation Tables:\n",
			indent + 2, "");
		virt_dump(stream, vm, indent + 4);
+14 −14
Original line number Diff line number Diff line
@@ -50,11 +50,11 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
	int i;
	vm_paddr_t child, table;

	if (vm->pgd_created)
	if (vm->mmu.pgd_created)
		return;

	child = table = 0;
	for (i = 0; i < vm->pgtable_levels; i++) {
	for (i = 0; i < vm->mmu.pgtable_levels; i++) {
		invalid_pgtable[i] = child;
		table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
				vm->memslots[MEM_REGION_PT]);
@@ -62,8 +62,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
		virt_set_pgtable(vm, table, child);
		child = table;
	}
	vm->pgd = table;
	vm->pgd_created = true;
	vm->mmu.pgd = table;
	vm->mmu.pgd_created = true;
}

static int virt_pte_none(uint64_t *ptep, int level)
@@ -77,11 +77,11 @@ static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
	uint64_t *ptep;
	vm_paddr_t child;

	if (!vm->pgd_created)
	if (!vm->mmu.pgd_created)
		goto unmapped_gva;

	child = vm->pgd;
	level = vm->pgtable_levels - 1;
	child = vm->mmu.pgd;
	level = vm->mmu.pgtable_levels - 1;
	while (level > 0) {
		ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
		if (virt_pte_none(ptep, level)) {
@@ -161,11 +161,11 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
	int level;

	if (!vm->pgd_created)
	if (!vm->mmu.pgd_created)
		return;

	level = vm->pgtable_levels - 1;
	pte_dump(stream, vm, indent, vm->pgd, level);
	level = vm->mmu.pgtable_levels - 1;
	pte_dump(stream, vm, indent, vm->mmu.pgd, level);
}

void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
@@ -297,7 +297,7 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)

	width = vm->page_shift - 3;

	switch (vm->pgtable_levels) {
	switch (vm->mmu.pgtable_levels) {
	case 4:
		/* pud page shift and width */
		val = (vm->page_shift + width * 2) << 20 | (width << 25);
@@ -309,15 +309,15 @@ static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
		val |= vm->page_shift | width << 5;
		break;
	default:
		TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
		TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels);
	}

	loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);

	/* PGD page shift and width */
	val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
	val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6;
	loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
	loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
	loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd);

	/*
	 * Refill exception runs on real mode
+16 −15
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
	TEST_ASSERT(level > -1,
		"Negative page table level (%d) not possible", level);
	TEST_ASSERT(level < vm->pgtable_levels,
	TEST_ASSERT(level < vm->mmu.pgtable_levels,
		"Invalid page table level (%d)", level);

	return (gva & pte_index_mask[level]) >> pte_index_shift[level];
@@ -70,19 +70,19 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
	size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;

	if (vm->pgd_created)
	if (vm->mmu.pgd_created)
		return;

	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
	vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
					 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
					 vm->memslots[MEM_REGION_PT]);
	vm->pgd_created = true;
	vm->mmu.pgd_created = true;
}

void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
	uint64_t *ptep, next_ppn;
	int level = vm->pgtable_levels - 1;
	int level = vm->mmu.pgtable_levels - 1;

	TEST_ASSERT((vaddr % vm->page_size) == 0,
		"Virtual address not on page boundary,\n"
@@ -98,7 +98,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
		paddr, vm->max_gfn, vm->page_size);

	ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
	ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8;
	if (!*ptep) {
		next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
		*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
@@ -126,12 +126,12 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
	uint64_t *ptep;
	int level = vm->pgtable_levels - 1;
	int level = vm->mmu.pgtable_levels - 1;

	if (!vm->pgd_created)
	if (!vm->mmu.pgd_created)
		goto unmapped_gva;

	ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
	ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8;
	if (!ptep)
		goto unmapped_gva;
	level--;
@@ -176,13 +176,14 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,

void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
	int level = vm->pgtable_levels - 1;
	struct kvm_mmu *mmu = &vm->mmu;
	int level = mmu->pgtable_levels - 1;
	uint64_t pgd, *ptep;

	if (!vm->pgd_created)
	if (!mmu->pgd_created)
		return;

	for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
	for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
		ptep = addr_gpa2hva(vm, pgd);
		if (!*ptep)
			continue;
@@ -211,7 +212,7 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
		TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
	}

	satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
	satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
	satp |= SATP_MODE_48;

	vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
Loading