Commit 42bf3f0a authored by Avi Kivity's avatar Avi Kivity
Browse files

KVM: MMU: Simplify page table walker



Simplify the walker level loop not to carry so much information from one
loop to the next.  In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.

As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations.  This will happen when we enable swapping.

Signed-off-by: default avatarAvi Kivity <avi@qumranet.com>
parent d77a2507
Loading
Loading
Loading
Loading
+48 −76
Original line number Diff line number Diff line
@@ -59,32 +59,12 @@
struct guest_walker {
	int level;
	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
	pt_element_t *table;
	pt_element_t pte;
	pt_element_t *ptep;
	struct page *page;
	int index;
	pt_element_t inherited_ar;
	gfn_t gfn;
	u32 error_code;
};

static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
				    int write_fault,
				    pt_element_t *ptep,
				    gfn_t table_gfn)
{
	gpa_t pte_gpa;

	if (write_fault && !is_dirty_pte(*ptep)) {
		mark_page_dirty(vcpu->kvm, table_gfn);
		*ptep |= PT_DIRTY_MASK;
		pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
		pte_gpa += offset_in_page(ptep);
		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
	}
}

/*
 * Fetch a guest pte for a guest virtual address
 */
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
{
	hpa_t hpa;
	struct kvm_memory_slot *slot;
	pt_element_t *ptep;
	pt_element_t root;
	struct page *page;
	pt_element_t *table;
	pt_element_t pte;
	gfn_t table_gfn;
	unsigned index;
	gpa_t pte_gpa;

	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
	walker->level = vcpu->mmu.root_level;
	walker->table = NULL;
	walker->page = NULL;
	walker->ptep = NULL;
	root = vcpu->cr3;
	pte = vcpu->cr3;
#if PTTYPE == 64
	if (!is_long_mode(vcpu)) {
		walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
		root = *walker->ptep;
		walker->pte = root;
		if (!(root & PT_PRESENT_MASK))
		pte = vcpu->pdptrs[(addr >> 30) & 3];
		if (!is_present_pte(pte))
			goto not_present;
		--walker->level;
	}
#endif
	table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
	walker->table_gfn[walker->level - 1] = table_gfn;
	pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
		 walker->level - 1, table_gfn);
	slot = gfn_to_memslot(vcpu->kvm, table_gfn);
	hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
	walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
	walker->table = kmap_atomic(walker->page, KM_USER0);

	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
	       (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);

	walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;

	for (;;) {
		int index = PT_INDEX(addr, walker->level);
		hpa_t paddr;
		index = PT_INDEX(addr, walker->level);

		table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
		walker->table_gfn[walker->level - 1] = table_gfn;
		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
			 walker->level - 1, table_gfn);

		slot = gfn_to_memslot(vcpu->kvm, table_gfn);
		hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
		page = pfn_to_page(hpa >> PAGE_SHIFT);

		ptep = &walker->table[index];
		walker->index = index;
		ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
		       ((unsigned long)ptep & PAGE_MASK));
		table = kmap_atomic(page, KM_USER0);
		pte = table[index];
		kunmap_atomic(table, KM_USER0);

		if (!is_present_pte(*ptep))
		if (!is_present_pte(pte))
			goto not_present;

		if (write_fault && !is_writeble_pte(*ptep))
		if (write_fault && !is_writeble_pte(pte))
			if (user_fault || is_write_protection(vcpu))
				goto access_error;

		if (user_fault && !(*ptep & PT_USER_MASK))
		if (user_fault && !(pte & PT_USER_MASK))
			goto access_error;

#if PTTYPE == 64
		if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
		if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK))
			goto access_error;
#endif

		if (!(*ptep & PT_ACCESSED_MASK)) {
		if (!(pte & PT_ACCESSED_MASK)) {
			mark_page_dirty(vcpu->kvm, table_gfn);
			*ptep |= PT_ACCESSED_MASK;
			pte |= PT_ACCESSED_MASK;
			table = kmap_atomic(page, KM_USER0);
			table[index] = pte;
			kunmap_atomic(table, KM_USER0);
		}

		if (walker->level == PT_PAGE_TABLE_LEVEL) {
			walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
				>> PAGE_SHIFT;
			FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
						table_gfn);
			walker->gfn = (pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
			break;
		}

		if (walker->level == PT_DIRECTORY_LEVEL
		    && (*ptep & PT_PAGE_SIZE_MASK)
		    && (pte & PT_PAGE_SIZE_MASK)
		    && (PTTYPE == 64 || is_pse(vcpu))) {
			walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
			walker->gfn = (pte & PT_DIR_BASE_ADDR_MASK)
				>> PAGE_SHIFT;
			walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
			FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
						table_gfn);
			break;
		}

		walker->inherited_ar &= walker->table[index];
		table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
		kunmap_atomic(walker->table, KM_USER0);
		paddr = safe_gpa_to_hpa(vcpu->kvm, table_gfn << PAGE_SHIFT);
		walker->page = pfn_to_page(paddr >> PAGE_SHIFT);
		walker->table = kmap_atomic(walker->page, KM_USER0);
		walker->inherited_ar &= pte;
		--walker->level;
		walker->table_gfn[walker->level - 1] = table_gfn;
		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
			 walker->level - 1, table_gfn);
	}
	walker->pte = *ptep;
	if (walker->page)
		walker->ptep = NULL;
	if (walker->table)
		kunmap_atomic(walker->table, KM_USER0);
	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);

	if (write_fault && !is_dirty_pte(pte)) {
		mark_page_dirty(vcpu->kvm, table_gfn);
		pte |= PT_DIRTY_MASK;
		table = kmap_atomic(page, KM_USER0);
		table[index] = pte;
		kunmap_atomic(table, KM_USER0);
		pte_gpa = table_gfn << PAGE_SHIFT;
		pte_gpa += index * sizeof(pt_element_t);
		kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
	}

	walker->pte = pte;
	pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)pte);
	return 1;

not_present:
@@ -209,8 +183,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
		walker->error_code |= PFERR_USER_MASK;
	if (fetch_fault)
		walker->error_code |= PFERR_FETCH_MASK;
	if (walker->table)
		kunmap_atomic(walker->table, KM_USER0);
	return 0;
}