Commit 5d55a052 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-mmu-6.12' of https://github.com/kvm-x86/linux into HEAD

KVM x86 MMU changes for 6.12:

 - Overhaul the "unprotect and retry" logic to more precisely identify cases
   where retrying is actually helpful, and to harden all retry paths against
   putting the guest into an infinite retry loop.

 - Add support for yielding, e.g. to honor NEED_RESCHED, when zapping rmaps in
   the shadow MMU.

 - Refactor pieces of the shadow MMU related to aging SPTEs in prepartion for
   adding MGLRU support in KVM.

 - Misc cleanups
parents c345344e 9a5bff7f
Loading
Loading
Loading
Loading
+9 −5
Original line number Diff line number Diff line
@@ -282,10 +282,6 @@ enum x86_intercept_stage;
#define PFERR_PRIVATE_ACCESS   BIT_ULL(49)
#define PFERR_SYNTHETIC_MASK   (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)

#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
				 PFERR_WRITE_MASK |		\
				 PFERR_PRESENT_MASK)

/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC	0
/*
@@ -2142,7 +2138,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);

void kvm_update_dr7(struct kvm_vcpu *vcpu);

int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
				       bool always_retry);

static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
						   gpa_t cr2_or_gpa)
{
	return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
}

void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
			ulong roots_to_free);
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
+287 −245

File changed.

Preview size limit exceeded, changes collapsed.

+3 −0
Original line number Diff line number Diff line
@@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
 * RET_PF_CONTINUE: So far, so good, keep handling the page fault.
 * RET_PF_RETRY: let CPU fault again on the address.
 * RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
 * RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the
 *                         gfn and retry, or emulate the instruction directly.
 * RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
 * RET_PF_FIXED: The faulting entry has been fixed.
 * RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
@@ -274,6 +276,7 @@ enum {
	RET_PF_CONTINUE = 0,
	RET_PF_RETRY,
	RET_PF_EMULATE,
	RET_PF_WRITE_PROTECTED,
	RET_PF_INVALID,
	RET_PF_FIXED,
	RET_PF_SPURIOUS,
+1 −0
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@
TRACE_DEFINE_ENUM(RET_PF_CONTINUE);
TRACE_DEFINE_ENUM(RET_PF_RETRY);
TRACE_DEFINE_ENUM(RET_PF_EMULATE);
TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED);
TRACE_DEFINE_ENUM(RET_PF_INVALID);
TRACE_DEFINE_ENUM(RET_PF_FIXED);
TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);
+32 −31
Original line number Diff line number Diff line
@@ -646,10 +646,10 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
	 * really care if it changes underneath us after this point).
	 */
	if (FNAME(gpte_changed)(vcpu, gw, top_level))
		goto out_gpte_changed;
		return RET_PF_RETRY;

	if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
		goto out_gpte_changed;
		return RET_PF_RETRY;

	/*
	 * Load a new root and retry the faulting instruction in the extremely
@@ -659,7 +659,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
	 */
	if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
		kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
		goto out_gpte_changed;
		return RET_PF_RETRY;
	}

	for_each_shadow_entry(vcpu, fault->addr, it) {
@@ -674,34 +674,38 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
		sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn,
					  false, access);

		if (sp != ERR_PTR(-EEXIST)) {
		/*
			 * We must synchronize the pagetable before linking it
			 * because the guest doesn't need to flush tlb when
			 * the gpte is changed from non-present to present.
			 * Otherwise, the guest may use the wrong mapping.
		 * Synchronize the new page before linking it, as the CPU (KVM)
		 * is architecturally disallowed from inserting non-present
		 * entries into the TLB, i.e. the guest isn't required to flush
		 * the TLB when changing the gPTE from non-present to present.
		 *
			 * For PG_LEVEL_4K, kvm_mmu_get_page() has already
			 * synchronized it transiently via kvm_sync_page().
		 * For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already
		 * synchronized the page via kvm_sync_page().
		 *
			 * For higher level pagetable, we synchronize it via
			 * the slower mmu_sync_children().  If it needs to
			 * break, some progress has been made; return
			 * RET_PF_RETRY and retry on the next #PF.
			 * KVM_REQ_MMU_SYNC is not necessary but it
			 * expedites the process.
			 */
			if (sp->unsync_children &&
		 * For higher level pages, which cannot be unsync themselves
		 * but can have unsync children, synchronize via the slower
		 * mmu_sync_children().  If KVM needs to drop mmu_lock due to
		 * contention or to reschedule, instruct the caller to retry
		 * the #PF (mmu_sync_children() ensures forward progress will
		 * be made).
		 */
		if (sp != ERR_PTR(-EEXIST) && sp->unsync_children &&
		    mmu_sync_children(vcpu, sp, false))
			return RET_PF_RETRY;
		}

		/*
		 * Verify that the gpte in the page we've just write
		 * protected is still there.
		 * Verify that the gpte in the page, which is now either
		 * write-protected or unsync, wasn't modified between the fault
		 * and acquiring mmu_lock.  This needs to be done even when
		 * reusing an existing shadow page to ensure the information
		 * gathered by the walker matches the information stored in the
		 * shadow page (which could have been modified by a different
		 * vCPU even if the page was already linked).  Holding mmu_lock
		 * prevents the shadow page from changing after this point.
		 */
		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
			goto out_gpte_changed;
			return RET_PF_RETRY;

		if (sp != ERR_PTR(-EEXIST))
			link_shadow_page(vcpu, it.sptep, sp);
@@ -755,9 +759,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,

	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
	return ret;

out_gpte_changed:
	return RET_PF_RETRY;
}

/*
@@ -805,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault

	if (page_fault_handle_page_track(vcpu, fault)) {
		shadow_page_table_clear_flood(vcpu, fault->addr);
		return RET_PF_EMULATE;
		return RET_PF_WRITE_PROTECTED;
	}

	r = mmu_topup_memory_caches(vcpu, true);
Loading