Merge tag 'kvm-x86-mmu-6.12' of https://github.com/kvm-x86/linux into HEAD (5d55a052) · Commits · git / linux-net

arch/x86/include/asm/kvm_host.h

+9 −5

Original line number	Diff line number	Diff line
		@@ -282,10 +282,6 @@ enum x86_intercept_stage;
		#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
		#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS \| PFERR_PRIVATE_ACCESS)

		#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK \| \
		PFERR_WRITE_MASK \| \
		PFERR_PRESENT_MASK)

		/* apic attention bits */
		#define KVM_APIC_CHECK_VAPIC 0
		/*
		@@ -2142,7 +2138,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);

		void kvm_update_dr7(struct kvm_vcpu *vcpu);

		int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
		bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
		bool always_retry);

		static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
		gpa_t cr2_or_gpa)
		{
		return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
		}

		void kvm_mmu_free_roots(struct kvm kvm, struct kvm_mmu mmu,
		ulong roots_to_free);
		void kvm_mmu_free_guest_mode_roots(struct kvm kvm, struct kvm_mmu mmu);

arch/x86/kvm/mmu/mmu.c

+287 −245

File changed.

Preview size limit exceeded, changes collapsed.

arch/x86/kvm/mmu/mmu_internal.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu vcpu, struct kvm_page_fault fault);
		* RET_PF_CONTINUE: So far, so good, keep handling the page fault.
		* RET_PF_RETRY: let CPU fault again on the address.
		* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
		* RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the
		* gfn and retry, or emulate the instruction directly.
		* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
		* RET_PF_FIXED: The faulting entry has been fixed.
		* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
		@@ -274,6 +276,7 @@ enum {
		RET_PF_CONTINUE = 0,
		RET_PF_RETRY,
		RET_PF_EMULATE,
		RET_PF_WRITE_PROTECTED,
		RET_PF_INVALID,
		RET_PF_FIXED,
		RET_PF_SPURIOUS,

arch/x86/kvm/mmu/mmutrace.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -57,6 +57,7 @@
		TRACE_DEFINE_ENUM(RET_PF_CONTINUE);
		TRACE_DEFINE_ENUM(RET_PF_RETRY);
		TRACE_DEFINE_ENUM(RET_PF_EMULATE);
		TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED);
		TRACE_DEFINE_ENUM(RET_PF_INVALID);
		TRACE_DEFINE_ENUM(RET_PF_FIXED);
		TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);

arch/x86/kvm/mmu/paging_tmpl.h

+32 −31

Original line number	Diff line number	Diff line
		@@ -646,10 +646,10 @@ static int FNAME(fetch)(struct kvm_vcpu vcpu, struct kvm_page_fault fault,
		* really care if it changes underneath us after this point).
		*/
		if (FNAME(gpte_changed)(vcpu, gw, top_level))
		goto out_gpte_changed;
		return RET_PF_RETRY;

		if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
		goto out_gpte_changed;
		return RET_PF_RETRY;

		/*
		* Load a new root and retry the faulting instruction in the extremely
		@@ -659,7 +659,7 @@ static int FNAME(fetch)(struct kvm_vcpu vcpu, struct kvm_page_fault fault,
		*/
		if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
		kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
		goto out_gpte_changed;
		return RET_PF_RETRY;
		}

		for_each_shadow_entry(vcpu, fault->addr, it) {
		@@ -674,34 +674,38 @@ static int FNAME(fetch)(struct kvm_vcpu vcpu, struct kvm_page_fault fault,
		sp = kvm_mmu_get_child_sp(vcpu, it.sptep, table_gfn,
		false, access);

		if (sp != ERR_PTR(-EEXIST)) {
		/*
		* We must synchronize the pagetable before linking it
		* because the guest doesn't need to flush tlb when
		* the gpte is changed from non-present to present.
		* Otherwise, the guest may use the wrong mapping.
		* Synchronize the new page before linking it, as the CPU (KVM)
		* is architecturally disallowed from inserting non-present
		* entries into the TLB, i.e. the guest isn't required to flush
		* the TLB when changing the gPTE from non-present to present.
		*
		* For PG_LEVEL_4K, kvm_mmu_get_page() has already
		* synchronized it transiently via kvm_sync_page().
		* For PG_LEVEL_4K, kvm_mmu_find_shadow_page() has already
		* synchronized the page via kvm_sync_page().
		*
		* For higher level pagetable, we synchronize it via
		* the slower mmu_sync_children(). If it needs to
		* break, some progress has been made; return
		* RET_PF_RETRY and retry on the next #PF.
		* KVM_REQ_MMU_SYNC is not necessary but it
		* expedites the process.
		*/
		if (sp->unsync_children &&
		* For higher level pages, which cannot be unsync themselves
		* but can have unsync children, synchronize via the slower
		* mmu_sync_children(). If KVM needs to drop mmu_lock due to
		* contention or to reschedule, instruct the caller to retry
		* the #PF (mmu_sync_children() ensures forward progress will
		* be made).
		*/
		if (sp != ERR_PTR(-EEXIST) && sp->unsync_children &&
		mmu_sync_children(vcpu, sp, false))
		return RET_PF_RETRY;
		}

		/*
		* Verify that the gpte in the page we've just write
		* protected is still there.
		* Verify that the gpte in the page, which is now either
		* write-protected or unsync, wasn't modified between the fault
		* and acquiring mmu_lock. This needs to be done even when
		* reusing an existing shadow page to ensure the information
		* gathered by the walker matches the information stored in the
		* shadow page (which could have been modified by a different
		* vCPU even if the page was already linked). Holding mmu_lock
		* prevents the shadow page from changing after this point.
		*/
		if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
		goto out_gpte_changed;
		return RET_PF_RETRY;

		if (sp != ERR_PTR(-EEXIST))
		link_shadow_page(vcpu, it.sptep, sp);
		@@ -755,9 +759,6 @@ static int FNAME(fetch)(struct kvm_vcpu vcpu, struct kvm_page_fault fault,

		FNAME(pte_prefetch)(vcpu, gw, it.sptep);
		return ret;

		out_gpte_changed:
		return RET_PF_RETRY;
		}

		/*
		@@ -805,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu vcpu, struct kvm_page_fault fault

		if (page_fault_handle_page_track(vcpu, fault)) {
		shadow_page_table_clear_flood(vcpu, fault->addr);
		return RET_PF_EMULATE;
		return RET_PF_WRITE_PROTECTED;
		}

		r = mmu_topup_memory_caches(vcpu, true);