Commit 8569992d authored by Chao Peng's avatar Chao Peng Committed by Paolo Bonzini
Browse files

KVM: Use gfn instead of hva for mmu_notifier_retry



Currently in mmu_notifier invalidate path, hva range is recorded and then
checked against by mmu_invalidate_retry_hva() in the page fault handling
path. However, for the soon-to-be-introduced private memory, a page fault
may not have a hva associated, checking gfn(gpa) makes more sense.

For existing hva based shared memory, gfn is expected to also work. The
only downside is when aliasing multiple gfns to a single hva, the
current algorithm of checking multiple ranges could result in a much
larger range being rejected. Such aliasing should be uncommon, so the
impact is expected small.

Suggested-by: default avatarSean Christopherson <seanjc@google.com>
Cc: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: default avatarChao Peng <chao.p.peng@linux.intel.com>
Reviewed-by: default avatarFuad Tabba <tabba@google.com>
Tested-by: default avatarFuad Tabba <tabba@google.com>
[sean: convert vmx_set_apic_access_page_addr() to gfn-based API]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Reviewed-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Reviewed-by: default avatarXu Yilun <yilun.xu@linux.intel.com>
Message-Id: <20231027182217.3615211-4-seanjc@google.com>
Reviewed-by: default avatarKai Huang <kai.huang@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c0db1923
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -3056,7 +3056,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 *
 * There are several ways to safely use this helper:
 *
 * - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
 * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
 *   consuming it.  In this case, mmu_lock doesn't need to be held during the
 *   lookup, but it does need to be held while checking the MMU notifier.
 *
@@ -4366,7 +4366,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
		return true;

	return fault->slot &&
	       mmu_invalidate_retry_hva(vcpu->kvm, fault->mmu_seq, fault->hva);
	       mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
}

static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -6260,7 +6260,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)

	write_lock(&kvm->mmu_lock);

	kvm_mmu_invalidate_begin(kvm, 0, -1ul);
	kvm_mmu_invalidate_begin(kvm);

	kvm_mmu_invalidate_range_add(kvm, gfn_start, gfn_end);

	flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);

@@ -6270,7 +6272,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
	if (flush)
		kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);

	kvm_mmu_invalidate_end(kvm, 0, -1ul);
	kvm_mmu_invalidate_end(kvm);

	write_unlock(&kvm->mmu_lock);
}
+5 −6
Original line number Diff line number Diff line
@@ -6757,10 +6757,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
		return;

	/*
	 * Grab the memslot so that the hva lookup for the mmu_notifier retry
	 * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
	 * on the pfn lookup's validation of the memslot to ensure a valid hva
	 * is used for the retry check.
	 * Explicitly grab the memslot using KVM's internal slot ID to ensure
	 * KVM doesn't unintentionally grab a userspace memslot.  It _should_
	 * be impossible for userspace to create a memslot for the APIC when
	 * APICv is enabled, but paranoia won't hurt in this case.
	 */
	slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
@@ -6785,8 +6785,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
		return;

	read_lock(&vcpu->kvm->mmu_lock);
	if (mmu_invalidate_retry_hva(kvm, mmu_seq,
				     gfn_to_hva_memslot(slot, gfn))) {
	if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
		kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
		read_unlock(&vcpu->kvm->mmu_lock);
		goto out;
+21 −12
Original line number Diff line number Diff line
@@ -787,8 +787,8 @@ struct kvm {
	struct mmu_notifier mmu_notifier;
	unsigned long mmu_invalidate_seq;
	long mmu_invalidate_in_progress;
	unsigned long mmu_invalidate_range_start;
	unsigned long mmu_invalidate_range_end;
	gfn_t mmu_invalidate_range_start;
	gfn_t mmu_invalidate_range_end;
#endif
	struct list_head devices;
	u64 manual_dirty_log_protect;
@@ -1392,10 +1392,9 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif

void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
			      unsigned long end);
void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
			    unsigned long end);
void kvm_mmu_invalidate_begin(struct kvm *kvm);
void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
void kvm_mmu_invalidate_end(struct kvm *kvm);

long kvm_arch_dev_ioctl(struct file *filp,
			unsigned int ioctl, unsigned long arg);
@@ -1970,9 +1969,9 @@ static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
	return 0;
}

static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
static inline int mmu_invalidate_retry_gfn(struct kvm *kvm,
					   unsigned long mmu_seq,
					   unsigned long hva)
					   gfn_t gfn)
{
	lockdep_assert_held(&kvm->mmu_lock);
	/*
@@ -1981,10 +1980,20 @@ static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
	 * that might be being invalidated. Note that it may include some false
	 * positives, due to shortcuts when handing concurrent invalidations.
	 */
	if (unlikely(kvm->mmu_invalidate_in_progress) &&
	    hva >= kvm->mmu_invalidate_range_start &&
	    hva < kvm->mmu_invalidate_range_end)
	if (unlikely(kvm->mmu_invalidate_in_progress)) {
		/*
		 * Dropping mmu_lock after bumping mmu_invalidate_in_progress
		 * but before updating the range is a KVM bug.
		 */
		if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA ||
				 kvm->mmu_invalidate_range_end == INVALID_GPA))
			return 1;

		if (gfn >= kvm->mmu_invalidate_range_start &&
		    gfn < kvm->mmu_invalidate_range_end)
			return 1;
	}

	if (kvm->mmu_invalidate_seq != mmu_seq)
		return 1;
	return 0;
+34 −9
Original line number Diff line number Diff line
@@ -543,9 +543,7 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)

typedef bool (*gfn_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);

typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
			     unsigned long end);

typedef void (*on_lock_fn_t)(struct kvm *kvm);
typedef void (*on_unlock_fn_t)(struct kvm *kvm);

struct kvm_mmu_notifier_range {
@@ -637,7 +635,8 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
				locked = true;
				KVM_MMU_LOCK(kvm);
				if (!IS_KVM_NULL_FN(range->on_lock))
					range->on_lock(kvm, range->start, range->end);
					range->on_lock(kvm);

				if (IS_KVM_NULL_FN(range->handler))
					break;
			}
@@ -742,16 +741,29 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
	kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
}

void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
			      unsigned long end)
void kvm_mmu_invalidate_begin(struct kvm *kvm)
{
	lockdep_assert_held_write(&kvm->mmu_lock);
	/*
	 * The count increase must become visible at unlock time as no
	 * spte can be established without taking the mmu_lock and
	 * count is also read inside the mmu_lock critical section.
	 */
	kvm->mmu_invalidate_in_progress++;

	if (likely(kvm->mmu_invalidate_in_progress == 1)) {
		kvm->mmu_invalidate_range_start = INVALID_GPA;
		kvm->mmu_invalidate_range_end = INVALID_GPA;
	}
}

void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end)
{
	lockdep_assert_held_write(&kvm->mmu_lock);

	WARN_ON_ONCE(!kvm->mmu_invalidate_in_progress);

	if (likely(kvm->mmu_invalidate_range_start == INVALID_GPA)) {
		kvm->mmu_invalidate_range_start = start;
		kvm->mmu_invalidate_range_end = end;
	} else {
@@ -771,6 +783,12 @@ void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
	}
}

static bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
	kvm_mmu_invalidate_range_add(kvm, range->start, range->end);
	return kvm_unmap_gfn_range(kvm, range);
}

static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
					const struct mmu_notifier_range *range)
{
@@ -778,7 +796,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
	const struct kvm_mmu_notifier_range hva_range = {
		.start		= range->start,
		.end		= range->end,
		.handler	= kvm_unmap_gfn_range,
		.handler	= kvm_mmu_unmap_gfn_range,
		.on_lock	= kvm_mmu_invalidate_begin,
		.on_unlock	= kvm_arch_guest_memory_reclaimed,
		.flush_on_ret	= true,
@@ -817,9 +835,10 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
	return 0;
}

void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
			    unsigned long end)
void kvm_mmu_invalidate_end(struct kvm *kvm)
{
	lockdep_assert_held_write(&kvm->mmu_lock);

	/*
	 * This sequence increase will notify the kvm page fault that
	 * the page that is going to be mapped in the spte could have
@@ -834,6 +853,12 @@ void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
	 */
	kvm->mmu_invalidate_in_progress--;
	KVM_BUG_ON(kvm->mmu_invalidate_in_progress < 0, kvm);

	/*
	 * Assert that at least one range was added between start() and end().
	 * Not adding a range isn't fatal, but it is a KVM bug.
	 */
	WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA);
}

static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,