Commit cec29eef authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: Add a dedicated mmu_notifier flag for reclaiming freed memory



Handle AMD SEV's kvm_arch_guest_memory_reclaimed() hook by having
__kvm_handle_hva_range() return whether or not an overlapping memslot
was found, i.e. mmu_lock was acquired.  Using the .on_unlock() hook
works, but kvm_arch_guest_memory_reclaimed() needs to run after dropping
mmu_lock, which makes .on_lock() and .on_unlock() asymmetrical.

Use a small struct to return the tuple of the notifier-specific return,
plus whether or not overlap was found.  Because the iteration helpers are
__always_inlined, practically speaking, the struct will never actually be
returned from a function call (not to mention the size of the struct will
be two bytes in practice).

Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Reviewed-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Reviewed-by: default avatarFuad Tabba <tabba@google.com>
Tested-by: default avatarFuad Tabba <tabba@google.com>
Message-Id: <20231027182217.3615211-11-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 16f95f3b
Loading
Loading
Loading
Loading
+37 −16
Original line number Diff line number Diff line
@@ -561,6 +561,19 @@ struct kvm_mmu_notifier_range {
	bool may_block;
};

/*
 * The inner-most helper returns a tuple containing the return value from the
 * arch- and action-specific handler, plus a flag indicating whether or not at
 * least one memslot was found, i.e. if the handler found guest memory.
 *
 * Note, most notifiers are averse to booleans, so even though KVM tracks the
 * return from arch code as a bool, outer helpers will cast it to an int. :-(
 */
typedef struct kvm_mmu_notifier_return {
	bool ret;
	bool found_memslot;
} kvm_mn_ret_t;

/*
 * Use a dedicated stub instead of NULL to indicate that there is no callback
 * function/handler.  The compiler technically can't guarantee that a real
@@ -582,22 +595,25 @@ static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
	     node;							     \
	     node = interval_tree_iter_next(node, start, last))	     \

static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
							   const struct kvm_mmu_notifier_range *range)
{
	bool ret = false, locked = false;
	struct kvm_mmu_notifier_return r = {
		.ret = false,
		.found_memslot = false,
	};
	struct kvm_gfn_range gfn_range;
	struct kvm_memory_slot *slot;
	struct kvm_memslots *slots;
	int i, idx;

	if (WARN_ON_ONCE(range->end <= range->start))
		return 0;
		return r;

	/* A null handler is allowed if and only if on_lock() is provided. */
	if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
			 IS_KVM_NULL_FN(range->handler)))
		return 0;
		return r;

	idx = srcu_read_lock(&kvm->srcu);

@@ -631,8 +647,8 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
			gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
			gfn_range.slot = slot;

			if (!locked) {
				locked = true;
			if (!r.found_memslot) {
				r.found_memslot = true;
				KVM_MMU_LOCK(kvm);
				if (!IS_KVM_NULL_FN(range->on_lock))
					range->on_lock(kvm);
@@ -640,14 +656,14 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
				if (IS_KVM_NULL_FN(range->handler))
					break;
			}
			ret |= range->handler(kvm, &gfn_range);
			r.ret |= range->handler(kvm, &gfn_range);
		}
	}

	if (range->flush_on_ret && ret)
	if (range->flush_on_ret && r.ret)
		kvm_flush_remote_tlbs(kvm);

	if (locked) {
	if (r.found_memslot) {
		KVM_MMU_UNLOCK(kvm);
		if (!IS_KVM_NULL_FN(range->on_unlock))
			range->on_unlock(kvm);
@@ -655,8 +671,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,

	srcu_read_unlock(&kvm->srcu, idx);

	/* The notifiers are averse to booleans. :-( */
	return (int)ret;
	return r;
}

static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
@@ -677,7 +692,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
		.may_block	= false,
	};

	return __kvm_handle_hva_range(kvm, &range);
	return __kvm_handle_hva_range(kvm, &range).ret;
}

static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn,
@@ -696,7 +711,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
		.may_block	= false,
	};

	return __kvm_handle_hva_range(kvm, &range);
	return __kvm_handle_hva_range(kvm, &range).ret;
}

static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -798,7 +813,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
		.end		= range->end,
		.handler	= kvm_mmu_unmap_gfn_range,
		.on_lock	= kvm_mmu_invalidate_begin,
		.on_unlock	= kvm_arch_guest_memory_reclaimed,
		.on_unlock	= (void *)kvm_null_fn,
		.flush_on_ret	= true,
		.may_block	= mmu_notifier_range_blockable(range),
	};
@@ -830,7 +845,13 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
	gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
					  hva_range.may_block);

	__kvm_handle_hva_range(kvm, &hva_range);
	/*
	 * If one or more memslots were found and thus zapped, notify arch code
	 * that guest memory has been reclaimed.  This needs to be done *after*
	 * dropping mmu_lock, as x86's reclaim path is slooooow.
	 */
	if (__kvm_handle_hva_range(kvm, &hva_range).found_memslot)
		kvm_arch_guest_memory_reclaimed(kvm);

	return 0;
}