Commit 8b15c376 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini
Browse files

KVM: Don't grab reference on VM_MIXEDMAP pfns that have a "struct page"



Now that KVM no longer relies on an ugly heuristic to find its struct page
references, i.e. now that KVM can't get false positives on VM_MIXEDMAP
pfns, remove KVM's hack to elevate the refcount for pfns that happen to
have a valid struct page.  In addition to removing a long-standing wart
in KVM, this allows KVM to map non-refcounted struct page memory into the
guest, e.g. for exposing GPU TTM buffers to KVM guests.

Tested-by: default avatarAlex Bennée <alex.bennee@linaro.org>
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Tested-by: default avatarDmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Message-ID: <20241010182427.1434605-86-seanjc@google.com>
parent 93b7da40
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -1730,9 +1730,6 @@ void kvm_arch_sync_events(struct kvm *kvm);

int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);

struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn);
bool kvm_is_zone_device_page(struct page *page);

struct kvm_irq_ack_notifier {
	struct hlist_node link;
	unsigned gsi;
+2 −73
Original line number Diff line number Diff line
@@ -160,52 +160,6 @@ __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{
}

bool kvm_is_zone_device_page(struct page *page)
{
	/*
	 * The metadata used by is_zone_device_page() to determine whether or
	 * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
	 * the device has been pinned, e.g. by get_user_pages().  WARN if the
	 * page_count() is zero to help detect bad usage of this helper.
	 */
	if (WARN_ON_ONCE(!page_count(page)))
		return false;

	return is_zone_device_page(page);
}

/*
 * Returns a 'struct page' if the pfn is "valid" and backed by a refcounted
 * page, NULL otherwise.  Note, the list of refcounted PG_reserved page types
 * is likely incomplete, it has been compiled purely through people wanting to
 * back guest with a certain type of memory and encountering issues.
 */
struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn)
{
	struct page *page;

	if (!pfn_valid(pfn))
		return NULL;

	page = pfn_to_page(pfn);
	if (!PageReserved(page))
		return page;

	/* The ZERO_PAGE(s) is marked PG_reserved, but is refcounted. */
	if (is_zero_pfn(pfn))
		return page;

	/*
	 * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
	 * perspective they are "normal" pages, albeit with slightly different
	 * usage rules.
	 */
	if (kvm_is_zone_device_page(page))
		return page;

	return NULL;
}

/*
 * Switches to specified vcpu, until a matching vcpu_put()
 */
@@ -2804,35 +2758,10 @@ static kvm_pfn_t kvm_resolve_pfn(struct kvm_follow_pfn *kfp, struct page *page,
	if (kfp->map_writable)
		*kfp->map_writable = writable;

	/*
	 * FIXME: Remove this once KVM no longer blindly calls put_page() on
	 *	  every pfn that points at a struct page.
	 *
	 * Get a reference for follow_pte() pfns if they happen to point at a
	 * struct page, as KVM will ultimately call kvm_release_pfn_clean() on
	 * the returned pfn, i.e. KVM expects to have a reference.
	 *
	 * Certain IO or PFNMAP mappings can be backed with valid struct pages,
	 * but be allocated without refcounting, e.g. tail pages of
	 * non-compound higher order allocations.  Grabbing and putting a
	 * reference to such pages would cause KVM to prematurely free a page
	 * it doesn't own (KVM gets and puts the one and only reference).
	 * Don't allow those pages until the FIXME is resolved.
	 *
	 * Don't grab a reference for pins, callers that pin pages are required
	 * to check refcounted_page, i.e. must not blindly release the pfn.
	 */
	if (map) {
	if (map)
		pfn = map->pfn;

		if (!kfp->pin) {
			page = kvm_pfn_to_refcounted_page(pfn);
			if (page && !get_page_unless_zero(page))
				return KVM_PFN_ERR_FAULT;
		}
	} else {
	else
		pfn = page_to_pfn(page);
	}

	*kfp->refcounted_page = page;