Commit 5cbe2435 authored by Claudio Imbrenda's avatar Claudio Imbrenda
Browse files

KVM: s390: move pv gmap functions into kvm



Move gmap related functions from kernel/uv into kvm.

Create a new file to collect gmap-related functions.

Reviewed-by: default avatarJanosch Frank <frankja@linux.ibm.com>
Reviewed-by: default avatarChristoph Schlameuss <schlameuss@linux.ibm.com>
[fixed unpack_one(), thanks mhartmay@linux.ibm.com]
Link: https://lore.kernel.org/r/20250123144627.312456-6-imbrenda@linux.ibm.com


Signed-off-by: default avatarClaudio Imbrenda <imbrenda@linux.ibm.com>
Message-ID: <20250123144627.312456-6-imbrenda@linux.ibm.com>
parent 63e71519
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -149,6 +149,7 @@ int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
			    unsigned long end, bool interruptible);
int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split);

/**
 * s390_uv_destroy_range - Destroy a range of pages in the given mm.
+3 −3
Original line number Diff line number Diff line
@@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void)
}

int uv_pin_shared(unsigned long paddr);
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
int uv_convert_from_secure(unsigned long paddr);
int uv_convert_from_secure_folio(struct folio *folio);

void setup_uv(void);

+29 −263
Original line number Diff line number Diff line
@@ -19,19 +19,6 @@
#include <asm/sections.h>
#include <asm/uv.h>

#if !IS_ENABLED(CONFIG_KVM)
unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
	return 0;
}

int gmap_fault(struct gmap *gmap, unsigned long gaddr,
	       unsigned int fault_flags)
{
	return 0;
}
#endif

/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio)
	folio_put(folio);
	return rc;
}
EXPORT_SYMBOL(uv_destroy_folio);

/*
 * The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte)
 *
 * @paddr: Absolute host address of page to be exported
 */
static int uv_convert_from_secure(unsigned long paddr)
int uv_convert_from_secure(unsigned long paddr)
{
	struct uv_cb_cfs uvcb = {
		.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr)
		return -EINVAL;
	return 0;
}
EXPORT_SYMBOL_GPL(uv_convert_from_secure);

/*
 * The caller must already hold a reference to the folio.
 */
static int uv_convert_from_secure_folio(struct folio *folio)
int uv_convert_from_secure_folio(struct folio *folio)
{
	int rc;

@@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
	folio_put(folio);
	return rc;
}
EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);

/*
 * The present PTE still indirectly holds a folio reference through the mapping.
@@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio)
	return res;
}

static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
/**
 * make_folio_secure() - make a folio secure
 * @folio: the folio to make secure
 * @uvcb: the uvcb that describes the UVC to be used
 *
 * The folio @folio will be made secure if possible, @uvcb will be passed
 * as-is to the UVC.
 *
 * Return: 0 on success;
 *         -EBUSY if the folio is in writeback or has too many references;
 *         -E2BIG if the folio is large;
 *         -EAGAIN if the UVC needs to be attempted again;
 *         -ENXIO if the address is not mapped;
 *         -EINVAL if the UVC failed for other reasons.
 *
 * Context: The caller must hold exactly one extra reference on the folio
 *          (it's the same logic as split_folio())
 */
int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
	int expected, cc = 0;

	if (folio_test_large(folio))
		return -E2BIG;
	if (folio_test_writeback(folio))
		return -EAGAIN;
	expected = expected_folio_refs(folio);
		return -EBUSY;
	expected = expected_folio_refs(folio) + 1;
	if (!folio_ref_freeze(folio, expected))
		return -EBUSY;
	set_bit(PG_arch_1, &folio->flags);
@@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
		return -EAGAIN;
	return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}

/**
 * should_export_before_import - Determine whether an export is needed
 * before an import-like operation
 * @uvcb: the Ultravisor control block of the UVC to be performed
 * @mm: the mm of the process
 *
 * Returns whether an export is needed before every import-like operation.
 * This is needed for shared pages, which don't trigger a secure storage
 * exception when accessed from a different guest.
 *
 * Although considered as one, the Unpin Page UVC is not an actual import,
 * so it is not affected.
 *
 * No export is needed also when there is only one protected VM, because the
 * page cannot belong to the wrong VM in that case (there is no "other VM"
 * it can belong to).
 *
 * Return: true if an export is needed before every import, otherwise false.
 */
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
{
	/*
	 * The misc feature indicates, among other things, that importing a
	 * shared page from a different protected VM will automatically also
	 * transfer its ownership.
	 */
	if (uv_has_feature(BIT_UV_FEAT_MISC))
		return false;
	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
		return false;
	return atomic_read(&mm->context.protected_count) > 1;
}

/*
 * Drain LRU caches: the local one on first invocation and the ones of all
 * CPUs on successive invocations. Returns "true" on the first invocation.
 */
static bool drain_lru(bool *drain_lru_called)
{
	/*
	 * If we have tried a local drain and the folio refcount
	 * still does not match our expected safe value, try with a
	 * system wide drain. This is needed if the pagevecs holding
	 * the page are on a different CPU.
	 */
	if (*drain_lru_called) {
		lru_add_drain_all();
		/* We give up here, don't retry immediately. */
		return false;
	}
	/*
	 * We are here if the folio refcount does not match the
	 * expected safe value. The main culprits are usually
	 * pagevecs. With lru_add_drain() we drain the pagevecs
	 * on the local CPU so that hopefully the refcount will
	 * reach the expected safe value.
	 */
	lru_add_drain();
	*drain_lru_called = true;
	/* The caller should try again immediately */
	return true;
}

/*
 * Requests the Ultravisor to make a page accessible to a guest.
 * If it's brought in the first time, it will be cleared. If
 * it has been exported before, it will be decrypted and integrity
 * checked.
 */
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
{
	struct vm_area_struct *vma;
	bool drain_lru_called = false;
	spinlock_t *ptelock;
	unsigned long uaddr;
	struct folio *folio;
	pte_t *ptep;
	int rc;

again:
	rc = -EFAULT;
	mmap_read_lock(gmap->mm);

	uaddr = __gmap_translate(gmap, gaddr);
	if (IS_ERR_VALUE(uaddr))
		goto out;
	vma = vma_lookup(gmap->mm, uaddr);
	if (!vma)
		goto out;
	/*
	 * Secure pages cannot be huge and userspace should not combine both.
	 * In case userspace does it anyway this will result in an -EFAULT for
	 * the unpack. The guest is thus never reaching secure mode. If
	 * userspace is playing dirty tricky with mapping huge pages later
	 * on this will result in a segmentation fault.
	 */
	if (is_vm_hugetlb_page(vma))
		goto out;

	rc = -ENXIO;
	ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
	if (!ptep)
		goto out;
	if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
		folio = page_folio(pte_page(*ptep));
		rc = -EAGAIN;
		if (folio_test_large(folio)) {
			rc = -E2BIG;
		} else if (folio_trylock(folio)) {
			if (should_export_before_import(uvcb, gmap->mm))
				uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
			rc = make_folio_secure(folio, uvcb);
			folio_unlock(folio);
		}

		/*
		 * Once we drop the PTL, the folio may get unmapped and
		 * freed immediately. We need a temporary reference.
		 */
		if (rc == -EAGAIN || rc == -E2BIG)
			folio_get(folio);
	}
	pte_unmap_unlock(ptep, ptelock);
out:
	mmap_read_unlock(gmap->mm);

	switch (rc) {
	case -E2BIG:
		folio_lock(folio);
		rc = split_folio(folio);
		folio_unlock(folio);
		folio_put(folio);

		switch (rc) {
		case 0:
			/* Splitting succeeded, try again immediately. */
			goto again;
		case -EAGAIN:
			/* Additional folio references. */
			if (drain_lru(&drain_lru_called))
				goto again;
			return -EAGAIN;
		case -EBUSY:
			/* Unexpected race. */
			return -EAGAIN;
		}
		WARN_ON_ONCE(1);
		return -ENXIO;
	case -EAGAIN:
		/*
		 * If we are here because the UVC returned busy or partial
		 * completion, this is just a useless check, but it is safe.
		 */
		folio_wait_writeback(folio);
		folio_put(folio);
		return -EAGAIN;
	case -EBUSY:
		/* Additional folio references. */
		if (drain_lru(&drain_lru_called))
			goto again;
		return -EAGAIN;
	case -ENXIO:
		if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
			return -EFAULT;
		return -EAGAIN;
	}
	return rc;
}
EXPORT_SYMBOL_GPL(gmap_make_secure);

int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
{
	struct uv_cb_cts uvcb = {
		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
		.header.len = sizeof(uvcb),
		.guest_handle = gmap->guest_handle,
		.gaddr = gaddr,
	};

	return gmap_make_secure(gmap, gaddr, &uvcb);
}
EXPORT_SYMBOL_GPL(gmap_convert_to_secure);

/**
 * gmap_destroy_page - Destroy a guest page.
 * @gmap: the gmap of the guest
 * @gaddr: the guest address to destroy
 *
 * An attempt will be made to destroy the given guest page. If the attempt
 * fails, an attempt is made to export the page. If both attempts fail, an
 * appropriate error is returned.
 */
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
{
	struct vm_area_struct *vma;
	struct folio_walk fw;
	unsigned long uaddr;
	struct folio *folio;
	int rc;

	rc = -EFAULT;
	mmap_read_lock(gmap->mm);

	uaddr = __gmap_translate(gmap, gaddr);
	if (IS_ERR_VALUE(uaddr))
		goto out;
	vma = vma_lookup(gmap->mm, uaddr);
	if (!vma)
		goto out;
	/*
	 * Huge pages should not be able to become secure
	 */
	if (is_vm_hugetlb_page(vma))
		goto out;

	rc = 0;
	folio = folio_walk_start(&fw, vma, uaddr, 0);
	if (!folio)
		goto out;
	/*
	 * See gmap_make_secure(): large folios cannot be secure. Small
	 * folio implies FW_LEVEL_PTE.
	 */
	if (folio_test_large(folio) || !pte_write(fw.pte))
		goto out_walk_end;
	rc = uv_destroy_folio(folio);
	/*
	 * Fault handlers can race; it is possible that two CPUs will fault
	 * on the same secure page. One CPU can destroy the page, reboot,
	 * re-enter secure mode and import it, while the second CPU was
	 * stuck at the beginning of the handler. At some point the second
	 * CPU will be able to progress, and it will not be able to destroy
	 * the page. In that case we do not want to terminate the process,
	 * we instead try to export the page.
	 */
	if (rc)
		rc = uv_convert_from_secure_folio(folio);
out_walk_end:
	folio_walk_end(&fw, vma);
out:
	mmap_read_unlock(gmap->mm);
	return rc;
}
EXPORT_SYMBOL_GPL(gmap_destroy_page);
EXPORT_SYMBOL_GPL(make_folio_secure);

/*
 * To be called with the folio locked or with an extra reference! This will
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm

kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o

kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o

arch/s390/kvm/gmap.c

0 → 100644
+212 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0
/*
 * Guest memory management for KVM/s390
 *
 * Copyright IBM Corp. 2008, 2020, 2024
 *
 *    Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
 *               Martin Schwidefsky <schwidefsky@de.ibm.com>
 *               David Hildenbrand <david@redhat.com>
 *               Janosch Frank <frankja@linux.vnet.ibm.com>
 */

#include <linux/compiler.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/pgtable.h>
#include <linux/pagemap.h>

#include <asm/lowcore.h>
#include <asm/gmap.h>
#include <asm/uv.h>

#include "gmap.h"

/**
 * should_export_before_import - Determine whether an export is needed
 * before an import-like operation
 * @uvcb: the Ultravisor control block of the UVC to be performed
 * @mm: the mm of the process
 *
 * Returns whether an export is needed before every import-like operation.
 * This is needed for shared pages, which don't trigger a secure storage
 * exception when accessed from a different guest.
 *
 * Although considered as one, the Unpin Page UVC is not an actual import,
 * so it is not affected.
 *
 * No export is needed also when there is only one protected VM, because the
 * page cannot belong to the wrong VM in that case (there is no "other VM"
 * it can belong to).
 *
 * Return: true if an export is needed before every import, otherwise false.
 */
static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
{
	/*
	 * The misc feature indicates, among other things, that importing a
	 * shared page from a different protected VM will automatically also
	 * transfer its ownership.
	 */
	if (uv_has_feature(BIT_UV_FEAT_MISC))
		return false;
	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
		return false;
	return atomic_read(&mm->context.protected_count) > 1;
}

static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
{
	struct folio *folio = page_folio(page);
	int rc;

	/*
	 * Secure pages cannot be huge and userspace should not combine both.
	 * In case userspace does it anyway this will result in an -EFAULT for
	 * the unpack. The guest is thus never reaching secure mode.
	 * If userspace plays dirty tricks and decides to map huge pages at a
	 * later point in time, it will receive a segmentation fault or
	 * KVM_RUN will return -EFAULT.
	 */
	if (folio_test_hugetlb(folio))
		return -EFAULT;
	if (folio_test_large(folio)) {
		mmap_read_unlock(gmap->mm);
		rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true);
		mmap_read_lock(gmap->mm);
		if (rc)
			return rc;
		folio = page_folio(page);
	}

	if (!folio_trylock(folio))
		return -EAGAIN;
	if (should_export_before_import(uvcb, gmap->mm))
		uv_convert_from_secure(folio_to_phys(folio));
	rc = make_folio_secure(folio, uvcb);
	folio_unlock(folio);

	/*
	 * In theory a race is possible and the folio might have become
	 * large again before the folio_trylock() above. In that case, no
	 * action is performed and -EAGAIN is returned; the callers will
	 * have to try again later.
	 * In most cases this implies running the VM again, getting the same
	 * exception again, and make another attempt in this function.
	 * This is expected to happen extremely rarely.
	 */
	if (rc == -E2BIG)
		return -EAGAIN;
	/* The folio has too many references, try to shake some off */
	if (rc == -EBUSY) {
		mmap_read_unlock(gmap->mm);
		kvm_s390_wiggle_split_folio(gmap->mm, folio, false);
		mmap_read_lock(gmap->mm);
		return -EAGAIN;
	}

	return rc;
}

/**
 * gmap_make_secure() - make one guest page secure
 * @gmap: the guest gmap
 * @gaddr: the guest address that needs to be made secure
 * @uvcb: the UVCB specifying which operation needs to be performed
 *
 * Context: needs to be called with kvm->srcu held.
 * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()).
 */
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
{
	struct kvm *kvm = gmap->private;
	struct page *page;
	int rc = 0;

	lockdep_assert_held(&kvm->srcu);

	page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
	mmap_read_lock(gmap->mm);
	if (page)
		rc = __gmap_make_secure(gmap, page, uvcb);
	kvm_release_page_clean(page);
	mmap_read_unlock(gmap->mm);

	return rc;
}

int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
{
	struct uv_cb_cts uvcb = {
		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
		.header.len = sizeof(uvcb),
		.guest_handle = gmap->guest_handle,
		.gaddr = gaddr,
	};

	return gmap_make_secure(gmap, gaddr, &uvcb);
}

/**
 * __gmap_destroy_page() - Destroy a guest page.
 * @gmap: the gmap of the guest
 * @page: the page to destroy
 *
 * An attempt will be made to destroy the given guest page. If the attempt
 * fails, an attempt is made to export the page. If both attempts fail, an
 * appropriate error is returned.
 *
 * Context: must be called holding the mm lock for gmap->mm
 */
static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
{
	struct folio *folio = page_folio(page);
	int rc;

	/*
	 * See gmap_make_secure(): large folios cannot be secure. Small
	 * folio implies FW_LEVEL_PTE.
	 */
	if (folio_test_large(folio))
		return -EFAULT;

	rc = uv_destroy_folio(folio);
	/*
	 * Fault handlers can race; it is possible that two CPUs will fault
	 * on the same secure page. One CPU can destroy the page, reboot,
	 * re-enter secure mode and import it, while the second CPU was
	 * stuck at the beginning of the handler. At some point the second
	 * CPU will be able to progress, and it will not be able to destroy
	 * the page. In that case we do not want to terminate the process,
	 * we instead try to export the page.
	 */
	if (rc)
		rc = uv_convert_from_secure_folio(folio);

	return rc;
}

/**
 * gmap_destroy_page() - Destroy a guest page.
 * @gmap: the gmap of the guest
 * @gaddr: the guest address to destroy
 *
 * An attempt will be made to destroy the given guest page. If the attempt
 * fails, an attempt is made to export the page. If both attempts fail, an
 * appropriate error is returned.
 *
 * Context: may sleep.
 */
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
{
	struct page *page;
	int rc = 0;

	mmap_read_lock(gmap->mm);
	page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
	if (page)
		rc = __gmap_destroy_page(gmap, page);
	kvm_release_page_clean(page);
	mmap_read_unlock(gmap->mm);
	return rc;
}
Loading