Commit 9123c5f9 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-gmem-6.20' of https://github.com/kvm-x86/linux into HEAD

KVM guest_memfd changes for 6.20

 - Remove kvm_gmem_populate()'s preparation tracking and half-baked hugepage
   handling, and instead rely on SNP (the only user of the tracking) to do its
   own tracking via the RMP.

 - Retroactively document and enforce (for SNP) that KVM_SEV_SNP_LAUNCH_UPDATE
   and KVM_TDX_INIT_MEM_REGION require the source page to be 4KiB aligned, to
   avoid non-trivial complexity for a non-existent usecase (and because
   in-place conversion simply can't support unaligned sources).

 - When populating guest_memfd memory, GUP the source page in common code and
   pass the refcounted page to the vendor callback, instead of letting vendor
   code do the heavy lifting.  Doing so avoids a looming deadlock bug with
   in-place due an AB-BA conflict betwee mmap_lock and guest_memfd's filemap
   invalidate lock.
parents 54f15ebf 2a62345b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -523,7 +523,7 @@ Returns: 0 on success, < 0 on error, -EAGAIN if caller should retry

        struct kvm_sev_snp_launch_update {
                __u64 gfn_start;        /* Guest page number to load/encrypt data into. */
                __u64 uaddr;            /* Userspace address of data to be loaded/encrypted. */
                __u64 uaddr;            /* 4k-aligned address of data to be loaded/encrypted. */
                __u64 len;              /* 4k-aligned length in bytes to copy into guest memory.*/
                __u8 type;              /* The type of the guest pages being initialized. */
                __u8 pad0;
+1 −1
Original line number Diff line number Diff line
@@ -156,7 +156,7 @@ KVM_TDX_INIT_MEM_REGION
:Returns: 0 on success, <0 on error

Initialize @nr_pages TDX guest private memory starting from @gpa with userspace
provided data from @source_addr.
provided data from @source_addr. @source_addr must be PAGE_SIZE-aligned.

Note, before calling this sub command, memory attribute of the range
[gpa, gpa + nr_pages] needs to be private.  Userspace can use
+47 −61
Original line number Diff line number Diff line
@@ -2277,66 +2277,52 @@ struct sev_gmem_populate_args {
	int fw_error;
};

static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pfn,
				  void __user *src, int order, void *opaque)
static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
				  struct page *src_page, void *opaque)
{
	struct sev_gmem_populate_args *sev_populate_args = opaque;
	struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
	int n_private = 0, ret, i;
	int npages = (1 << order);
	gfn_t gfn;

	if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src))
		return -EINVAL;

	for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) {
	struct sev_data_snp_launch_update fw_args = {0};
	struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
	bool assigned = false;
	int level;
	int ret;

	if (WARN_ON_ONCE(sev_populate_args->type != KVM_SEV_SNP_PAGE_TYPE_ZERO && !src_page))
		return -EINVAL;

		ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
	ret = snp_lookup_rmpentry((u64)pfn, &assigned, &level);
	if (ret || assigned) {
		pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
			 __func__, gfn, ret, assigned);
		ret = ret ? -EINVAL : -EEXIST;
			goto err;
		goto out;
	}

		if (src) {
			void *vaddr = kmap_local_pfn(pfn + i);
	if (src_page) {
		void *src_vaddr = kmap_local_page(src_page);
		void *dst_vaddr = kmap_local_pfn(pfn);

			if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) {
				ret = -EFAULT;
				goto err;
			}
			kunmap_local(vaddr);
		memcpy(dst_vaddr, src_vaddr, PAGE_SIZE);

		kunmap_local(src_vaddr);
		kunmap_local(dst_vaddr);
	}

		ret = rmp_make_private(pfn + i, gfn << PAGE_SHIFT, PG_LEVEL_4K,
	ret = rmp_make_private(pfn, gfn << PAGE_SHIFT, PG_LEVEL_4K,
			       sev_get_asid(kvm), true);
	if (ret)
			goto err;

		n_private++;
		goto out;

	fw_args.gctx_paddr = __psp_pa(sev->snp_context);
		fw_args.address = __sme_set(pfn_to_hpa(pfn + i));
	fw_args.address = __sme_set(pfn_to_hpa(pfn));
	fw_args.page_size = PG_LEVEL_TO_RMP(PG_LEVEL_4K);
	fw_args.page_type = sev_populate_args->type;

	ret = __sev_issue_cmd(sev_populate_args->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE,
			      &fw_args, &sev_populate_args->fw_error);
		if (ret)
			goto fw_err;
	}

	return 0;

fw_err:
	/*
	 * If the firmware command failed handle the reclaim and cleanup of that
	 * PFN specially vs. prior pages which can be cleaned up below without
	 * needing to reclaim in advance.
	 * PFN before reporting an error.
	 *
	 * Additionally, when invalid CPUID function entries are detected,
	 * firmware writes the expected values into the page and leaves it
@@ -2346,26 +2332,22 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
	 * information to provide information on which CPUID leaves/fields
	 * failed CPUID validation.
	 */
	if (!snp_page_reclaim(kvm, pfn + i) &&
	if (ret && !snp_page_reclaim(kvm, pfn) &&
	    sev_populate_args->type == KVM_SEV_SNP_PAGE_TYPE_CPUID &&
	    sev_populate_args->fw_error == SEV_RET_INVALID_PARAM) {
		void *vaddr = kmap_local_pfn(pfn + i);
		void *src_vaddr = kmap_local_page(src_page);
		void *dst_vaddr = kmap_local_pfn(pfn);

		if (copy_to_user(src + i * PAGE_SIZE, vaddr, PAGE_SIZE))
			pr_debug("Failed to write CPUID page back to userspace\n");
		memcpy(src_vaddr, dst_vaddr, PAGE_SIZE);

		kunmap_local(vaddr);
		kunmap_local(src_vaddr);
		kunmap_local(dst_vaddr);
	}

	/* pfn + i is hypervisor-owned now, so skip below cleanup for it. */
	n_private--;

err:
	pr_debug("%s: exiting with error ret %d (fw_error %d), restoring %d gmem PFNs to shared.\n",
		 __func__, ret, sev_populate_args->fw_error, n_private);
	for (i = 0; i < n_private; i++)
		kvm_rmp_make_shared(kvm, pfn + i, PG_LEVEL_4K);

out:
	if (ret)
		pr_debug("%s: error updating GFN %llx, return code %d (fw_error %d)\n",
			 __func__, gfn, ret, sev_populate_args->fw_error);
	return ret;
}

@@ -2396,6 +2378,11 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)
	     params.type != KVM_SEV_SNP_PAGE_TYPE_CPUID))
		return -EINVAL;

	src = params.type == KVM_SEV_SNP_PAGE_TYPE_ZERO ? NULL : u64_to_user_ptr(params.uaddr);

	if (!PAGE_ALIGNED(src))
		return -EINVAL;

	npages = params.len / PAGE_SIZE;

	/*
@@ -2427,7 +2414,6 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp)

	sev_populate_args.sev_fd = argp->sev_fd;
	sev_populate_args.type = params.type;
	src = params.type == KVM_SEV_SNP_PAGE_TYPE_ZERO ? NULL : u64_to_user_ptr(params.uaddr);

	count = kvm_gmem_populate(kvm, params.gfn_start, src, npages,
				  sev_gmem_post_populate, &sev_populate_args);
+3 −13
Original line number Diff line number Diff line
@@ -3118,34 +3118,24 @@ struct tdx_gmem_post_populate_arg {
};

static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
				  void __user *src, int order, void *_arg)
				  struct page *src_page, void *_arg)
{
	struct tdx_gmem_post_populate_arg *arg = _arg;
	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
	u64 err, entry, level_state;
	gpa_t gpa = gfn_to_gpa(gfn);
	struct page *src_page;
	int ret, i;

	if (KVM_BUG_ON(kvm_tdx->page_add_src, kvm))
		return -EIO;

	/*
	 * Get the source page if it has been faulted in. Return failure if the
	 * source page has been swapped out or unmapped in primary memory.
	 */
	ret = get_user_pages_fast((unsigned long)src, 1, 0, &src_page);
	if (ret < 0)
		return ret;
	if (ret != 1)
		return -ENOMEM;
	if (!src_page)
		return -EOPNOTSUPP;

	kvm_tdx->page_add_src = src_page;
	ret = kvm_tdp_mmu_map_private_pfn(arg->vcpu, gfn, pfn);
	kvm_tdx->page_add_src = NULL;

	put_page(src_page);

	if (ret || !(arg->flags & KVM_TDX_MEASURE_MEMORY_REGION))
		return ret;

+2 −2
Original line number Diff line number Diff line
@@ -2566,7 +2566,7 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
 * @gfn: starting GFN to be populated
 * @src: userspace-provided buffer containing data to copy into GFN range
 *       (passed to @post_populate, and incremented on each iteration
 *       if not NULL)
 *       if not NULL). Must be page-aligned.
 * @npages: number of pages to copy from userspace-buffer
 * @post_populate: callback to issue for each gmem page that backs the GPA
 *                 range
@@ -2581,7 +2581,7 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord
 * Returns the number of pages that were populated.
 */
typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
				    void __user *src, int order, void *opaque);
				    struct page *page, void *opaque);

long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
		       kvm_gmem_populate_cb post_populate, void *opaque);
Loading