Commit beafd7ec authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-sev-6.17' of https://github.com/kvm-x86/linux into HEAD

KVM SEV cache maintenance changes for 6.17

 - Drop a superfluous WBINVD (on all CPUs!) when destroying a VM.

 - Use WBNOINVD instead of WBINVD when possible, for SEV cache maintenance,
   e.g. to minimize collateral damage when reclaiming memory from an SEV guest.

 - When reclaiming memory from an SEV guest, only do cache flushes on CPUs that
   have ever run a vCPU for the guest, i.e. don't flush the caches for CPUs
   that can't possibly have cache lines with dirty, encrypted data.
parents a10accae 6f38f8c5
Loading
Loading
Loading
Loading
+82 −28
Original line number Diff line number Diff line
@@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
	 */
	down_write(&sev_deactivate_lock);

	/* SNP firmware requires use of WBINVD for ASID recycling. */
	wbinvd_on_all_cpus();

	if (sev_snp_enabled)
@@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
	init_args.probe = false;
	ret = sev_platform_init(&init_args);
	if (ret)
		goto e_free;
		goto e_free_asid;

	if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto e_free_asid;
	}

	/* This needs to happen after SEV/SNP firmware initialization. */
	if (vm_type == KVM_X86_SNP_VM) {
@@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
	return 0;

e_free:
	free_cpumask_var(sev->have_run_cpus);
e_free_asid:
	argp->error = init_args.error;
	sev_asid_free(sev);
	sev->asid = 0;
@@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
	}
}

static void sev_writeback_caches(struct kvm *kvm)
{
	/*
	 * Note, the caller is responsible for ensuring correctness if the mask
	 * can be modified, e.g. if a CPU could be doing VMRUN.
	 */
	if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
		return;

	/*
	 * Ensure that all dirty guest tagged cache entries are written back
	 * before releasing the pages back to the system for use.  CLFLUSH will
	 * not do this without SME_COHERENT, and flushing many cache lines
	 * individually is slower than blasting WBINVD for large VMs, so issue
	 * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
	 * on CPUs that have done VMRUN, i.e. may have dirtied data using the
	 * VM's ASID.
	 *
	 * For simplicity, never remove CPUs from the bitmap.  Ideally, KVM
	 * would clear the mask when flushing caches, but doing so requires
	 * serializing multiple calls and having responding CPUs (to the IPI)
	 * mark themselves as still running if they are running (or about to
	 * run) a vCPU for the VM.
	 */
	wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
}

static unsigned long get_num_contig_pages(unsigned long idx,
				struct page **inpages, unsigned long npages)
{
@@ -2037,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
	if (ret)
		goto out_source_vcpu;

	/*
	 * Allocate a new have_run_cpus for the destination, i.e. don't copy
	 * the set of CPUs from the source.  If a CPU was used to run a vCPU in
	 * the source VM but is never used for the destination VM, then the CPU
	 * can only have cached memory that was accessible to the source VM.
	 */
	if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto out_source_vcpu;
	}

	sev_migrate_from(kvm, source_kvm);
	kvm_vm_dead(source_kvm);
	cg_cleanup_sev = src_sev;
@@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
		goto failed;
	}

	/*
	 * Ensure that all guest tagged cache entries are flushed before
	 * releasing the pages back to the system for use. CLFLUSH will
	 * not do this, so issue a WBINVD.
	 */
	wbinvd_on_all_cpus();
	sev_writeback_caches(kvm);

	__unregister_enc_region_locked(kvm, region);

@@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
		goto e_unlock;
	}

	mirror_sev = to_kvm_sev_info(kvm);
	if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto e_unlock;
	}

	/*
	 * The mirror kvm holds an enc_context_owner ref so its asid can't
	 * disappear until we're done with it
	 */
	source_sev = to_kvm_sev_info(source_kvm);
	kvm_get_kvm(source_kvm);
	mirror_sev = to_kvm_sev_info(kvm);
	list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);

	/* Set enc_context_owner and copy its encryption context over */
@@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm)

	WARN_ON(!list_empty(&sev->mirror_vms));

	/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
	free_cpumask_var(sev->have_run_cpus);

	/*
	 * If this is a mirror VM, remove it from the owner's list of a mirrors
	 * and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
	 * Note, mirror VMs don't support registering encrypted regions.
	 */
	if (is_mirroring_enc_context(kvm)) {
		struct kvm *owner_kvm = sev->enc_context_owner;

@@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm)
		return;
	}

	/*
	 * Ensure that all guest tagged cache entries are flushed before
	 * releasing the pages back to the system for use. CLFLUSH will
	 * not do this, so issue a WBINVD.
	 */
	wbinvd_on_all_cpus();

	/*
	 * if userspace was terminated before unregistering the memory regions
@@ -3095,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)

	/*
	 * VM Page Flush takes a host virtual address and a guest ASID.  Fall
	 * back to WBINVD if this faults so as not to make any problems worse
	 * by leaving stale encrypted data in the cache.
	 * back to full writeback of caches if this faults so as not to make
	 * any problems worse by leaving stale encrypted data in the cache.
	 */
	if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid)))
		goto do_wbinvd;
		goto do_sev_writeback_caches;

	return;

do_wbinvd:
	wbinvd_on_all_cpus();
do_sev_writeback_caches:
	sev_writeback_caches(vcpu->kvm);
}

void sev_guest_memory_reclaimed(struct kvm *kvm)
{
	/*
	 * With SNP+gmem, private/encrypted memory is unreachable via the
	 * hva-based mmu notifiers, so these events are only actually
	 * pertaining to shared pages where there is no need to perform
	 * the WBINVD to flush associated caches.
	 * hva-based mmu notifiers, i.e. these events are explicitly scoped to
	 * shared pages, where there's no need to flush caches.
	 */
	if (!sev_guest(kvm) || sev_snp_guest(kvm))
		return;

	wbinvd_on_all_cpus();
	sev_writeback_caches(kvm);
}

void sev_free_vcpu(struct kvm_vcpu *vcpu)
@@ -3450,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
	if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
		return -EINVAL;

	/*
	 * To optimize cache flushes when memory is reclaimed from an SEV VM,
	 * track physical CPUs that enter the guest for SEV VMs and thus can
	 * have encrypted, dirty data in the cache, and flush caches only for
	 * CPUs that have entered the guest.
	 */
	if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
		cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);

	/* Assign the asid allocated with this SEV guest */
	svm->asid = asid;

@@ -3882,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
	 * From this point forward, the VMSA will always be a guest-mapped page
	 * rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
	 * theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
	 * that involves cleanups like wbinvd_on_all_cpus() which would ideally
	 * be handled during teardown rather than guest boot.  Deferring that
	 * also allows the existing logic for SEV-ES VMSAs to be re-used with
	 * that involves cleanups like flushing caches, which would ideally be
	 * handled during teardown rather than guest boot.  Deferring that also
	 * allows the existing logic for SEV-ES VMSAs to be re-used with
	 * minimal SNP-specific changes.
	 */
	svm->sev_es.snp_has_guest_vmsa = true;
@@ -4875,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)

		/*
		 * SEV-ES avoids host/guest cache coherency issues through
		 * WBINVD hooks issued via MMU notifiers during run-time, and
		 * WBNOINVD hooks issued via MMU notifiers during run-time, and
		 * KVM's VM destroy path at shutdown. Those MMU notifier events
		 * don't cover gmem since there is no requirement to map pages
		 * to a HVA in order to use them for a running guest. While the
+1 −0
Original line number Diff line number Diff line
@@ -110,6 +110,7 @@ struct kvm_sev_info {
	void *guest_req_buf;    /* Bounce buffer for SNP Guest Request input */
	void *guest_resp_buf;   /* Bounce buffer for SNP Guest Request output */
	struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
	cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
};

#define SEV_POLICY_NODBG	BIT_ULL(0)
+1 −7
Original line number Diff line number Diff line
@@ -4994,11 +4994,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
	return r;
}

static void wbinvd_ipi(void *garbage)
{
	wbinvd();
}

static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
@@ -5022,8 +5017,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		if (kvm_x86_call(has_wbinvd_exit)())
			cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
		else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
			smp_call_function_single(vcpu->cpu,
					wbinvd_ipi, NULL, 1);
			wbinvd_on_cpu(vcpu->cpu);
	}

	kvm_x86_call(vcpu_load)(vcpu, cpu);