Merge tag 'kvm-x86-sev-6.17' of https://github.com/kvm-x86/linux into HEAD (beafd7ec) · Commits · git / linux-net

arch/x86/kvm/svm/sev.c

+82 −28

Original line number	Diff line number	Diff line
		@@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
		*/
		down_write(&sev_deactivate_lock);

		/* SNP firmware requires use of WBINVD for ASID recycling. */
		wbinvd_on_all_cpus();

		if (sev_snp_enabled)
		@@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm kvm, struct kvm_sev_cmd argp,
		init_args.probe = false;
		ret = sev_platform_init(&init_args);
		if (ret)
		goto e_free;
		goto e_free_asid;

		if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto e_free_asid;
		}

		/* This needs to happen after SEV/SNP firmware initialization. */
		if (vm_type == KVM_X86_SNP_VM) {
		@@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm kvm, struct kvm_sev_cmd argp,
		return 0;

		e_free:
		free_cpumask_var(sev->have_run_cpus);
		e_free_asid:
		argp->error = init_args.error;
		sev_asid_free(sev);
		sev->asid = 0;
		@@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
		}
		}

		static void sev_writeback_caches(struct kvm *kvm)
		{
		/*
		* Note, the caller is responsible for ensuring correctness if the mask
		* can be modified, e.g. if a CPU could be doing VMRUN.
		*/
		if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
		return;

		/*
		* Ensure that all dirty guest tagged cache entries are written back
		* before releasing the pages back to the system for use. CLFLUSH will
		* not do this without SME_COHERENT, and flushing many cache lines
		* individually is slower than blasting WBINVD for large VMs, so issue
		* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
		* on CPUs that have done VMRUN, i.e. may have dirtied data using the
		* VM's ASID.
		*
		* For simplicity, never remove CPUs from the bitmap. Ideally, KVM
		* would clear the mask when flushing caches, but doing so requires
		* serializing multiple calls and having responding CPUs (to the IPI)
		* mark themselves as still running if they are running (or about to
		* run) a vCPU for the VM.
		*/
		wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
		}

		static unsigned long get_num_contig_pages(unsigned long idx,
		struct page **inpages, unsigned long npages)
		{
		@@ -2037,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
		if (ret)
		goto out_source_vcpu;

		/*
		* Allocate a new have_run_cpus for the destination, i.e. don't copy
		* the set of CPUs from the source. If a CPU was used to run a vCPU in
		* the source VM but is never used for the destination VM, then the CPU
		* can only have cached memory that was accessible to the source VM.
		*/
		if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto out_source_vcpu;
		}

		sev_migrate_from(kvm, source_kvm);
		kvm_vm_dead(source_kvm);
		cg_cleanup_sev = src_sev;
		@@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
		goto failed;
		}

		/*
		* Ensure that all guest tagged cache entries are flushed before
		* releasing the pages back to the system for use. CLFLUSH will
		* not do this, so issue a WBINVD.
		*/
		wbinvd_on_all_cpus();
		sev_writeback_caches(kvm);

		__unregister_enc_region_locked(kvm, region);

		@@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
		goto e_unlock;
		}

		mirror_sev = to_kvm_sev_info(kvm);
		if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
		ret = -ENOMEM;
		goto e_unlock;
		}

		/*
		* The mirror kvm holds an enc_context_owner ref so its asid can't
		* disappear until we're done with it
		*/
		source_sev = to_kvm_sev_info(source_kvm);
		kvm_get_kvm(source_kvm);
		mirror_sev = to_kvm_sev_info(kvm);
		list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);

		/* Set enc_context_owner and copy its encryption context over */
		@@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm)

		WARN_ON(!list_empty(&sev->mirror_vms));

		/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
		free_cpumask_var(sev->have_run_cpus);

		/*
		* If this is a mirror VM, remove it from the owner's list of a mirrors
		* and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
		* Note, mirror VMs don't support registering encrypted regions.
		*/
		if (is_mirroring_enc_context(kvm)) {
		struct kvm *owner_kvm = sev->enc_context_owner;

		@@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm)
		return;
		}

		/*
		* Ensure that all guest tagged cache entries are flushed before
		* releasing the pages back to the system for use. CLFLUSH will
		* not do this, so issue a WBINVD.
		*/
		wbinvd_on_all_cpus();

		/*
		* if userspace was terminated before unregistering the memory regions
		@@ -3095,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu vcpu, void va)

		/*
		* VM Page Flush takes a host virtual address and a guest ASID. Fall
		* back to WBINVD if this faults so as not to make any problems worse
		* by leaving stale encrypted data in the cache.
		* back to full writeback of caches if this faults so as not to make
		* any problems worse by leaving stale encrypted data in the cache.
		*/
		if (WARN_ON_ONCE(wrmsrq_safe(MSR_AMD64_VM_PAGE_FLUSH, addr \| asid)))
		goto do_wbinvd;
		goto do_sev_writeback_caches;

		return;

		do_wbinvd:
		wbinvd_on_all_cpus();
		do_sev_writeback_caches:
		sev_writeback_caches(vcpu->kvm);
		}

		void sev_guest_memory_reclaimed(struct kvm *kvm)
		{
		/*
		* With SNP+gmem, private/encrypted memory is unreachable via the
		* hva-based mmu notifiers, so these events are only actually
		* pertaining to shared pages where there is no need to perform
		* the WBINVD to flush associated caches.
		* hva-based mmu notifiers, i.e. these events are explicitly scoped to
		* shared pages, where there's no need to flush caches.
		*/
		if (!sev_guest(kvm) \|\| sev_snp_guest(kvm))
		return;

		wbinvd_on_all_cpus();
		sev_writeback_caches(kvm);
		}

		void sev_free_vcpu(struct kvm_vcpu *vcpu)
		@@ -3450,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
		if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
		return -EINVAL;

		/*
		* To optimize cache flushes when memory is reclaimed from an SEV VM,
		* track physical CPUs that enter the guest for SEV VMs and thus can
		* have encrypted, dirty data in the cache, and flush caches only for
		* CPUs that have entered the guest.
		*/
		if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
		cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);

		/* Assign the asid allocated with this SEV guest */
		svm->asid = asid;

		@@ -3882,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
		* From this point forward, the VMSA will always be a guest-mapped page
		* rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
		* theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
		* that involves cleanups like wbinvd_on_all_cpus() which would ideally
		* be handled during teardown rather than guest boot. Deferring that
		* also allows the existing logic for SEV-ES VMSAs to be re-used with
		* that involves cleanups like flushing caches, which would ideally be
		* handled during teardown rather than guest boot. Deferring that also
		* allows the existing logic for SEV-ES VMSAs to be re-used with
		* minimal SNP-specific changes.
		*/
		svm->sev_es.snp_has_guest_vmsa = true;
		@@ -4875,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)

		/*
		* SEV-ES avoids host/guest cache coherency issues through
		* WBINVD hooks issued via MMU notifiers during run-time, and
		* WBNOINVD hooks issued via MMU notifiers during run-time, and
		* KVM's VM destroy path at shutdown. Those MMU notifier events
		* don't cover gmem since there is no requirement to map pages
		* to a HVA in order to use them for a running guest. While the

arch/x86/kvm/svm/svm.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -110,6 +110,7 @@ struct kvm_sev_info {
		void guest_req_buf; / Bounce buffer for SNP Guest Request input */
		void guest_resp_buf; / Bounce buffer for SNP Guest Request output */
		struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
		cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
		};

		#define SEV_POLICY_NODBG BIT_ULL(0)

arch/x86/kvm/x86.c

+1 −7

Original line number	Diff line number	Diff line
		@@ -4994,11 +4994,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
		return r;
		}

		static void wbinvd_ipi(void *garbage)
		{
		wbinvd();
		}

		static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
		{
		return kvm_arch_has_noncoherent_dma(vcpu->kvm);
		@@ -5022,8 +5017,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
		if (kvm_x86_call(has_wbinvd_exit)())
		cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
		else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
		smp_call_function_single(vcpu->cpu,
		wbinvd_ipi, NULL, 1);
		wbinvd_on_cpu(vcpu->cpu);
		}

		kvm_x86_call(vcpu_load)(vcpu, cpu);