Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm (137e0ec0) · Commits · git / linux-nf

Documentation/virt/kvm/api.rst

+5 −0

Original line number	Diff line number	Diff line
		@@ -8791,6 +8791,11 @@ means the VM type with value @n is supported. Possible values of @n are::
		#define KVM_X86_DEFAULT_VM 0
		#define KVM_X86_SW_PROTECTED_VM 1

		Note, KVM_X86_SW_PROTECTED_VM is currently only for development and testing.
		Do not use KVM_X86_SW_PROTECTED_VM for "real" VMs, and especially not in
		production. The behavior and effective ABI for software-protected VMs is
		unstable.

		9. Known KVM API problems
		=========================

arch/x86/kvm/Kconfig

+4 −3

Original line number	Diff line number	Diff line
		@@ -80,9 +80,10 @@ config KVM_SW_PROTECTED_VM
		depends on KVM && X86_64
		select KVM_GENERIC_PRIVATE_MEM
		help
		Enable support for KVM software-protected VMs. Currently "protected"
		means the VM can be backed with memory provided by
		KVM_CREATE_GUEST_MEMFD.
		Enable support for KVM software-protected VMs. Currently, software-
		protected VMs are purely a development and testing vehicle for
		KVM_CREATE_GUEST_MEMFD. Attempting to run a "real" VM workload as a
		software-protected VM will fail miserably.

		If unsure, say "N".

arch/x86/kvm/mmu/mmu.c

+42 −0

Original line number	Diff line number	Diff line
		@@ -4405,6 +4405,31 @@ static int kvm_faultin_pfn(struct kvm_vcpu vcpu, struct kvm_page_fault fault,
		fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
		smp_rmb();

		/*
		* Check for a relevant mmu_notifier invalidation event before getting
		* the pfn from the primary MMU, and before acquiring mmu_lock.
		*
		* For mmu_lock, if there is an in-progress invalidation and the kernel
		* allows preemption, the invalidation task may drop mmu_lock and yield
		* in response to mmu_lock being contended, which is very counter-
		* productive as this vCPU can't actually make forward progress until
		* the invalidation completes.
		*
		* Retrying now can also avoid unnessary lock contention in the primary
		* MMU, as the primary MMU doesn't necessarily hold a single lock for
		* the duration of the invalidation, i.e. faulting in a conflicting pfn
		* can cause the invalidation to take longer by holding locks that are
		* needed to complete the invalidation.
		*
		* Do the pre-check even for non-preemtible kernels, i.e. even if KVM
		* will never yield mmu_lock in response to contention, as this vCPU is
		* guaranteed to need to retry, i.e. waiting until mmu_lock is held
		* to detect retry guarantees the worst case latency for the vCPU.
		*/
		if (fault->slot &&
		mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
		return RET_PF_RETRY;

		ret = __kvm_faultin_pfn(vcpu, fault);
		if (ret != RET_PF_CONTINUE)
		return ret;
		@@ -4415,6 +4440,18 @@ static int kvm_faultin_pfn(struct kvm_vcpu vcpu, struct kvm_page_fault fault,
		if (unlikely(!fault->slot))
		return kvm_handle_noslot_fault(vcpu, fault, access);

		/*
		* Check again for a relevant mmu_notifier invalidation event purely to
		* avoid contending mmu_lock. Most invalidations will be detected by
		* the previous check, but checking is extremely cheap relative to the
		* overall cost of failing to detect the invalidation until after
		* mmu_lock is acquired.
		*/
		if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
		kvm_release_pfn_clean(fault->pfn);
		return RET_PF_RETRY;
		}

		return RET_PF_CONTINUE;
		}

		@@ -4442,6 +4479,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
		if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
		return true;

		/*
		* Check for a relevant mmu_notifier invalidation event one last time
		* now that mmu_lock is held, as the "unsafe" checks performed without
		* holding mmu_lock can get false negatives.
		*/
		return fault->slot &&
		mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn);
		}

arch/x86/kvm/svm/sev.c

+14 −9

Original line number	Diff line number	Diff line
		@@ -57,7 +57,7 @@ static bool sev_es_enabled = true;
		module_param_named(sev_es, sev_es_enabled, bool, 0444);

		/* enable/disable SEV-ES DebugSwap support */
		static bool sev_es_debug_swap_enabled = true;
		static bool sev_es_debug_swap_enabled = false;
		module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
		#else
		#define sev_enabled false
		@@ -612,8 +612,11 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
		save->xss = svm->vcpu.arch.ia32_xss;
		save->dr6 = svm->vcpu.arch.dr6;

		if (sev_es_debug_swap_enabled)
		if (sev_es_debug_swap_enabled) {
		save->sev_features \|= SVM_SEV_FEAT_DEBUG_SWAP;
		pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. "
		"This will not work starting with Linux 6.10\n");
		}

		pr_debug("Virtual Machine Save Area (VMSA):\n");
		print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
		@@ -1975,20 +1978,22 @@ int sev_mem_enc_register_region(struct kvm *kvm,
		goto e_free;
		}

		region->uaddr = range->addr;
		region->size = range->size;

		list_add_tail(&region->list, &sev->regions_list);
		mutex_unlock(&kvm->lock);

		/*
		* The guest may change the memory encryption attribute from C=0 -> C=1
		* or vice versa for this memory range. Lets make sure caches are
		* flushed to ensure that guest data gets written into memory with
		* correct C-bit.
		* correct C-bit. Note, this must be done before dropping kvm->lock,
		* as region and its array of pages can be freed by a different task
		* once kvm->lock is released.
		*/
		sev_clflush_pages(region->pages, region->npages);

		region->uaddr = range->addr;
		region->size = range->size;

		list_add_tail(&region->list, &sev->regions_list);
		mutex_unlock(&kvm->lock);

		return ret;

		e_free:

arch/x86/kvm/x86.c

+11 −1

Original line number	Diff line number	Diff line
		@@ -4580,7 +4580,7 @@ static bool kvm_is_vm_type_supported(unsigned long type)
		{
		return type == KVM_X86_DEFAULT_VM \|\|
		(type == KVM_X86_SW_PROTECTED_VM &&
		IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
		IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
		}

		int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
		@@ -8007,6 +8007,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,

		if (r < 0)
		return X86EMUL_UNHANDLEABLE;

		/*
		* Mark the page dirty _before_ checking whether or not the CMPXCHG was
		* successful, as the old value is written back on failure. Note, for
		* live migration, this is unnecessarily conservative as CMPXCHG writes
		* back the original value and the access is atomic, but KVM's ABI is
		* that all writes are dirty logged, regardless of the value written.
		*/
		kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa));

		if (r)
		return X86EMUL_CMPXCHG_FAILED;