Merge branch 'kvm-mirror-page-tables' into HEAD (86eb1aef) · Commits · git / linux-nf

arch/x86/include/asm/kvm-x86-ops.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -93,6 +93,10 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
		KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
		KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
		KVM_X86_OP(load_mmu_pgd)
		KVM_X86_OP_OPTIONAL(link_external_spt)
		KVM_X86_OP_OPTIONAL(set_external_spte)
		KVM_X86_OP_OPTIONAL(free_external_spt)
		KVM_X86_OP_OPTIONAL(remove_external_spte)
		KVM_X86_OP(has_wbinvd_exit)
		KVM_X86_OP(get_l2_tsc_offset)
		KVM_X86_OP(get_l2_tsc_multiplier)

arch/x86/include/asm/kvm_host.h

+28 −3

Original line number	Diff line number	Diff line
		@@ -313,10 +313,11 @@ struct kvm_kernel_irq_routing_entry;
		* the number of unique SPs that can theoretically be created is 2^n, where n
		* is the number of bits that are used to compute the role.
		*
		* But, even though there are 19 bits in the mask below, not all combinations
		* But, even though there are 20 bits in the mask below, not all combinations
		* of modes and flags are possible:
		*
		* - invalid shadow pages are not accounted, so the bits are effectively 18
		* - invalid shadow pages are not accounted, mirror pages are not shadowed,
		* so the bits are effectively 18.
		*
		* - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
		* execonly and ad_disabled are only used for nested EPT which has
		@@ -349,7 +350,8 @@ union kvm_mmu_page_role {
		unsigned ad_disabled:1;
		unsigned guest_mode:1;
		unsigned passthrough:1;
		unsigned :5;
		unsigned is_mirror:1;
		unsigned :4;

		/*
		* This is left at the top of the word so that
		@@ -457,6 +459,7 @@ struct kvm_mmu {
		int (sync_spte)(struct kvm_vcpu vcpu,
		struct kvm_mmu_page *sp, int i);
		struct kvm_mmu_root_info root;
		hpa_t mirror_root_hpa;
		union kvm_cpu_role cpu_role;
		union kvm_mmu_page_role root_role;

		@@ -830,6 +833,11 @@ struct kvm_vcpu_arch {
		struct kvm_mmu_memory_cache mmu_shadow_page_cache;
		struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
		struct kvm_mmu_memory_cache mmu_page_header_cache;
		/*
		* This cache is to allocate external page table. E.g. private EPT used
		* by the TDX module.
		*/
		struct kvm_mmu_memory_cache mmu_external_spt_cache;

		/*
		* QEMU userspace and the guest each have their own FPU state.
		@@ -1549,6 +1557,8 @@ struct kvm_arch {
		*/
		#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
		struct kvm_mmu_memory_cache split_desc_cache;

		gfn_t gfn_direct_bits;
		};

		struct kvm_vm_stat {
		@@ -1761,6 +1771,21 @@ struct kvm_x86_ops {
		void (load_mmu_pgd)(struct kvm_vcpu vcpu, hpa_t root_hpa,
		int root_level);

		/* Update external mapping with page table link. */
		int (link_external_spt)(struct kvm kvm, gfn_t gfn, enum pg_level level,
		void *external_spt);
		/* Update the external page table from spte getting set. */
		int (set_external_spte)(struct kvm kvm, gfn_t gfn, enum pg_level level,
		kvm_pfn_t pfn_for_gfn);

		/* Update external page tables for page table about to be freed. */
		int (free_external_spt)(struct kvm kvm, gfn_t gfn, enum pg_level level,
		void *external_spt);

		/* Update external page table from spte getting removed, and flush TLB. */
		int (remove_external_spte)(struct kvm kvm, gfn_t gfn, enum pg_level level,
		kvm_pfn_t pfn_for_gfn);

		bool (*has_wbinvd_exit)(void);

		u64 (get_l2_tsc_offset)(struct kvm_vcpu vcpu);

arch/x86/include/uapi/asm/kvm.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -925,5 +925,6 @@ struct kvm_hyperv_eventfd {
		#define KVM_X86_SEV_VM 2
		#define KVM_X86_SEV_ES_VM 3
		#define KVM_X86_SNP_VM 4
		#define KVM_X86_TDX_VM 5

		#endif /* _ASM_X86_KVM_H */

arch/x86/kvm/mmu.h

+31 −0

Original line number	Diff line number	Diff line
		@@ -104,6 +104,15 @@ void kvm_mmu_track_write(struct kvm_vcpu vcpu, gpa_t gpa, const u8 new,

		static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
		{
		/*
		* Checking root.hpa is sufficient even when KVM has mirror root.
		* We can have either:
		* (1) mirror_root_hpa = INVALID_PAGE, root.hpa = INVALID_PAGE
		* (2) mirror_root_hpa = root, root.hpa = INVALID_PAGE
		* (3) mirror_root_hpa = root1, root.hpa = root2
		* We don't ever have:
		* mirror_root_hpa = INVALID_PAGE, root.hpa = root
		*/
		if (likely(vcpu->arch.mmu->root.hpa != INVALID_PAGE))
		return 0;

		@@ -287,4 +296,26 @@ static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
		return gpa;
		return translate_nested_gpa(vcpu, gpa, access, exception);
		}

		static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm)
		{
		return kvm->arch.vm_type == KVM_X86_TDX_VM;
		}

		static inline gfn_t kvm_gfn_direct_bits(const struct kvm *kvm)
		{
		return kvm->arch.gfn_direct_bits;
		}

		static inline bool kvm_is_addr_direct(struct kvm *kvm, gpa_t gpa)
		{
		gpa_t gpa_direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(kvm));

		return !gpa_direct_bits \|\| (gpa & gpa_direct_bits);
		}

		static inline bool kvm_is_gfn_alias(struct kvm *kvm, gfn_t gfn)
		{
		return gfn & kvm_gfn_direct_bits(kvm);
		}
		#endif

arch/x86/kvm/mmu/mmu.c

+51 −9

Original line number	Diff line number	Diff line
		@@ -599,6 +599,12 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
		1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
		if (r)
		return r;
		if (kvm_has_mirrored_tdp(vcpu->kvm)) {
		r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_external_spt_cache,
		PT64_ROOT_MAX_LEVEL);
		if (r)
		return r;
		}
		r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
		PT64_ROOT_MAX_LEVEL);
		if (r)
		@@ -618,6 +624,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
		kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
		kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
		kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
		kvm_mmu_free_memory_cache(&vcpu->arch.mmu_external_spt_cache);
		kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
		}

		@@ -3656,8 +3663,13 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
		unsigned i;
		int r;

		if (tdp_mmu_enabled)
		return kvm_tdp_mmu_alloc_root(vcpu);
		if (tdp_mmu_enabled) {
		if (kvm_has_mirrored_tdp(vcpu->kvm) &&
		!VALID_PAGE(mmu->mirror_root_hpa))
		kvm_tdp_mmu_alloc_root(vcpu, true);
		kvm_tdp_mmu_alloc_root(vcpu, false);
		return 0;
		}

		write_lock(&vcpu->kvm->mmu_lock);
		r = make_mmu_pages_available(vcpu);
		@@ -4379,8 +4391,12 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
		struct kvm_page_fault *fault, unsigned int access)
		{
		struct kvm_memory_slot *slot = fault->slot;
		struct kvm *kvm = vcpu->kvm;
		int ret;

		if (KVM_BUG_ON(kvm_is_gfn_alias(kvm, fault->gfn), kvm))
		return -EFAULT;

		/*
		* Note that the mmu_invalidate_seq also serves to detect a concurrent
		* change in attributes. is_page_fault_stale() will detect an
		@@ -4394,7 +4410,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
		* Now that we have a snapshot of mmu_invalidate_seq we can check for a
		* private vs. shared mismatch.
		*/
		if (fault->is_private != kvm_mem_is_private(vcpu->kvm, fault->gfn)) {
		if (fault->is_private != kvm_mem_is_private(kvm, fault->gfn)) {
		kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
		return -EFAULT;
		}
		@@ -4456,7 +4472,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
		* guaranteed to need to retry, i.e. waiting until mmu_lock is held
		* to detect retry guarantees the worst case latency for the vCPU.
		*/
		if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn))
		if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn))
		return RET_PF_RETRY;

		ret = __kvm_mmu_faultin_pfn(vcpu, fault);
		@@ -4476,7 +4492,7 @@ static int kvm_mmu_faultin_pfn(struct kvm_vcpu *vcpu,
		* overall cost of failing to detect the invalidation until after
		* mmu_lock is acquired.
		*/
		if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) {
		if (mmu_invalidate_retry_gfn_unsafe(kvm, fault->mmu_seq, fault->gfn)) {
		kvm_mmu_finish_page_fault(vcpu, fault, RET_PF_RETRY);
		return RET_PF_RETRY;
		}
		@@ -6095,8 +6111,16 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
		else if (r == RET_PF_SPURIOUS)
		vcpu->stat.pf_spurious++;

		/*
		* None of handle_mmio_page_fault(), kvm_mmu_do_page_fault(), or
		* kvm_mmu_write_protect_fault() return RET_PF_CONTINUE.
		* kvm_mmu_do_page_fault() only uses RET_PF_CONTINUE internally to
		* indicate continuing the page fault handling until to the final
		* page table mapping phase.
		*/
		WARN_ON_ONCE(r == RET_PF_CONTINUE);
		if (r != RET_PF_EMULATE)
		return 1;
		return r;

		emulate:
		return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
		@@ -6272,6 +6296,7 @@ static int __kvm_mmu_create(struct kvm_vcpu vcpu, struct kvm_mmu mmu)

		mmu->root.hpa = INVALID_PAGE;
		mmu->root.pgd = 0;
		mmu->mirror_root_hpa = INVALID_PAGE;
		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
		mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;

		@@ -6441,8 +6466,13 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
		* write and in the same critical section as making the reload request,
		* e.g. before kvm_zap_obsolete_pages() could drop mmu_lock and yield.
		*/
		if (tdp_mmu_enabled)
		kvm_tdp_mmu_invalidate_all_roots(kvm);
		if (tdp_mmu_enabled) {
		/*
		* External page tables don't support fast zapping, therefore
		* their mirrors must be invalidated separately by the caller.
		*/
		kvm_tdp_mmu_invalidate_roots(kvm, KVM_DIRECT_ROOTS);
		}

		/*
		* Notify all vcpus to reload its shadow page table and flush TLB.
		@@ -6467,7 +6497,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
		* lead to use-after-free.
		*/
		if (tdp_mmu_enabled)
		kvm_tdp_mmu_zap_invalidated_roots(kvm);
		kvm_tdp_mmu_zap_invalidated_roots(kvm, true);
		}

		void kvm_mmu_init_vm(struct kvm *kvm)
		@@ -7220,6 +7250,12 @@ int kvm_mmu_vendor_module_init(void)
		void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
		{
		kvm_mmu_unload(vcpu);
		if (tdp_mmu_enabled) {
		read_lock(&vcpu->kvm->mmu_lock);
		mmu_free_root_page(vcpu->kvm, &vcpu->arch.mmu->mirror_root_hpa,
		NULL);
		read_unlock(&vcpu->kvm->mmu_lock);
		}
		free_mmu_pages(&vcpu->arch.root_mmu);
		free_mmu_pages(&vcpu->arch.guest_mmu);
		mmu_free_memory_caches(vcpu);
		@@ -7452,6 +7488,12 @@ bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
		if (WARN_ON_ONCE(!kvm_arch_has_private_mem(kvm)))
		return false;

		/* Unmap the old attribute page. */
		if (range->arg.attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE)
		range->attr_filter = KVM_FILTER_SHARED;
		else
		range->attr_filter = KVM_FILTER_PRIVATE;

		return kvm_unmap_gfn_range(kvm, range);
		}