Commit 5c5ddf71 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-mtrrs-6.11' of https://github.com/kvm-x86/linux into HEAD

KVM x86 MTRR virtualization removal

Remove support for virtualizing MTRRs on Intel CPUs, along with a nasty CR0.CD
hack, and instead always honor guest PAT on CPUs that support self-snoop.
parents 34b69ede 377b2f35
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -8025,7 +8025,11 @@ The valid bits in cap.args[0] are:
                                    When this quirk is disabled, the reset value
                                    is 0x10000 (APIC_LVT_MASKED).

 KVM_X86_QUIRK_CD_NW_CLEARED        By default, KVM clears CR0.CD and CR0.NW.
 KVM_X86_QUIRK_CD_NW_CLEARED        By default, KVM clears CR0.CD and CR0.NW on
                                    AMD CPUs to workaround buggy guest firmware
                                    that runs in perpetuity with CR0.CD, i.e.
                                    with caches in "no fill" mode.

                                    When this quirk is disabled, KVM does not
                                    change the value of CR0.CD and CR0.NW.

+18 −0
Original line number Diff line number Diff line
@@ -48,3 +48,21 @@ have the same physical APIC ID, KVM will deliver events targeting that APIC ID
only to the vCPU with the lowest vCPU ID.  If KVM_X2APIC_API_USE_32BIT_IDS is
not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
matching the target APIC ID receive the interrupt).

MTRRs
-----
KVM does not virtualize guest MTRR memory types.  KVM emulates accesses to MTRR
MSRs, i.e. {RD,WR}MSR in the guest will behave as expected, but KVM does not
honor guest MTRRs when determining the effective memory type, and instead
treats all of guest memory as having Writeback (WB) MTRRs.

CR0.CD
------
KVM does not virtualize CR0.CD on Intel CPUs.  Similar to MTRR MSRs, KVM
emulates CR0.CD accesses so that loads and stores from/to CR0 behave as
expected, but setting CR0.CD=1 has no impact on the cachaeability of guest
memory.

Note, this erratum does not affect AMD CPUs, which fully virtualize CR0.CD in
hardware, i.e. put the CPU caches into "no fill" mode when CR0.CD=1, even when
running in the guest.
 No newline at end of file
+4 −11
Original line number Diff line number Diff line
@@ -160,7 +160,6 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 256
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8

#define ASYNC_PF_PER_VCPU 64
@@ -605,18 +604,12 @@ enum {
	KVM_DEBUGREG_WONT_EXIT = 2,
};

struct kvm_mtrr_range {
	u64 base;
	u64 mask;
	struct list_head node;
};

struct kvm_mtrr {
	struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
	mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
	u64 var[KVM_NR_VAR_MTRR * 2];
	u64 fixed_64k;
	u64 fixed_16k[2];
	u64 fixed_4k[8];
	u64 deftype;

	struct list_head head;
};

/* Hyper-V SynIC timer */
+1 −6
Original line number Diff line number Diff line
@@ -221,12 +221,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
	return -(u32)fault & errcode;
}

bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma);

static inline bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
{
	return __kvm_mmu_honors_guest_mtrrs(kvm_arch_has_noncoherent_dma(kvm));
}
bool kvm_mmu_may_ignore_guest_pat(void);

int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);

+10 −25
Original line number Diff line number Diff line
@@ -4671,38 +4671,23 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
}
#endif

bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma)
bool kvm_mmu_may_ignore_guest_pat(void)
{
	/*
	 * If host MTRRs are ignored (shadow_memtype_mask is non-zero), and the
	 * VM has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is
	 * to honor the memtype from the guest's MTRRs so that guest accesses
	 * to memory that is DMA'd aren't cached against the guest's wishes.
	 *
	 * Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
	 * e.g. KVM will force UC memtype for host MMIO.
	 * When EPT is enabled (shadow_memtype_mask is non-zero), the CPU does
	 * not support self-snoop (or is affected by an erratum), and the VM
	 * has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to
	 * honor the memtype from the guest's PAT so that guest accesses to
	 * memory that is DMA'd aren't cached against the guest's wishes.  As a
	 * result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA,
	 * KVM _always_ ignores or honors guest PAT, i.e. doesn't toggle SPTE
	 * bits in response to non-coherent device (un)registration.
	 */
	return vm_has_noncoherent_dma && shadow_memtype_mask;
	return !static_cpu_has(X86_FEATURE_SELFSNOOP) && shadow_memtype_mask;
}

int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
	/*
	 * If the guest's MTRRs may be used to compute the "real" memtype,
	 * restrict the mapping level to ensure KVM uses a consistent memtype
	 * across the entire mapping.
	 */
	if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
		for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
			int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
			gfn_t base = gfn_round_for_level(fault->gfn,
							 fault->max_level);

			if (kvm_mtrr_check_gfn_range_consistency(vcpu, base, page_num))
				break;
		}
	}

#ifdef CONFIG_X86_64
	if (tdp_mmu_enabled)
		return kvm_tdp_mmu_page_fault(vcpu, fault);
Loading