Commit 1c3bed80 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-fixes-6.9-rcN' of https://github.com/kvm-x86/linux into HEAD

- Fix a mostly benign bug in the gfn_to_pfn_cache infrastructure where KVM
  would allow userspace to refresh the cache with a bogus GPA.  The bug has
  existed for quite some time, but was exposed by a new sanity check added in
  6.9 (to ensure a cache is either GPA-based or HVA-based).

- Drop an unused param from gfn_to_pfn_cache_invalidate_start() that got left
  behind during a 6.9 cleanup.

- Disable support for virtualizing adaptive PEBS, as KVM's implementation is
  architecturally broken and can leak host LBRs to the guest.

- Fix a bug where KVM neglects to set the enable bits for general purpose
  counters in PERF_GLOBAL_CTRL when initializing the virtual PMU.  Both Intel
  and AMD architectures require the bits to be set at RESET in order for v2
  PMUs to be backwards compatible with software that was written for v1 PMUs,
  i.e. for software that will never manually set the global enables.

- Disable LBR virtualization on CPUs that don't support LBR callstacks, as
  KVM unconditionally uses PERF_SAMPLE_BRANCH_CALL_STACK when creating the
  virtual LBR perf event, i.e. KVM will always fail to create LBR events on
  such CPUs.

- Fix a math goof in x86's hugepage logic for KVM_SET_MEMORY_ATTRIBUTES that
  results in an array overflow (detected by KASAN).

- Fix a flaw in the max_guest_memory selftest that results in it exhausting
  the supply of ucall structures when run with more than 256 vCPUs.

- Mark KVM_MEM_READONLY as supported for RISC-V in set_memory_region_test.

- Fix a bug where KVM incorrectly thinks a TDP MMU root is an indirect shadow
  root due KVM unnecessarily clobbering root_role.direct when userspace sets
  guest CPUID.

- Fix a dirty logging bug in the where KVM fails to write-protect TDP MMU
  SPTEs used for L2 if Page-Modification Logging is enabled for L1 and the L1
  hypervisor is NOT using EPT (if nEPT is enabled, KVM doesn't use the TDP MMU
  to run L2).  For simplicity, KVM always disables PML when running L2, but
  the TDP MMU wasn't accounting for root-specific conditions that force write-
  protect based dirty logging.
parents 49ff3b4a eefb85b3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
	lbr->from = x86_pmu.lbr_from;
	lbr->to = x86_pmu.lbr_to;
	lbr->info = x86_pmu.lbr_info;
	lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);

+1 −0
Original line number Diff line number Diff line
@@ -555,6 +555,7 @@ struct x86_pmu_lbr {
	unsigned int	from;
	unsigned int	to;
	unsigned int	info;
	bool		has_callstack;
};

extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
+5 −4
Original line number Diff line number Diff line
@@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
	 * that problem is swept under the rug; KVM's CPUID API is horrific and
	 * it's all but impossible to solve it without introducing a new API.
	 */
	vcpu->arch.root_mmu.root_role.word = 0;
	vcpu->arch.guest_mmu.root_role.word = 0;
	vcpu->arch.nested_mmu.root_role.word = 0;
	vcpu->arch.root_mmu.root_role.invalid = 1;
	vcpu->arch.guest_mmu.root_role.invalid = 1;
	vcpu->arch.nested_mmu.root_role.invalid = 1;
	vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
	vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
	vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
@@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
			 * by the memslot, KVM can't use a hugepage due to the
			 * misaligned address regardless of memory attributes.
			 */
			if (gfn >= slot->base_gfn) {
			if (gfn >= slot->base_gfn &&
			    gfn + nr_pages <= slot->base_gfn + slot->npages) {
				if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
					hugepage_clear_mixed(slot, gfn, level);
				else
+22 −29
Original line number Diff line number Diff line
@@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
	}
}

static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
{
	/*
 * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
 * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
 * If AD bits are not enabled, this will require clearing the writable bit on
 * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
 * be flushed.
	 * All TDP MMU shadow pages share the same role as their root, aside
	 * from level, so it is valid to key off any shadow page to determine if
	 * write protection is needed for an entire tree.
	 */
	return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
}

static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
			   gfn_t start, gfn_t end)
{
	u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
	const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
							    shadow_dirty_mask;
	struct tdp_iter iter;
	bool spte_set = false;

@@ -1573,7 +1577,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
		if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
			continue;

		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
				spte_ad_need_write_protect(iter.old_spte));

		if (!(iter.old_spte & dbit))
@@ -1590,11 +1594,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
}

/*
 * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
 * AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
 * If AD bits are not enabled, this will require clearing the writable bit on
 * each SPTE. Returns true if an SPTE has been changed and the TLBs need to
 * be flushed.
 * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
 * memslot. Returns true if an SPTE has been changed and the TLBs need to be
 * flushed.
 */
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
				  const struct kvm_memory_slot *slot)
@@ -1610,17 +1612,10 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
	return spte_set;
}

/*
 * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
 * set in mask, starting at gfn. The given memslot is expected to contain all
 * the GFNs represented by set bits in the mask. If AD bits are enabled,
 * clearing the dirty status will involve clearing the dirty bit on each SPTE
 * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
 */
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
				  gfn_t gfn, unsigned long mask, bool wrprot)
{
	u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
	const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
									shadow_dirty_mask;
	struct tdp_iter iter;

@@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
		if (!mask)
			break;

		KVM_MMU_WARN_ON(kvm_ad_enabled() &&
		KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
				spte_ad_need_write_protect(iter.old_spte));

		if (iter.level > PG_LEVEL_4K ||
@@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
}

/*
 * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
 * set in mask, starting at gfn. The given memslot is expected to contain all
 * the GFNs represented by set bits in the mask. If AD bits are enabled,
 * clearing the dirty status will involve clearing the dirty bit on each SPTE
 * or, if AD bits are not enabled, clearing the writable bit on each SPTE.
 * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
 * which a bit is set in mask, starting at gfn. The given memslot is expected to
 * contain all the GFNs represented by set bits in the mask.
 */
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
				       struct kvm_memory_slot *slot,
+14 −2
Original line number Diff line number Diff line
@@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
	pmu->pebs_data_cfg_mask = ~0ull;
	bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);

	if (vcpu->kvm->arch.enable_pmu)
	if (!vcpu->kvm->arch.enable_pmu)
		return;

	static_call(kvm_x86_pmu_refresh)(vcpu);

	/*
	 * At RESET, both Intel and AMD CPUs set all enable bits for general
	 * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
	 * was written for v1 PMUs don't unknowingly leave GP counters disabled
	 * in the global controls).  Emulate that behavior when refreshing the
	 * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
	 */
	if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
		pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
}

void kvm_pmu_init(struct kvm_vcpu *vcpu)
Loading