Commit 34b69ede authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-mmu-6.11' of https://github.com/kvm-x86/linux into HEAD

KVM x86 MMU changes for 6.11

 - Don't allocate kvm_mmu_page.shadowed_translation for shadow pages that can't
   hold leafs SPTEs.

 - Unconditionally drop mmu_lock when allocating TDP MMU page tables for eager
   page splitting to avoid stalling vCPUs when splitting huge pages.

 - Misc cleanups
parents 5dcc1e76 0089c055
Loading
Loading
Loading
Loading
+9 −8
Original line number Diff line number Diff line
@@ -722,7 +722,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
	if (sp->role.passthrough)
		return sp->gfn;

	if (!sp->role.direct)
	if (sp->shadowed_translation)
		return sp->shadowed_translation[index] >> PAGE_SHIFT;

	return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
@@ -736,7 +736,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
 */
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
{
	if (sp_has_gptes(sp))
	if (sp->shadowed_translation)
		return sp->shadowed_translation[index] & ACC_ALL;

	/*
@@ -757,7 +757,7 @@ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
					 gfn_t gfn, unsigned int access)
{
	if (sp_has_gptes(sp)) {
	if (sp->shadowed_translation) {
		sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
		return;
	}
@@ -1700,7 +1700,6 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
	hlist_del(&sp->hash_link);
	list_del(&sp->link);
	free_page((unsigned long)sp->spt);
	if (!sp->role.direct)
	free_page((unsigned long)sp->shadowed_translation);
	kmem_cache_free(mmu_page_header_cache, sp);
}
@@ -2203,7 +2202,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,

	sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
	sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
	if (!role.direct)
	if (!role.direct && role.level <= KVM_MAX_HUGEPAGE_LEVEL)
		sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);

	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
@@ -4609,7 +4608,10 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
	if (WARN_ON_ONCE(error_code >> 32))
		error_code = lower_32_bits(error_code);

	/* Ensure the above sanity check also covers KVM-defined flags. */
	/*
	 * Restrict KVM-defined flags to bits 63:32 so that it's impossible for
	 * them to conflict with #PF error codes, which are limited to 32 bits.
	 */
	BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));

	vcpu->arch.l1tf_flush_l1d = true;
@@ -7049,7 +7051,6 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,

	list_for_each_entry(kvm, &vm_list, vm_list) {
		int idx;
		LIST_HEAD(invalid_list);

		/*
		 * Never scan more than sc->nr_to_scan VM instances.
+2 −1
Original line number Diff line number Diff line
@@ -911,7 +911,8 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
	gpa_t pte_gpa;
	gfn_t gfn;

	if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE))
	if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE ||
			 !sp->shadowed_translation))
		return 0;

	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
+24 −54
Original line number Diff line number Diff line
@@ -1340,17 +1340,15 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
	return spte_set;
}

static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(void)
{
	struct kvm_mmu_page *sp;

	gfp |= __GFP_ZERO;

	sp = kmem_cache_alloc(mmu_page_header_cache, gfp);
	sp = kmem_cache_zalloc(mmu_page_header_cache, GFP_KERNEL_ACCOUNT);
	if (!sp)
		return NULL;

	sp->spt = (void *)__get_free_page(gfp);
	sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
	if (!sp->spt) {
		kmem_cache_free(mmu_page_header_cache, sp);
		return NULL;
@@ -1359,47 +1357,6 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
	return sp;
}

static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
						       struct tdp_iter *iter,
						       bool shared)
{
	struct kvm_mmu_page *sp;

	kvm_lockdep_assert_mmu_lock_held(kvm, shared);

	/*
	 * Since we are allocating while under the MMU lock we have to be
	 * careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
	 * reclaim and to avoid making any filesystem callbacks (which can end
	 * up invoking KVM MMU notifiers, resulting in a deadlock).
	 *
	 * If this allocation fails we drop the lock and retry with reclaim
	 * allowed.
	 */
	sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
	if (sp)
		return sp;

	rcu_read_unlock();

	if (shared)
		read_unlock(&kvm->mmu_lock);
	else
		write_unlock(&kvm->mmu_lock);

	iter->yielded = true;
	sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);

	if (shared)
		read_lock(&kvm->mmu_lock);
	else
		write_lock(&kvm->mmu_lock);

	rcu_read_lock();

	return sp;
}

/* Note, the caller is responsible for initializing @sp. */
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
				   struct kvm_mmu_page *sp, bool shared)
@@ -1446,7 +1403,6 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
{
	struct kvm_mmu_page *sp = NULL;
	struct tdp_iter iter;
	int ret = 0;

	rcu_read_lock();

@@ -1470,16 +1426,30 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
			continue;

		if (!sp) {
			sp = tdp_mmu_alloc_sp_for_split(kvm, &iter, shared);
			rcu_read_unlock();

			if (shared)
				read_unlock(&kvm->mmu_lock);
			else
				write_unlock(&kvm->mmu_lock);

			sp = tdp_mmu_alloc_sp_for_split();

			if (shared)
				read_lock(&kvm->mmu_lock);
			else
				write_lock(&kvm->mmu_lock);

			if (!sp) {
				ret = -ENOMEM;
				trace_kvm_mmu_split_huge_page(iter.gfn,
							      iter.old_spte,
							      iter.level, ret);
				break;
							      iter.level, -ENOMEM);
				return -ENOMEM;
			}

			if (iter.yielded)
			rcu_read_lock();

			iter.yielded = true;
			continue;
		}

@@ -1501,7 +1471,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
	if (sp)
		tdp_mmu_free_sp(sp);

	return ret;
	return 0;
}