Commit 931656b9 authored by Keith Busch's avatar Keith Busch Committed by Paolo Bonzini
Browse files

kvm: defer huge page recovery vhost task to later



Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too.  It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: default avatarAlyssa Ross <hi@alyssa.is>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
Message-ID: <20250123153543.2769928-1-kbusch@meta.com>
[Move call_once API to include/linux. - Paolo]
Cc: stable@vger.kernel.org
Fixes: d96c77bd ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 86eb1aef
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <linux/hyperv.h>
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
#include <linux/call_once.h>

#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -1466,6 +1467,7 @@ struct kvm_arch {
	struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
	struct vhost_task *nx_huge_page_recovery_thread;
	u64 nx_huge_page_last;
	struct once nx_once;

#ifdef CONFIG_X86_64
	/* The number of TDP MMU pages across all roots. */
+13 −5
Original line number Diff line number Diff line
@@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
	return true;
}

int kvm_mmu_post_init_vm(struct kvm *kvm)
static void kvm_mmu_start_lpage_recovery(struct once *once)
{
	if (nx_hugepage_mitigation_hard_disabled)
		return 0;
	struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
	struct kvm *kvm = container_of(ka, struct kvm, arch);

	kvm->arch.nx_huge_page_last = get_jiffies_64();
	kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
		kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
		kvm, "kvm-nx-lpage-recovery");

	if (kvm->arch.nx_huge_page_recovery_thread)
		vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
}

int kvm_mmu_post_init_vm(struct kvm *kvm)
{
	if (nx_hugepage_mitigation_hard_disabled)
		return 0;

	call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
	if (!kvm->arch.nx_huge_page_recovery_thread)
		return -ENOMEM;

	vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
	return 0;
}

+6 −1
Original line number Diff line number Diff line
@@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
	struct kvm_run *kvm_run = vcpu->run;
	int r;

	r = kvm_mmu_post_init_vm(vcpu->kvm);
	if (r)
		return r;

	vcpu_load(vcpu);
	kvm_sigset_activate(vcpu);
	kvm_run->flags = 0;
@@ -12748,7 +12752,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)

int kvm_arch_post_init_vm(struct kvm *kvm)
{
	return kvm_mmu_post_init_vm(kvm);
	once_init(&kvm->arch.nx_once);
	return 0;
}

static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
+45 −0
Original line number Diff line number Diff line
#ifndef _LINUX_CALL_ONCE_H
#define _LINUX_CALL_ONCE_H

#include <linux/types.h>
#include <linux/mutex.h>

#define ONCE_NOT_STARTED 0
#define ONCE_RUNNING     1
#define ONCE_COMPLETED   2

struct once {
        atomic_t state;
        struct mutex lock;
};

static inline void __once_init(struct once *once, const char *name,
			       struct lock_class_key *key)
{
        atomic_set(&once->state, ONCE_NOT_STARTED);
        __mutex_init(&once->lock, name, key);
}

#define once_init(once)							\
do {									\
	static struct lock_class_key __key;				\
	__once_init((once), #once, &__key);				\
} while (0)

static inline void call_once(struct once *once, void (*cb)(struct once *))
{
        /* Pairs with atomic_set_release() below.  */
        if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
                return;

        guard(mutex)(&once->lock);
        WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
        if (atomic_read(&once->state) != ONCE_NOT_STARTED)
                return;

        atomic_set(&once->state, ONCE_RUNNING);
        cb(once);
        atomic_set_release(&once->state, ONCE_COMPLETED);
}

#endif /* _LINUX_CALL_ONCE_H */