Commit 683b783c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull KVM fixes from Paolo Bonzini:
 "ARM:

   - Avoid dropping the page refcount twice when freeing an unlinked
     page-table subtree.

   - Don't source the VFIO Kconfig twice

   - Fix protected-mode locking order between kvm and vcpus

  RISC-V:

   - Fix steal-time related sparse warnings

  x86:

   - Cleanup gtod_is_based_on_tsc() to return "bool" instead of an "int"

   - Make a KVM_REQ_NMI request while handling KVM_SET_VCPU_EVENTS if
     and only if the incoming events->nmi.pending is non-zero. If the
     target vCPU is in the UNITIALIZED state, the spurious request will
     result in KVM exiting to userspace, which in turn causes QEMU to
     constantly acquire and release QEMU's global mutex, to the point
     where the BSP is unable to make forward progress.

   - Fix a type (u8 versus u64) goof that results in pmu->fixed_ctr_ctrl
     being incorrectly truncated, and ultimately causes KVM to think a
     fixed counter has already been disabled (KVM thinks the old value
     is '0').

   - Fix a stack leak in KVM_GET_MSRS where a failed MSR read from
     userspace that is ultimately ignored due to ignore_msrs=true
     doesn't zero the output as intended.

  Selftests cleanups and fixes:

   - Remove redundant newlines from error messages.

   - Delete an unused variable in the AMX test (which causes build
     failures when compiling with -Werror).

   - Fail instead of skipping tests if open(), e.g. of /dev/kvm, fails
     with an error code other than ENOENT (a Hyper-V selftest bug
     resulted in an EMFILE, and the test eventually got skipped).

   - Fix TSC related bugs in several Hyper-V selftests.

   - Fix a bug in the dirty ring logging test where a sem_post() could
     be left pending across multiple runs, resulting in incorrect
     synchronization between the main thread and the vCPU worker thread.

   - Relax the dirty log split test's assertions on 4KiB mappings to fix
     false positives due to the number of mappings for memslot 0 (used
     for code and data that is NOT being dirty logged) changing, e.g.
     due to NUMA balancing"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
  KVM: arm64: Fix double-free following kvm_pgtable_stage2_free_unlinked()
  RISC-V: KVM: Use correct restricted types
  RISC-V: paravirt: Use correct restricted types
  RISC-V: paravirt: steal_time should be static
  KVM: selftests: Don't assert on exact number of 4KiB in dirty log split test
  KVM: selftests: Fix a semaphore imbalance in the dirty ring logging test
  KVM: x86: Fix KVM_GET_MSRS stack info leak
  KVM: arm64: Do not source virt/lib/Kconfig twice
  KVM: x86/pmu: Fix type length error when reading pmu->fixed_ctr_ctrl
  KVM: x86: Make gtod_is_based_on_tsc() return 'bool'
  KVM: selftests: Make hyperv_clock require TSC based system clocksource
  KVM: selftests: Run clocksource dependent tests with hyperv_clocksource_tsc_page too
  KVM: selftests: Use generic sys_clocksource_is_tsc() in vmx_nested_tsc_scaling_test
  KVM: selftests: Generalize check_clocksource() from kvm_clock_test
  KVM: x86: make KVM_REQ_NMI request iff NMI pending for vcpu
  KVM: arm64: Fix circular locking dependency
  KVM: selftests: Fail tests when open() fails with !ENOENT
  KVM: selftests: Avoid infinite loop in hyperv_features when invtsc is missing
  KVM: selftests: Delete superfluous, unused "stage" variable in AMX test
  KVM: selftests: x86_64: Remove redundant newlines
  ...
parents 4b6f7c62 9895ceeb
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -3,7 +3,6 @@
# KVM configuration
#

source "virt/lib/Kconfig"
source "virt/kvm/Kconfig"

menuconfig VIRTUALIZATION
+0 −2
Original line number Diff line number Diff line
@@ -1419,7 +1419,6 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
				 level + 1);
	if (ret) {
		kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level);
		mm_ops->put_page(pgtable);
		return ERR_PTR(ret);
	}

@@ -1502,7 +1501,6 @@ static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,

	if (!stage2_try_break_pte(ctx, mmu)) {
		kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
		mm_ops->put_page(childp);
		return -EAGAIN;
	}

+17 −10
Original line number Diff line number Diff line
@@ -101,6 +101,17 @@ void __init kvm_hyp_reserve(void)
		 hyp_mem_base);
}

static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
	if (host_kvm->arch.pkvm.handle) {
		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
					  host_kvm->arch.pkvm.handle));
	}

	host_kvm->arch.pkvm.handle = 0;
	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
}

/*
 * Allocates and donates memory for hypervisor VM structs at EL2.
 *
@@ -181,7 +192,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
	return 0;

destroy_vm:
	pkvm_destroy_hyp_vm(host_kvm);
	__pkvm_destroy_hyp_vm(host_kvm);
	return ret;
free_vm:
	free_pages_exact(hyp_vm, hyp_vm_sz);
@@ -194,23 +205,19 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
{
	int ret = 0;

	mutex_lock(&host_kvm->lock);
	mutex_lock(&host_kvm->arch.config_lock);
	if (!host_kvm->arch.pkvm.handle)
		ret = __pkvm_create_hyp_vm(host_kvm);
	mutex_unlock(&host_kvm->lock);
	mutex_unlock(&host_kvm->arch.config_lock);

	return ret;
}

void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
{
	if (host_kvm->arch.pkvm.handle) {
		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
					  host_kvm->arch.pkvm.handle));
	}

	host_kvm->arch.pkvm.handle = 0;
	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
	mutex_lock(&host_kvm->arch.config_lock);
	__pkvm_destroy_hyp_vm(host_kvm);
	mutex_unlock(&host_kvm->arch.config_lock);
}

int pkvm_init_host_vm(struct kvm *host_kvm)
+3 −3
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@ static int __init parse_no_stealacc(char *arg)

early_param("no-steal-acc", parse_no_stealacc);

DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);
static DEFINE_PER_CPU(struct sbi_sta_struct, steal_time) __aligned(64);

static bool __init has_pv_steal_clock(void)
{
@@ -91,8 +91,8 @@ static int pv_time_cpu_down_prepare(unsigned int cpu)
static u64 pv_time_steal_clock(int cpu)
{
	struct sbi_sta_struct *st = per_cpu_ptr(&steal_time, cpu);
	u32 sequence;
	u64 steal;
	__le32 sequence;
	__le64 steal;

	/*
	 * Check the sequence field before and after reading the steal
+12 −8
Original line number Diff line number Diff line
@@ -26,8 +26,12 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
{
	gpa_t shmem = vcpu->arch.sta.shmem;
	u64 last_steal = vcpu->arch.sta.last_steal;
	u32 *sequence_ptr, sequence;
	u64 *steal_ptr, steal;
	__le32 __user *sequence_ptr;
	__le64 __user *steal_ptr;
	__le32 sequence_le;
	__le64 steal_le;
	u32 sequence;
	u64 steal;
	unsigned long hva;
	gfn_t gfn;

@@ -47,22 +51,22 @@ void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu)
		return;
	}

	sequence_ptr = (u32 *)(hva + offset_in_page(shmem) +
	sequence_ptr = (__le32 __user *)(hva + offset_in_page(shmem) +
			       offsetof(struct sbi_sta_struct, sequence));
	steal_ptr = (u64 *)(hva + offset_in_page(shmem) +
	steal_ptr = (__le64 __user *)(hva + offset_in_page(shmem) +
			    offsetof(struct sbi_sta_struct, steal));

	if (WARN_ON(get_user(sequence, sequence_ptr)))
	if (WARN_ON(get_user(sequence_le, sequence_ptr)))
		return;

	sequence = le32_to_cpu(sequence);
	sequence = le32_to_cpu(sequence_le);
	sequence += 1;

	if (WARN_ON(put_user(cpu_to_le32(sequence), sequence_ptr)))
		return;

	if (!WARN_ON(get_user(steal, steal_ptr))) {
		steal = le64_to_cpu(steal);
	if (!WARN_ON(get_user(steal_le, steal_ptr))) {
		steal = le64_to_cpu(steal_le);
		vcpu->arch.sta.last_steal = READ_ONCE(current->sched_info.run_delay);
		steal += vcpu->arch.sta.last_steal - last_steal;
		WARN_ON(put_user(cpu_to_le64(steal), steal_ptr));
Loading