Commit 41786cc5 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-misc-6.12' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.12

 - Advertise AVX10.1 to userspace (effectively prep work for the "real" AVX10
   functionality that is on the horizon).

 - Rework common MSR handling code to suppress errors on userspace accesses to
   unsupported-but-advertised MSRs.  This will allow removing (almost?) all of
   KVM's exemptions for userspace access to MSRs that shouldn't exist based on
   the vCPU model (the actual cleanup is non-trivial future work).

 - Rework KVM's handling of x2APIC ICR, again, because AMD (x2AVIC) splits the
   64-bit value into the legacy ICR and ICR2 storage, whereas Intel (APICv)
   stores the entire 64-bit value a the ICR offset.

 - Fix a bug where KVM would fail to exit to userspace if one was triggered by
   a fastpath exit handler.

 - Add fastpath handling of HLT VM-Exit to expedite re-entering the guest when
   there's already a pending wake event at the time of the exit.

 - Finally fix the RSM vs. nested VM-Enter WARN by forcing the vCPU out of
   guest mode prior to signalling SHUTDOWN (architecturally, the SHUTDOWN is
   supposed to hit L1, not L2).
parents 7056c4e2 4ca077f2
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -179,6 +179,7 @@ static __always_inline bool cpuid_function_is_indexed(u32 function)
	case 0x1d:
	case 0x1e:
	case 0x1f:
	case 0x24:
	case 0x8000001d:
		return true;
	}
+1 −1
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
KVM_X86_OP(get_msr_feature)
KVM_X86_OP(get_feature_msr)
KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
+4 −1
Original line number Diff line number Diff line
@@ -212,6 +212,7 @@ enum exit_fastpath_completion {
	EXIT_FASTPATH_NONE,
	EXIT_FASTPATH_REENTER_GUEST,
	EXIT_FASTPATH_EXIT_HANDLED,
	EXIT_FASTPATH_EXIT_USERSPACE,
};
typedef enum exit_fastpath_completion fastpath_t;

@@ -1730,6 +1731,8 @@ struct kvm_x86_ops {
	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);

	const bool x2apic_icr_is_split;
	const unsigned long required_apicv_inhibits;
	bool allow_apicv_in_x2apic_without_x2apic_virtualization;
	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
@@ -1809,7 +1812,7 @@ struct kvm_x86_ops {
	int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
	void (*guest_memory_reclaimed)(struct kvm *kvm);

	int (*get_msr_feature)(struct kvm_msr_entry *entry);
	int (*get_feature_msr)(u32 msr, u64 *data);

	int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
					 void *insn, int insn_len);
+28 −2
Original line number Diff line number Diff line
@@ -705,7 +705,7 @@ void kvm_set_cpu_caps(void)

	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
		F(AMX_COMPLEX)
		F(AMX_COMPLEX) | F(AVX10)
	);

	kvm_cpu_cap_init_kvm_defined(CPUID_7_2_EDX,
@@ -721,6 +721,10 @@ void kvm_set_cpu_caps(void)
		SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA)
	);

	kvm_cpu_cap_init_kvm_defined(CPUID_24_0_EBX,
		F(AVX10_128) | F(AVX10_256) | F(AVX10_512)
	);

	kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
@@ -949,7 +953,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
	switch (function) {
	case 0:
		/* Limited to the highest leaf implemented in KVM. */
		entry->eax = min(entry->eax, 0x1fU);
		entry->eax = min(entry->eax, 0x24U);
		break;
	case 1:
		cpuid_entry_override(entry, CPUID_1_EDX);
@@ -1174,6 +1178,28 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
			break;
		}
		break;
	case 0x24: {
		u8 avx10_version;

		if (!kvm_cpu_cap_has(X86_FEATURE_AVX10)) {
			entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
			break;
		}

		/*
		 * The AVX10 version is encoded in EBX[7:0].  Note, the version
		 * is guaranteed to be >=1 if AVX10 is supported.  Note #2, the
		 * version needs to be captured before overriding EBX features!
		 */
		avx10_version = min_t(u8, entry->ebx & 0xff, 1);
		cpuid_entry_override(entry, CPUID_24_0_EBX);
		entry->ebx |= avx10_version;

		entry->eax = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	}
	case KVM_CPUID_SIGNATURE: {
		const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
		entry->eax = KVM_CPUID_FEATURES;
+53 −22
Original line number Diff line number Diff line
@@ -1944,7 +1944,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
	u64 ns = 0;
	ktime_t expire;
	struct kvm_vcpu *vcpu = apic->vcpu;
	unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
	u32 this_tsc_khz = vcpu->arch.virtual_tsc_khz;
	unsigned long flags;
	ktime_t now;

@@ -2453,6 +2453,43 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);

#define X2APIC_ICR_RESERVED_BITS (GENMASK_ULL(31, 20) | GENMASK_ULL(17, 16) | BIT(13))

int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{
	if (data & X2APIC_ICR_RESERVED_BITS)
		return 1;

	/*
	 * The BUSY bit is reserved on both Intel and AMD in x2APIC mode, but
	 * only AMD requires it to be zero, Intel essentially just ignores the
	 * bit.  And if IPI virtualization (Intel) or x2AVIC (AMD) is enabled,
	 * the CPU performs the reserved bits checks, i.e. the underlying CPU
	 * behavior will "win".  Arbitrarily clear the BUSY bit, as there is no
	 * sane way to provide consistent behavior with respect to hardware.
	 */
	data &= ~APIC_ICR_BUSY;

	kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
	if (kvm_x86_ops.x2apic_icr_is_split) {
		kvm_lapic_set_reg(apic, APIC_ICR, data);
		kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
	} else {
		kvm_lapic_set_reg64(apic, APIC_ICR, data);
	}
	trace_kvm_apic_write(APIC_ICR, data);
	return 0;
}

static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
{
	if (kvm_x86_ops.x2apic_icr_is_split)
		return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
		       (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;

	return kvm_lapic_get_reg64(apic, APIC_ICR);
}

/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
@@ -2470,7 +2507,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
	 * maybe-unecessary write, and both are in the noise anyways.
	 */
	if (apic_x2apic_mode(apic) && offset == APIC_ICR)
		kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR));
		WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
	else
		kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
@@ -2990,12 +3027,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,

		/*
		 * In x2APIC mode, the LDR is fixed and based on the id.  And
		 * ICR is internally a single 64-bit register, but needs to be
		 * split to ICR+ICR2 in userspace for backwards compatibility.
		 * if the ICR is _not_ split, ICR is internally a single 64-bit
		 * register, but needs to be split to ICR+ICR2 in userspace for
		 * backwards compatibility.
		 */
		if (set) {
		if (set)
			*ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);

		if (!kvm_x86_ops.x2apic_icr_is_split) {
			if (set) {
				icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
				      (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
				__kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
@@ -3004,6 +3044,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
				__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
			}
		}
	}

	return 0;
}
@@ -3194,22 +3235,12 @@ int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
	return 0;
}

int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
{
	data &= ~APIC_ICR_BUSY;

	kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
	kvm_lapic_set_reg64(apic, APIC_ICR, data);
	trace_kvm_apic_write(APIC_ICR, data);
	return 0;
}

static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
{
	u32 low;

	if (reg == APIC_ICR) {
		*data = kvm_lapic_get_reg64(apic, APIC_ICR);
		*data = kvm_x2apic_icr_read(apic);
		return 0;
	}

Loading