Commit 4f7ff70c authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-misc-6.14' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.14:

 - Overhaul KVM's CPUID feature infrastructure to track all vCPU capabilities
   instead of just those where KVM needs to manage state and/or explicitly
   enable the feature in hardware.  Along the way, refactor the code to make
   it easier to add features, and to make it more self-documenting how KVM
   is handling each feature.

 - Rework KVM's handling of VM-Exits during event vectoring; this plugs holes
   where KVM unintentionally puts the vCPU into infinite loops in some scenarios
   (e.g. if emulation is triggered by the exit), and brings parity between VMX
   and SVM.

 - Add pending request and interrupt injection information to the kvm_exit and
   kvm_entry tracepoints respectively.

 - Fix a relatively benign flaw where KVM would end up redoing RDPKRU when
   loading guest/host PKRU, due to a refactoring of the kernel helpers that
   didn't account for KVM's pre-checking of the need to do WRPKRU.
parents 892e7b8c 4c20cd4c
Loading
Loading
Loading
Loading
+7 −3
Original line number Diff line number Diff line
@@ -1825,15 +1825,18 @@ emulate them efficiently. The fields in each entry are defined as follows:
         the values returned by the cpuid instruction for
         this function/index combination

The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
support.  Instead it is reported via::
x2APIC (CPUID leaf 1, ecx[21) and TSC deadline timer (CPUID leaf 1, ecx[24])
may be returned as true, but they depend on KVM_CREATE_IRQCHIP for in-kernel
emulation of the local APIC.  TSC deadline timer support is also reported via::

  ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)

if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
feature in userspace, then you can enable the feature for KVM_SET_CPUID2.

Enabling x2APIC in KVM_SET_CPUID2 requires KVM_CREATE_IRQCHIP as KVM doesn't
support forwarding x2APIC MSR accesses to userspace, i.e. KVM does not support
emulating x2APIC in userspace.

4.47 KVM_PPC_GET_PVINFO
-----------------------
@@ -7673,6 +7676,7 @@ branch to guests' 0x200 interrupt vector.
:Architectures: x86
:Parameters: args[0] defines which exits are disabled
:Returns: 0 on success, -EINVAL when args[0] contains invalid exits
          or if any vCPUs have already been created

Valid bits in args[0] are::

+1 −0
Original line number Diff line number Diff line
@@ -99,6 +99,7 @@ KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
+44 −21
Original line number Diff line number Diff line
@@ -739,6 +739,23 @@ struct kvm_queued_exception {
	bool has_payload;
};

/*
 * Hardware-defined CPUID leafs that are either scattered by the kernel or are
 * unknown to the kernel, but need to be directly used by KVM.  Note, these
 * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
 */
enum kvm_only_cpuid_leafs {
	CPUID_12_EAX	 = NCAPINTS,
	CPUID_7_1_EDX,
	CPUID_8000_0007_EDX,
	CPUID_8000_0022_EAX,
	CPUID_7_2_EDX,
	CPUID_24_0_EBX,
	NR_KVM_CPU_CAPS,

	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
};

struct kvm_vcpu_arch {
	/*
	 * rip and regs accesses must go through
@@ -854,27 +871,23 @@ struct kvm_vcpu_arch {

	int cpuid_nent;
	struct kvm_cpuid_entry2 *cpuid_entries;
	struct kvm_hypervisor_cpuid kvm_cpuid;
	bool is_amd_compatible;

	/*
	 * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
	 * when "struct kvm_vcpu_arch" is no longer defined in an
	 * arch/x86/include/asm header.  The max is mostly arbitrary, i.e.
	 * can be increased as necessary.
	 */
#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG

	/*
	 * Track whether or not the guest is allowed to use features that are
	 * governed by KVM, where "governed" means KVM needs to manage state
	 * and/or explicitly enable the feature in hardware.  Typically, but
	 * not always, governed features can be used by the guest if and only
	 * if both KVM and userspace want to expose the feature to the guest.
	 * cpu_caps holds the effective guest capabilities, i.e. the features
	 * the vCPU is allowed to use.  Typically, but not always, features can
	 * be used by the guest if and only if both KVM and userspace want to
	 * expose the feature to the guest.
	 *
	 * A common exception is for virtualization holes, i.e. when KVM can't
	 * prevent the guest from using a feature, in which case the vCPU "has"
	 * the feature regardless of what KVM or userspace desires.
	 *
	 * Note, features that don't require KVM involvement in any way are
	 * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
	 * guest CPUID provided by userspace.
	 */
	struct {
		DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
	} governed_features;
	u32 cpu_caps[NR_KVM_CPU_CAPS];

	u64 reserved_gpa_bits;
	int maxphyaddr;
@@ -1756,12 +1769,15 @@ struct kvm_x86_ops {
	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);

	/*
	 * Retrieve somewhat arbitrary exit information.  Intended to
	 * Retrieve somewhat arbitrary exit/entry information.  Intended to
	 * be used only from within tracepoints or error paths.
	 */
	void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
			      u64 *info1, u64 *info2,
			      u32 *exit_int_info, u32 *exit_int_info_err_code);
			      u32 *intr_info, u32 *error_code);

	void (*get_entry_info)(struct kvm_vcpu *vcpu,
			       u32 *intr_info, u32 *error_code);

	int (*check_intercept)(struct kvm_vcpu *vcpu,
			       struct x86_instruction_info *info,
@@ -2018,8 +2034,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
 *			VMware backdoor emulation handles select instructions
 *			and reinjects the #GP for all other cases.
 *
 * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
 *		 case the CR2/GPA value pass on the stack is valid.
 * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
 *		 the CR2/GPA value pass on the stack is valid.
 *
 * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
 *				 state and inject single-step #DBs after skipping
@@ -2054,6 +2070,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP	    (1 << 8)

static inline bool kvm_can_emulate_event_vectoring(int emul_type)
{
	return !(emul_type & EMULTYPE_PF);
}

int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
					void *insn, int insn_len);
@@ -2061,6 +2082,8 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
					  u64 *data, u8 ndata);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);

void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);

void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+662 −309

File changed.

Preview size limit exceeded, changes collapsed.

+56 −72
Original line number Diff line number Diff line
@@ -10,8 +10,8 @@
extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
void kvm_set_cpu_caps(void);

void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
						    u32 function, u32 index);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
@@ -67,41 +67,40 @@ static __always_inline void cpuid_entry_override(struct kvm_cpuid_entry2 *entry,
	*reg = kvm_cpu_caps[leaf];
}

static __always_inline u32 *guest_cpuid_get_register(struct kvm_vcpu *vcpu,
static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
					    unsigned int x86_feature)
{
	const struct cpuid_reg cpuid = x86_feature_cpuid(x86_feature);
	struct kvm_cpuid_entry2 *entry;
	u32 *reg;

	/*
	 * XSAVES is a special snowflake.  Due to lack of a dedicated intercept
	 * on SVM, KVM must assume that XSAVES (and thus XRSTORS) is usable by
	 * the guest if the host supports XSAVES and *XSAVE* is exposed to the
	 * guest.  Because the guest can execute XSAVES and XRSTORS, i.e. can
	 * indirectly consume XSS, KVM must ensure XSS is zeroed when running
	 * the guest, i.e. must set XSAVES in vCPU capabilities.  But to reject
	 * direct XSS reads and writes (to minimize the virtualization hole and
	 * honor userspace's CPUID), KVM needs to check the raw guest CPUID,
	 * not KVM's view of guest capabilities.
	 *
	 * For all other features, guest capabilities are accurate.  Expand
	 * this allowlist with extreme vigilance.
	 */
	BUILD_BUG_ON(x86_feature != X86_FEATURE_XSAVES);

	entry = kvm_find_cpuid_entry_index(vcpu, cpuid.function, cpuid.index);
	if (!entry)
		return NULL;

	return __cpuid_entry_get_reg(entry, cpuid.reg);
}

static __always_inline bool guest_cpuid_has(struct kvm_vcpu *vcpu,
					    unsigned int x86_feature)
{
	u32 *reg;

	reg = guest_cpuid_get_register(vcpu, x86_feature);
	reg = __cpuid_entry_get_reg(entry, cpuid.reg);
	if (!reg)
		return false;

	return *reg & __feature_bit(x86_feature);
}

static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
					      unsigned int x86_feature)
{
	u32 *reg;

	reg = guest_cpuid_get_register(vcpu, x86_feature);
	if (reg)
		*reg &= ~__feature_bit(x86_feature);
}

static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.is_amd_compatible;
@@ -150,21 +149,6 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
	return x86_stepping(best->eax);
}

static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
{
	return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
		guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) ||
		guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) ||
		guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD));
}

static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
{
	return (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
		guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB) ||
		guest_cpuid_has(vcpu, X86_FEATURE_SBPB));
}

static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
@@ -180,7 +164,6 @@ static __always_inline void kvm_cpu_cap_clear(unsigned int x86_feature)
{
	unsigned int x86_leaf = __feature_leaf(x86_feature);

	reverse_cpuid_check(x86_leaf);
	kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}

@@ -188,7 +171,6 @@ static __always_inline void kvm_cpu_cap_set(unsigned int x86_feature)
{
	unsigned int x86_leaf = __feature_leaf(x86_feature);

	reverse_cpuid_check(x86_leaf);
	kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}

@@ -196,7 +178,6 @@ static __always_inline u32 kvm_cpu_cap_get(unsigned int x86_feature)
{
	unsigned int x86_leaf = __feature_leaf(x86_feature);

	reverse_cpuid_check(x86_leaf);
	return kvm_cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}

@@ -220,58 +201,61 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
	return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}

enum kvm_governed_features {
#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
#include "governed_features.h"
	KVM_NR_GOVERNED_FEATURES
};

static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
					      unsigned int x86_feature)
{
	switch (x86_feature) {
#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
#include "governed_features.h"
	default:
		return -1;
	}
}
	unsigned int x86_leaf = __feature_leaf(x86_feature);

static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
{
	return kvm_governed_feature_index(x86_feature) >= 0;
	vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
}

static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
						unsigned int x86_feature)
{
	BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
	unsigned int x86_leaf = __feature_leaf(x86_feature);

	__set_bit(kvm_governed_feature_index(x86_feature),
		  vcpu->arch.governed_features.enabled);
	vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
}

static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
							       unsigned int x86_feature)
static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
						 unsigned int x86_feature,
						 bool guest_has_cap)
{
	if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
		kvm_governed_feature_set(vcpu, x86_feature);
	if (guest_has_cap)
		guest_cpu_cap_set(vcpu, x86_feature);
	else
		guest_cpu_cap_clear(vcpu, x86_feature);
}

static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
					      unsigned int x86_feature)
{
	BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
	unsigned int x86_leaf = __feature_leaf(x86_feature);

	return test_bit(kvm_governed_feature_index(x86_feature),
			vcpu->arch.governed_features.enabled);
	return vcpu->arch.cpu_caps[x86_leaf] & __feature_bit(x86_feature);
}

static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
	if (guest_can_use(vcpu, X86_FEATURE_LAM))
	if (guest_cpu_cap_has(vcpu, X86_FEATURE_LAM))
		cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);

	return kvm_vcpu_is_legal_gpa(vcpu, cr3);
}

static inline bool guest_has_spec_ctrl_msr(struct kvm_vcpu *vcpu)
{
	return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
		guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_STIBP) ||
		guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBRS) ||
		guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_SSBD));
}

static inline bool guest_has_pred_cmd_msr(struct kvm_vcpu *vcpu)
{
	return (guest_cpu_cap_has(vcpu, X86_FEATURE_SPEC_CTRL) ||
		guest_cpu_cap_has(vcpu, X86_FEATURE_AMD_IBPB) ||
		guest_cpu_cap_has(vcpu, X86_FEATURE_SBPB));
}

#endif
Loading