Commit 5dcc1e76 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD

KVM x86 misc changes for 6.11

 - Add a global struct to consolidate tracking of host values, e.g. EFER, and
   move "shadow_phys_bits" into the structure as "maxphyaddr".

 - Add KVM_CAP_X86_APIC_BUS_CYCLES_NS to allow configuring the effective APIC
   bus frequency, because TDX.

 - Print the name of the APICv/AVIC inhibits in the relevant tracepoint.

 - Clean up KVM's handling of vendor specific emulation to consistently act on
   "compatible with Intel/AMD", versus checking for a specific vendor.

 - Misc cleanups
parents 86014c1e 82222ee7
Loading
Loading
Loading
Loading
+57 −21
Original line number Diff line number Diff line
@@ -6485,7 +6485,10 @@ affect the device's behavior. Current defined flags::
  /* x86, set if the VCPU is in system management mode */
  #define KVM_RUN_X86_SMM          (1 << 0)
  /* x86, set if bus lock detected in VM */
  #define KVM_RUN_BUS_LOCK    (1 << 1)
  #define KVM_RUN_X86_BUS_LOCK     (1 << 1)
  /* x86, set if the VCPU is executing a nested (L2) guest */
  #define KVM_RUN_X86_GUEST_MODE   (1 << 2)

  /* arm64, set for KVM_EXIT_DEBUG */
  #define KVM_DEBUG_ARCH_HSR_HIGH_VALID  (1 << 0)

@@ -7831,29 +7834,31 @@ Valid bits in args[0] are::
  #define KVM_BUS_LOCK_DETECTION_OFF      (1 << 0)
  #define KVM_BUS_LOCK_DETECTION_EXIT     (1 << 1)

Enabling this capability on a VM provides userspace with a way to select
a policy to handle the bus locks detected in guest. Userspace can obtain
the supported modes from the result of KVM_CHECK_EXTENSION and define it
through the KVM_ENABLE_CAP.
Enabling this capability on a VM provides userspace with a way to select a
policy to handle the bus locks detected in guest. Userspace can obtain the
supported modes from the result of KVM_CHECK_EXTENSION and define it through
the KVM_ENABLE_CAP. The supported modes are mutually-exclusive.

KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported
currently and mutually exclusive with each other. More bits can be added in
the future.
This capability allows userspace to force VM exits on bus locks detected in the
guest, irrespective whether or not the host has enabled split-lock detection
(which triggers an #AC exception that KVM intercepts). This capability is
intended to mitigate attacks where a malicious/buggy guest can exploit bus
locks to degrade the performance of the whole system.

With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits
so that no additional actions are needed. This is the default mode.
If KVM_BUS_LOCK_DETECTION_OFF is set, KVM doesn't force guest bus locks to VM
exit, although the host kernel's split-lock #AC detection still applies, if
enabled.

With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected
in VM. KVM just exits to userspace when handling them. Userspace can enforce
its own throttling or other policy based mitigations.
If KVM_BUS_LOCK_DETECTION_EXIT is set, KVM enables a CPU feature that ensures
bus locks in the guest trigger a VM exit, and KVM exits to userspace for all
such VM exits, e.g. to allow userspace to throttle the offending guest and/or
apply some other policy-based mitigation. When exiting to userspace, KVM sets
KVM_RUN_X86_BUS_LOCK in vcpu-run->flags, and conditionally sets the exit_reason
to KVM_EXIT_X86_BUS_LOCK.

This capability is aimed to address the thread that VM can exploit bus locks to
degree the performance of the whole system. Once the userspace enable this
capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the
KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning
the bus lock vm exit can be preempted by a higher priority VM exit, the exit
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
Note! Detected bus locks may be coincident with other exits to userspace, i.e.
KVM_RUN_X86_BUS_LOCK should be checked regardless of the primary exit reason if
userspace wants to take action on all detected bus locks.

7.23 KVM_CAP_PPC_DAWR1
----------------------
@@ -8137,6 +8142,37 @@ error/annotated fault.

See KVM_EXIT_MEMORY_FAULT for more information.

7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS
-----------------------------------

:Architectures: x86
:Target: VM
:Parameters: args[0] is the desired APIC bus clock rate, in nanoseconds
:Returns: 0 on success, -EINVAL if args[0] contains an invalid value for the
          frequency or if any vCPUs have been created, -ENXIO if a virtual
          local APIC has not been created using KVM_CREATE_IRQCHIP.

This capability sets the VM's APIC bus clock frequency, used by KVM's in-kernel
virtual APIC when emulating APIC timers.  KVM's default value can be retrieved
by KVM_CHECK_EXTENSION.

Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the
core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest.

7.36 KVM_CAP_X86_GUEST_MODE
------------------------------

:Architectures: x86
:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP.

The presence of this capability indicates that KVM_RUN will update the
KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the
vCPU was executing nested guest code when it exited.

KVM exits with the register state of either the L1 or L2 guest
depending on which executed at the time of an exit. Userspace must
take care to differentiate between these cases.

8. Other capabilities.
======================

+21 −3
Original line number Diff line number Diff line
@@ -1208,7 +1208,7 @@ enum kvm_apicv_inhibit {
	 * APIC acceleration is disabled by a module parameter
	 * and/or not supported in hardware.
	 */
	APICV_INHIBIT_REASON_DISABLE,
	APICV_INHIBIT_REASON_DISABLED,

	/*
	 * APIC acceleration is inhibited because AutoEOI feature is
@@ -1278,8 +1278,27 @@ enum kvm_apicv_inhibit {
	 * mapping between logical ID and vCPU.
	 */
	APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,

	NR_APICV_INHIBIT_REASONS,
};

#define __APICV_INHIBIT_REASON(reason)			\
	{ BIT(APICV_INHIBIT_REASON_##reason), #reason }

#define APICV_INHIBIT_REASONS				\
	__APICV_INHIBIT_REASON(DISABLED),		\
	__APICV_INHIBIT_REASON(HYPERV),			\
	__APICV_INHIBIT_REASON(ABSENT),			\
	__APICV_INHIBIT_REASON(BLOCKIRQ),		\
	__APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED),	\
	__APICV_INHIBIT_REASON(APIC_ID_MODIFIED),	\
	__APICV_INHIBIT_REASON(APIC_BASE_MODIFIED),	\
	__APICV_INHIBIT_REASON(NESTED),			\
	__APICV_INHIBIT_REASON(IRQWIN),			\
	__APICV_INHIBIT_REASON(PIT_REINJ),		\
	__APICV_INHIBIT_REASON(SEV),			\
	__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)

struct kvm_arch {
	unsigned long n_used_mmu_pages;
	unsigned long n_requested_mmu_pages;
@@ -1365,6 +1384,7 @@ struct kvm_arch {

	u32 default_tsc_khz;
	bool user_set_tsc;
	u64 apic_bus_cycle_ns;

	seqcount_raw_spinlock_t pvclock_sc;
	bool use_master_clock;
@@ -1709,7 +1729,6 @@ struct kvm_x86_ops {
	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
	bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
	const unsigned long required_apicv_inhibits;
	bool allow_apicv_in_x2apic_without_x2apic_virtualization;
	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
@@ -1855,7 +1874,6 @@ struct kvm_arch_async_pf {
};

extern u32 __read_mostly kvm_nr_uret_msrs;
extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
extern struct kvm_x86_ops kvm_x86_ops;
+1 −0
Original line number Diff line number Diff line
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {

#define KVM_RUN_X86_SMM		 (1 << 0)
#define KVM_RUN_X86_BUS_LOCK     (1 << 1)
#define KVM_RUN_X86_GUEST_MODE   (1 << 2)

/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
+12 −0
Original line number Diff line number Diff line
@@ -335,6 +335,18 @@ static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
#endif
}

static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *entry;

	entry = kvm_find_cpuid_entry(vcpu, 0);
	if (!entry)
		return false;

	return is_guest_vendor_amd(entry->ebx, entry->ecx, entry->edx) ||
	       is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
}

static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
	struct kvm_lapic *apic = vcpu->arch.apic;
+0 −18
Original line number Diff line number Diff line
@@ -102,24 +102,6 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
		*reg &= ~__feature_bit(x86_feature);
}

static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 0);
	return best &&
	       (is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
		is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
}

static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
{
	struct kvm_cpuid_entry2 *best;

	best = kvm_find_cpuid_entry(vcpu, 0);
	return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}

static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.is_amd_compatible;
Loading