Commit a104e0a3 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

Merge tag 'kvm-x86-svm-6.18' of https://github.com/kvm-x86/linux into HEAD

KVM SVM changes for 6.18

 - Require a minimum GHCB version of 2 when starting SEV-SNP guests via
   KVM_SEV_INIT2 so that invalid GHCB versions result in immediate errors
   instead of latent guest failures.

 - Add support for Secure TSC for SEV-SNP guests, which prevents the untrusted
   host from tampering with the guest's TSC frequency, while still allowing the
   the VMM to configure the guest's TSC frequency prior to launch.

 - Mitigate the potential for TOCTOU bugs when accessing GHCB fields by
   wrapping all accesses via READ_ONCE().

 - Validate the XCR0 provided by the guest (via the GHCB) to avoid tracking a
   bogous XCR0 value in KVM's software model.

 - Save an SEV guest's policy if and only if LAUNCH_START fully succeeds to
   avoid leaving behind stale state (thankfully not consumed in KVM).

 - Explicitly reject non-positive effective lengths during SNP's LAUNCH_UPDATE
   instead of subtly relying on guest_memfd to do the "heavy" lifting.

 - Reload the pre-VMRUN TSC_AUX on #VMEXIT for SEV-ES guests, not the host's
   desired TSC_AUX, to fix a bug where KVM could clobber a different vCPU's
   TSC_AUX due to hardware not matching the value cached in the user-return MSR
   infrastructure.

 - Enable AVIC by default for Zen4+ if x2AVIC (and other prereqs) is supported,
   and clean up the AVIC initialization code along the way.
parents 0f68fe44 ca2967de
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -444,6 +444,7 @@
#define X86_FEATURE_VM_PAGE_FLUSH	(19*32+ 2) /* VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES		(19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SEV_SNP		(19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_SNP_SECURE_TSC	(19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX		(19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT	(19*32+10) /* hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP		(19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+2 −0
Original line number Diff line number Diff line
@@ -2200,6 +2200,7 @@ int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr);
unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);

int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -2367,6 +2368,7 @@ int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
void kvm_user_return_msr_update_cache(unsigned int index, u64 val);
u64 kvm_get_user_return_msr(unsigned int slot);

static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
+1 −0
Original line number Diff line number Diff line
@@ -299,6 +299,7 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define SVM_SEV_FEAT_RESTRICTED_INJECTION		BIT(3)
#define SVM_SEV_FEAT_ALTERNATE_INJECTION		BIT(4)
#define SVM_SEV_FEAT_DEBUG_SWAP				BIT(5)
#define SVM_SEV_FEAT_SECURE_TSC				BIT(9)

#define VMCB_ALLOWED_SEV_FEATURES_VALID			BIT_ULL(63)

+125 −26
Original line number Diff line number Diff line
@@ -64,6 +64,34 @@

static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_IDX_MASK) == -1u);

#define AVIC_AUTO_MODE -1

static int avic_param_set(const char *val, const struct kernel_param *kp)
{
	if (val && sysfs_streq(val, "auto")) {
		*(int *)kp->arg = AVIC_AUTO_MODE;
		return 0;
	}

	return param_set_bint(val, kp);
}

static const struct kernel_param_ops avic_ops = {
	.flags = KERNEL_PARAM_OPS_FL_NOARG,
	.set = avic_param_set,
	.get = param_get_bool,
};

/*
 * Enable / disable AVIC.  In "auto" mode (default behavior), AVIC is enabled
 * for Zen4+ CPUs with x2AVIC (and all other criteria for enablement are met).
 */
static int avic = AVIC_AUTO_MODE;
module_param_cb(avic, &avic_ops, &avic, 0444);
__MODULE_PARM_TYPE(avic, "bool");

module_param(enable_ipiv, bool, 0444);

static bool force_avic;
module_param_unsafe(force_avic, bool, 0444);

@@ -77,7 +105,58 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
bool x2avic_enabled;
static bool x2avic_enabled;


static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
					     bool intercept)
{
	static const u32 x2avic_passthrough_msrs[] = {
		X2APIC_MSR(APIC_ID),
		X2APIC_MSR(APIC_LVR),
		X2APIC_MSR(APIC_TASKPRI),
		X2APIC_MSR(APIC_ARBPRI),
		X2APIC_MSR(APIC_PROCPRI),
		X2APIC_MSR(APIC_EOI),
		X2APIC_MSR(APIC_RRR),
		X2APIC_MSR(APIC_LDR),
		X2APIC_MSR(APIC_DFR),
		X2APIC_MSR(APIC_SPIV),
		X2APIC_MSR(APIC_ISR),
		X2APIC_MSR(APIC_TMR),
		X2APIC_MSR(APIC_IRR),
		X2APIC_MSR(APIC_ESR),
		X2APIC_MSR(APIC_ICR),
		X2APIC_MSR(APIC_ICR2),

		/*
		 * Note!  Always intercept LVTT, as TSC-deadline timer mode
		 * isn't virtualized by hardware, and the CPU will generate a
		 * #GP instead of a #VMEXIT.
		 */
		X2APIC_MSR(APIC_LVTTHMR),
		X2APIC_MSR(APIC_LVTPC),
		X2APIC_MSR(APIC_LVT0),
		X2APIC_MSR(APIC_LVT1),
		X2APIC_MSR(APIC_LVTERR),
		X2APIC_MSR(APIC_TMICT),
		X2APIC_MSR(APIC_TMCCT),
		X2APIC_MSR(APIC_TDCR),
	};
	int i;

	if (intercept == svm->x2avic_msrs_intercepted)
		return;

	if (!x2avic_enabled)
		return;

	for (i = 0; i < ARRAY_SIZE(x2avic_passthrough_msrs); i++)
		svm_set_intercept_for_msr(&svm->vcpu, x2avic_passthrough_msrs[i],
					  MSR_TYPE_RW, intercept);

	svm->x2avic_msrs_intercepted = intercept;
}

static void avic_activate_vmcb(struct vcpu_svm *svm)
{
@@ -99,7 +178,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
		vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
		/* Disabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, false);
		avic_set_x2apic_msr_interception(svm, false);
	} else {
		/*
		 * Flush the TLB, the guest may have inserted a non-APIC
@@ -110,7 +189,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
		/* For xAVIC and hybrid-xAVIC modes */
		vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
		/* Enabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, true);
		avic_set_x2apic_msr_interception(svm, true);
	}
}

@@ -130,7 +209,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
		return;

	/* Enabling MSR intercept for x2APIC registers */
	svm_set_x2apic_msr_interception(svm, true);
	avic_set_x2apic_msr_interception(svm, true);
}

/* Note:
@@ -1090,23 +1169,27 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
	avic_vcpu_load(vcpu, vcpu->cpu);
}

static bool __init avic_want_avic_enabled(void)
{
	/*
 * Note:
 * - The module param avic enable both xAPIC and x2APIC mode.
 * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
 * - The mode can be switched at run-time.
	 * In "auto" mode, enable AVIC by default for Zen4+ if x2AVIC is
	 * supported (to avoid enabling partial support by default, and because
	 * x2AVIC should be supported by all Zen4+ CPUs).  Explicitly check for
	 * family 0x19 and later (Zen5+), as the kernel's synthetic ZenX flags
	 * aren't inclusive of previous generations, i.e. the kernel will set
	 * at most one ZenX feature flag.
	 */
bool avic_hardware_setup(void)
{
	if (!npt_enabled)
	if (avic == AVIC_AUTO_MODE)
		avic = boot_cpu_has(X86_FEATURE_X2AVIC) &&
		       (boot_cpu_data.x86 > 0x19 || cpu_feature_enabled(X86_FEATURE_ZEN4));

	if (!avic || !npt_enabled)
		return false;

	/* AVIC is a prerequisite for x2AVIC. */
	if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) {
		if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
			pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
			pr_warn(FW_BUG "Try enable AVIC using force_avic option");
		}
		if (boot_cpu_has(X86_FEATURE_X2AVIC))
			pr_warn(FW_BUG "Cannot enable x2AVIC, AVIC is unsupported\n");
		return false;
	}

@@ -1116,21 +1199,37 @@ bool avic_hardware_setup(void)
		return false;
	}

	if (boot_cpu_has(X86_FEATURE_AVIC)) {
		pr_info("AVIC enabled\n");
	} else if (force_avic) {
	/*
		 * Some older systems does not advertise AVIC support.
		 * See Revision Guide for specific AMD processor for more detail.
	 * Print a scary message if AVIC is force enabled to make it abundantly
	 * clear that ignoring CPUID could have repercussions.  See Revision
	 * Guide for specific AMD processor for more details.
	 */
		pr_warn("AVIC is not supported in CPUID but force enabled");
		pr_warn("Your system might crash and burn");
	if (!boot_cpu_has(X86_FEATURE_AVIC))
		pr_warn("AVIC unsupported in CPUID but force enabled, your system might crash and burn\n");

	return true;
}

/*
 * Note:
 * - The module param avic enable both xAPIC and x2APIC mode.
 * - Hypervisor can support both xAVIC and x2AVIC in the same guest.
 * - The mode can be switched at run-time.
 */
bool __init avic_hardware_setup(void)
{
	avic = avic_want_avic_enabled();
	if (!avic)
		return false;

	pr_info("AVIC enabled\n");

	/* AVIC is a prerequisite for x2AVIC. */
	x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
	if (x2avic_enabled)
		pr_info("x2AVIC enabled\n");
	else
		svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;

	/*
	 * Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
+9 −11
Original line number Diff line number Diff line
@@ -1798,17 +1798,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
		return -EINVAL;

	ret  = -ENOMEM;
	ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL);
	save = kzalloc(sizeof(*save), GFP_KERNEL);
	if (!ctl || !save)
		goto out_free;
	ctl = memdup_user(&user_vmcb->control, sizeof(*ctl));
	if (IS_ERR(ctl))
		return PTR_ERR(ctl);

	ret = -EFAULT;
	if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl)))
		goto out_free;
	if (copy_from_user(save, &user_vmcb->save, sizeof(*save)))
		goto out_free;
	save = memdup_user(&user_vmcb->save, sizeof(*save));
	if (IS_ERR(save)) {
		kfree(ctl);
		return PTR_ERR(save);
	}

	ret = -EINVAL;
	__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
Loading