Commit 13f64f6d authored by Oliver Upton's avatar Oliver Upton
Browse files

Merge branch 'kvm-arm64/nv-idregs' into kvmarm/next



* kvm-arm64/nv-idregs:
  : Changes to exposure of NV features, courtesy of Marc Zyngier
  :
  : Apply NV-specific feature restrictions at reset rather than at the point
  : of KVM_RUN. This makes the true feature set visible to userspace, a
  : necessary step towards save/restore support or NV VMs.
  :
  : Add an additional vCPU feature flag for selecting the E2H0 flavor of NV,
  : such that the VHE-ness of the VM can be applied to the feature set.
  KVM: arm64: selftests: Test that TGRAN*_2 fields are writable
  KVM: arm64: Allow userspace to write ID_AA64MMFR0_EL1.TGRAN*_2
  KVM: arm64: Advertise FEAT_ECV when possible
  KVM: arm64: Make ID_AA64MMFR4_EL1.NV_frac writable
  KVM: arm64: Allow userspace to limit NV support to nVHE
  KVM: arm64: Move NV-specific capping to idreg sanitisation
  KVM: arm64: Enforce NV limits on a per-idregs basis
  KVM: arm64: Make ID_REG_LIMIT_FIELD_ENUM() more widely available
  KVM: arm64: Consolidate idreg callbacks
  KVM: arm64: Advertise NV2 in the boot messages
  KVM: arm64: Mark HCR.EL2.{NV*,AT} RES0 when ID_AA64MMFR4_EL1.NV_frac is 0
  KVM: arm64: Mark HCR.EL2.E2H RES0 when ID_AA64MMFR1_EL1.VH is zero
  KVM: arm64: Hide ID_AA64MMFR2_EL1.NV from guest and userspace
  arm64: cpufeature: Handle NV_frac as a synonym of NV2

Signed-off-by: default avatarOliver Upton <oliver.upton@linux.dev>
parents 56e3e5c8 edfd826b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -188,6 +188,7 @@ static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
}

int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val);

#ifdef CONFIG_ARM64_PTR_AUTH
bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+1 −0
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS	5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC	6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2		7 /* Support nested virtualization */
#define KVM_ARM_VCPU_HAS_EL2_E2H0	8 /* Limit NV support to E2H RES0 */

struct kvm_vcpu_init {
	__u32 target;
+13 −2
Original line number Diff line number Diff line
@@ -497,6 +497,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {

static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_NV_frac_SHIFT, 4, 0),
	ARM64_FTR_END,
};

@@ -2162,7 +2163,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
	if (kvm_get_mode() != KVM_MODE_NV)
		return false;

	if (!has_cpuid_feature(cap, scope)) {
	if (!cpucap_multi_entry_cap_matches(cap, scope)) {
		pr_warn("unavailable: %s\n", cap->desc);
		return false;
	}
@@ -2519,8 +2520,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
		.capability = ARM64_HAS_NESTED_VIRT,
		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
		.matches = has_nested_virt_support,
		.match_list = (const struct arm64_cpu_capabilities []){
			{
				.matches = has_cpuid_feature,
				ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
			},
			{
				.matches = has_cpuid_feature,
				ARM64_CPUID_FIELDS(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)
			},
			{ /* Sentinel */ }
		},
	},
	{
		.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+3 −2
Original line number Diff line number Diff line
@@ -2836,11 +2836,12 @@ static __init int kvm_arm_init(void)
	if (err)
		goto out_hyp;

	kvm_info("%s%sVHE mode initialized successfully\n",
	kvm_info("%s%sVHE%s mode initialized successfully\n",
		 in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
				     "Protected " : "Hyp "),
		 in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
				     "h" : "n"));
				     "h" : "n"),
		 cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": "");

	/*
	 * FIXME: Do something reasonable if kvm_init() fails after pKVM
+152 −133
Original line number Diff line number Diff line
@@ -16,9 +16,6 @@

#include "sys_regs.h"

/* Protection against the sysreg repainting madness... */
#define NV_FTR(r, f)		ID_AA64##r##_EL1_##f

/*
 * Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
 * memory usage and potential number of different sets of S2 PTs in
@@ -54,6 +51,10 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
	struct kvm_s2_mmu *tmp;
	int num_mmus, ret = 0;

	if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features) &&
	    !cpus_have_final_cap(ARM64_HAS_HCR_NV1))
		return -EINVAL;

	/*
	 * Let's treat memory allocation failures as benign: If we fail to
	 * allocate anything, return an error and keep the allocated array
@@ -807,134 +808,151 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 * This list should get updated as new features get added to the NV
 * support, and new extension to the architecture.
 */
static void limit_nv_id_regs(struct kvm *kvm)
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
	u64 val, tmp;

	switch (reg) {
	case SYS_ID_AA64ISAR0_EL1:
		/* Support everything but TME */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1);
	val &= ~NV_FTR(ISAR0, TME);
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val);

	/* Support everything but Spec Invalidation and LS64 */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1);
	val &= ~(NV_FTR(ISAR1, LS64)	|
		 NV_FTR(ISAR1, SPECRES));
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val);

	/* No AMU, MPAM, S-EL2, or RAS */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1);
	val &= ~(GENMASK_ULL(55, 52)	|
		 NV_FTR(PFR0, AMU)	|
		 NV_FTR(PFR0, MPAM)	|
		 NV_FTR(PFR0, SEL2)	|
		 NV_FTR(PFR0, RAS)	|
		 NV_FTR(PFR0, EL3)	|
		 NV_FTR(PFR0, EL2)	|
		 NV_FTR(PFR0, EL1)	|
		 NV_FTR(PFR0, EL0));
		val &= ~ID_AA64ISAR0_EL1_TME;
		break;

	case SYS_ID_AA64ISAR1_EL1:
		/* Support everything but LS64 and Spec Invalidation */
		val &= ~(ID_AA64ISAR1_EL1_LS64	|
			 ID_AA64ISAR1_EL1_SPECRES);
		break;

	case SYS_ID_AA64PFR0_EL1:
		/* No RME, AMU, MPAM, S-EL2, or RAS */
		val &= ~(ID_AA64PFR0_EL1_RME	|
			 ID_AA64PFR0_EL1_AMU	|
			 ID_AA64PFR0_EL1_MPAM	|
			 ID_AA64PFR0_EL1_SEL2	|
			 ID_AA64PFR0_EL1_RAS	|
			 ID_AA64PFR0_EL1_EL3	|
			 ID_AA64PFR0_EL1_EL2	|
			 ID_AA64PFR0_EL1_EL1	|
			 ID_AA64PFR0_EL1_EL0);
		/* 64bit only at any EL */
	val |= FIELD_PREP(NV_FTR(PFR0, EL0), 0b0001);
	val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
	val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
	val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);
		val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP);
		val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP);
		val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP);
		val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP);
		break;

	case SYS_ID_AA64PFR1_EL1:
		/* Only support BTI, SSBS, CSV2_frac */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1);
	val &= (NV_FTR(PFR1, BT)	|
		NV_FTR(PFR1, SSBS)	|
		NV_FTR(PFR1, CSV2_frac));
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val);

	/* Hide ECV, ExS, Secure Memory */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
	val &= ~(NV_FTR(MMFR0, ECV)		|
		 NV_FTR(MMFR0, EXS)		|
		 NV_FTR(MMFR0, TGRAN4_2)	|
		 NV_FTR(MMFR0, TGRAN16_2)	|
		 NV_FTR(MMFR0, TGRAN64_2)	|
		 NV_FTR(MMFR0, SNSMEM));
		val &= (ID_AA64PFR1_EL1_BT	|
			ID_AA64PFR1_EL1_SSBS	|
			ID_AA64PFR1_EL1_CSV2_frac);
		break;

	case SYS_ID_AA64MMFR0_EL1:
		/* Hide ExS, Secure Memory */
		val &= ~(ID_AA64MMFR0_EL1_EXS		|
			 ID_AA64MMFR0_EL1_TGRAN4_2	|
			 ID_AA64MMFR0_EL1_TGRAN16_2	|
			 ID_AA64MMFR0_EL1_TGRAN64_2	|
			 ID_AA64MMFR0_EL1_SNSMEM);

		/* Hide CNTPOFF if present */
		val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, ECV, IMP);

		/* Disallow unsupported S2 page sizes */
		switch (PAGE_SIZE) {
		case SZ_64K:
		val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, NI);
			fallthrough;
		case SZ_16K:
		val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, NI);
			fallthrough;
		case SZ_4K:
			/* Support everything */
			break;
		}

		/*
	 * Since we can't support a guest S2 page size smaller than
	 * the host's own page size (due to KVM only populating its
	 * own S2 using the kernel's page size), advertise the
	 * limitation using FEAT_GTG.
		 * Since we can't support a guest S2 page size smaller
		 * than the host's own page size (due to KVM only
		 * populating its own S2 using the kernel's page
		 * size), advertise the limitation using FEAT_GTG.
		 */
		switch (PAGE_SIZE) {
		case SZ_4K:
		val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
			fallthrough;
		case SZ_16K:
		val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
			fallthrough;
		case SZ_64K:
		val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
			break;
		}

		/* Cap PARange to 48bits */
	tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
	if (tmp > 0b0101) {
		val &= ~NV_FTR(MMFR0, PARANGE);
		val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
	}
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val);

	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1);
	val &= (NV_FTR(MMFR1, HCX)	|
		NV_FTR(MMFR1, PAN)	|
		NV_FTR(MMFR1, LO)	|
		NV_FTR(MMFR1, HPDS)	|
		NV_FTR(MMFR1, VH)	|
		NV_FTR(MMFR1, VMIDBits));
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val);

	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1);
	val &= ~(NV_FTR(MMFR2, BBM)	|
		 NV_FTR(MMFR2, TTL)	|
		val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, PARANGE, 48);
		break;

	case SYS_ID_AA64MMFR1_EL1:
		val &= (ID_AA64MMFR1_EL1_HCX	|
			ID_AA64MMFR1_EL1_PAN	|
			ID_AA64MMFR1_EL1_LO	|
			ID_AA64MMFR1_EL1_HPDS	|
			ID_AA64MMFR1_EL1_VH	|
			ID_AA64MMFR1_EL1_VMIDBits);
		/* FEAT_E2H0 implies no VHE */
		if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
			val &= ~ID_AA64MMFR1_EL1_VH;
		break;

	case SYS_ID_AA64MMFR2_EL1:
		val &= ~(ID_AA64MMFR2_EL1_BBM	|
			 ID_AA64MMFR2_EL1_TTL	|
			 GENMASK_ULL(47, 44)	|
		 NV_FTR(MMFR2, ST)	|
		 NV_FTR(MMFR2, CCIDX)	|
		 NV_FTR(MMFR2, VARange));
			 ID_AA64MMFR2_EL1_ST	|
			 ID_AA64MMFR2_EL1_CCIDX	|
			 ID_AA64MMFR2_EL1_VARange);

		/* Force TTL support */
	val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val);
		val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR2_EL1, TTL, IMP);
		break;

	case SYS_ID_AA64MMFR4_EL1:
		/*
		 * You get EITHER
		 *
		 * - FEAT_VHE without FEAT_E2H0
		 * - FEAT_NV limited to FEAT_NV2
		 * - HCR_EL2.NV1 being RES0
		 *
		 * OR
		 *
		 * - FEAT_E2H0 without FEAT_VHE nor FEAT_NV
		 *
		 * Life is too short for anything else.
		 */
		if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) {
			val = 0;
	if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
		val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
				  ID_AA64MMFR4_EL1_E2H0_NI_NV1);
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);
		} else {
			val = SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY);
			val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI_NV1);
		}
		break;

	case SYS_ID_AA64DFR0_EL1:
		/* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
	val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
	val &= (NV_FTR(DFR0, PMUVer)	|
		NV_FTR(DFR0, WRPs)	|
		NV_FTR(DFR0, BRPs)	|
		NV_FTR(DFR0, DebugVer)	|
		NV_FTR(DFR0, HPMN0));
		val &= (ID_AA64DFR0_EL1_PMUVer	|
			ID_AA64DFR0_EL1_WRPs	|
			ID_AA64DFR0_EL1_BRPs	|
			ID_AA64DFR0_EL1_DebugVer|
			ID_AA64DFR0_EL1_HPMN0);

		/* Cap Debug to ARMv8.1 */
	tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
	if (tmp > 0b0111) {
		val &= ~NV_FTR(DFR0, DebugVer);
		val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
		val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE);
		break;
	}
	kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);

	return val;
}

u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu,
@@ -981,8 +999,6 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
	if (!kvm->arch.sysreg_masks)
		return -ENOMEM;

	limit_nv_id_regs(kvm);

	/* VTTBR_EL2 */
	res0 = res1 = 0;
	if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16))
@@ -1021,10 +1037,11 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
		res0 |= HCR_FIEN;
	if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP))
		res0 |= HCR_FWB;
	if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2))
		res0 |= HCR_NV2;
	if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
		res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
	/* Implementation choice: NV2 is the only supported config */
	if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
		res0 |= (HCR_NV2 | HCR_NV | HCR_AT);
	if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, NI))
		res0 |= HCR_NV1;
	if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
	      kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
		res0 |= (HCR_API | HCR_APK);
@@ -1034,6 +1051,8 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
		res0 |= (HCR_TEA | HCR_TERR);
	if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
		res0 |= HCR_TLOR;
	if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
		res0 |= HCR_E2H;
	if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP))
		res1 |= HCR_E2H;
	set_sysreg_masks(kvm, HCR_EL2, res0, res1);
Loading