Commit 968e9bc4 authored by Eric Biggers's avatar Eric Biggers
Browse files

x86: move ZMM exclusion list into CPU feature flag



Lift zmm_exclusion_list in aesni-intel_glue.c into the x86 CPU setup
code, and add a new x86 CPU feature flag X86_FEATURE_PREFER_YMM that is
set when the CPU is on this list.

This allows other code in arch/x86/, such as the CRC library code, to
apply the same exclusion list when deciding whether to execute 256-bit
or 512-bit optimized functions.

Note that full AVX512 support including ZMM registers is still exposed
to userspace and is still supported for in-kernel use.  This flag just
indicates whether in-kernel code should prefer to use YMM registers.

Acked-by: default avatarArd Biesheuvel <ardb@kernel.org>
Acked-by: default avatarIngo Molnar <mingo@kernel.org>
Acked-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatar"Martin K. Petersen" <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20250210174540.161705-2-ebiggers@kernel.org


Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
parent 0645b245
Loading
Loading
Loading
Loading
+1 −21
Original line number Diff line number Diff line
@@ -1536,26 +1536,6 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
		AES_GCM_KEY_AVX10_SIZE, 800);
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */

/*
 * This is a list of CPU models that are known to suffer from downclocking when
 * zmm registers (512-bit vectors) are used.  On these CPUs, the AES mode
 * implementations with zmm registers won't be used by default.  Implementations
 * with ymm registers (256-bit vectors) will be used by default instead.
 */
static const struct x86_cpu_id zmm_exclusion_list[] = {
	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_X,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_D,		0),
	X86_MATCH_VFM(INTEL_ICELAKE,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_L,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,	0),
	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	0),
	X86_MATCH_VFM(INTEL_TIGERLAKE,		0),
	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
	/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
	{},
};

static int __init register_avx_algs(void)
{
	int err;
@@ -1600,7 +1580,7 @@ static int __init register_avx_algs(void)
	if (err)
		return err;

	if (x86_match_cpu(zmm_exclusion_list)) {
	if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
		int i;

		aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
+1 −0
Original line number Diff line number Diff line
@@ -483,6 +483,7 @@
#define X86_FEATURE_AMD_FAST_CPPC	(21*32 + 5) /* Fast CPPC */
#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS	(21*32 + 7) /* Workload Classification */
#define X86_FEATURE_PREFER_YMM		(21*32 + 8) /* Avoid ZMM registers due to downclocking */

/*
 * BUG word(s)
+22 −0
Original line number Diff line number Diff line
@@ -521,6 +521,25 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
	wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}

/*
 * This is a list of Intel CPUs that are known to suffer from downclocking when
 * ZMM registers (512-bit vectors) are used.  On these CPUs, when the kernel
 * executes SIMD-optimized code such as cryptography functions or CRCs, it
 * should prefer 256-bit (YMM) code to 512-bit (ZMM) code.
 */
static const struct x86_cpu_id zmm_exclusion_list[] = {
	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_X,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_D,		0),
	X86_MATCH_VFM(INTEL_ICELAKE,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_L,		0),
	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,	0),
	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	0),
	X86_MATCH_VFM(INTEL_TIGERLAKE,		0),
	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
	{},
};

static void init_intel(struct cpuinfo_x86 *c)
{
	early_init_intel(c);
@@ -601,6 +620,9 @@ static void init_intel(struct cpuinfo_x86 *c)
	}
#endif

	if (x86_match_cpu(zmm_exclusion_list))
		set_cpu_cap(c, X86_FEATURE_PREFER_YMM);

	/* Work around errata */
	srat_detect_node(c);