Commit aa2197f5 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: x86/aes-xts - wire up VAES + AVX10/512 implementation



Add an AES-XTS implementation "xts-aes-vaes-avx10_512" for x86_64 CPUs
with the VAES, VPCLMULQDQ, and either AVX10/512 or AVX512BW + AVX512VL
extensions.  This implementation uses zmm registers to operate on four
AES blocks at a time.  The assembly code is instantiated using a macro
so that most of the source code is shared with other implementations.

To avoid downclocking on older Intel CPU models, an exclusion list is
used to prevent this 512-bit implementation from being used by default
on some CPU models.  They will use xts-aes-vaes-avx10_256 instead.  For
now, this exclusion list is simply coded into aesni-intel_glue.c.  It
may make sense to eventually move it into a more central location.

xts-aes-vaes-avx10_512 is slightly faster than xts-aes-vaes-avx10_256 on
some current CPUs.  E.g., on AMD Zen 4, AES-256-XTS decryption
throughput increases by 13% with 4096-byte inputs, or 14% with 512-byte
inputs.  On Intel Sapphire Rapids, AES-256-XTS decryption throughput
increases by 2% with 4096-byte inputs, or 3% with 512-byte inputs.

Future CPUs may provide stronger 512-bit support, in which case a larger
benefit should be seen.

Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent ee63fea0
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -826,4 +826,13 @@ SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
	_aes_xts_crypt	0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)

.set	VL, 64
.set	USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
	_aes_xts_crypt	1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
	_aes_xts_crypt	0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
+32 −0
Original line number Diff line number Diff line
@@ -1298,8 +1298,29 @@ DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500);
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600);
DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800);
#endif

/*
 * This is a list of CPU models that are known to suffer from downclocking when
 * zmm registers (512-bit vectors) are used.  On these CPUs, the AES-XTS
 * implementation with zmm registers won't be used by default.  An
 * implementation with ymm registers (256-bit vectors) will be used instead.
 */
static const struct x86_cpu_id zmm_exclusion_list[] = {
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_SKYLAKE_X },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_X },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_D },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_L },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_ICELAKE_NNPI },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE_L },
	{ .vendor = X86_VENDOR_INTEL, .family = 6, .model = INTEL_FAM6_TIGERLAKE },
	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
	/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
	{},
};

static int __init register_xts_algs(void)
{
	int err;
@@ -1333,6 +1354,14 @@ static int __init register_xts_algs(void)
					     &aes_xts_simdalg_vaes_avx10_256);
	if (err)
		return err;

	if (x86_match_cpu(zmm_exclusion_list))
		aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;

	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1,
					     &aes_xts_simdalg_vaes_avx10_512);
	if (err)
		return err;
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
	return 0;
}
@@ -1349,6 +1378,9 @@ static void unregister_xts_algs(void)
	if (aes_xts_simdalg_vaes_avx10_256)
		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
					  &aes_xts_simdalg_vaes_avx10_256);
	if (aes_xts_simdalg_vaes_avx10_512)
		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1,
					  &aes_xts_simdalg_vaes_avx10_512);
#endif
}
#else /* CONFIG_X86_64 */