Commit 7d14fbc5 authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: x86/aes - drop the avx10_256 AES-XTS and AES-CTR code



Intel made a late change to the AVX10 specification that removes support
for a 256-bit maximum vector length and enumeration of the maximum
vector length.  AVX10 will imply a maximum vector length of 512 bits.
I.e. there won't be any such thing as AVX10/256 or AVX10/512; there will
just be AVX10, and it will essentially just consolidate AVX512 features.

As a result of this new development, my strategy of providing both
*_avx10_256 and *_avx10_512 functions didn't turn out to be that useful.
The only remaining motivation for the 256-bit AVX512 / AVX10 functions
is to avoid downclocking on older Intel CPUs.  But in the case of
AES-XTS and AES-CTR, I already wrote *_avx2 code too (primarily to
support CPUs without AVX512), which performs almost as well as
*_avx10_256.  So we should just use that.

Therefore, remove the *_avx10_256 AES-XTS and AES-CTR functions and
algorithms, and rename the *_avx10_512 AES-XTS and AES-CTR functions and
algorithms to *_avx512.  Make Ice Lake and Tiger Lake use *_avx2 instead
of *_avx10_256 which they previously used.

I've left AES-GCM unchanged for now.  There is no VAES+AVX2 optimized
AES-GCM in the kernel yet, so the path forward for that is not as clear.
However, I did write a VAES+AVX2 optimized AES-GCM for BoringSSL.  So
one option is to port that to the kernel and then do the same cleanup.

Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 5ebc052d
Loading
Loading
Loading
Loading
+14 −33
Original line number Diff line number Diff line
@@ -48,8 +48,7 @@
// using the following sets of CPU features:
//	- AES-NI && AVX
//	- VAES && AVX2
//	- VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2
//	- VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2
//	- VAES && AVX512BW && AVX512VL && BMI2
//
// See the function definitions at the bottom of the file for more information.

@@ -76,7 +75,6 @@
.text

// Move a vector between memory and a register.
// The register operand must be in the first 16 vector registers.
.macro	_vmovdqu	src, dst
.if VL < 64
	vmovdqu		\src, \dst
@@ -86,7 +84,6 @@
.endm

// Move a vector between registers.
// The registers must be in the first 16 vector registers.
.macro	_vmovdqa	src, dst
.if VL < 64
	vmovdqa		\src, \dst
@@ -96,7 +93,7 @@
.endm

// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
// register.  The register operand must be in the first 16 vector registers.
// register.
.macro	_vbroadcast128	src, dst
.if VL == 16
	vmovdqu		\src, \dst
@@ -108,7 +105,6 @@
.endm

// XOR two vectors together.
// Any register operands must be in the first 16 vector registers.
.macro	_vpxor	src1, src2, dst
.if VL < 64
	vpxor		\src1, \src2, \dst
@@ -199,8 +195,8 @@
// XOR each with the zero-th round key.  Also update LE_CTR if !\final.
.macro	_prepare_2_ctr_vecs	is_xctr, i0, i1, final=0
.if \is_xctr
  .if USE_AVX10
	_vmovdqa	LE_CTR, AESDATA\i0
  .if USE_AVX512
	vmovdqa64	LE_CTR, AESDATA\i0
	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i0
  .else
	vpxor		XCTR_IV, LE_CTR, AESDATA\i0
@@ -208,7 +204,7 @@
  .endif
	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1

  .if USE_AVX10
  .if USE_AVX512
	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i1
  .else
	vpxor		XCTR_IV, AESDATA\i1, AESDATA\i1
@@ -481,18 +477,12 @@
.Lxor_tail_partial_vec_0\@:
	// XOR the remaining 1 <= LEN < VL bytes.  It's easy if masked
	// loads/stores are available; otherwise it's a bit harder...
.if USE_AVX10
  .if VL <= 32
	mov		$-1, %eax
	bzhi		LEN, %eax, %eax
	kmovd		%eax, %k1
  .else
.if USE_AVX512
	mov		$-1, %rax
	bzhi		LEN64, %rax, %rax
	kmovq		%rax, %k1
  .endif
	vmovdqu8	(SRC), AESDATA1{%k1}{z}
	_vpxor		AESDATA1, AESDATA0, AESDATA0
	vpxord		AESDATA1, AESDATA0, AESDATA0
	vmovdqu8	AESDATA0, (DST){%k1}
.else
  .if VL == 32
@@ -554,7 +544,7 @@
// eliminates carries.  |ctr| is the per-message block counter starting at 1.

.set	VL, 16
.set	USE_AVX10, 0
.set	USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
	_aes_ctr_crypt	0
SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
@@ -564,7 +554,7 @@ SYM_FUNC_END(aes_xctr_crypt_aesni_avx)

#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set	VL, 32
.set	USE_AVX10, 0
.set	USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
	_aes_ctr_crypt	0
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
@@ -572,21 +562,12 @@ SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
	_aes_ctr_crypt	1
SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)

.set	VL, 32
.set	USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256)
	_aes_ctr_crypt	0
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256)
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256)
	_aes_ctr_crypt	1
SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256)

.set	VL, 64
.set	USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512)
.set	USE_AVX512, 1
SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx512)
	_aes_ctr_crypt	0
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512)
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512)
SYM_FUNC_END(aes_ctr64_crypt_vaes_avx512)
SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx512)
	_aes_ctr_crypt	1
SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512)
SYM_FUNC_END(aes_xctr_crypt_vaes_avx512)
#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
+50 −68
Original line number Diff line number Diff line
@@ -52,32 +52,25 @@
 * different code, it uses a macro to generate several implementations that
 * share similar source code but are targeted at different CPUs, listed below:
 *
 * AES-NI + AVX
 * AES-NI && AVX
 *    - 128-bit vectors (1 AES block per vector)
 *    - VEX-coded instructions
 *    - xmm0-xmm15
 *    - This is for older CPUs that lack VAES but do have AVX.
 *
 * VAES + VPCLMULQDQ + AVX2
 * VAES && VPCLMULQDQ && AVX2
 *    - 256-bit vectors (2 AES blocks per vector)
 *    - VEX-coded instructions
 *    - ymm0-ymm15
 *    - This is for CPUs that have VAES but lack AVX512 or AVX10,
 *      e.g. Intel's Alder Lake and AMD's Zen 3.
 *    - This is for CPUs that have VAES but either lack AVX512 (e.g. Intel's
 *      Alder Lake and AMD's Zen 3) or downclock too eagerly when using zmm
 *      registers (e.g. Intel's Ice Lake).
 *
 * VAES + VPCLMULQDQ + AVX10/256 + BMI2
 *    - 256-bit vectors (2 AES blocks per vector)
 * VAES && VPCLMULQDQ && AVX512BW && AVX512VL && BMI2
 *    - 512-bit vectors (4 AES blocks per vector)
 *    - EVEX-coded instructions
 *    - ymm0-ymm31
 *    - This is for CPUs that have AVX512 but where using zmm registers causes
 *      downclocking, and for CPUs that have AVX10/256 but not AVX10/512.
 *    - By "AVX10/256" we really mean (AVX512BW + AVX512VL) || AVX10/256.
 *      To avoid confusion with 512-bit, we just write AVX10/256.
 *
 * VAES + VPCLMULQDQ + AVX10/512 + BMI2
 *    - Same as the previous one, but upgrades to 512-bit vectors
 *      (4 AES blocks per vector) in zmm0-zmm31.
 *    - This is for CPUs that have good AVX512 or AVX10/512 support.
 *    - zmm0-zmm31
 *    - This is for CPUs that have good AVX512 support.
 *
 * This file doesn't have an implementation for AES-NI alone (without AVX), as
 * the lack of VEX would make all the assembly code different.
@@ -109,7 +102,7 @@

	// This table contains constants for vpshufb and vpblendvb, used to
	// handle variable byte shifts and blending during ciphertext stealing
	// on CPUs that don't support AVX10-style masking.
	// on CPUs that don't support AVX512-style masking.
.Lcts_permute_table:
	.byte	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
	.byte	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
@@ -138,7 +131,7 @@
.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
	_define_Vi	\i
.endr
.if USE_AVX10
.if USE_AVX512
.irp i, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
	_define_Vi	\i
.endr
@@ -193,7 +186,7 @@
	// keys to the *end* of this register range.  I.e., AES-128 uses
	// KEY5-KEY14, AES-192 uses KEY3-KEY14, and AES-256 uses KEY1-KEY14.
	// (All also use KEY0 for the XOR-only "round" at the beginning.)
.if USE_AVX10
.if USE_AVX512
	.set	KEY1_XMM,	%xmm16
	.set	KEY1,		V16
	.set	KEY2_XMM,	%xmm17
@@ -227,7 +220,6 @@
.endm

// Move a vector between memory and a register.
// The register operand must be in the first 16 vector registers.
.macro	_vmovdqu	src, dst
.if VL < 64
	vmovdqu		\src, \dst
@@ -238,9 +230,9 @@

// Broadcast a 128-bit value into a vector.
.macro	_vbroadcast128	src, dst
.if VL == 16 && !USE_AVX10
.if VL == 16
	vmovdqu		\src, \dst
.elseif VL == 32 && !USE_AVX10
.elseif VL == 32
	vbroadcasti128	\src, \dst
.else
	vbroadcasti32x4	\src, \dst
@@ -248,7 +240,6 @@
.endm

// XOR two vectors together.
// Any register operands must be in the first 16 vector registers.
.macro	_vpxor	src1, src2, dst
.if VL < 64
	vpxor		\src1, \src2, \dst
@@ -259,7 +250,7 @@

// XOR three vectors together.
.macro	_xor3	src1, src2, src3_and_dst
.if USE_AVX10
.if USE_AVX512
	// vpternlogd with immediate 0x96 is a three-argument XOR.
	vpternlogd	$0x96, \src1, \src2, \src3_and_dst
.else
@@ -274,7 +265,7 @@
	vpshufd		$0x13, \src, \tmp
	vpaddq		\src, \src, \dst
	vpsrad		$31, \tmp, \tmp
.if USE_AVX10
.if USE_AVX512
	vpternlogd	$0x78, GF_POLY_XMM, \tmp, \dst
.else
	vpand		GF_POLY_XMM, \tmp, \tmp
@@ -337,7 +328,7 @@
	vpsllq		$1*VL/16, TWEAK0, TWEAK1
	vpsllq		$2*VL/16, TWEAK0, TWEAK2
	vpsllq		$3*VL/16, TWEAK0, TWEAK3
.if USE_AVX10
.if USE_AVX512
	vpternlogd	$0x96, V0, V1, TWEAK1
	vpternlogd	$0x96, V2, V3, TWEAK2
	vpternlogd	$0x96, V4, V5, TWEAK3
@@ -474,26 +465,26 @@
	lea		OFFS-16(KEY, KEYLEN64, 4), KEY

	// If all 32 SIMD registers are available, cache all the round keys.
.if USE_AVX10
.if USE_AVX512
	cmp		$24, KEYLEN
	jl		.Laes128\@
	je		.Laes192\@
	_vbroadcast128	-6*16(KEY), KEY1
	_vbroadcast128	-5*16(KEY), KEY2
	vbroadcasti32x4	-6*16(KEY), KEY1
	vbroadcasti32x4	-5*16(KEY), KEY2
.Laes192\@:
	_vbroadcast128	-4*16(KEY), KEY3
	_vbroadcast128	-3*16(KEY), KEY4
	vbroadcasti32x4	-4*16(KEY), KEY3
	vbroadcasti32x4	-3*16(KEY), KEY4
.Laes128\@:
	_vbroadcast128	-2*16(KEY), KEY5
	_vbroadcast128	-1*16(KEY), KEY6
	_vbroadcast128	0*16(KEY), KEY7
	_vbroadcast128	1*16(KEY), KEY8
	_vbroadcast128	2*16(KEY), KEY9
	_vbroadcast128	3*16(KEY), KEY10
	_vbroadcast128	4*16(KEY), KEY11
	_vbroadcast128	5*16(KEY), KEY12
	_vbroadcast128	6*16(KEY), KEY13
	_vbroadcast128	7*16(KEY), KEY14
	vbroadcasti32x4	-2*16(KEY), KEY5
	vbroadcasti32x4	-1*16(KEY), KEY6
	vbroadcasti32x4	0*16(KEY), KEY7
	vbroadcasti32x4	1*16(KEY), KEY8
	vbroadcasti32x4	2*16(KEY), KEY9
	vbroadcasti32x4	3*16(KEY), KEY10
	vbroadcasti32x4	4*16(KEY), KEY11
	vbroadcasti32x4	5*16(KEY), KEY12
	vbroadcasti32x4	6*16(KEY), KEY13
	vbroadcasti32x4	7*16(KEY), KEY14
.endif
.endm

@@ -521,7 +512,7 @@
// using the same key for all block(s).  The round key is loaded from the
// appropriate register or memory location for round \i.  May clobber \tmp.
.macro _vaes_1x		enc, i, xmm_suffix, data, tmp
.if USE_AVX10
.if USE_AVX512
	_vaes		\enc, KEY\i\xmm_suffix, \data
.else
.ifnb \xmm_suffix
@@ -538,7 +529,7 @@
// appropriate register or memory location for round \i.  In addition, does two
// steps of the computation of the next set of tweaks.  May clobber V4 and V5.
.macro	_vaes_4x	enc, i
.if USE_AVX10
.if USE_AVX512
	_tweak_step	(2*(\i-5))
	_vaes		\enc, KEY\i, V0
	_vaes		\enc, KEY\i, V1
@@ -574,7 +565,7 @@
.irp i, 5,6,7,8,9,10,11,12,13
	_vaes_1x	\enc, \i, \xmm_suffix, \data, tmp=\tmp
.endr
.if USE_AVX10
.if USE_AVX512
	vpxord		KEY14\xmm_suffix, \tweak, \tmp
.else
.ifnb \xmm_suffix
@@ -617,11 +608,11 @@
	// This is the main loop, en/decrypting 4*VL bytes per iteration.

	// XOR each source block with its tweak and the zero-th round key.
.if USE_AVX10
	_vmovdqu	0*VL(SRC), V0
	_vmovdqu	1*VL(SRC), V1
	_vmovdqu	2*VL(SRC), V2
	_vmovdqu	3*VL(SRC), V3
.if USE_AVX512
	vmovdqu8	0*VL(SRC), V0
	vmovdqu8	1*VL(SRC), V1
	vmovdqu8	2*VL(SRC), V2
	vmovdqu8	3*VL(SRC), V3
	vpternlogd	$0x96, TWEAK0, KEY0, V0
	vpternlogd	$0x96, TWEAK1, KEY0, V1
	vpternlogd	$0x96, TWEAK2, KEY0, V2
@@ -654,7 +645,7 @@
	// Reduce latency by doing the XOR before the vaesenclast, utilizing the
	// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a)
	// (and likewise for vaesdeclast).
.if USE_AVX10
.if USE_AVX512
	_tweak_step	18
	_tweak_step	19
	vpxord		TWEAK0, KEY14, V4
@@ -762,7 +753,7 @@
	_aes_crypt	\enc, _XMM, TWEAK1_XMM, %xmm0, tmp=%xmm1
.endif

.if USE_AVX10
.if USE_AVX512
	// Create a mask that has the first LEN bits set.
	mov		$-1, %r9d
	bzhi		LEN, %r9d, %r9d
@@ -811,7 +802,7 @@
//			   u8 iv[AES_BLOCK_SIZE]);
//
// Encrypt |iv| using the AES key |tweak_key| to get the first tweak.  Assumes
// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX10.
// that the CPU supports AES-NI and AVX, but not necessarily VAES or AVX512.
SYM_TYPED_FUNC_START(aes_xts_encrypt_iv)
	.set	TWEAK_KEY,	%rdi
	.set	IV,		%rsi
@@ -853,7 +844,7 @@ SYM_FUNC_END(aes_xts_encrypt_iv)
// multiple of 16, then this function updates |tweak| to contain the next tweak.

.set	VL, 16
.set	USE_AVX10, 0
.set	USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_xts_encrypt_aesni_avx)
	_aes_xts_crypt	1
SYM_FUNC_END(aes_xts_encrypt_aesni_avx)
@@ -863,7 +854,7 @@ SYM_FUNC_END(aes_xts_decrypt_aesni_avx)

#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
.set	VL, 32
.set	USE_AVX10, 0
.set	USE_AVX512, 0
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
	_aes_xts_crypt	1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx2)
@@ -871,21 +862,12 @@ SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2)
	_aes_xts_crypt	0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx2)

.set	VL, 32
.set	USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256)
	_aes_xts_crypt	1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
	_aes_xts_crypt	0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)

.set	VL, 64
.set	USE_AVX10, 1
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_512)
.set	USE_AVX512, 1
SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx512)
	_aes_xts_crypt	1
SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_512)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_512)
SYM_FUNC_END(aes_xts_encrypt_vaes_avx512)
SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx512)
	_aes_xts_crypt	0
SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_512)
SYM_FUNC_END(aes_xts_decrypt_vaes_avx512)
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
+10 −20
Original line number Diff line number Diff line
@@ -844,8 +844,7 @@ simd_skcipher_algs_##suffix[ARRAY_SIZE(skcipher_algs_##suffix)]
DEFINE_AVX_SKCIPHER_ALGS(aesni_avx, "aesni-avx", 500);
#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx2, "vaes-avx2", 600);
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_256, "vaes-avx10_256", 700);
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx10_512, "vaes-avx10_512", 800);
DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
#endif

/* The common part of the x86_64 AES-GCM key struct */
@@ -1592,11 +1591,6 @@ static int __init register_avx_algs(void)
			       XFEATURE_MASK_AVX512, NULL))
		return 0;

	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_256,
					     ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
					     simd_skcipher_algs_vaes_avx10_256);
	if (err)
		return err;
	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256,
					 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
					 aes_gcm_simdalgs_vaes_avx10_256);
@@ -1606,15 +1600,15 @@ static int __init register_avx_algs(void)
	if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
		int i;

		for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
			skcipher_algs_vaes_avx10_512[i].base.cra_priority = 1;
		for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx512); i++)
			skcipher_algs_vaes_avx512[i].base.cra_priority = 1;
		for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
			aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
	}

	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx10_512,
					     ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
					     simd_skcipher_algs_vaes_avx10_512);
	err = simd_register_skciphers_compat(skcipher_algs_vaes_avx512,
					     ARRAY_SIZE(skcipher_algs_vaes_avx512),
					     simd_skcipher_algs_vaes_avx512);
	if (err)
		return err;
	err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512,
@@ -1641,18 +1635,14 @@ static void unregister_avx_algs(void)
		simd_unregister_skciphers(skcipher_algs_vaes_avx2,
					  ARRAY_SIZE(skcipher_algs_vaes_avx2),
					  simd_skcipher_algs_vaes_avx2);
	if (simd_skcipher_algs_vaes_avx10_256[0])
		simd_unregister_skciphers(skcipher_algs_vaes_avx10_256,
					  ARRAY_SIZE(skcipher_algs_vaes_avx10_256),
					  simd_skcipher_algs_vaes_avx10_256);
	if (aes_gcm_simdalgs_vaes_avx10_256[0])
		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256,
				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
				      aes_gcm_simdalgs_vaes_avx10_256);
	if (simd_skcipher_algs_vaes_avx10_512[0])
		simd_unregister_skciphers(skcipher_algs_vaes_avx10_512,
					  ARRAY_SIZE(skcipher_algs_vaes_avx10_512),
					  simd_skcipher_algs_vaes_avx10_512);
	if (simd_skcipher_algs_vaes_avx512[0])
		simd_unregister_skciphers(skcipher_algs_vaes_avx512,
					  ARRAY_SIZE(skcipher_algs_vaes_avx512),
					  simd_skcipher_algs_vaes_avx512);
	if (aes_gcm_simdalgs_vaes_avx10_512[0])
		simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512,
				      ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512),