Commit 571e557c authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/aes-ce - Simplify round key load sequence



Tweak the round key logic so that they can be loaded using a single
branchless sequence using overlapping loads. This is shorter and
simpler, and puts the conditional branches based on the key size further
apart, which might benefit microarchitectures that cannot record taken
branches at every instruction. For these branches, use test-bit-branch
instructions that don't clobber the condition flags.

Note that none of this has any impact on performance, positive or
otherwise (and the branch prediction benefit would only benefit AES-192
which nobody uses). It does make for nicer code, though.

While at it, use \@ to generate the labels inside the macros, which is
more robust than using fixed numbers, which could clash inadvertently.
Also, bring aes-neon.S in line with these changes, including the switch
to test-and-branch instructions, to avoid surprises in the future when
we might start relying on the condition flags being preserved in the
chaining mode wrappers in aes-modes.S

Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Reviewed-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 3f4d1482
Loading
Loading
Loading
Loading
+14 −20
Original line number Diff line number Diff line
@@ -25,33 +25,28 @@
	.endm

	/* preload all round keys */
	.macro		load_round_keys, rounds, rk
	cmp		\rounds, #12
	blo		2222f		/* 128 bits */
	beq		1111f		/* 192 bits */
	ld1		{v17.4s-v18.4s}, [\rk], #32
1111:	ld1		{v19.4s-v20.4s}, [\rk], #32
2222:	ld1		{v21.4s-v24.4s}, [\rk], #64
	ld1		{v25.4s-v28.4s}, [\rk], #64
	ld1		{v29.4s-v31.4s}, [\rk]
	.macro		load_round_keys, rk, nr, tmp
	add		\tmp, \rk, \nr, sxtw #4
	sub		\tmp, \tmp, #160
	ld1		{v17.4s-v20.4s}, [\rk]
	ld1		{v21.4s-v24.4s}, [\tmp], #64
	ld1		{v25.4s-v28.4s}, [\tmp], #64
	ld1		{v29.4s-v31.4s}, [\tmp]
	.endm

	/* prepare for encryption with key in rk[] */
	.macro		enc_prepare, rounds, rk, temp
	mov		\temp, \rk
	load_round_keys	\rounds, \temp
	load_round_keys	\rk, \rounds, \temp
	.endm

	/* prepare for encryption (again) but with new key in rk[] */
	.macro		enc_switch_key, rounds, rk, temp
	mov		\temp, \rk
	load_round_keys	\rounds, \temp
	load_round_keys	\rk, \rounds, \temp
	.endm

	/* prepare for decryption with key in rk[] */
	.macro		dec_prepare, rounds, rk, temp
	mov		\temp, \rk
	load_round_keys	\rounds, \temp
	load_round_keys	\rk, \rounds, \temp
	.endm

	.macro		do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
@@ -110,14 +105,13 @@

	/* up to 5 interleaved blocks */
	.macro		do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
	cmp		\rounds, #12
	blo		2222f		/* 128 bits */
	beq		1111f		/* 192 bits */
	tbz		\rounds, #2, .L\@	/* 128 bits */
	round_Nx	\enc, v17, \i0, \i1, \i2, \i3, \i4
	round_Nx	\enc, v18, \i0, \i1, \i2, \i3, \i4
1111:	round_Nx	\enc, v19, \i0, \i1, \i2, \i3, \i4
	tbz		\rounds, #1, .L\@	/* 192 bits */
	round_Nx	\enc, v19, \i0, \i1, \i2, \i3, \i4
	round_Nx	\enc, v20, \i0, \i1, \i2, \i3, \i4
2222:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
.L\@:	.irp		key, v21, v22, v23, v24, v25, v26, v27, v28, v29
	round_Nx	\enc, \key, \i0, \i1, \i2, \i3, \i4
	.endr
	fin_round_Nx	\enc, v30, v31, \i0, \i1, \i2, \i3, \i4
+10 −10
Original line number Diff line number Diff line
@@ -99,16 +99,16 @@
	ld1		{v15.4s}, [\rk]
	add		\rkp, \rk, #16
	mov		\i, \rounds
1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
.La\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
	movi		v15.16b, #0x40
	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
	sub_bytes	\in
	subs		\i, \i, #1
	sub		\i, \i, #1
	ld1		{v15.4s}, [\rkp], #16
	beq		2222f
	cbz		\i, .Lb\@
	mix_columns	\in, \enc
	b		1111b
2222:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
	b		.La\@
.Lb\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
	.endm

	.macro		encrypt_block, in, rounds, rk, rkp, i
@@ -206,7 +206,7 @@
	ld1		{v15.4s}, [\rk]
	add		\rkp, \rk, #16
	mov		\i, \rounds
1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
.La\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
@@ -216,13 +216,13 @@
	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
	sub_bytes_4x	\in0, \in1, \in2, \in3
	subs		\i, \i, #1
	sub		\i, \i, #1
	ld1		{v15.4s}, [\rkp], #16
	beq		2222f
	cbz		\i, .Lb\@
	mix_columns_2x	\in0, \in1, \enc
	mix_columns_2x	\in2, \in3, \enc
	b		1111b
2222:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
	b		.La\@
.Lb\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */