Commit ea9459ef authored by Eric Biggers's avatar Eric Biggers Committed by Herbert Xu
Browse files

crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()



Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0.  This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.

Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Reviewed-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 1d27e1f5
Loading
Loading
Loading
Loading
+79 −191
Original line number Diff line number Diff line
@@ -2825,28 +2825,24 @@ SYM_FUNC_END(aesni_ctr_enc)
.previous

/*
 * _aesni_gf128mul_x_ble:		internal ABI
 *	Multiply in GF(2^128) for XTS IVs
 * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
 * input:
 *	IV:	current IV
 *	GF128MUL_MASK == mask with 0x87 and 0x01
 * output:
 *	IV:	next IV
 * changed:
 *	CTR:	== temporary value
 *	KEY:	== temporary value
 */
#define _aesni_gf128mul_x_ble() \
	pshufd $0x13, IV, KEY; \
	paddq IV, IV; \
	psrad $31, KEY; \
	pand GF128MUL_MASK, KEY; \
	pxor KEY, IV;
.macro _aesni_gf128mul_x_ble
	pshufd $0x13, IV, KEY
	paddq IV, IV
	psrad $31, KEY
	pand GF128MUL_MASK, KEY
	pxor KEY, IV
.endm

/*
 * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
 *		      const u8 *src, unsigned int len, le128 *iv)
 */
SYM_FUNC_START(aesni_xts_enc)
.macro	_aesni_xts_crypt	enc
	FRAME_BEGIN
#ifndef __x86_64__
	pushl IVP
@@ -2865,35 +2861,46 @@ SYM_FUNC_START(aesni_xts_enc)
	movups (IVP), IV

	mov 480(KEYP), KLEN
.if !\enc
	add $240, KEYP

	test $15, LEN
	jz .Lxts_loop4\@
	sub $16, LEN
.endif

.Lxts_enc_loop4:
.Lxts_loop4\@:
	sub $64, LEN
	jl .Lxts_enc_1x
	jl .Lxts_1x\@

	movdqa IV, STATE1
	movdqu 0x00(INP), IN
	pxor IN, STATE1
	movdqu IV, 0x00(OUTP)

	_aesni_gf128mul_x_ble()
	_aesni_gf128mul_x_ble
	movdqa IV, STATE2
	movdqu 0x10(INP), IN
	pxor IN, STATE2
	movdqu IV, 0x10(OUTP)

	_aesni_gf128mul_x_ble()
	_aesni_gf128mul_x_ble
	movdqa IV, STATE3
	movdqu 0x20(INP), IN
	pxor IN, STATE3
	movdqu IV, 0x20(OUTP)

	_aesni_gf128mul_x_ble()
	_aesni_gf128mul_x_ble
	movdqa IV, STATE4
	movdqu 0x30(INP), IN
	pxor IN, STATE4
	movdqu IV, 0x30(OUTP)

.if \enc
	call _aesni_enc4
.else
	call _aesni_dec4
.endif

	movdqu 0x00(OUTP), IN
	pxor IN, STATE1
@@ -2911,17 +2918,17 @@ SYM_FUNC_START(aesni_xts_enc)
	pxor IN, STATE4
	movdqu STATE4, 0x30(OUTP)

	_aesni_gf128mul_x_ble()
	_aesni_gf128mul_x_ble

	add $64, INP
	add $64, OUTP
	test LEN, LEN
	jnz .Lxts_enc_loop4
	jnz .Lxts_loop4\@

.Lxts_enc_ret_iv:
.Lxts_ret_iv\@:
	movups IV, (IVP)

.Lxts_enc_ret:
.Lxts_ret\@:
#ifndef __x86_64__
	popl KLEN
	popl KEYP
@@ -2931,39 +2938,60 @@ SYM_FUNC_START(aesni_xts_enc)
	FRAME_END
	RET

.Lxts_enc_1x:
.Lxts_1x\@:
	add $64, LEN
	jz .Lxts_enc_ret_iv
	jz .Lxts_ret_iv\@
.if \enc
	sub $16, LEN
	jl .Lxts_enc_cts4
	jl .Lxts_cts4\@
.endif

.Lxts_enc_loop1:
.Lxts_loop1\@:
	movdqu (INP), STATE
.if \enc
	pxor IV, STATE
	call _aesni_enc1
.else
	add $16, INP
	sub $16, LEN
	jl .Lxts_cts1\@
	pxor IV, STATE
	_aesni_gf128mul_x_ble()
	call _aesni_dec1
.endif
	pxor IV, STATE
	_aesni_gf128mul_x_ble

	test LEN, LEN
	jz .Lxts_enc_out
	jz .Lxts_out\@

.if \enc
	add $16, INP
	sub $16, LEN
	jl .Lxts_enc_cts1
	jl .Lxts_cts1\@
.endif

	movdqu STATE, (OUTP)
	add $16, OUTP
	jmp .Lxts_enc_loop1
	jmp .Lxts_loop1\@

.Lxts_enc_out:
.Lxts_out\@:
	movdqu STATE, (OUTP)
	jmp .Lxts_enc_ret_iv
	jmp .Lxts_ret_iv\@

.Lxts_enc_cts4:
.if \enc
.Lxts_cts4\@:
	movdqa STATE4, STATE
	sub $16, OUTP
.Lxts_cts1\@:
.else
.Lxts_cts1\@:
	movdqa IV, STATE4
	_aesni_gf128mul_x_ble

.Lxts_enc_cts1:
	pxor IV, STATE
	call _aesni_dec1
	pxor IV, STATE
.endif
#ifndef __x86_64__
	lea .Lcts_permute_table, T1
#else
@@ -2989,12 +3017,26 @@ SYM_FUNC_START(aesni_xts_enc)
	pblendvb IN2, IN1
	movaps IN1, STATE

.if \enc
	pxor IV, STATE
	call _aesni_enc1
	pxor IV, STATE
.else
	pxor STATE4, STATE
	call _aesni_dec1
	pxor STATE4, STATE
.endif

	movups STATE, (OUTP)
	jmp .Lxts_enc_ret
	jmp .Lxts_ret\@
.endm

/*
 * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
 *		      const u8 *src, unsigned int len, le128 *iv)
 */
SYM_FUNC_START(aesni_xts_enc)
	_aesni_xts_crypt	1
SYM_FUNC_END(aesni_xts_enc)

/*
@@ -3002,159 +3044,5 @@ SYM_FUNC_END(aesni_xts_enc)
 *		      const u8 *src, unsigned int len, le128 *iv)
 */
SYM_FUNC_START(aesni_xts_dec)
	FRAME_BEGIN
#ifndef __x86_64__
	pushl IVP
	pushl LEN
	pushl KEYP
	pushl KLEN
	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
	movl (FRAME_OFFSET+28)(%esp), INP	# src
	movl (FRAME_OFFSET+32)(%esp), LEN	# len
	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
#else
	movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
	movups (IVP), IV

	mov 480(KEYP), KLEN
	add $240, KEYP

	test $15, LEN
	jz .Lxts_dec_loop4
	sub $16, LEN

.Lxts_dec_loop4:
	sub $64, LEN
	jl .Lxts_dec_1x

	movdqa IV, STATE1
	movdqu 0x00(INP), IN
	pxor IN, STATE1
	movdqu IV, 0x00(OUTP)

	_aesni_gf128mul_x_ble()
	movdqa IV, STATE2
	movdqu 0x10(INP), IN
	pxor IN, STATE2
	movdqu IV, 0x10(OUTP)

	_aesni_gf128mul_x_ble()
	movdqa IV, STATE3
	movdqu 0x20(INP), IN
	pxor IN, STATE3
	movdqu IV, 0x20(OUTP)

	_aesni_gf128mul_x_ble()
	movdqa IV, STATE4
	movdqu 0x30(INP), IN
	pxor IN, STATE4
	movdqu IV, 0x30(OUTP)

	call _aesni_dec4

	movdqu 0x00(OUTP), IN
	pxor IN, STATE1
	movdqu STATE1, 0x00(OUTP)

	movdqu 0x10(OUTP), IN
	pxor IN, STATE2
	movdqu STATE2, 0x10(OUTP)

	movdqu 0x20(OUTP), IN
	pxor IN, STATE3
	movdqu STATE3, 0x20(OUTP)

	movdqu 0x30(OUTP), IN
	pxor IN, STATE4
	movdqu STATE4, 0x30(OUTP)

	_aesni_gf128mul_x_ble()

	add $64, INP
	add $64, OUTP
	test LEN, LEN
	jnz .Lxts_dec_loop4

.Lxts_dec_ret_iv:
	movups IV, (IVP)

.Lxts_dec_ret:
#ifndef __x86_64__
	popl KLEN
	popl KEYP
	popl LEN
	popl IVP
#endif
	FRAME_END
	RET

.Lxts_dec_1x:
	add $64, LEN
	jz .Lxts_dec_ret_iv

.Lxts_dec_loop1:
	movdqu (INP), STATE

	add $16, INP
	sub $16, LEN
	jl .Lxts_dec_cts1

	pxor IV, STATE
	call _aesni_dec1
	pxor IV, STATE
	_aesni_gf128mul_x_ble()

	test LEN, LEN
	jz .Lxts_dec_out

	movdqu STATE, (OUTP)
	add $16, OUTP
	jmp .Lxts_dec_loop1

.Lxts_dec_out:
	movdqu STATE, (OUTP)
	jmp .Lxts_dec_ret_iv

.Lxts_dec_cts1:
	movdqa IV, STATE4
	_aesni_gf128mul_x_ble()

	pxor IV, STATE
	call _aesni_dec1
	pxor IV, STATE

#ifndef __x86_64__
	lea .Lcts_permute_table, T1
#else
	lea .Lcts_permute_table(%rip), T1
#endif
	add LEN, INP		/* rewind input pointer */
	add $16, LEN		/* # bytes in final block */
	movups (INP), IN1

	mov T1, IVP
	add $32, IVP
	add LEN, T1
	sub LEN, IVP
	add OUTP, LEN

	movups (T1), %xmm4
	movaps STATE, IN2
	pshufb %xmm4, STATE
	movups STATE, (LEN)

	movups (IVP), %xmm0
	pshufb %xmm0, IN1
	pblendvb IN2, IN1
	movaps IN1, STATE

	pxor STATE4, STATE
	call _aesni_dec1
	pxor STATE4, STATE

	movups STATE, (OUTP)
	jmp .Lxts_dec_ret
	_aesni_xts_crypt	0
SYM_FUNC_END(aesni_xts_dec)