Commit 0dcc7782 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: x86/cast5 - Use RIP-relative addressing



Prefer RIP-relative addressing where possible, which removes the need
for boot time relocation fixups.

Co-developed-by: default avatarThomas Garnier <thgarnie@chromium.org>
Signed-off-by: default avatarThomas Garnier <thgarnie@chromium.org>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 24ff1e9d
Loading
Loading
Loading
Loading
+21 −17
Original line number Diff line number Diff line
@@ -84,15 +84,19 @@

#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
	movzbl		src ## bh,     RID1d;    \
	leaq		s1(%rip),      RID2;     \
	movl		(RID2,RID1,4), dst ## d; \
	movzbl		src ## bl,     RID2d;    \
	leaq		s2(%rip),      RID1;     \
	op1		(RID1,RID2,4), dst ## d; \
	shrq $16,	src;                     \
	movl		s1(, RID1, 4), dst ## d; \
	op1		s2(, RID2, 4), dst ## d; \
	movzbl		src ## bh,     RID1d;    \
	leaq		s3(%rip),      RID2;     \
	op2		(RID2,RID1,4), dst ## d; \
	movzbl		src ## bl,     RID2d;    \
	interleave_op(il_reg);			 \
	op2		s3(, RID1, 4), dst ## d; \
	op3		s4(, RID2, 4), dst ## d;
	leaq		s4(%rip),      RID1;     \
	op3		(RID1,RID2,4), dst ## d;

#define dummy(d) /* do nothing */

@@ -151,15 +155,15 @@
	subround(l ## 3, r ## 3, l ## 4, r ## 4, f);

#define enc_preload_rkr() \
	vbroadcastss	.L16_mask,                RKR;      \
	vbroadcastss	.L16_mask(%rip),          RKR;      \
	/* add 16-bit rotation to key rotations (mod 32) */ \
	vpxor		kr(CTX),                  RKR, RKR;

#define dec_preload_rkr() \
	vbroadcastss	.L16_mask,                RKR;      \
	vbroadcastss	.L16_mask(%rip),          RKR;      \
	/* add 16-bit rotation to key rotations (mod 32) */ \
	vpxor		kr(CTX),                  RKR, RKR; \
	vpshufb		.Lbswap128_mask,          RKR, RKR;
	vpshufb		.Lbswap128_mask(%rip),    RKR, RKR;

#define transpose_2x4(x0, x1, t0, t1) \
	vpunpckldq		x1, x0, t0; \
@@ -235,9 +239,9 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)

	movq %rdi, CTX;

	vmovdqa .Lbswap_mask, RKM;
	vmovd .Lfirst_mask, R1ST;
	vmovd .L32_mask, R32;
	vmovdqa .Lbswap_mask(%rip), RKM;
	vmovd .Lfirst_mask(%rip), R1ST;
	vmovd .L32_mask(%rip), R32;
	enc_preload_rkr();

	inpack_blocks(RL1, RR1, RTMP, RX, RKM);
@@ -271,7 +275,7 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
	popq %rbx;
	popq %r15;

	vmovdqa .Lbswap_mask, RKM;
	vmovdqa .Lbswap_mask(%rip), RKM;

	outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
	outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -308,9 +312,9 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)

	movq %rdi, CTX;

	vmovdqa .Lbswap_mask, RKM;
	vmovd .Lfirst_mask, R1ST;
	vmovd .L32_mask, R32;
	vmovdqa .Lbswap_mask(%rip), RKM;
	vmovd .Lfirst_mask(%rip), R1ST;
	vmovd .L32_mask(%rip), R32;
	dec_preload_rkr();

	inpack_blocks(RL1, RR1, RTMP, RX, RKM);
@@ -341,7 +345,7 @@ SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
	round(RL, RR, 1, 2);
	round(RR, RL, 0, 1);

	vmovdqa .Lbswap_mask, RKM;
	vmovdqa .Lbswap_mask(%rip), RKM;
	popq %rbx;
	popq %r15;

@@ -504,8 +508,8 @@ SYM_FUNC_START(cast5_ctr_16way)

	vpcmpeqd RKR, RKR, RKR;
	vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
	vmovdqa .Lbswap_iv_mask, R1ST;
	vmovdqa .Lbswap128_mask, RKM;
	vmovdqa .Lbswap_iv_mask(%rip), R1ST;
	vmovdqa .Lbswap128_mask(%rip), RKM;

	/* load IV and byteswap */
	vmovq (%rcx), RX;