Commit 7f8ec316 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: x86/cast6 - Use RIP-relative addressing



Prefer RIP-relative addressing where possible, which removes the need
for boot time relocation fixups.

Co-developed-by: default avatarThomas Garnier <thgarnie@chromium.org>
Signed-off-by: default avatarThomas Garnier <thgarnie@chromium.org>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 0dcc7782
Loading
Loading
Loading
Loading
+18 −14
Original line number Diff line number Diff line
@@ -84,15 +84,19 @@

#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
	movzbl		src ## bh,     RID1d;    \
	leaq		s1(%rip),      RID2;     \
	movl		(RID2,RID1,4), dst ## d; \
	movzbl		src ## bl,     RID2d;    \
	leaq		s2(%rip),      RID1;     \
	op1		(RID1,RID2,4), dst ## d; \
	shrq $16,	src;                     \
	movl		s1(, RID1, 4), dst ## d; \
	op1		s2(, RID2, 4), dst ## d; \
	movzbl		src ## bh,     RID1d;    \
	leaq		s3(%rip),      RID2;     \
	op2		(RID2,RID1,4), dst ## d; \
	movzbl		src ## bl,     RID2d;    \
	interleave_op(il_reg);			 \
	op2		s3(, RID1, 4), dst ## d; \
	op3		s4(, RID2, 4), dst ## d;
	leaq		s4(%rip),      RID1;     \
	op3		(RID1,RID2,4), dst ## d;

#define dummy(d) /* do nothing */

@@ -175,10 +179,10 @@
	qop(RD, RC, 1);

#define shuffle(mask) \
	vpshufb		mask,            RKR, RKR;
	vpshufb		mask(%rip),            RKR, RKR;

#define preload_rkr(n, do_mask, mask) \
	vbroadcastss	.L16_mask,                RKR;      \
	vbroadcastss	.L16_mask(%rip),          RKR;      \
	/* add 16-bit rotation to key rotations (mod 32) */ \
	vpxor		(kr+n*16)(CTX),           RKR, RKR; \
	do_mask(mask);
@@ -258,9 +262,9 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)

	movq %rdi, CTX;

	vmovdqa .Lbswap_mask, RKM;
	vmovd .Lfirst_mask, R1ST;
	vmovd .L32_mask, R32;
	vmovdqa .Lbswap_mask(%rip), RKM;
	vmovd .Lfirst_mask(%rip), R1ST;
	vmovd .L32_mask(%rip), R32;

	inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
	inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -284,7 +288,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
	popq %rbx;
	popq %r15;

	vmovdqa .Lbswap_mask, RKM;
	vmovdqa .Lbswap_mask(%rip), RKM;

	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -306,9 +310,9 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)

	movq %rdi, CTX;

	vmovdqa .Lbswap_mask, RKM;
	vmovd .Lfirst_mask, R1ST;
	vmovd .L32_mask, R32;
	vmovdqa .Lbswap_mask(%rip), RKM;
	vmovd .Lfirst_mask(%rip), R1ST;
	vmovd .L32_mask(%rip), R32;

	inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
	inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -332,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
	popq %rbx;
	popq %r15;

	vmovdqa .Lbswap_mask, RKM;
	vmovdqa .Lbswap_mask(%rip), RKM;
	outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
	outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);