crypto: x86/aria - Use RIP-relative addressing (52fc482a) · Commits · git / linux-nf

arch/x86/crypto/aria-aesni-avx-asm_64.S

+14 −14

Original line number	Diff line number	Diff line
		@@ -80,7 +80,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vmovdqu .Lshufb_16x16b, a0; \
		vmovdqu .Lshufb_16x16b(%rip), a0; \
		vmovdqu st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -132,7 +132,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vmovdqu .Lshufb_16x16b, a0; \
		vmovdqu .Lshufb_16x16b(%rip), a0; \
		vmovdqu st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -300,11 +300,11 @@
		x4, x5, x6, x7, \
		t0, t1, t2, t3, \
		t4, t5, t6, t7) \
		vmovdqa .Ltf_s2_bitmatrix, t0; \
		vmovdqa .Ltf_inv_bitmatrix, t1; \
		vmovdqa .Ltf_id_bitmatrix, t2; \
		vmovdqa .Ltf_aff_bitmatrix, t3; \
		vmovdqa .Ltf_x2_bitmatrix, t4; \
		vmovdqa .Ltf_s2_bitmatrix(%rip), t0; \
		vmovdqa .Ltf_inv_bitmatrix(%rip), t1; \
		vmovdqa .Ltf_id_bitmatrix(%rip), t2; \
		vmovdqa .Ltf_aff_bitmatrix(%rip), t3; \
		vmovdqa .Ltf_x2_bitmatrix(%rip), t4; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
		vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
		@@ -324,13 +324,13 @@
		x4, x5, x6, x7, \
		t0, t1, t2, t3, \
		t4, t5, t6, t7) \
		vmovdqa .Linv_shift_row, t0; \
		vmovdqa .Lshift_row, t1; \
		vbroadcastss .L0f0f0f0f, t6; \
		vmovdqa .Ltf_lo__inv_aff__and__s2, t2; \
		vmovdqa .Ltf_hi__inv_aff__and__s2, t3; \
		vmovdqa .Ltf_lo__x2__and__fwd_aff, t4; \
		vmovdqa .Ltf_hi__x2__and__fwd_aff, t5; \
		vmovdqa .Linv_shift_row(%rip), t0; \
		vmovdqa .Lshift_row(%rip), t1; \
		vbroadcastss .L0f0f0f0f(%rip), t6; \
		vmovdqa .Ltf_lo__inv_aff__and__s2(%rip), t2; \
		vmovdqa .Ltf_hi__inv_aff__and__s2(%rip), t3; \
		vmovdqa .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
		vmovdqa .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
		\
		vaesenclast t7, x0, x0; \
		vaesenclast t7, x4, x4; \

arch/x86/crypto/aria-aesni-avx2-asm_64.S

+14 −14

Original line number	Diff line number	Diff line
		@@ -96,7 +96,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vbroadcasti128 .Lshufb_16x16b, a0; \
		vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
		vmovdqu st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -148,7 +148,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vbroadcasti128 .Lshufb_16x16b, a0; \
		vbroadcasti128 .Lshufb_16x16b(%rip), a0; \
		vmovdqu st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -307,11 +307,11 @@
		x4, x5, x6, x7, \
		t0, t1, t2, t3, \
		t4, t5, t6, t7) \
		vpbroadcastq .Ltf_s2_bitmatrix, t0; \
		vpbroadcastq .Ltf_inv_bitmatrix, t1; \
		vpbroadcastq .Ltf_id_bitmatrix, t2; \
		vpbroadcastq .Ltf_aff_bitmatrix, t3; \
		vpbroadcastq .Ltf_x2_bitmatrix, t4; \
		vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
		vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
		vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
		vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
		vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
		vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
		@@ -332,12 +332,12 @@
		t4, t5, t6, t7) \
		vpxor t7, t7, t7; \
		vpxor t6, t6, t6; \
		vbroadcasti128 .Linv_shift_row, t0; \
		vbroadcasti128 .Lshift_row, t1; \
		vbroadcasti128 .Ltf_lo__inv_aff__and__s2, t2; \
		vbroadcasti128 .Ltf_hi__inv_aff__and__s2, t3; \
		vbroadcasti128 .Ltf_lo__x2__and__fwd_aff, t4; \
		vbroadcasti128 .Ltf_hi__x2__and__fwd_aff, t5; \
		vbroadcasti128 .Linv_shift_row(%rip), t0; \
		vbroadcasti128 .Lshift_row(%rip), t1; \
		vbroadcasti128 .Ltf_lo__inv_aff__and__s2(%rip), t2; \
		vbroadcasti128 .Ltf_hi__inv_aff__and__s2(%rip), t3; \
		vbroadcasti128 .Ltf_lo__x2__and__fwd_aff(%rip), t4; \
		vbroadcasti128 .Ltf_hi__x2__and__fwd_aff(%rip), t5; \
		\
		vextracti128 $1, x0, t6##_x; \
		vaesenclast t7##_x, x0##_x, x0##_x; \
		@@ -369,7 +369,7 @@
		vaesdeclast t7##_x, t6##_x, t6##_x; \
		vinserti128 $1, t6##_x, x6, x6; \
		\
		vpbroadcastd .L0f0f0f0f, t6; \
		vpbroadcastd .L0f0f0f0f(%rip), t6; \
		\
		/* AES inverse shift rows */ \
		vpshufb t0, x0, x0; \

arch/x86/crypto/aria-gfni-avx512-asm_64.S

+12 −12

Original line number	Diff line number	Diff line
		@@ -80,7 +80,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vbroadcasti64x2 .Lshufb_16x16b, a0; \
		vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
		vmovdqu64 st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -132,7 +132,7 @@
		transpose_4x4(c0, c1, c2, c3, a0, a1); \
		transpose_4x4(d0, d1, d2, d3, a0, a1); \
		\
		vbroadcasti64x2 .Lshufb_16x16b, a0; \
		vbroadcasti64x2 .Lshufb_16x16b(%rip), a0; \
		vmovdqu64 st1, a1; \
		vpshufb a0, a2, a2; \
		vpshufb a0, a3, a3; \
		@@ -308,11 +308,11 @@
		x4, x5, x6, x7, \
		t0, t1, t2, t3, \
		t4, t5, t6, t7) \
		vpbroadcastq .Ltf_s2_bitmatrix, t0; \
		vpbroadcastq .Ltf_inv_bitmatrix, t1; \
		vpbroadcastq .Ltf_id_bitmatrix, t2; \
		vpbroadcastq .Ltf_aff_bitmatrix, t3; \
		vpbroadcastq .Ltf_x2_bitmatrix, t4; \
		vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
		vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
		vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
		vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
		vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
		vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \
		@@ -332,11 +332,11 @@
		y4, y5, y6, y7, \
		t0, t1, t2, t3, \
		t4, t5, t6, t7) \
		vpbroadcastq .Ltf_s2_bitmatrix, t0; \
		vpbroadcastq .Ltf_inv_bitmatrix, t1; \
		vpbroadcastq .Ltf_id_bitmatrix, t2; \
		vpbroadcastq .Ltf_aff_bitmatrix, t3; \
		vpbroadcastq .Ltf_x2_bitmatrix, t4; \
		vpbroadcastq .Ltf_s2_bitmatrix(%rip), t0; \
		vpbroadcastq .Ltf_inv_bitmatrix(%rip), t1; \
		vpbroadcastq .Ltf_id_bitmatrix(%rip), t2; \
		vpbroadcastq .Ltf_aff_bitmatrix(%rip), t3; \
		vpbroadcastq .Ltf_x2_bitmatrix(%rip), t4; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1; \
		vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5; \
		vgf2p8affineqb $(tf_inv_const), t1, x2, x2; \