mirror of git://gcc.gnu.org/git/gcc.git
[Aarch64][SVE] Add copysign and xorsign support
This patch adds support for copysign and xorsign builtins to SVE. With the new expands, they can be vectorized using bitwise logical operations. I tested this patch in an aarch64 machine bootstrapping the compiler and running the checks. 2019-01-09 Alejandro Martinez <alejandro.martinezvicente@arm.com> * config/aarch64/aarch64-sve.md (copysign<mode>3): New define_expand. (xorsign<mode>3): Likewise. 2019-01-09 Alejandro Martinez <alejandro.martinezvicente@arm.com> * gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized copysign. * gcc.target/aarch64/sve/copysign_1_run.c: Likewise. * gcc.target/aarch64/sve/xorsign_1.c: New test for SVE vectorized xorsign. * gcc.target/aarch64/sve/xorsign_1_run.c: Likewise. From-SVN: r267764
This commit is contained in:
parent
d3c8a7cfdb
commit
6c9c7b735c
|
|
@ -1,3 +1,8 @@
|
||||||
|
2019-01-09 Alejandro Martinez <alejandro.martinezvicente@arm.com>
|
||||||
|
|
||||||
|
* config/aarch64/aarch64-sve.md (copysign<mode>3): New define_expand.
|
||||||
|
(xorsign<mode>3): Likewise.
|
||||||
|
|
||||||
2019-01-09 Jelinek <jakub@redhat.com>
|
2019-01-09 Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
PR middle-end/88758
|
PR middle-end/88758
|
||||||
|
|
|
||||||
|
|
@ -3074,3 +3074,57 @@
|
||||||
insr\t%0.<Vetype>, %<vwcore>2
|
insr\t%0.<Vetype>, %<vwcore>2
|
||||||
insr\t%0.<Vetype>, %<Vetype>2"
|
insr\t%0.<Vetype>, %<Vetype>2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(define_expand "copysign<mode>3"
|
||||||
|
[(match_operand:SVE_F 0 "register_operand")
|
||||||
|
(match_operand:SVE_F 1 "register_operand")
|
||||||
|
(match_operand:SVE_F 2 "register_operand")]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||||
|
rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||||
|
rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||||
|
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
|
||||||
|
|
||||||
|
rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
|
||||||
|
rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
|
||||||
|
|
||||||
|
emit_insn (gen_and<v_int_equiv>3
|
||||||
|
(sign, arg2,
|
||||||
|
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
|
||||||
|
HOST_WIDE_INT_M1U
|
||||||
|
<< bits)));
|
||||||
|
emit_insn (gen_and<v_int_equiv>3
|
||||||
|
(mant, arg1,
|
||||||
|
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
|
||||||
|
~(HOST_WIDE_INT_M1U
|
||||||
|
<< bits))));
|
||||||
|
emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
|
||||||
|
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_expand "xorsign<mode>3"
|
||||||
|
[(match_operand:SVE_F 0 "register_operand")
|
||||||
|
(match_operand:SVE_F 1 "register_operand")
|
||||||
|
(match_operand:SVE_F 2 "register_operand")]
|
||||||
|
"TARGET_SVE"
|
||||||
|
{
|
||||||
|
rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||||
|
rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||||
|
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
|
||||||
|
|
||||||
|
rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
|
||||||
|
rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
|
||||||
|
|
||||||
|
emit_insn (gen_and<v_int_equiv>3
|
||||||
|
(sign, arg2,
|
||||||
|
aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
|
||||||
|
HOST_WIDE_INT_M1U
|
||||||
|
<< bits)));
|
||||||
|
emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
|
||||||
|
emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
|
2019-01-09 Alejandro Martinez <alejandro.martinezvicente@arm.com>
|
||||||
|
|
||||||
|
* gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized
|
||||||
|
copysign.
|
||||||
|
* gcc.target/aarch64/sve/copysign_1_run.c: Likewise.
|
||||||
|
* gcc.target/aarch64/sve/xorsign_1.c: New test for SVE vectorized
|
||||||
|
xorsign.
|
||||||
|
* gcc.target/aarch64/sve/xorsign_1_run.c: Likewise.
|
||||||
|
|
||||||
2019-01-09 Jakub Jelinek <jakub@redhat.com>
|
2019-01-09 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
PR rtl-optimization/88331
|
PR rtl-optimization/88331
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details --save-temps" } */
|
||||||
|
|
||||||
|
void
|
||||||
|
copysign_half (_Float16 * restrict a, _Float16 * restrict b,
|
||||||
|
_Float16 * restrict r, int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = __builtin_copysignf16 (a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
copysign_float (float *restrict a, float *restrict b, float *restrict r,
|
||||||
|
int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = __builtin_copysignf (a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
copysign_double (double *restrict a, double *restrict b, double *restrict r,
|
||||||
|
int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = __builtin_copysign (a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x8000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x7fff\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8000000000000000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x7fffffffffffffff\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
/* { dg-do run { target { aarch64_sve_hw } } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||||
|
|
||||||
|
#include "copysign_1.c"
|
||||||
|
|
||||||
|
extern void abort ();
|
||||||
|
|
||||||
|
#define N 16
|
||||||
|
_Float16 ah[N] = { -0.1f16, -3.2f16, -6.3f16, -9.4f16,
|
||||||
|
-12.5f16, -15.6f16, -18.7f16, -21.8f16,
|
||||||
|
24.9f16, 27.1f16, 30.2f16, 33.3f16,
|
||||||
|
36.4f16, 39.5f16, 42.6f16, 45.7f
|
||||||
|
};
|
||||||
|
|
||||||
|
_Float16 bh[N] = { -1.2f16, 3.4f16, -5.6f16, 7.8f16,
|
||||||
|
-9.0f16, 1.0f16, -2.0f16, 3.0f16,
|
||||||
|
-4.0f16, -5.0f16, 6.0f16, 7.0f16,
|
||||||
|
-8.0f16, -9.0f16, 10.0f16, 11.0f16
|
||||||
|
};
|
||||||
|
|
||||||
|
_Float16 rh[N];
|
||||||
|
|
||||||
|
float a[N] = { -0.1f, -3.2f, -6.3f, -9.4f,
|
||||||
|
-12.5f, -15.6f, -18.7f, -21.8f,
|
||||||
|
24.9f, 27.1f, 30.2f, 33.3f,
|
||||||
|
36.4f, 39.5f, 42.6f, 45.7f
|
||||||
|
};
|
||||||
|
|
||||||
|
float b[N] = { -1.2f, 3.4f, -5.6f, 7.8f,
|
||||||
|
-9.0f, 1.0f, -2.0f, 3.0f,
|
||||||
|
-4.0f, -5.0f, 6.0f, 7.0f,
|
||||||
|
-8.0f, -9.0f, 10.0f, 11.0f
|
||||||
|
};
|
||||||
|
|
||||||
|
float r[N];
|
||||||
|
|
||||||
|
double ad[N] = { -0.1d, -3.2d, -6.3d, -9.4d,
|
||||||
|
-12.5d, -15.6d, -18.7d, -21.8d,
|
||||||
|
24.9d, 27.1d, 30.2d, 33.3d,
|
||||||
|
36.4d, 39.5d, 42.6d, 45.7d
|
||||||
|
};
|
||||||
|
|
||||||
|
double bd[N] = { -1.2d, 3.4d, -5.6d, 7.8d,
|
||||||
|
-9.0d, 1.0d, -2.0d, 3.0d,
|
||||||
|
-4.0d, -5.0d, 6.0d, 7.0d,
|
||||||
|
-8.0d, -9.0d, 10.0d, 11.0d
|
||||||
|
};
|
||||||
|
|
||||||
|
double rd[N];
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
copysign_half (ah, bh, rh, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (rh[i] != __builtin_copysignf16 (ah[i], bh[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
copysign_float (a, b, r, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (r[i] != __builtin_copysignf (a[i], b[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
copysign_double (ad, bd, rd, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (rd[i] != __builtin_copysign (ad[i], bd[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details --save-temps" } */
|
||||||
|
|
||||||
|
void
|
||||||
|
xorsign_half (_Float16 * restrict a, _Float16 * restrict b,
|
||||||
|
_Float16 * restrict r, int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = a[i] * __builtin_copysignf16 (1.0f16, b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
xorsign_float (float *restrict a, float *restrict b, float *restrict r, int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = a[i] * __builtin_copysignf (1.0f, b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
xorsign_double (double *restrict a, double *restrict b, double *restrict r,
|
||||||
|
int n)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
r[i] = a[i] * __builtin_copysign (1.0d, b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x8000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8000000000000000\n} 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
/* { dg-do run { target { aarch64_sve_hw } } } */
|
||||||
|
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||||
|
|
||||||
|
#include "xorsign_1.c"
|
||||||
|
|
||||||
|
extern void abort ();
|
||||||
|
|
||||||
|
#define N 16
|
||||||
|
_Float16 ah[N] = { -0.1f16, -3.2f16, -6.3f16, -9.4f16,
|
||||||
|
-12.5f16, -15.6f16, -18.7f16, -21.8f16,
|
||||||
|
24.9f16, 27.1f16, 30.2f16, 33.3f16,
|
||||||
|
36.4f16, 39.5f16, 42.6f16, 45.7f
|
||||||
|
};
|
||||||
|
|
||||||
|
_Float16 bh[N] = { -1.2f16, 3.4f16, -5.6f16, 7.8f16,
|
||||||
|
-9.0f16, 1.0f16, -2.0f16, 3.0f16,
|
||||||
|
-4.0f16, -5.0f16, 6.0f16, 7.0f16,
|
||||||
|
-8.0f16, -9.0f16, 10.0f16, 11.0f16
|
||||||
|
};
|
||||||
|
|
||||||
|
_Float16 rh[N];
|
||||||
|
|
||||||
|
float a[N] = { -0.1f, -3.2f, -6.3f, -9.4f,
|
||||||
|
-12.5f, -15.6f, -18.7f, -21.8f,
|
||||||
|
24.9f, 27.1f, 30.2f, 33.3f,
|
||||||
|
36.4f, 39.5f, 42.6f, 45.7f
|
||||||
|
};
|
||||||
|
|
||||||
|
float b[N] = { -1.2f, 3.4f, -5.6f, 7.8f,
|
||||||
|
-9.0f, 1.0f, -2.0f, 3.0f,
|
||||||
|
-4.0f, -5.0f, 6.0f, 7.0f,
|
||||||
|
-8.0f, -9.0f, 10.0f, 11.0f
|
||||||
|
};
|
||||||
|
|
||||||
|
float r[N];
|
||||||
|
|
||||||
|
double ad[N] = { -0.1d, -3.2d, -6.3d, -9.4d,
|
||||||
|
-12.5d, -15.6d, -18.7d, -21.8d,
|
||||||
|
24.9d, 27.1d, 30.2d, 33.3d,
|
||||||
|
36.4d, 39.5d, 42.6d, 45.7d
|
||||||
|
};
|
||||||
|
|
||||||
|
double bd[N] = { -1.2d, 3.4d, -5.6d, 7.8d,
|
||||||
|
-9.0d, 1.0d, -2.0d, 3.0d,
|
||||||
|
-4.0d, -5.0d, 6.0d, 7.0d,
|
||||||
|
-8.0d, -9.0d, 10.0d, 11.0d
|
||||||
|
};
|
||||||
|
|
||||||
|
double rd[N];
|
||||||
|
|
||||||
|
int
|
||||||
|
main (void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
xorsign_half (ah, bh, rh, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (rh[i] != ah[i] * __builtin_copysignf16 (1.0f16, bh[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
xorsign_float (a, b, r, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
xorsign_double (ad, bd, rd, N);
|
||||||
|
for (i = 0; i < N; i++)
|
||||||
|
if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
|
||||||
|
abort ();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue