mirror of git://gcc.gnu.org/git/gcc.git
AArch64: convert widen_sum optabs to convert [PR122069]
This patch is a mechanical rewrite of the widen_[us]sum optabs from a direct to a conversion optab. The result of which requires the output mode to be added to the existing patterns. No change in functionality is expected. gcc/ChangeLog: PR middle-end/122069 * config/aarch64/aarch64-simd.md (widen_ssum<mode>3): Change into.. (widen_ssum<Vdblw><mode>3, widen_ssum<Vwide><mode>3): ... these. (widen_usum<mode>3): Change into ... (widen_usum<Vdblw><mode>3, widen_usum<Vwide><mode>3): ... these. * config/aarch64/iterators.md (Vdblw): New. (Vwide): Extend to match VWIDE. gcc/testsuite/ChangeLog: PR middle-end/122069 * gcc.target/aarch64/pr122069_1.c: New test. * gcc.target/aarch64/pr122069_2.c: New test.
This commit is contained in:
parent
2bb6a8c4f9
commit
b394181afd
|
@ -4647,7 +4647,7 @@
|
|||
|
||||
;; <su><addsub>w<q>.
|
||||
|
||||
(define_expand "widen_ssum<mode>3"
|
||||
(define_expand "widen_ssum<Vdblw><mode>3"
|
||||
[(set (match_operand:<VDBLW> 0 "register_operand")
|
||||
(plus:<VDBLW> (sign_extend:<VDBLW>
|
||||
(match_operand:VQW 1 "register_operand"))
|
||||
|
@ -4664,7 +4664,7 @@
|
|||
}
|
||||
)
|
||||
|
||||
(define_expand "widen_ssum<mode>3"
|
||||
(define_expand "widen_ssum<Vwide><mode>3"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(plus:<VWIDE> (sign_extend:<VWIDE>
|
||||
(match_operand:VD_BHSI 1 "register_operand"))
|
||||
|
@ -4675,7 +4675,7 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "widen_usum<mode>3"
|
||||
(define_expand "widen_usum<Vdblw><mode>3"
|
||||
[(set (match_operand:<VDBLW> 0 "register_operand")
|
||||
(plus:<VDBLW> (zero_extend:<VDBLW>
|
||||
(match_operand:VQW 1 "register_operand"))
|
||||
|
@ -4692,7 +4692,7 @@
|
|||
}
|
||||
)
|
||||
|
||||
(define_expand "widen_usum<mode>3"
|
||||
(define_expand "widen_usum<Vwide><mode>3"
|
||||
[(set (match_operand:<VWIDE> 0 "register_operand")
|
||||
(plus:<VWIDE> (zero_extend:<VWIDE>
|
||||
(match_operand:VD_BHSI 1 "register_operand"))
|
||||
|
|
|
@ -1901,6 +1901,11 @@
|
|||
(V4HI "V2SI") (V8HI "V4SI")
|
||||
(V2SI "DI") (V4SI "V2DI")])
|
||||
|
||||
;; Modes with double-width elements.
|
||||
(define_mode_attr Vdblw [(V8QI "v4hi") (V16QI "v8hi")
|
||||
(V4HI "v2si") (V8HI "v4si")
|
||||
(V2SI "di") (V4SI "v2di")])
|
||||
|
||||
(define_mode_attr VQUADW [(V8QI "V4SI") (V16QI "V8SI")
|
||||
(V4HI "V2DI") (V8HI "V4DI")])
|
||||
|
||||
|
@ -2003,7 +2008,9 @@
|
|||
(define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")])
|
||||
|
||||
;; Widened modes of vector modes, lowercase
|
||||
(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf")
|
||||
(define_mode_attr Vwide [(V2SI "v2di") (V4HI "v4si")
|
||||
(V2SF "v2df") (V4HF "v4sf")
|
||||
(V8QI "v8hi")
|
||||
(VNx16QI "vnx8hi") (VNx8HI "vnx4si")
|
||||
(VNx4SI "vnx2di")
|
||||
(VNx8HF "vnx4sf") (VNx4SF "vnx2df")
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
inline char char_abs(char i) {
|
||||
return (i < 0 ? -i : i);
|
||||
}
|
||||
|
||||
/*
|
||||
** foo_int:
|
||||
** ...
|
||||
** sub v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
|
||||
** zip1 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
|
||||
** zip2 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b
|
||||
** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
|
||||
** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
|
||||
** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
|
||||
** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
|
||||
** ...
|
||||
*/
|
||||
int foo_int(unsigned char *x, unsigned char * restrict y) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 8000; i++)
|
||||
sum += char_abs(x[i] - y[i]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
/*
|
||||
** foo2_int:
|
||||
** ...
|
||||
** add v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h
|
||||
** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h
|
||||
** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h
|
||||
** ...
|
||||
*/
|
||||
int foo2_int(unsigned short *x, unsigned short * restrict y) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 8000; i++)
|
||||
{
|
||||
x[i] = x[i] + y[i];
|
||||
sum += x[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
|
@ -0,0 +1,80 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/
|
||||
|
||||
inline char char_abs(char i) {
|
||||
return (i < 0 ? -i : i);
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
int foo_int(unsigned char *x, unsigned char * restrict y) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 100; i++)
|
||||
sum += char_abs(x[i] - y[i]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
int foo2_int(unsigned short *x, unsigned short * restrict y,
|
||||
unsigned short * restrict z) {
|
||||
int sum = 0;
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
z[i] = x[i] + y[i];
|
||||
sum += z[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
int foo_int2(unsigned char *x, unsigned char * restrict y) {
|
||||
int sum = 0;
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < 100; i++)
|
||||
sum += char_abs(x[i] - y[i]);
|
||||
return sum;
|
||||
}
|
||||
|
||||
__attribute__((noipa))
|
||||
int foo2_int2(unsigned short *x, unsigned short * restrict y,
|
||||
unsigned short * restrict z) {
|
||||
int sum = 0;
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
z[i] = x[i] + y[i];
|
||||
sum += z[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
unsigned short a[100];
|
||||
unsigned short b[100];
|
||||
unsigned short r1[100];
|
||||
unsigned short r2[100];
|
||||
unsigned char c[100];
|
||||
unsigned char d[100];
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < 100; i++)
|
||||
{
|
||||
a[i] = c[i] = i;
|
||||
b[i] = d[i] = 100 - i;
|
||||
}
|
||||
|
||||
if (foo_int (c, d) != foo_int2 (c, d))
|
||||
__builtin_abort();
|
||||
|
||||
|
||||
if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2))
|
||||
__builtin_abort();
|
||||
|
||||
#pragma GCC novector
|
||||
for (int i = 0; i < 100; i++)
|
||||
if (r1[i] != r2[i])
|
||||
__builtin_abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
|
Loading…
Reference in New Issue