diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 0d5b02a739fa..6488119a1402 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4647,7 +4647,7 @@ ;; w. -(define_expand "widen_ssum3" +(define_expand "widen_ssum3" [(set (match_operand: 0 "register_operand") (plus: (sign_extend: (match_operand:VQW 1 "register_operand")) @@ -4664,7 +4664,7 @@ } ) -(define_expand "widen_ssum3" +(define_expand "widen_ssum3" [(set (match_operand: 0 "register_operand") (plus: (sign_extend: (match_operand:VD_BHSI 1 "register_operand")) @@ -4675,7 +4675,7 @@ DONE; }) -(define_expand "widen_usum3" +(define_expand "widen_usum3" [(set (match_operand: 0 "register_operand") (plus: (zero_extend: (match_operand:VQW 1 "register_operand")) @@ -4692,7 +4692,7 @@ } ) -(define_expand "widen_usum3" +(define_expand "widen_usum3" [(set (match_operand: 0 "register_operand") (plus: (zero_extend: (match_operand:VD_BHSI 1 "register_operand")) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 332e7ffd2eaf..61ca4990b941 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1901,6 +1901,11 @@ (V4HI "V2SI") (V8HI "V4SI") (V2SI "DI") (V4SI "V2DI")]) +;; Modes with double-width elements. +(define_mode_attr Vdblw [(V8QI "v4hi") (V16QI "v8hi") + (V4HI "v2si") (V8HI "v4si") + (V2SI "di") (V4SI "v2di")]) + (define_mode_attr VQUADW [(V8QI "V4SI") (V16QI "V8SI") (V4HI "V2DI") (V8HI "V4DI")]) @@ -2003,7 +2008,9 @@ (define_mode_attr VWIDE_PRED [(VNx8HF "VNx4BI") (VNx4SF "VNx2BI")]) ;; Widened modes of vector modes, lowercase -(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf") +(define_mode_attr Vwide [(V2SI "v2di") (V4HI "v4si") + (V2SF "v2df") (V4HF "v4sf") + (V8QI "v8hi") (VNx16QI "vnx8hi") (VNx8HI "vnx4si") (VNx4SI "vnx2di") (VNx8HF "vnx4sf") (VNx4SF "vnx2df") diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_1.c b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c new file mode 100644 index 000000000000..b2f973261ea0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_1.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** zip1 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** zip2 v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h +** uaddw v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4h +** uaddw2 v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.8h +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr122069_2.c b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c new file mode 100644 index 000000000000..8019bf9e150a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr122069_2.c @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -march=armv8-a -mautovec-preference=asimd-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ \ No newline at end of file