diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 8c47d441c3fd..550ff0a3cde6 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7722,6 +7722,22 @@ [(set_attr "sve_type" "sve_int_dot")] ) +;; Define double widen_[su]sum as dotproduct +;; Use dot product to perform double widening sum reductions by +;; changing += a into += (a * 1). i.e. we seed the multiplication with 1. +(define_expand "widen_sum3" + [(set (match_operand:SVE_FULL_SDI 0 "register_operand") + (plus:SVE_FULL_SDI + (unspec:SVE_FULL_SDI + [(match_operand: 1 "register_operand") + (match_dup 3)] + DOTPROD) + (match_operand:SVE_FULL_SDI 2 "register_operand")))] + "TARGET_SVE" +{ + operands[3] = force_reg (mode, CONST1_RTX (mode)); +}) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Sum of absolute differences ;; ------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c new file mode 100644 index 000000000000..5d1f61f4a6a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only --param vect-epilogues-nomask=0 -fno-schedule-insns -fno-reorder-blocks -fno-schedule-insns2 -fdump-tree-vect-details" }*/ +/* { dg-final { check-function-bodies "**" "" } } */ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +/* +** foo_int: +** ... +** sub z[0-9]+.b, z[0-9]+.b, z[0-9]+.b +** udot z[0-9]+.s, z[0-9]+.b, z[0-9]+.b +** ... +*/ +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +/* +** foo2_int: +** ... +** add z[0-9]+.h, z[0-9]+.h, z[0-9]+.h +** punpklo p[0-9]+.h, p[0-9]+.b +** uunpklo z[0-9]+.s, z[0-9]+.h +** add z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** punpkhi p[0-9]+.h, p[0-9]+.b +** uunpkhi z[0-9]+.s, z[0-9]+.h +** add z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s +** ... +*/ +int foo2_int(unsigned short *x, unsigned short * restrict y) { + int sum = 0; + for (int i = 0; i < 8000; i++) + { + x[i] = x[i] + y[i]; + sum += x[i]; + } + return sum; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c new file mode 100644 index 000000000000..b9e0010114f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122069_2.c @@ -0,0 +1,81 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aarch64_sve_hw } */ +/* { dg-options "-O3 -march=armv8-a+sve -mautovec-preference=sve-only -fdump-tree-vect-details" }*/ + +inline char char_abs(char i) { + return (i < 0 ? -i : i); +} + +__attribute__((noipa)) +int foo_int(unsigned char *x, unsigned char * restrict y) { + int sum = 0; + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +__attribute__((noipa)) +int foo_int2(unsigned char *x, unsigned char * restrict y) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + sum += char_abs(x[i] - y[i]); + return sum; +} + +__attribute__((noipa)) +int foo2_int2(unsigned short *x, unsigned short * restrict y, + unsigned short * restrict z) { + int sum = 0; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + z[i] = x[i] + y[i]; + sum += z[i]; + } + return sum; +} + +int main () +{ + unsigned short a[100]; + unsigned short b[100]; + unsigned short r1[100]; + unsigned short r2[100]; + unsigned char c[100]; + unsigned char d[100]; +#pragma GCC novector + for (int i = 0; i < 100; i++) + { + a[i] = c[i] = i; + b[i] = d[i] = 100 - i; + } + + if (foo_int (c, d) != foo_int2 (c, d)) + __builtin_abort(); + + + if (foo2_int (a, b, r1) != foo2_int2 (a, b, r2)) + __builtin_abort(); + +#pragma GCC novector + for (int i = 0; i < 100; i++) + if (r1[i] != r2[i]) + __builtin_abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ \ No newline at end of file