mirror of git://gcc.gnu.org/git/gcc.git
vect: Add pattern recognition for vectorizing {FLOOR,CEIL,ROUND}_{MOD, DIV}_EXPR
Added a new helper function "add_code_for_floorceilround_divmod" in tree-vect-patterns.cc for adding compensating code for each of the op {FLOOR,ROUND,CEIL}_{DIV,MOD}_EXPR. This function checks if target supports all required operations required to implement these operation and generates vectorized code for the respective operations. Based on the following logic FLOOR_{DIV,MOD} r = x %[fl] y; r = x % y; if (r && (x ^ y) < 0) r += y; r = x/[fl] y; r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; CEIL_{DIV,MOD} (unsigned) r = x %[cl] y; r = x % y; if (r) r -= y; r = x/[cl] y; r = x % y; d = x/y; if (r) d++; CEIL_{DIV,MOD} (signed) r = x %[cl] y; r = x % y; if (r && (x ^ y) >= 0) r -= y; r = x/[cl] y; r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++; ROUND_{DIV,MOD} (unsigned) r = x %[rd] y; r = x % y; if (r > ((y-1)/2)) r -= y; r = x/[rd] y; r = x % y; d = x/y; if (r > ((y-1)/2)) d++; ROUND_{DIV,MOD} (signed) r = x %[rd] y; r = x % y; if (r > ((y-1)/2)) {if ((x ^ y) >= 0) r -= y; else r += y;} r = x/[rd] y; r = x % y; d = x/y; if ((r > ((y-1)/2)) && (x ^ y) >= 0) {if ((x ^ y) >= 0) d++; else d--;} each of the case is implemented in a vectorized form. This function is then called in each of the path in vect_recog_divmod_pattern, which there are 3, based on value of constant operand1, 1. == 2 2. == power of 2 3. otherwise 2025-10-17 Avinash Jayakar <avinashd@linux.ibm.com> gcc/ChangeLog: PR tree-optimization/104116 * tree-vect-patterns.cc (add_code_for_floorceilround_divmod): patt recog for {FLOOR,ROUND,CEIL}_{DIV,MOD}_EXPR. (vect_recog_divmod_pattern): Call add_code_for_floorceilround_divmod after computing div/mod for each control path. gcc/testsuite/ChangeLog: PR tree-optimization/104116 * gcc.dg/vect/pr104116-ceil-div-2.c: New test. * gcc.dg/vect/pr104116-ceil-div-pow2.c: New test. * gcc.dg/vect/pr104116-ceil-div.c: New test. * gcc.dg/vect/pr104116-ceil-mod-2.c: New test. * gcc.dg/vect/pr104116-ceil-mod-pow2.c: New test. * gcc.dg/vect/pr104116-ceil-mod.c: New test. * gcc.dg/vect/pr104116-ceil-udiv-2.c: New test. * gcc.dg/vect/pr104116-ceil-udiv-pow2.c: New test. * gcc.dg/vect/pr104116-ceil-udiv.c: New test. * gcc.dg/vect/pr104116-ceil-umod-2.c: New test. * gcc.dg/vect/pr104116-ceil-umod-pow2.c: New test. * gcc.dg/vect/pr104116-ceil-umod.c: New test. * gcc.dg/vect/pr104116-floor-div-2.c: New test. * gcc.dg/vect/pr104116-floor-div-pow2.c: New test. * gcc.dg/vect/pr104116-floor-div.c: New test. * gcc.dg/vect/pr104116-floor-mod-2.c: New test. * gcc.dg/vect/pr104116-floor-mod-pow2.c: New test. * gcc.dg/vect/pr104116-floor-mod.c: New test. * gcc.dg/vect/pr104116-round-div-2.c: New test. * gcc.dg/vect/pr104116-round-div-pow2.c: New test. * gcc.dg/vect/pr104116-round-div.c: New test. * gcc.dg/vect/pr104116-round-mod-2.c: New test. * gcc.dg/vect/pr104116-round-mod-pow2.c: New test. * gcc.dg/vect/pr104116-round-mod.c: New test. * gcc.dg/vect/pr104116-round-udiv-2.c: New test. * gcc.dg/vect/pr104116-round-udiv-pow2.c: New test. * gcc.dg/vect/pr104116-round-udiv.c: New test. * gcc.dg/vect/pr104116-round-umod-2.c: New test. * gcc.dg/vect/pr104116-round-umod-pow2.c: New test. * gcc.dg/vect/pr104116-round-umod.c: New test. * gcc.dg/vect/pr104116.h: New test.
This commit is contained in:
parent
eb717a8f4e
commit
6883d51304
|
@ -0,0 +1,29 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__CEIL_DIV, 2, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int *a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = cl_div (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN(__CEIL_DIV, 8, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int *a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = cl_div (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN(__CEIL_DIV, 19, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int *a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = cl_div (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN(__CEIL_MOD, 2, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int *a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = cl_mod (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN(__CEIL_MOD, 8, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *a = (unsigned int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_mod (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN(__CEIL_MOD, 19, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int *a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = cl_mod (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__CEIL_DIV, 2u, udiv)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *ua = (unsigned int*)↑
|
||||
init_uarr(ua, N);
|
||||
udiv(ua);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_udiv (0xf0000000 + i, 2);
|
||||
if (expected != ua[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__CEIL_DIV, 8u, udiv)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *ua = (unsigned int*)↑
|
||||
init_uarr(ua, N);
|
||||
udiv(ua);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_udiv (0xf0000000 + i, 8);
|
||||
if (expected != ua[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,29 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__CEIL_DIV, 19u, udiv)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *ua = (unsigned int*)↑
|
||||
init_uarr(ua, N);
|
||||
udiv(ua);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_udiv (0xf0000000 + i, 19);
|
||||
if (expected != ua[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_umod (0xf0000000 + i, 2);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN_UNSIGNED (__CEIL_MOD, 2u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_umod (0xf0000000 + i, 2);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
|
||||
TEST_FN_UNSIGNED (__CEIL_MOD, 19u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int *a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = cl_umod (0xf0000000 + i, 19);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_DIV, 2, div_2)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div_2(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_div (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_DIV, 8, div_2)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div_2(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_div (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_DIV, 19, div_2)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div_2(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_div (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_MOD, 2, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_mod (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_MOD, 8, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_mod (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__FLOOR_MOD, 19, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = fl_mod (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_DIV, 2, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_div (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_DIV, 8, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_div (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_DIV, 19, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
div(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_div (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_MOD, 2, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_mod (i - N/2, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_MOD, 8, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_mod (i - N/2, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN(__ROUND_MOD, 19, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
int * a = (int*)&arr;
|
||||
init_arr(a, N);
|
||||
mod(a);
|
||||
for (int i=0; i<N; i++)
|
||||
{
|
||||
int expected = rd_mod (i - N/2, 19);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_DIV, 2u, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
div(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_udiv (0xf0000000 + i, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_DIV, 8u, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
div(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_udiv (0xf0000000 + i, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_DIV, 19u, div)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
div(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_udiv (0xf0000000 + i, 19);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_MOD, 2u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_umod (0xf0000000 + i, 2);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_MOD, 8u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_umod (0xf0000000 + i, 8);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
/* { dg-additional-options "-fgimple -fdump-tree-optimized" } */
|
||||
/* { dg-require-effective-target vect_int} */
|
||||
/* { dg-require-effective-target vect_condition} */
|
||||
/* { dg-require-effective-target vect_shift} */
|
||||
|
||||
|
||||
|
||||
#include "pr104116.h"
|
||||
#include "tree-vect.h"
|
||||
|
||||
TEST_FN_UNSIGNED(__ROUND_MOD, 19u, mod)
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
unsigned int * a = (unsigned int*)↑
|
||||
init_uarr(a, N);
|
||||
mod(a);
|
||||
for (unsigned int i=0; i<N; i++)
|
||||
{
|
||||
unsigned int expected = rd_umod (0xf0000000 + i, 19);
|
||||
if (expected != a[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "optimized: loop vectorized" 1 "vect" } } */
|
||||
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
#define TEST_FN(OP, CONST, NAME) \
|
||||
__attribute__((noinline)) \
|
||||
void __GIMPLE (ssa,guessed_local(10737416)) \
|
||||
NAME (int * a) \
|
||||
{ \
|
||||
int i; \
|
||||
long unsigned int _1; \
|
||||
long unsigned int _2; \
|
||||
int * _3; \
|
||||
int _4; \
|
||||
int _5; \
|
||||
unsigned int _12; \
|
||||
unsigned int _13; \
|
||||
\
|
||||
__BB(2,guessed_local(10737416)): \
|
||||
goto __BB3(precise(134217728)); \
|
||||
\
|
||||
__BB(3,loop_header(1),guessed_local(1063004408)): \
|
||||
i_14 = __PHI (__BB5: i_11, __BB2: 0); \
|
||||
_13 = __PHI (__BB5: _12, __BB2: 1024u); \
|
||||
_1 = (long unsigned int) i_14; \
|
||||
_2 = _1 * 4ul; \
|
||||
_3 = a_9(D) + _2; \
|
||||
_4 = __MEM <int> (_3); \
|
||||
_5 = _4 OP CONST; \
|
||||
__MEM <int> (_3) = _5; \
|
||||
i_11 = i_14 + 1; \
|
||||
_12 = _13 - 1u; \
|
||||
if (_12 != 0u) \
|
||||
goto __BB5(guessed(132861994)); \
|
||||
else \
|
||||
goto __BB4(guessed(1355734)); \
|
||||
\
|
||||
__BB(5,guessed_local(1052266995)): \
|
||||
goto __BB3(precise(134217728)); \
|
||||
\
|
||||
__BB(4,guessed_local(10737416)): \
|
||||
return; \
|
||||
\
|
||||
} \
|
||||
|
||||
#define TEST_FN_UNSIGNED(OP, CONST, NAME) \
|
||||
__attribute__((noinline)) \
|
||||
void __GIMPLE (ssa,guessed_local(10737416)) \
|
||||
NAME (unsigned int * a) \
|
||||
{ \
|
||||
int i; \
|
||||
long unsigned int _1; \
|
||||
long unsigned int _2; \
|
||||
unsigned int * _3; \
|
||||
unsigned int _4; \
|
||||
unsigned int _5; \
|
||||
unsigned int _12; \
|
||||
unsigned int _13; \
|
||||
\
|
||||
__BB(2,guessed_local(10737416)): \
|
||||
goto __BB3(precise(134217728)); \
|
||||
\
|
||||
__BB(3,loop_header(1),guessed_local(1063004408)): \
|
||||
i_14 = __PHI (__BB5: i_11, __BB2: 0); \
|
||||
_13 = __PHI (__BB5: _12, __BB2: 1024u); \
|
||||
_1 = (long unsigned int) i_14; \
|
||||
_2 = _1 * 4ul; \
|
||||
_3 = a_9(D) + _2; \
|
||||
_4 = __MEM <unsigned int> (_3); \
|
||||
_5 = _4 OP CONST; \
|
||||
__MEM <unsigned int> (_3) = _5; \
|
||||
i_11 = i_14 + 1; \
|
||||
_12 = _13 - 1u; \
|
||||
if (_12 != 0u) \
|
||||
goto __BB5(guessed(132861994)); \
|
||||
else \
|
||||
goto __BB4(guessed(1355734)); \
|
||||
\
|
||||
__BB(5,guessed_local(1052266995)): \
|
||||
goto __BB3(precise(134217728)); \
|
||||
\
|
||||
__BB(4,guessed_local(10737416)): \
|
||||
return; \
|
||||
} \
|
||||
|
||||
|
||||
#define N 1024
|
||||
int arr[N];
|
||||
__attribute__((optimize("O0")))
|
||||
void init_arr (int *a, int n)
|
||||
{
|
||||
for (int i=0; i<n; i++)
|
||||
a[i] = i - n/2;
|
||||
}
|
||||
|
||||
unsigned int uarr[N];
|
||||
__attribute__((optimize("O0")))
|
||||
void init_uarr (unsigned int *a, int n)
|
||||
{
|
||||
for (unsigned int i=0; i<n; i++)
|
||||
a[i] = 0xf0000000 + i;
|
||||
}
|
||||
|
||||
int cl_div (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
int q = x / y;
|
||||
if (r != 0 && (x ^ y) >= 0)
|
||||
q++;
|
||||
return q;
|
||||
}
|
||||
|
||||
unsigned int cl_udiv (unsigned int x, unsigned int y)
|
||||
{
|
||||
unsigned int r = x % y;
|
||||
unsigned int q = x / y;
|
||||
if (r > 0)
|
||||
q++;
|
||||
return q;
|
||||
}
|
||||
|
||||
int cl_mod (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
if (r != 0 && (x ^ y) >= 0)
|
||||
r -= y;
|
||||
return r;
|
||||
}
|
||||
|
||||
unsigned int cl_umod (unsigned int x, unsigned int y)
|
||||
{
|
||||
unsigned int r = x % y;
|
||||
unsigned int q = x / y;
|
||||
if (r > 0)
|
||||
r-=y;
|
||||
return r;
|
||||
}
|
||||
|
||||
int fl_div (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
int q = x / y;
|
||||
if (r != 0 && (x ^ y) < 0)
|
||||
q--;
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
int fl_mod (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
if (r != 0 && (x ^ y) < 0)
|
||||
r += y;
|
||||
return r;
|
||||
}
|
||||
|
||||
int abs(int x)
|
||||
{
|
||||
if (x < 0) return -x;
|
||||
return x;
|
||||
}
|
||||
|
||||
int rd_mod (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
if (abs(r) > abs((y-1) >> 1))
|
||||
{
|
||||
if ((x ^ y) < 0)
|
||||
r += y;
|
||||
else
|
||||
r -= y;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
int rd_div (int x, int y)
|
||||
{
|
||||
int r = x % y;
|
||||
int q = x / y;
|
||||
if (abs(r) > abs((y-1) >> 1))
|
||||
{
|
||||
if ((x ^ y) < 0)
|
||||
q--;
|
||||
else
|
||||
q++;
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
unsigned int rd_umod (unsigned int x, unsigned int y)
|
||||
{
|
||||
unsigned int r = x % y;
|
||||
if (r > ((y-1) >> 1))
|
||||
r -= y;
|
||||
return r;
|
||||
}
|
||||
|
||||
unsigned int rd_udiv (unsigned int x, unsigned int y)
|
||||
{
|
||||
unsigned int r = x % y;
|
||||
unsigned int q = x / y;
|
||||
if (r > ((y-1) >> 1))
|
||||
q++;
|
||||
return q;
|
||||
}
|
|
@ -4838,6 +4838,281 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* Function add_code_for_floorceilround_divmod
|
||||
A helper function to add compensation code for implementing FLOOR_MOD_EXPR,
|
||||
FLOOR_DIV_EXPR, CEIL_MOD_EXPR, CEIL_DIV_EXPR, ROUND_MOD_EXPR and
|
||||
ROUND_DIV_EXPR
|
||||
The quotient and remainder are needed for implemented these operators.
|
||||
FLOOR cases
|
||||
r = x %[fl] y; r = x/[fl] y;
|
||||
is
|
||||
r = x % y; if (r && (x ^ y) < 0) r += y;
|
||||
r = x % y; d = x/y; if (r && (x ^ y) < 0) d--; Respectively
|
||||
Produce following sequence
|
||||
v0 = x^y
|
||||
v1 = -r
|
||||
v2 = r | -r
|
||||
v3 = v0 & v2
|
||||
v4 = v3 < 0
|
||||
if (floor_mod)
|
||||
v5 = v4 ? y : 0
|
||||
v6 = r + v5
|
||||
if (floor_div)
|
||||
v5 = v4 ? 1 : 0
|
||||
v6 = d - 1
|
||||
Similar sequences of vector instructions are produces for following cases
|
||||
CEIL cases
|
||||
r = x %[cl] y; r = x/[cl] y;
|
||||
is
|
||||
r = x % y; if (r && (x ^ y) >= 0) r -= y;
|
||||
r = x % y; if (r) r -= y; (unsigned)
|
||||
r = x % y; d = x/y; if (r && (x ^ y) >= 0) d++;
|
||||
r = x % y; d = x/y; if (r) d++; (unsigned)
|
||||
ROUND cases
|
||||
r = x %[rd] y; r = x/[rd] y;
|
||||
is
|
||||
r = x % y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) r -= y; else r += y;
|
||||
r = x % y; if (r > ((y-1)/2)) r -= y; (unsigned)
|
||||
r = x % y; d = x/y; if (r > ((y-1)/2)) if ((x ^ y) >= 0) d++; else d--;
|
||||
r = x % y; d = x/y; if (r > ((y-1)/2)) d++; (unsigned)
|
||||
Inputs:
|
||||
VECTYPE: Vector type of the operands
|
||||
STMT_VINFO: Statement where pattern begins
|
||||
RHS_CODE: Should either be FLOOR_MOD_EXPR or FLOOR_DIV_EXPR
|
||||
Q: The quotient of division
|
||||
R: Remainder of division
|
||||
OPRDN0/OPRND1: Actual operands involved
|
||||
ITYPE: tree type of oprnd0
|
||||
Output:
|
||||
NULL if vectorization not possible
|
||||
Gimple statement based on rhs_code
|
||||
*/
|
||||
static gimple *
|
||||
add_code_for_floorceilround_divmod (tree vectype, vec_info *vinfo,
|
||||
stmt_vec_info stmt_vinfo,
|
||||
enum tree_code rhs_code, tree q, tree r,
|
||||
tree oprnd0, tree oprnd1, tree itype)
|
||||
{
|
||||
gimple *def_stmt;
|
||||
tree mask_vectype = truth_type_for (vectype);
|
||||
if (!mask_vectype)
|
||||
return NULL;
|
||||
tree bool_cond;
|
||||
bool unsigned_p = TYPE_UNSIGNED (itype);
|
||||
|
||||
switch (rhs_code)
|
||||
{
|
||||
case FLOOR_MOD_EXPR:
|
||||
case FLOOR_DIV_EXPR:
|
||||
case CEIL_MOD_EXPR:
|
||||
case CEIL_DIV_EXPR:
|
||||
{
|
||||
if (!target_has_vecop_for_code (NEGATE_EXPR, vectype)
|
||||
|| !target_has_vecop_for_code (BIT_XOR_EXPR, vectype)
|
||||
|| !target_has_vecop_for_code (BIT_IOR_EXPR, vectype)
|
||||
|| !target_has_vecop_for_code (PLUS_EXPR, vectype)
|
||||
|| !target_has_vecop_for_code (MINUS_EXPR, vectype)
|
||||
|| !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
|
||||
|| !expand_vec_cond_expr_p (vectype, mask_vectype))
|
||||
return NULL;
|
||||
if (unsigned_p)
|
||||
{
|
||||
gcc_assert (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
|
||||
|
||||
if (!expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR))
|
||||
return NULL;
|
||||
bool is_mod = rhs_code == CEIL_MOD_EXPR;
|
||||
// r > 0
|
||||
bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
|
||||
def_stmt = gimple_build_assign (bool_cond, GT_EXPR, r,
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
|
||||
itype);
|
||||
|
||||
// (r > 0) ? y : 0 (mod)
|
||||
// (r > 0) ? 1 : 0 (ceil)
|
||||
tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
|
||||
is_mod ? oprnd1 : build_int_cst (itype, 1),
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// r -= (r > 0) ? y : 0 (mod)
|
||||
// d += (x^y < 0 && r) ? -1 : 0 (ceil)
|
||||
tree result = vect_recog_temp_ssa_var (itype, NULL);
|
||||
return gimple_build_assign (result, is_mod ? MINUS_EXPR : PLUS_EXPR,
|
||||
is_mod ? r : q, extr_cond);
|
||||
}
|
||||
else
|
||||
{
|
||||
bool ceil_p
|
||||
= (rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR);
|
||||
if (ceil_p && !target_has_vecop_for_code (BIT_NOT_EXPR, vectype))
|
||||
return NULL;
|
||||
// x ^ y
|
||||
tree xort = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (xort, BIT_XOR_EXPR, oprnd0, oprnd1);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
tree cond_reg = xort;
|
||||
// ~(x ^ y) (ceil)
|
||||
if (ceil_p)
|
||||
{
|
||||
cond_reg = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (cond_reg, BIT_NOT_EXPR, xort);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
}
|
||||
|
||||
// -r
|
||||
tree negate_r = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (negate_r, NEGATE_EXPR, r);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// r | -r , sign bit is set if r!=0
|
||||
tree r_or_negr = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (r_or_negr, BIT_IOR_EXPR, r, negate_r);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// (x ^ y) & (r | -r)
|
||||
// ~(x ^ y) & (r | -r) (ceil)
|
||||
tree r_or_negr_and_xor = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (r_or_negr_and_xor, BIT_AND_EXPR,
|
||||
r_or_negr, cond_reg);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// (x ^ y) & (r | -r) < 0 which is equivalent to (x^y < 0 && r!=0)
|
||||
bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (bool_cond, LT_EXPR, r_or_negr_and_xor,
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
|
||||
itype);
|
||||
|
||||
// (x^y < 0 && r) ? y : 0 (mod)
|
||||
// (x^y < 0 && r) ? -1 : 0 (div)
|
||||
bool is_mod
|
||||
= (rhs_code == FLOOR_MOD_EXPR || rhs_code == CEIL_MOD_EXPR);
|
||||
tree extr_cond = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (extr_cond, COND_EXPR, bool_cond,
|
||||
is_mod ? oprnd1
|
||||
: build_int_cst (itype, -1),
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// r += (x ^ y < 0 && r) ? y : 0 (floor mod)
|
||||
// d += (x^y < 0 && r) ? -1 : 0 (floor div)
|
||||
// r -= (x ^ y < 0 && r) ? y : 0 (ceil mod)
|
||||
// d -= (x^y < 0 && r) ? -1 : 0 (ceil div)
|
||||
tree result = vect_recog_temp_ssa_var (itype, NULL);
|
||||
return gimple_build_assign (result,
|
||||
(rhs_code == FLOOR_MOD_EXPR
|
||||
|| rhs_code == FLOOR_DIV_EXPR)
|
||||
? PLUS_EXPR
|
||||
: MINUS_EXPR,
|
||||
is_mod ? r : q, extr_cond);
|
||||
}
|
||||
}
|
||||
case ROUND_MOD_EXPR:
|
||||
case ROUND_DIV_EXPR:
|
||||
{
|
||||
if (!target_has_vecop_for_code (BIT_AND_EXPR, vectype)
|
||||
|| !target_has_vecop_for_code (PLUS_EXPR, vectype)
|
||||
|| !expand_vec_cmp_expr_p (vectype, mask_vectype, LT_EXPR)
|
||||
|| !expand_vec_cmp_expr_p (vectype, mask_vectype, GT_EXPR)
|
||||
|| !expand_vec_cond_expr_p (vectype, mask_vectype))
|
||||
return NULL;
|
||||
|
||||
bool is_mod = rhs_code == ROUND_MOD_EXPR;
|
||||
HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
|
||||
unsigned HOST_WIDE_INT abs_d
|
||||
= (d >= 0 ? (unsigned HOST_WIDE_INT) d : -(unsigned HOST_WIDE_INT) d);
|
||||
unsigned HOST_WIDE_INT mid_d = (abs_d - 1) >> 1;
|
||||
if (!unsigned_p)
|
||||
{
|
||||
// check availibility of abs expression for vector
|
||||
if (!target_has_vecop_for_code (ABS_EXPR, vectype))
|
||||
return NULL;
|
||||
// abs (r)
|
||||
tree abs_r = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (abs_r, ABS_EXPR, r);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// abs (r) > (abs (y-1) >> 1)
|
||||
tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
|
||||
def_stmt = gimple_build_assign (round_p, GT_EXPR, abs_r,
|
||||
build_int_cst (itype, mid_d));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
|
||||
itype);
|
||||
|
||||
// x ^ y
|
||||
tree cond_reg = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (cond_reg, BIT_XOR_EXPR, oprnd0, oprnd1);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
// x ^ y < 0
|
||||
bool_cond = vect_recog_temp_ssa_var (boolean_type_node, NULL);
|
||||
def_stmt = gimple_build_assign (bool_cond, LT_EXPR, cond_reg,
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
|
||||
itype);
|
||||
|
||||
// x ^ y < 0 ? y : -y (mod)
|
||||
// x ^ y < 0 ? -1 : 1 (div)
|
||||
tree val1 = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (val1, COND_EXPR, bool_cond,
|
||||
build_int_cst (itype, is_mod ? d : -1),
|
||||
build_int_cst (itype, is_mod ? -d : 1));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
int precision = TYPE_PRECISION (itype);
|
||||
wide_int wmask = wi::mask (precision, false, precision);
|
||||
|
||||
// abs (r) > (abs (y-1) >> 1) ? 0xffffffff : 0
|
||||
tree val2 = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (val2, COND_EXPR, round_p,
|
||||
wide_int_to_tree (itype, wmask),
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
tree fval = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt = gimple_build_assign (fval, BIT_AND_EXPR, val1, val2);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
tree result = vect_recog_temp_ssa_var (itype, NULL);
|
||||
return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
|
||||
fval);
|
||||
}
|
||||
else
|
||||
{
|
||||
// r > (y-1 >> 1)
|
||||
tree round_p = vect_recog_temp_ssa_var (boolean_type_node, NULL);
|
||||
def_stmt = gimple_build_assign (round_p, GT_EXPR, r,
|
||||
build_int_cst (itype, mid_d));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, mask_vectype,
|
||||
itype);
|
||||
|
||||
// (r > (y-1)>>1) ? -d : 1
|
||||
tree val2 = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (val2, COND_EXPR, round_p,
|
||||
build_int_cst (itype, is_mod ? -d : 1),
|
||||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
tree result = vect_recog_temp_ssa_var (itype, NULL);
|
||||
return gimple_build_assign (result, PLUS_EXPR, is_mod ? r : q,
|
||||
val2);
|
||||
}
|
||||
}
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Detect a signed division by a constant that wouldn't be
|
||||
otherwise vectorized:
|
||||
|
||||
|
@ -4882,7 +5157,8 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
{
|
||||
gimple *last_stmt = stmt_vinfo->stmt;
|
||||
tree oprnd0, oprnd1, vectype, itype, cond;
|
||||
gimple *pattern_stmt, *def_stmt;
|
||||
gimple *pattern_stmt = NULL;
|
||||
gimple *def_stmt = NULL;
|
||||
enum tree_code rhs_code;
|
||||
optab optab;
|
||||
tree q, cst;
|
||||
|
@ -4899,6 +5175,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
case TRUNC_DIV_EXPR:
|
||||
case EXACT_DIV_EXPR:
|
||||
case TRUNC_MOD_EXPR:
|
||||
case FLOOR_MOD_EXPR:
|
||||
case FLOOR_DIV_EXPR:
|
||||
case CEIL_MOD_EXPR:
|
||||
case CEIL_DIV_EXPR:
|
||||
case ROUND_MOD_EXPR:
|
||||
case ROUND_DIV_EXPR:
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
|
@ -4930,9 +5212,16 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
}
|
||||
|
||||
prec = TYPE_PRECISION (itype);
|
||||
|
||||
bool is_flclrd_moddiv_p
|
||||
= rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR
|
||||
|| rhs_code == CEIL_MOD_EXPR || rhs_code == CEIL_DIV_EXPR
|
||||
|| rhs_code == ROUND_MOD_EXPR || rhs_code == ROUND_DIV_EXPR;
|
||||
if (integer_pow2p (oprnd1))
|
||||
{
|
||||
if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
|
||||
if ((TYPE_UNSIGNED (itype)
|
||||
&& (rhs_code == FLOOR_MOD_EXPR || rhs_code == FLOOR_DIV_EXPR))
|
||||
|| tree_int_cst_sgn (oprnd1) != 1)
|
||||
return NULL;
|
||||
|
||||
/* Pattern detected. */
|
||||
|
@ -4949,18 +5238,27 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
tree var_div = vect_recog_temp_ssa_var (itype, NULL);
|
||||
gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
|
||||
gimple_call_set_lhs (div_stmt, var_div);
|
||||
|
||||
if (rhs_code == TRUNC_MOD_EXPR)
|
||||
if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
|
||||
{
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
|
||||
tree t1 = vect_recog_temp_ssa_var (itype, NULL);
|
||||
def_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
LSHIFT_EXPR, var_div, shift);
|
||||
= gimple_build_assign (t1, LSHIFT_EXPR, var_div, shift);
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
pattern_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
MINUS_EXPR, oprnd0,
|
||||
gimple_assign_lhs (def_stmt));
|
||||
MINUS_EXPR, oprnd0, t1);
|
||||
if (is_flclrd_moddiv_p)
|
||||
{
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
|
||||
pattern_stmt
|
||||
= add_code_for_floorceilround_divmod (vectype, vinfo,
|
||||
stmt_vinfo, rhs_code,
|
||||
var_div, t1, oprnd0,
|
||||
oprnd1, itype);
|
||||
if (pattern_stmt == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
pattern_stmt = div_stmt;
|
||||
|
@ -4974,8 +5272,12 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
build_int_cst (itype, 0));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt,
|
||||
truth_type_for (vectype), itype);
|
||||
tree div_result = NULL_TREE;
|
||||
if (rhs_code == TRUNC_DIV_EXPR
|
||||
|| rhs_code == EXACT_DIV_EXPR)
|
||||
|| rhs_code == EXACT_DIV_EXPR
|
||||
|| rhs_code == FLOOR_DIV_EXPR
|
||||
|| rhs_code == CEIL_DIV_EXPR
|
||||
|| rhs_code == ROUND_DIV_EXPR)
|
||||
{
|
||||
tree var = vect_recog_temp_ssa_var (itype, NULL);
|
||||
tree shift;
|
||||
|
@ -4992,12 +5294,17 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
shift = build_int_cst (itype, tree_log2 (oprnd1));
|
||||
div_result = vect_recog_temp_ssa_var (itype, NULL);
|
||||
pattern_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
RSHIFT_EXPR, var, shift);
|
||||
= gimple_build_assign (div_result, RSHIFT_EXPR, var, shift);
|
||||
}
|
||||
else
|
||||
if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
|
||||
{
|
||||
if (rhs_code == FLOOR_DIV_EXPR
|
||||
|| rhs_code == CEIL_DIV_EXPR
|
||||
|| rhs_code == ROUND_DIV_EXPR)
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
|
||||
|
||||
tree signmask;
|
||||
if (compare_tree_int (oprnd1, 2) == 0)
|
||||
{
|
||||
|
@ -5042,10 +5349,21 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
build_int_cst (itype, 1)));
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
|
||||
|
||||
tree r = vect_recog_temp_ssa_var (itype, NULL);
|
||||
pattern_stmt
|
||||
= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
|
||||
MINUS_EXPR, gimple_assign_lhs (def_stmt),
|
||||
= gimple_build_assign (r, MINUS_EXPR, gimple_assign_lhs (def_stmt),
|
||||
signmask);
|
||||
if (is_flclrd_moddiv_p)
|
||||
{
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
|
||||
pattern_stmt
|
||||
= add_code_for_floorceilround_divmod (vectype, vinfo,
|
||||
stmt_vinfo, rhs_code,
|
||||
div_result, r, oprnd0,
|
||||
oprnd1, itype);
|
||||
if (pattern_stmt == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return pattern_stmt;
|
||||
|
@ -5352,7 +5670,7 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
}
|
||||
}
|
||||
|
||||
if (rhs_code == TRUNC_MOD_EXPR)
|
||||
if (rhs_code == TRUNC_MOD_EXPR || is_flclrd_moddiv_p)
|
||||
{
|
||||
tree r, t1;
|
||||
|
||||
|
@ -5367,6 +5685,17 @@ vect_recog_divmod_pattern (vec_info *vinfo,
|
|||
|
||||
r = vect_recog_temp_ssa_var (itype, NULL);
|
||||
pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
|
||||
|
||||
if (is_flclrd_moddiv_p)
|
||||
{
|
||||
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
|
||||
pattern_stmt
|
||||
= add_code_for_floorceilround_divmod (vectype, vinfo, stmt_vinfo,
|
||||
rhs_code, q, r, oprnd0, oprnd1,
|
||||
itype);
|
||||
if (pattern_stmt == NULL)
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pattern detected. */
|
||||
|
|
Loading…
Reference in New Issue