mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/79920 (Incorrect floating point results when compiling with -O3)
2017-03-08 Richard Biener <rguenther@suse.de> PR tree-optimization/79920 * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline with ncopies == 1 to ... (vect_transform_slp_perm_load): ... here. Properly compute all element loads by iterating VF times over the group. Do not handle ncopies (computed in a broken way) in vect_create_mask_and_perm. * gcc.dg/vect/pr79920.c: New testcase. From-SVN: r245968
This commit is contained in:
parent
4b48e88382
commit
61fdfd8c51
|
|
@ -1,3 +1,13 @@
|
||||||
|
2017-03-08 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/79920
|
||||||
|
* tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
|
||||||
|
with ncopies == 1 to ...
|
||||||
|
(vect_transform_slp_perm_load): ... here. Properly compute
|
||||||
|
all element loads by iterating VF times over the group. Do
|
||||||
|
not handle ncopies (computed in a broken way) in
|
||||||
|
vect_create_mask_and_perm.
|
||||||
|
|
||||||
2017-03-08 Jakub Jelinek <jakub@redhat.com>
|
2017-03-08 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
PR sanitizer/79904
|
PR sanitizer/79904
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2017-03-08 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/79920
|
||||||
|
* gcc.dg/vect/pr79920.c: New testcase.
|
||||||
|
|
||||||
2017-03-08 Jakub Jelinek <jakub@redhat.com>
|
2017-03-08 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
PR sanitizer/79904
|
PR sanitizer/79904
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
/* { dg-do run } */
|
||||||
|
/* { dg-additional-options "-O3" } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
double __attribute__((noinline,noclone))
|
||||||
|
compute_integral (double w_1[18])
|
||||||
|
{
|
||||||
|
double A = 0;
|
||||||
|
double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
|
||||||
|
{0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
|
||||||
|
double t43[2] = {0.0, 0.0};
|
||||||
|
double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}};
|
||||||
|
double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}};
|
||||||
|
|
||||||
|
for (int ip_1 = 0; ip_1 < 2; ++ip_1)
|
||||||
|
{
|
||||||
|
for (int i_0 = 0; i_0 < 6; ++i_0)
|
||||||
|
t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
|
||||||
|
+ (w_1[i_0*3+2] * t32[ip_1][2]));
|
||||||
|
t43[ip_1] = 2.0;
|
||||||
|
}
|
||||||
|
for (int i_0 = 0; i_0 < 6; ++i_0)
|
||||||
|
A += t43[1]*t33[1][i_0];
|
||||||
|
return A;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
check_vect ();
|
||||||
|
|
||||||
|
double w_1[18] = {0., 1.0, 1.0,
|
||||||
|
0., 1.0, 1.0,
|
||||||
|
0., 1.0, 1.0,
|
||||||
|
0., 1.0, 1.0,
|
||||||
|
0., 1.0, 1.0,
|
||||||
|
0., 1.0, 1.0};
|
||||||
|
double A = compute_integral(w_1);
|
||||||
|
if (A != 12.0)
|
||||||
|
__builtin_abort ();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */
|
||||||
|
|
@ -3379,66 +3379,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
|
|
||||||
building a vector of type MASK_TYPE from it) and two input vectors placed in
|
|
||||||
DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
|
|
||||||
shifting by STRIDE elements of DR_CHAIN for every copy.
|
|
||||||
(STRIDE is the number of vectorized stmts for NODE divided by the number of
|
|
||||||
copies).
|
|
||||||
VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
|
|
||||||
the created stmts must be inserted. */
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
vect_create_mask_and_perm (gimple *stmt,
|
|
||||||
tree mask, int first_vec_indx, int second_vec_indx,
|
|
||||||
gimple_stmt_iterator *gsi, slp_tree node,
|
|
||||||
tree vectype, vec<tree> dr_chain,
|
|
||||||
int ncopies, int vect_stmts_counter)
|
|
||||||
{
|
|
||||||
tree perm_dest;
|
|
||||||
gimple *perm_stmt = NULL;
|
|
||||||
int i, stride_in, stride_out;
|
|
||||||
tree first_vec, second_vec, data_ref;
|
|
||||||
|
|
||||||
stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
|
|
||||||
stride_in = dr_chain.length () / ncopies;
|
|
||||||
|
|
||||||
/* Initialize the vect stmts of NODE to properly insert the generated
|
|
||||||
stmts later. */
|
|
||||||
for (i = SLP_TREE_VEC_STMTS (node).length ();
|
|
||||||
i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
|
|
||||||
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
|
|
||||||
|
|
||||||
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
|
|
||||||
for (i = 0; i < ncopies; i++)
|
|
||||||
{
|
|
||||||
first_vec = dr_chain[first_vec_indx];
|
|
||||||
second_vec = dr_chain[second_vec_indx];
|
|
||||||
|
|
||||||
/* Generate the permute statement if necessary. */
|
|
||||||
if (mask)
|
|
||||||
{
|
|
||||||
perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
|
|
||||||
first_vec, second_vec, mask);
|
|
||||||
data_ref = make_ssa_name (perm_dest, perm_stmt);
|
|
||||||
gimple_set_lhs (perm_stmt, data_ref);
|
|
||||||
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
/* If mask was NULL_TREE generate the requested identity transform. */
|
|
||||||
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
|
|
||||||
|
|
||||||
/* Store the vector statement in NODE. */
|
|
||||||
SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
|
|
||||||
= perm_stmt;
|
|
||||||
|
|
||||||
first_vec_indx += stride_in;
|
|
||||||
second_vec_indx += stride_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Generate vector permute statements from a list of loads in DR_CHAIN.
|
/* Generate vector permute statements from a list of loads in DR_CHAIN.
|
||||||
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
|
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
|
||||||
permute statements for the SLP node NODE of the SLP instance
|
permute statements for the SLP node NODE of the SLP instance
|
||||||
|
|
@ -3456,7 +3396,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||||
int nunits, vec_index = 0;
|
int nunits, vec_index = 0;
|
||||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
||||||
int unroll_factor, mask_element, ncopies;
|
int mask_element;
|
||||||
unsigned char *mask;
|
unsigned char *mask;
|
||||||
machine_mode mode;
|
machine_mode mode;
|
||||||
|
|
||||||
|
|
@ -3474,11 +3414,13 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||||
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
||||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
mask = XALLOCAVEC (unsigned char, nunits);
|
mask = XALLOCAVEC (unsigned char, nunits);
|
||||||
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
|
||||||
|
|
||||||
/* Number of copies is determined by the final vectorization factor
|
/* Initialize the vect stmts of NODE to properly insert the generated
|
||||||
relatively to SLP_NODE_INSTANCE unrolling factor. */
|
stmts later. */
|
||||||
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
if (! analyze_only)
|
||||||
|
for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
|
||||||
|
i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
|
||||||
|
SLP_TREE_VEC_STMTS (node).quick_push (NULL);
|
||||||
|
|
||||||
/* Generate permutation masks for every NODE. Number of masks for each NODE
|
/* Generate permutation masks for every NODE. Number of masks for each NODE
|
||||||
is equal to GROUP_SIZE.
|
is equal to GROUP_SIZE.
|
||||||
|
|
@ -3505,7 +3447,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||||
bool noop_p = true;
|
bool noop_p = true;
|
||||||
*n_perms = 0;
|
*n_perms = 0;
|
||||||
|
|
||||||
for (int j = 0; j < unroll_factor; j++)
|
for (int j = 0; j < vf; j++)
|
||||||
{
|
{
|
||||||
for (int k = 0; k < group_size; k++)
|
for (int k = 0; k < group_size; k++)
|
||||||
{
|
{
|
||||||
|
|
@ -3578,10 +3520,30 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
|
||||||
|
|
||||||
if (second_vec_index == -1)
|
if (second_vec_index == -1)
|
||||||
second_vec_index = first_vec_index;
|
second_vec_index = first_vec_index;
|
||||||
vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
|
|
||||||
second_vec_index,
|
/* Generate the permute statement if necessary. */
|
||||||
gsi, node, vectype, dr_chain,
|
tree first_vec = dr_chain[first_vec_index];
|
||||||
ncopies, vect_stmts_counter++);
|
tree second_vec = dr_chain[second_vec_index];
|
||||||
|
gimple *perm_stmt;
|
||||||
|
if (! noop_p)
|
||||||
|
{
|
||||||
|
tree perm_dest
|
||||||
|
= vect_create_destination_var (gimple_assign_lhs (stmt),
|
||||||
|
vectype);
|
||||||
|
perm_dest = make_ssa_name (perm_dest);
|
||||||
|
perm_stmt = gimple_build_assign (perm_dest,
|
||||||
|
VEC_PERM_EXPR,
|
||||||
|
first_vec, second_vec,
|
||||||
|
mask_vec);
|
||||||
|
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* If mask was NULL_TREE generate the requested
|
||||||
|
identity transform. */
|
||||||
|
perm_stmt = SSA_NAME_DEF_STMT (first_vec);
|
||||||
|
|
||||||
|
/* Store the vector statement in NODE. */
|
||||||
|
SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
index = 0;
|
index = 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue