mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])
2018-10-26 Richard Biener <rguenther@suse.de> PR tree-optimization/87105 * tree-vectorizer.h (_slp_tree::refcnt): New member. * tree-vect-slp.c (vect_free_slp_tree): Decrement and honor refcnt. (vect_create_new_slp_node): Initialize refcnt to one. (bst_traits): Move. (scalar_stmts_set_t, bst_fail): Remove. (vect_build_slp_tree_2): Add bst_map argument and adjust calls. (vect_build_slp_tree): Add bst_map argument and lookup already created SLP nodes. (vect_print_slp_tree): Handle a SLP graph, print SLP node addresses. (vect_slp_rearrange_stmts): Handle a SLP graph. (vect_analyze_slp_instance): Adjust and free SLP nodes from the CSE map. Fix indenting. (vect_schedule_slp_instance): Add short-cut. * g++.dg/vect/slp-pr87105.cc: Adjust. * gcc.dg/torture/20181024-1.c: New testcase. * g++.dg/opt/20181025-1.C: Likewise. From-SVN: r265522
This commit is contained in:
parent
74ca1c01d0
commit
a1f072e244
|
|
@ -1,3 +1,22 @@
|
||||||
|
2018-10-26 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/87105
|
||||||
|
* tree-vectorizer.h (_slp_tree::refcnt): New member.
|
||||||
|
* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
|
||||||
|
refcnt.
|
||||||
|
(vect_create_new_slp_node): Initialize refcnt to one.
|
||||||
|
(bst_traits): Move.
|
||||||
|
(scalar_stmts_set_t, bst_fail): Remove.
|
||||||
|
(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
|
||||||
|
(vect_build_slp_tree): Add bst_map argument and lookup
|
||||||
|
already created SLP nodes.
|
||||||
|
(vect_print_slp_tree): Handle a SLP graph, print SLP node
|
||||||
|
addresses.
|
||||||
|
(vect_slp_rearrange_stmts): Handle a SLP graph.
|
||||||
|
(vect_analyze_slp_instance): Adjust and free SLP nodes from
|
||||||
|
the CSE map. Fix indenting.
|
||||||
|
(vect_schedule_slp_instance): Add short-cut.
|
||||||
|
|
||||||
2018-10-26 Martin Liska <mliska@suse.cz>
|
2018-10-26 Martin Liska <mliska@suse.cz>
|
||||||
|
|
||||||
PR testsuite/86158
|
PR testsuite/86158
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,10 @@
|
||||||
|
2018-10-26 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/87105
|
||||||
|
* g++.dg/vect/slp-pr87105.cc: Adjust.
|
||||||
|
* gcc.dg/torture/20181024-1.c: New testcase.
|
||||||
|
* g++.dg/opt/20181025-1.C: Likewise.
|
||||||
|
|
||||||
2018-10-26 Richard Biener <rguenther@suse.de>
|
2018-10-26 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR testsuite/87754
|
PR testsuite/87754
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
// { dg-do compile }
|
||||||
|
// { dg-options "-Ofast" }
|
||||||
|
|
||||||
|
template <typename Number>
|
||||||
|
class Vector {
|
||||||
|
typedef Number value_type;
|
||||||
|
typedef const value_type *const_iterator;
|
||||||
|
Number norm_sqr () const;
|
||||||
|
const_iterator begin () const;
|
||||||
|
unsigned int dim;
|
||||||
|
};
|
||||||
|
template <typename Number>
|
||||||
|
static inline Number
|
||||||
|
local_sqr (const Number x)
|
||||||
|
{
|
||||||
|
return x*x;
|
||||||
|
}
|
||||||
|
template <typename Number>
|
||||||
|
Number
|
||||||
|
Vector<Number>::norm_sqr () const
|
||||||
|
{
|
||||||
|
Number sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
||||||
|
const_iterator ptr = begin(), eptr = ptr + (dim/4)*4;
|
||||||
|
while (ptr!=eptr)
|
||||||
|
{
|
||||||
|
sum0 += ::local_sqr(*ptr++);
|
||||||
|
sum1 += ::local_sqr(*ptr++);
|
||||||
|
}
|
||||||
|
return sum0+sum1+sum2+sum3;
|
||||||
|
}
|
||||||
|
template class Vector<double>;
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
// { dg-require-effective-target c++11 }
|
// { dg-require-effective-target c++11 }
|
||||||
// { dg-require-effective-target vect_double }
|
// { dg-require-effective-target vect_double }
|
||||||
// For MIN/MAX recognition
|
// For MIN/MAX recognition
|
||||||
// { dg-additional-options "-ffast-math -fvect-cost-model" }
|
// { dg-additional-options "-ffast-math" }
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
|
||||||
|
|
||||||
// We should have if-converted everything down to straight-line code
|
// We should have if-converted everything down to straight-line code
|
||||||
// { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
|
// { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
|
||||||
// We fail to elide an earlier store which makes us not handle a later
|
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } }
|
||||||
// duplicate one for vectorization.
|
// It's a bit awkward to detect that all stores were vectorized but the
|
||||||
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } }
|
// following more or less does the trick
|
||||||
|
// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } }
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
C = 0, N, S, E, W, T, B, NE, NW, SE, SW, NT, NB, ST, SB, ET, EB, WT, WB, FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
|
||||||
|
typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES];
|
||||||
|
void foo( LBM_Grid srcGrid )
|
||||||
|
{
|
||||||
|
double ux , uy , uz , rho , ux1, uy1, uz1, rho1, ux2, uy2, uz2, rho2, u2, px, py;
|
||||||
|
int i;
|
||||||
|
for( i = 0;
|
||||||
|
i < (N_CELL_ENTRIES*( 100*100));
|
||||||
|
i += N_CELL_ENTRIES )
|
||||||
|
{
|
||||||
|
rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)])
|
||||||
|
+ ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]);
|
||||||
|
rho = 2.0*rho1 - rho2;
|
||||||
|
px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0;
|
||||||
|
uz = 0.01 * (1.0-px*px) * (1.0-py*py);
|
||||||
|
u2 = 1.5 * (ux*ux + uy*uy + uz*uz);
|
||||||
|
(((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2);
|
||||||
|
(((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p)
|
||||||
int i;
|
int i;
|
||||||
slp_tree child;
|
slp_tree child;
|
||||||
|
|
||||||
|
if (--node->refcnt != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||||
vect_free_slp_tree (child, final_p);
|
vect_free_slp_tree (child, final_p);
|
||||||
|
|
||||||
|
|
@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p)
|
||||||
free (node);
|
free (node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Free the memory allocated for the SLP instance. FINAL_P is true if we
|
/* Free the memory allocated for the SLP instance. FINAL_P is true if we
|
||||||
have vectorized the instance or if we have made a final decision not
|
have vectorized the instance or if we have made a final decision not
|
||||||
to vectorize the statements in any way. */
|
to vectorize the statements in any way. */
|
||||||
|
|
@ -126,6 +128,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
|
||||||
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
|
||||||
SLP_TREE_TWO_OPERATORS (node) = false;
|
SLP_TREE_TWO_OPERATORS (node) = false;
|
||||||
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
|
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
|
||||||
|
node->refcnt = 1;
|
||||||
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
|
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
|
||||||
|
|
@ -1021,9 +1024,6 @@ bst_traits::equal (value_type existing, value_type candidate)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
|
|
||||||
static scalar_stmts_set_t *bst_fail;
|
|
||||||
|
|
||||||
typedef hash_map <vec <gimple *>, slp_tree,
|
typedef hash_map <vec <gimple *>, slp_tree,
|
||||||
simple_hashmap_traits <bst_traits, slp_tree> >
|
simple_hashmap_traits <bst_traits, slp_tree> >
|
||||||
scalar_stmts_to_slp_tree_map_t;
|
scalar_stmts_to_slp_tree_map_t;
|
||||||
|
|
@ -1034,30 +1034,33 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
poly_uint64 *max_nunits,
|
poly_uint64 *max_nunits,
|
||||||
vec<slp_tree> *loads,
|
vec<slp_tree> *loads,
|
||||||
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
||||||
unsigned max_tree_size);
|
unsigned max_tree_size,
|
||||||
|
scalar_stmts_to_slp_tree_map_t *bst_map);
|
||||||
|
|
||||||
static slp_tree
|
static slp_tree
|
||||||
vect_build_slp_tree (vec_info *vinfo,
|
vect_build_slp_tree (vec_info *vinfo,
|
||||||
vec<stmt_vec_info> stmts, unsigned int group_size,
|
vec<stmt_vec_info> stmts, unsigned int group_size,
|
||||||
poly_uint64 *max_nunits, vec<slp_tree> *loads,
|
poly_uint64 *max_nunits, vec<slp_tree> *loads,
|
||||||
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
||||||
unsigned max_tree_size)
|
unsigned max_tree_size,
|
||||||
|
scalar_stmts_to_slp_tree_map_t *bst_map)
|
||||||
{
|
{
|
||||||
if (bst_fail->contains (stmts))
|
if (slp_tree *leader = bst_map->get (stmts))
|
||||||
return NULL;
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
|
||||||
|
*leader ? "" : "failed ", *leader);
|
||||||
|
if (*leader)
|
||||||
|
(*leader)->refcnt++;
|
||||||
|
return *leader;
|
||||||
|
}
|
||||||
slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
|
slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
|
||||||
loads, matches, npermutes, tree_size,
|
loads, matches, npermutes, tree_size,
|
||||||
max_tree_size);
|
max_tree_size, bst_map);
|
||||||
/* When SLP build fails for stmts record this, otherwise SLP build
|
/* Keep a reference for the bst_map use. */
|
||||||
can be exponential in time when we allow to construct parts from
|
if (res)
|
||||||
scalars, see PR81723. */
|
res->refcnt++;
|
||||||
if (! res)
|
bst_map->put (stmts.copy (), res);
|
||||||
{
|
|
||||||
vec <stmt_vec_info> x;
|
|
||||||
x.create (stmts.length ());
|
|
||||||
x.splice (stmts);
|
|
||||||
bst_fail->add (x);
|
|
||||||
}
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1074,7 +1077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
poly_uint64 *max_nunits,
|
poly_uint64 *max_nunits,
|
||||||
vec<slp_tree> *loads,
|
vec<slp_tree> *loads,
|
||||||
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
bool *matches, unsigned *npermutes, unsigned *tree_size,
|
||||||
unsigned max_tree_size)
|
unsigned max_tree_size,
|
||||||
|
scalar_stmts_to_slp_tree_map_t *bst_map)
|
||||||
{
|
{
|
||||||
unsigned nops, i, this_tree_size = 0;
|
unsigned nops, i, this_tree_size = 0;
|
||||||
poly_uint64 this_max_nunits = *max_nunits;
|
poly_uint64 this_max_nunits = *max_nunits;
|
||||||
|
|
@ -1205,7 +1209,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
group_size, &this_max_nunits,
|
group_size, &this_max_nunits,
|
||||||
&this_loads, matches, npermutes,
|
&this_loads, matches, npermutes,
|
||||||
&this_tree_size,
|
&this_tree_size,
|
||||||
max_tree_size)) != NULL)
|
max_tree_size, bst_map)) != NULL)
|
||||||
{
|
{
|
||||||
/* If we have all children of child built up from scalars then just
|
/* If we have all children of child built up from scalars then just
|
||||||
throw that away and build it up this node from scalars. */
|
throw that away and build it up this node from scalars. */
|
||||||
|
|
@ -1348,7 +1352,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
group_size, &this_max_nunits,
|
group_size, &this_max_nunits,
|
||||||
&this_loads, tem, npermutes,
|
&this_loads, tem, npermutes,
|
||||||
&this_tree_size,
|
&this_tree_size,
|
||||||
max_tree_size)) != NULL)
|
max_tree_size, bst_map)) != NULL)
|
||||||
{
|
{
|
||||||
/* ... so if successful we can apply the operand swapping
|
/* ... so if successful we can apply the operand swapping
|
||||||
to the GIMPLE IL. This is necessary because for example
|
to the GIMPLE IL. This is necessary because for example
|
||||||
|
|
@ -1441,21 +1445,37 @@ fail:
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
|
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
|
||||||
slp_tree node)
|
slp_tree node, hash_set<slp_tree> &visited)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
stmt_vec_info stmt_info;
|
stmt_vec_info stmt_info;
|
||||||
slp_tree child;
|
slp_tree child;
|
||||||
|
|
||||||
dump_printf_loc (dump_kind, loc, "node%s\n",
|
if (visited.add (node))
|
||||||
|
return;
|
||||||
|
|
||||||
|
dump_printf_loc (dump_kind, loc, "node%s %p\n",
|
||||||
SLP_TREE_DEF_TYPE (node) != vect_internal_def
|
SLP_TREE_DEF_TYPE (node) != vect_internal_def
|
||||||
? " (external)" : "");
|
? " (external)" : "", node);
|
||||||
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
|
||||||
dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
|
dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
|
||||||
|
if (SLP_TREE_CHILDREN (node).is_empty ())
|
||||||
|
return;
|
||||||
|
dump_printf_loc (dump_kind, loc, "\tchildren");
|
||||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||||
vect_print_slp_tree (dump_kind, loc, child);
|
dump_printf (dump_kind, " %p", (void *)child);
|
||||||
|
dump_printf (dump_kind, "\n");
|
||||||
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||||
|
vect_print_slp_tree (dump_kind, loc, child, visited);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
|
||||||
|
slp_tree node)
|
||||||
|
{
|
||||||
|
hash_set<slp_tree> visited;
|
||||||
|
vect_print_slp_tree (dump_kind, loc, node, visited);
|
||||||
|
}
|
||||||
|
|
||||||
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
||||||
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
||||||
|
|
@ -1509,15 +1529,19 @@ vect_mark_slp_stmts_relevant (slp_tree node)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
|
||||||
vec<unsigned> permutation)
|
vec<unsigned> permutation,
|
||||||
|
hash_set<slp_tree> &visited)
|
||||||
{
|
{
|
||||||
stmt_vec_info stmt_info;
|
stmt_vec_info stmt_info;
|
||||||
vec<stmt_vec_info> tmp_stmts;
|
vec<stmt_vec_info> tmp_stmts;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
slp_tree child;
|
slp_tree child;
|
||||||
|
|
||||||
|
if (visited.add (node))
|
||||||
|
return;
|
||||||
|
|
||||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||||
vect_slp_rearrange_stmts (child, group_size, permutation);
|
vect_slp_rearrange_stmts (child, group_size, permutation, visited);
|
||||||
|
|
||||||
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
|
||||||
tmp_stmts.create (group_size);
|
tmp_stmts.create (group_size);
|
||||||
|
|
@ -1578,8 +1602,9 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
|
||||||
statements in the nodes is not important unless they are memory
|
statements in the nodes is not important unless they are memory
|
||||||
accesses, we can rearrange the statements in all the nodes
|
accesses, we can rearrange the statements in all the nodes
|
||||||
according to the order of the loads. */
|
according to the order of the loads. */
|
||||||
|
hash_set<slp_tree> visited;
|
||||||
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
|
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
|
||||||
node->load_permutation);
|
node->load_permutation, visited);
|
||||||
|
|
||||||
/* We are done, no actual permutations need to be generated. */
|
/* We are done, no actual permutations need to be generated. */
|
||||||
poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
|
poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
|
||||||
|
|
@ -1889,12 +1914,18 @@ vect_analyze_slp_instance (vec_info *vinfo,
|
||||||
/* Build the tree for the SLP instance. */
|
/* Build the tree for the SLP instance. */
|
||||||
bool *matches = XALLOCAVEC (bool, group_size);
|
bool *matches = XALLOCAVEC (bool, group_size);
|
||||||
unsigned npermutes = 0;
|
unsigned npermutes = 0;
|
||||||
bst_fail = new scalar_stmts_set_t ();
|
scalar_stmts_to_slp_tree_map_t *bst_map
|
||||||
|
= new scalar_stmts_to_slp_tree_map_t ();
|
||||||
poly_uint64 max_nunits = nunits;
|
poly_uint64 max_nunits = nunits;
|
||||||
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
|
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
|
||||||
&max_nunits, &loads, matches, &npermutes,
|
&max_nunits, &loads, matches, &npermutes,
|
||||||
NULL, max_tree_size);
|
NULL, max_tree_size, bst_map);
|
||||||
delete bst_fail;
|
/* The map keeps a reference on SLP nodes built, release that. */
|
||||||
|
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
|
||||||
|
it != bst_map->end (); ++it)
|
||||||
|
if ((*it).second)
|
||||||
|
vect_free_slp_tree ((*it).second, false);
|
||||||
|
delete bst_map;
|
||||||
if (node != NULL)
|
if (node != NULL)
|
||||||
{
|
{
|
||||||
/* Calculate the unrolling factor based on the smallest type. */
|
/* Calculate the unrolling factor based on the smallest type. */
|
||||||
|
|
@ -3749,8 +3780,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||||
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* See if we have already vectorized the node in the graph of the
|
||||||
|
SLP instance. */
|
||||||
|
if (SLP_TREE_VEC_STMTS (node).exists ())
|
||||||
|
return;
|
||||||
|
|
||||||
/* See if we have already vectorized the same set of stmts and reuse their
|
/* See if we have already vectorized the same set of stmts and reuse their
|
||||||
vectorized stmts. */
|
vectorized stmts across instances. */
|
||||||
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
|
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
|
||||||
{
|
{
|
||||||
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
|
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
|
||||||
|
|
@ -3778,7 +3814,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||||
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||||
|
|
||||||
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
|
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
|
||||||
if (!SLP_TREE_VEC_STMTS (node).exists ())
|
|
||||||
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
|
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
|
||||||
|
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,8 @@ struct _slp_tree {
|
||||||
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
|
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
|
||||||
divided by vector size. */
|
divided by vector size. */
|
||||||
unsigned int vec_stmts_size;
|
unsigned int vec_stmts_size;
|
||||||
|
/* Reference count in the SLP graph. */
|
||||||
|
unsigned int refcnt;
|
||||||
/* Whether the scalar computations use two different operators. */
|
/* Whether the scalar computations use two different operators. */
|
||||||
bool two_operators;
|
bool two_operators;
|
||||||
/* The DEF type of this node. */
|
/* The DEF type of this node. */
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue