mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/58626 (possible array wrong code bug)
2013-10-25 Richard Biener <rguenther@suse.de> PR tree-optimization/58626 * tree-loop-distribution.c (enum rdg_dep_type): Remove anti_dd, output_dd and input_dd. (struct rdg_edge): Remove level and relation members. (RDGE_LEVEL, RDGE_RELATION): Remove. (dot_rdg_1): Adjust. (create_rdg_edge_for_ddr): Remove. (create_rdg_edges_for_scalar): Adjust. (create_edge_for_control_dependence): Likewise. (create_rdg_edges): Split into ... (create_rdg_flow_edges): ... this (create_rdg_cd_edges): ... and this. (free_rdg): Adjust. (build_rdg): Likewise, do not compute data dependences or add edges for them. (pg_add_dependence_edges): New function. (pgcmp): Likewise. (distribute_loop): First apply all non-dependence based partition mergings. Then compute dependences between partitions and merge and order partitions according to them. * gcc.dg/torture/pr58626.c: New testcase. From-SVN: r204062
This commit is contained in:
parent
5970573cc1
commit
447f322380
|
|
@ -1,3 +1,26 @@
|
||||||
|
2013-10-25 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/58626
|
||||||
|
* tree-loop-distribution.c (enum rdg_dep_type): Remove
|
||||||
|
anti_dd, output_dd and input_dd.
|
||||||
|
(struct rdg_edge): Remove level and relation members.
|
||||||
|
(RDGE_LEVEL, RDGE_RELATION): Remove.
|
||||||
|
(dot_rdg_1): Adjust.
|
||||||
|
(create_rdg_edge_for_ddr): Remove.
|
||||||
|
(create_rdg_edges_for_scalar): Adjust.
|
||||||
|
(create_edge_for_control_dependence): Likewise.
|
||||||
|
(create_rdg_edges): Split into ...
|
||||||
|
(create_rdg_flow_edges): ... this
|
||||||
|
(create_rdg_cd_edges): ... and this.
|
||||||
|
(free_rdg): Adjust.
|
||||||
|
(build_rdg): Likewise, do not compute data dependences or
|
||||||
|
add edges for them.
|
||||||
|
(pg_add_dependence_edges): New function.
|
||||||
|
(pgcmp): Likewise.
|
||||||
|
(distribute_loop): First apply all non-dependence based
|
||||||
|
partition mergings. Then compute dependences between partitions
|
||||||
|
and merge and order partitions according to them.
|
||||||
|
|
||||||
2013-10-25 Eric Botcazou <ebotcazou@adacore.com>
|
2013-10-25 Eric Botcazou <ebotcazou@adacore.com>
|
||||||
|
|
||||||
PR rtl-optimization/58831
|
PR rtl-optimization/58831
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,8 @@
|
||||||
|
2013-10-25 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/58626
|
||||||
|
* gcc.dg/torture/pr58626.c: New testcase.
|
||||||
|
|
||||||
2013-10-25 Paolo Carlini <paolo.carlini@oracle.com>
|
2013-10-25 Paolo Carlini <paolo.carlini@oracle.com>
|
||||||
|
|
||||||
PR c++/54812
|
PR c++/54812
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
/* { dg-do run } */
|
||||||
|
|
||||||
|
extern void abort (void);
|
||||||
|
|
||||||
|
int a[8][6] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
|
||||||
|
int b;
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
for (b = 0; b <= 1; b++) {
|
||||||
|
a[1][3] = 0;
|
||||||
|
int c;
|
||||||
|
for (c = 0; c <= 1; c++) {
|
||||||
|
a[c + 1][b] = a[c + 2][b];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (a[1][1] != 1)
|
||||||
|
abort ();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -96,15 +96,6 @@ enum rdg_dep_type
|
||||||
/* Read After Write (RAW). */
|
/* Read After Write (RAW). */
|
||||||
flow_dd = 'f',
|
flow_dd = 'f',
|
||||||
|
|
||||||
/* Write After Read (WAR). */
|
|
||||||
anti_dd = 'a',
|
|
||||||
|
|
||||||
/* Write After Write (WAW). */
|
|
||||||
output_dd = 'o',
|
|
||||||
|
|
||||||
/* Read After Read (RAR). */
|
|
||||||
input_dd = 'i',
|
|
||||||
|
|
||||||
/* Control dependence (execute conditional on). */
|
/* Control dependence (execute conditional on). */
|
||||||
control_dd = 'c'
|
control_dd = 'c'
|
||||||
};
|
};
|
||||||
|
|
@ -115,19 +106,9 @@ typedef struct rdg_edge
|
||||||
{
|
{
|
||||||
/* Type of the dependence. */
|
/* Type of the dependence. */
|
||||||
enum rdg_dep_type type;
|
enum rdg_dep_type type;
|
||||||
|
|
||||||
/* Levels of the dependence: the depth of the loops that carry the
|
|
||||||
dependence. */
|
|
||||||
unsigned level;
|
|
||||||
|
|
||||||
/* Dependence relation between data dependences, NULL when one of
|
|
||||||
the vertices is a scalar. */
|
|
||||||
ddr_p relation;
|
|
||||||
} *rdg_edge_p;
|
} *rdg_edge_p;
|
||||||
|
|
||||||
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
|
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
|
||||||
#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
|
|
||||||
#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
|
|
||||||
|
|
||||||
/* Dump vertex I in RDG to FILE. */
|
/* Dump vertex I in RDG to FILE. */
|
||||||
|
|
||||||
|
|
@ -215,23 +196,11 @@ dot_rdg_1 (FILE *file, struct graph *rdg)
|
||||||
for (e = v->succ; e; e = e->succ_next)
|
for (e = v->succ; e; e = e->succ_next)
|
||||||
switch (RDGE_TYPE (e))
|
switch (RDGE_TYPE (e))
|
||||||
{
|
{
|
||||||
case input_dd:
|
|
||||||
fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case output_dd:
|
|
||||||
fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case flow_dd:
|
case flow_dd:
|
||||||
/* These are the most common dependences: don't print these. */
|
/* These are the most common dependences: don't print these. */
|
||||||
fprintf (file, "%d -> %d \n", i, e->dest);
|
fprintf (file, "%d -> %d \n", i, e->dest);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case anti_dd:
|
|
||||||
fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case control_dd:
|
case control_dd:
|
||||||
fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
|
fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
|
||||||
break;
|
break;
|
||||||
|
|
@ -273,52 +242,6 @@ rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Creates an edge in RDG for each distance vector from DDR. The
|
|
||||||
order that we keep track of in the RDG is the order in which
|
|
||||||
statements have to be executed. */
|
|
||||||
|
|
||||||
static void
|
|
||||||
create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
|
|
||||||
{
|
|
||||||
struct graph_edge *e;
|
|
||||||
int va, vb;
|
|
||||||
data_reference_p dra = DDR_A (ddr);
|
|
||||||
data_reference_p drb = DDR_B (ddr);
|
|
||||||
unsigned level = ddr_dependence_level (ddr);
|
|
||||||
|
|
||||||
/* For non scalar dependences, when the dependence is REVERSED,
|
|
||||||
statement B has to be executed before statement A. */
|
|
||||||
if (level > 0
|
|
||||||
&& !DDR_REVERSED_P (ddr))
|
|
||||||
{
|
|
||||||
data_reference_p tmp = dra;
|
|
||||||
dra = drb;
|
|
||||||
drb = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
|
|
||||||
vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
|
|
||||||
|
|
||||||
if (va < 0 || vb < 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
e = add_edge (rdg, va, vb);
|
|
||||||
e->data = XNEW (struct rdg_edge);
|
|
||||||
|
|
||||||
RDGE_LEVEL (e) = level;
|
|
||||||
RDGE_RELATION (e) = ddr;
|
|
||||||
|
|
||||||
/* Determines the type of the data dependence. */
|
|
||||||
if (DR_IS_READ (dra) && DR_IS_READ (drb))
|
|
||||||
RDGE_TYPE (e) = input_dd;
|
|
||||||
else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
|
|
||||||
RDGE_TYPE (e) = output_dd;
|
|
||||||
else if (DR_IS_WRITE (dra) && DR_IS_READ (drb))
|
|
||||||
RDGE_TYPE (e) = flow_dd;
|
|
||||||
else if (DR_IS_READ (dra) && DR_IS_WRITE (drb))
|
|
||||||
RDGE_TYPE (e) = anti_dd;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
|
/* Creates dependence edges in RDG for all the uses of DEF. IDEF is
|
||||||
the index of DEF in RDG. */
|
the index of DEF in RDG. */
|
||||||
|
|
||||||
|
|
@ -339,7 +262,6 @@ create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
|
||||||
e = add_edge (rdg, idef, use);
|
e = add_edge (rdg, idef, use);
|
||||||
e->data = XNEW (struct rdg_edge);
|
e->data = XNEW (struct rdg_edge);
|
||||||
RDGE_TYPE (e) = flow_dd;
|
RDGE_TYPE (e) = flow_dd;
|
||||||
RDGE_RELATION (e) = NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -366,7 +288,6 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
|
||||||
e = add_edge (rdg, c, v);
|
e = add_edge (rdg, c, v);
|
||||||
e->data = XNEW (struct rdg_edge);
|
e->data = XNEW (struct rdg_edge);
|
||||||
RDGE_TYPE (e) = control_dd;
|
RDGE_TYPE (e) = control_dd;
|
||||||
RDGE_RELATION (e) = NULL;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -374,25 +295,25 @@ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
|
||||||
/* Creates the edges of the reduced dependence graph RDG. */
|
/* Creates the edges of the reduced dependence graph RDG. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
|
create_rdg_flow_edges (struct graph *rdg)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct data_dependence_relation *ddr;
|
|
||||||
def_operand_p def_p;
|
def_operand_p def_p;
|
||||||
ssa_op_iter iter;
|
ssa_op_iter iter;
|
||||||
|
|
||||||
FOR_EACH_VEC_ELT (ddrs, i, ddr)
|
|
||||||
if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
|
|
||||||
create_rdg_edge_for_ddr (rdg, ddr);
|
|
||||||
else
|
|
||||||
free_dependence_relation (ddr);
|
|
||||||
|
|
||||||
for (i = 0; i < rdg->n_vertices; i++)
|
for (i = 0; i < rdg->n_vertices; i++)
|
||||||
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
|
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
|
||||||
iter, SSA_OP_DEF)
|
iter, SSA_OP_DEF)
|
||||||
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
|
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Creates the edges of the reduced dependence graph RDG. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
if (cd)
|
|
||||||
for (i = 0; i < rdg->n_vertices; i++)
|
for (i = 0; i < rdg->n_vertices; i++)
|
||||||
{
|
{
|
||||||
gimple stmt = RDG_STMT (rdg, i);
|
gimple stmt = RDG_STMT (rdg, i);
|
||||||
|
|
@ -494,10 +415,7 @@ free_rdg (struct graph *rdg)
|
||||||
struct graph_edge *e;
|
struct graph_edge *e;
|
||||||
|
|
||||||
for (e = v->succ; e; e = e->succ_next)
|
for (e = v->succ; e; e = e->succ_next)
|
||||||
{
|
|
||||||
free_dependence_relation (RDGE_RELATION (e));
|
|
||||||
free (e->data);
|
free (e->data);
|
||||||
}
|
|
||||||
|
|
||||||
if (v->data)
|
if (v->data)
|
||||||
{
|
{
|
||||||
|
|
@ -520,7 +438,6 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
|
||||||
struct graph *rdg;
|
struct graph *rdg;
|
||||||
vec<gimple> stmts;
|
vec<gimple> stmts;
|
||||||
vec<data_reference_p> datarefs;
|
vec<data_reference_p> datarefs;
|
||||||
vec<ddr_p> dependence_relations;
|
|
||||||
|
|
||||||
/* Create the RDG vertices from the stmts of the loop nest. */
|
/* Create the RDG vertices from the stmts of the loop nest. */
|
||||||
stmts.create (10);
|
stmts.create (10);
|
||||||
|
|
@ -536,19 +453,10 @@ build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
|
||||||
}
|
}
|
||||||
stmts.release ();
|
stmts.release ();
|
||||||
|
|
||||||
/* Create the RDG edges from the data dependences in the loop nest. */
|
create_rdg_flow_edges (rdg);
|
||||||
dependence_relations.create (100);
|
if (cd)
|
||||||
if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest,
|
create_rdg_cd_edges (rdg, cd);
|
||||||
false)
|
|
||||||
|| !known_dependences_p (dependence_relations))
|
|
||||||
{
|
|
||||||
free_dependence_relations (dependence_relations);
|
|
||||||
datarefs.release ();
|
|
||||||
free_rdg (rdg);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
create_rdg_edges (rdg, dependence_relations, cd);
|
|
||||||
dependence_relations.release ();
|
|
||||||
datarefs.release ();
|
datarefs.release ();
|
||||||
|
|
||||||
return rdg;
|
return rdg;
|
||||||
|
|
@ -1405,6 +1313,70 @@ partition_contains_all_rw (struct graph *rdg,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Compute partition dependence created by the data references in DRS1
|
||||||
|
and DRS2 and modify and return DIR according to that. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
|
||||||
|
vec<data_reference_p> drs1,
|
||||||
|
vec<data_reference_p> drs2)
|
||||||
|
{
|
||||||
|
data_reference_p dr1, dr2;
|
||||||
|
|
||||||
|
/* dependence direction - 0 is no dependence, -1 is back,
|
||||||
|
1 is forth, 2 is both (we can stop then, merging will occur). */
|
||||||
|
for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
|
||||||
|
for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
|
||||||
|
{
|
||||||
|
int this_dir = 1;
|
||||||
|
ddr_p ddr;
|
||||||
|
/* Re-shuffle data-refs to be in dominator order. */
|
||||||
|
if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
|
||||||
|
> rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
|
||||||
|
{
|
||||||
|
data_reference_p tem = dr1;
|
||||||
|
dr1 = dr2;
|
||||||
|
dr2 = tem;
|
||||||
|
this_dir = -this_dir;
|
||||||
|
}
|
||||||
|
ddr = initialize_data_dependence_relation (dr1, dr2, loops);
|
||||||
|
compute_affine_dependence (ddr, loops[0]);
|
||||||
|
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||||
|
this_dir = 2;
|
||||||
|
else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
|
||||||
|
{
|
||||||
|
if (DDR_REVERSED_P (ddr))
|
||||||
|
{
|
||||||
|
data_reference_p tem = dr1;
|
||||||
|
dr1 = dr2;
|
||||||
|
dr2 = tem;
|
||||||
|
this_dir = -this_dir;
|
||||||
|
}
|
||||||
|
/* Known dependences can still be unordered througout the
|
||||||
|
iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
|
||||||
|
if (DDR_NUM_DIST_VECTS (ddr) == 0)
|
||||||
|
this_dir = 2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
this_dir = 0;
|
||||||
|
free_dependence_relation (ddr);
|
||||||
|
if (dir == 0)
|
||||||
|
dir = this_dir;
|
||||||
|
else if (dir != this_dir)
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
return dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compare postorder number of the partition graph vertices V1 and V2. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
pgcmp (const void *v1_, const void *v2_)
|
||||||
|
{
|
||||||
|
const vertex *v1 = (const vertex *)v1_;
|
||||||
|
const vertex *v2 = (const vertex *)v2_;
|
||||||
|
return v2->post - v1->post;
|
||||||
|
}
|
||||||
|
|
||||||
/* Distributes the code from LOOP in such a way that producer
|
/* Distributes the code from LOOP in such a way that producer
|
||||||
statements are placed before consumer statements. Tries to separate
|
statements are placed before consumer statements. Tries to separate
|
||||||
|
|
@ -1421,6 +1393,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
|
||||||
partition_t partition;
|
partition_t partition;
|
||||||
bool any_builtin;
|
bool any_builtin;
|
||||||
int i, nbp;
|
int i, nbp;
|
||||||
|
graph *pg = NULL;
|
||||||
|
int num_sccs = 1;
|
||||||
|
|
||||||
*nb_calls = 0;
|
*nb_calls = 0;
|
||||||
loop_nest.create (3);
|
loop_nest.create (3);
|
||||||
|
|
@ -1455,8 +1429,8 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
|
||||||
any_builtin |= partition_builtin_p (partition);
|
any_builtin |= partition_builtin_p (partition);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we did not detect any builtin but are not asked to apply
|
/* If we are only distributing patterns but did not detect any,
|
||||||
regular loop distribution simply bail out. */
|
simply bail out. */
|
||||||
if (!flag_tree_loop_distribution
|
if (!flag_tree_loop_distribution
|
||||||
&& !any_builtin)
|
&& !any_builtin)
|
||||||
{
|
{
|
||||||
|
|
@ -1464,9 +1438,56 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
|
||||||
goto ldist_done;
|
goto ldist_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If we are only distributing patterns fuse all partitions that
|
||||||
|
were not classified as builtins. This also avoids chopping
|
||||||
|
a loop into pieces, separated by builtin calls. That is, we
|
||||||
|
only want no or a single loop body remaining. */
|
||||||
|
partition_t into;
|
||||||
|
if (!flag_tree_loop_distribution)
|
||||||
|
{
|
||||||
|
for (i = 0; partitions.iterate (i, &into); ++i)
|
||||||
|
if (!partition_builtin_p (into))
|
||||||
|
break;
|
||||||
|
for (++i; partitions.iterate (i, &partition); ++i)
|
||||||
|
if (!partition_builtin_p (partition))
|
||||||
|
{
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
{
|
||||||
|
fprintf (dump_file, "fusing non-builtin partitions\n");
|
||||||
|
dump_bitmap (dump_file, into->stmts);
|
||||||
|
dump_bitmap (dump_file, partition->stmts);
|
||||||
|
}
|
||||||
|
partition_merge_into (into, partition);
|
||||||
|
partitions.unordered_remove (i);
|
||||||
|
partition_free (partition);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Due to limitations in the transform phase we have to fuse all
|
||||||
|
reduction partitions into the last partition so the existing
|
||||||
|
loop will contain all loop-closed PHI nodes. */
|
||||||
|
for (i = 0; partitions.iterate (i, &into); ++i)
|
||||||
|
if (partition_reduction_p (into))
|
||||||
|
break;
|
||||||
|
for (i = i + 1; partitions.iterate (i, &partition); ++i)
|
||||||
|
if (partition_reduction_p (partition))
|
||||||
|
{
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
{
|
||||||
|
fprintf (dump_file, "fusing partitions\n");
|
||||||
|
dump_bitmap (dump_file, into->stmts);
|
||||||
|
dump_bitmap (dump_file, partition->stmts);
|
||||||
|
fprintf (dump_file, "because they have reductions\n");
|
||||||
|
}
|
||||||
|
partition_merge_into (into, partition);
|
||||||
|
partitions.unordered_remove (i);
|
||||||
|
partition_free (partition);
|
||||||
|
i--;
|
||||||
|
}
|
||||||
|
|
||||||
/* Apply our simple cost model - fuse partitions with similar
|
/* Apply our simple cost model - fuse partitions with similar
|
||||||
memory accesses. */
|
memory accesses. */
|
||||||
partition_t into;
|
|
||||||
for (i = 0; partitions.iterate (i, &into); ++i)
|
for (i = 0; partitions.iterate (i, &into); ++i)
|
||||||
{
|
{
|
||||||
if (partition_builtin_p (into))
|
if (partition_builtin_p (into))
|
||||||
|
|
@ -1486,61 +1507,119 @@ distribute_loop (struct loop *loop, vec<gimple> stmts,
|
||||||
"memory accesses\n");
|
"memory accesses\n");
|
||||||
}
|
}
|
||||||
partition_merge_into (into, partition);
|
partition_merge_into (into, partition);
|
||||||
partitions.ordered_remove (j);
|
partitions.unordered_remove (j);
|
||||||
partition_free (partition);
|
partition_free (partition);
|
||||||
j--;
|
j--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If we are only distributing patterns fuse all partitions that
|
/* Build the partition dependency graph. */
|
||||||
were not properly classified as builtins. */
|
|
||||||
if (!flag_tree_loop_distribution)
|
|
||||||
{
|
|
||||||
partition_t into;
|
|
||||||
/* Only fuse adjacent non-builtin partitions, see PR53616.
|
|
||||||
??? Use dependence information to improve partition ordering. */
|
|
||||||
i = 0;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
for (; partitions.iterate (i, &into); ++i)
|
|
||||||
if (!partition_builtin_p (into))
|
|
||||||
break;
|
|
||||||
for (++i; partitions.iterate (i, &partition); ++i)
|
|
||||||
if (!partition_builtin_p (partition))
|
|
||||||
{
|
|
||||||
partition_merge_into (into, partition);
|
|
||||||
partitions.ordered_remove (i);
|
|
||||||
partition_free (partition);
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
while ((unsigned) i < partitions.length ());
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fuse all reduction partitions into the last. */
|
|
||||||
if (partitions.length () > 1)
|
if (partitions.length () > 1)
|
||||||
{
|
{
|
||||||
partition_t into = partitions.last ();
|
pg = new_graph (partitions.length ());
|
||||||
for (i = partitions.length () - 2; i >= 0; --i)
|
struct pgdata {
|
||||||
|
partition_t partition;
|
||||||
|
vec<data_reference_p> writes;
|
||||||
|
vec<data_reference_p> reads;
|
||||||
|
};
|
||||||
|
#define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
|
||||||
|
for (i = 0; partitions.iterate (i, &partition); ++i)
|
||||||
{
|
{
|
||||||
partition_t what = partitions[i];
|
vertex *v = &pg->vertices[i];
|
||||||
if (partition_reduction_p (what))
|
pgdata *data = new pgdata;
|
||||||
|
data_reference_p dr;
|
||||||
|
/* FIXME - leaks. */
|
||||||
|
v->data = data;
|
||||||
|
bitmap_iterator bi;
|
||||||
|
unsigned j;
|
||||||
|
data->partition = partition;
|
||||||
|
data->reads = vNULL;
|
||||||
|
data->writes = vNULL;
|
||||||
|
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
|
||||||
|
for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
|
||||||
|
if (DR_IS_READ (dr))
|
||||||
|
data->reads.safe_push (dr);
|
||||||
|
else
|
||||||
|
data->writes.safe_push (dr);
|
||||||
|
}
|
||||||
|
partition_t partition1, partition2;
|
||||||
|
for (i = 0; partitions.iterate (i, &partition1); ++i)
|
||||||
|
for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
|
||||||
|
{
|
||||||
|
/* dependence direction - 0 is no dependence, -1 is back,
|
||||||
|
1 is forth, 2 is both (we can stop then, merging will occur). */
|
||||||
|
int dir = 0;
|
||||||
|
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
|
||||||
|
PGDATA(i)->writes,
|
||||||
|
PGDATA(j)->reads);
|
||||||
|
if (dir != 2)
|
||||||
|
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
|
||||||
|
PGDATA(i)->reads,
|
||||||
|
PGDATA(j)->writes);
|
||||||
|
if (dir != 2)
|
||||||
|
dir = pg_add_dependence_edges (rdg, loop_nest, dir,
|
||||||
|
PGDATA(i)->writes,
|
||||||
|
PGDATA(j)->writes);
|
||||||
|
if (dir == 1 || dir == 2)
|
||||||
|
add_edge (pg, i, j);
|
||||||
|
if (dir == -1 || dir == 2)
|
||||||
|
add_edge (pg, j, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add edges to the reduction partition (if any) to force it last. */
|
||||||
|
unsigned j;
|
||||||
|
for (j = 0; partitions.iterate (j, &partition); ++j)
|
||||||
|
if (partition_reduction_p (partition))
|
||||||
|
break;
|
||||||
|
if (j < partitions.length ())
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
|
||||||
|
if (i != j)
|
||||||
|
add_edge (pg, i, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute partitions we cannot separate and fuse them. */
|
||||||
|
num_sccs = graphds_scc (pg, NULL);
|
||||||
|
for (i = 0; i < num_sccs; ++i)
|
||||||
|
{
|
||||||
|
partition_t first;
|
||||||
|
int j;
|
||||||
|
for (j = 0; partitions.iterate (j, &first); ++j)
|
||||||
|
if (pg->vertices[j].component == i)
|
||||||
|
break;
|
||||||
|
for (j = j + 1; partitions.iterate (j, &partition); ++j)
|
||||||
|
if (pg->vertices[j].component == i)
|
||||||
{
|
{
|
||||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
{
|
{
|
||||||
fprintf (dump_file, "fusing partitions\n");
|
fprintf (dump_file, "fusing partitions\n");
|
||||||
dump_bitmap (dump_file, into->stmts);
|
dump_bitmap (dump_file, first->stmts);
|
||||||
dump_bitmap (dump_file, what->stmts);
|
dump_bitmap (dump_file, partition->stmts);
|
||||||
fprintf (dump_file, "because the latter has reductions\n");
|
fprintf (dump_file, "because they are in the same "
|
||||||
|
"dependence SCC\n");
|
||||||
}
|
}
|
||||||
partition_merge_into (into, what);
|
partition_merge_into (first, partition);
|
||||||
partitions.ordered_remove (i);
|
partitions[j] = NULL;
|
||||||
partition_free (what);
|
partition_free (partition);
|
||||||
|
PGDATA (j)->partition = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now order the remaining nodes in postorder. */
|
||||||
|
qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
|
||||||
|
partitions.truncate (0);
|
||||||
|
for (i = 0; i < pg->n_vertices; ++i)
|
||||||
|
{
|
||||||
|
pgdata *data = PGDATA (i);
|
||||||
|
if (data->partition)
|
||||||
|
partitions.safe_push (data->partition);
|
||||||
|
data->reads.release ();
|
||||||
|
data->writes.release ();
|
||||||
|
delete data;
|
||||||
|
}
|
||||||
|
gcc_assert (partitions.length () == (unsigned)num_sccs);
|
||||||
|
free_graph (pg);
|
||||||
}
|
}
|
||||||
|
|
||||||
nbp = partitions.length ();
|
nbp = partitions.length ();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue