mirror of git://gcc.gnu.org/git/gcc.git
Refactor tree-loop-distribution.c for thread safety
This patch refactors tree-loop-distribution.c for thread safety without use of C11 __thread feature. All global variables were moved to `class loop_distribution` which is initialized at ::execute time. From-SVN: r278421
This commit is contained in:
parent
8d890d37e0
commit
eef99cd956
|
@ -1,3 +1,36 @@
|
|||
2019-11-18 Giuliano Belinassi <giuliano.belinassi@usp.br>
|
||||
|
||||
* cfgloop.c (get_loop_body_in_custom_order): New.
|
||||
* cfgloop.h (get_loop_body_in_custom_order): New prototype.
|
||||
* tree-loop-distribution.c (class loop_distribution): New.
|
||||
(bb_top_order_cmp): Remove.
|
||||
(bb_top_order_cmp_r): New.
|
||||
(create_rdg_vertices): Move into class loop_distribution.
|
||||
(stmts_from_loop): Same as above.
|
||||
(update_for_merge): Same as above.
|
||||
(partition_merge_into): Same as above.
|
||||
(get_data_dependence): Same as above.
|
||||
(data_dep_in_cycle_p): Same as above.
|
||||
(update_type_for_merge): Same as above.
|
||||
(build_rdg_partition_for-vertex): Same as above.
|
||||
(classify_builtin_ldst): Same as above.
|
||||
(classify_partition): Same as above.
|
||||
(share_memory_accesses): Same as above.
|
||||
(rdg_build_partitions): Same as above.
|
||||
(pg_add_dependence_edges): Same as above.
|
||||
(build_partition_graph): Same as above.
|
||||
(merge_dep_scc_partitions): Same as above.
|
||||
(break_alias_scc_partitions): Same as above.
|
||||
(finalize_partitions): Same as above.
|
||||
(distribute_loop): Same as above.
|
||||
(bb_top_order_init): New method
|
||||
(bb_top_order_destroy): New method.
|
||||
(get_bb_top_order_index_size): New method.
|
||||
(get_bb_top_order_index_index): New method.
|
||||
(get_bb_top_order_index_index): New method.
|
||||
(loop_distribution::execute): New method.
|
||||
(pass_loop_distribution::execute): Instantiate loop_distribution.
|
||||
|
||||
2019-11-18 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
PR ipa/92508
|
||||
|
|
|
@ -980,6 +980,19 @@ get_loop_body_in_custom_order (const class loop *loop,
|
|||
return bbs;
|
||||
}
|
||||
|
||||
/* Same as above, but use gcc_sort_r instead of qsort. */
|
||||
|
||||
basic_block *
|
||||
get_loop_body_in_custom_order (const class loop *loop, void *data,
|
||||
int (*bb_comparator) (const void *, const void *, void *))
|
||||
{
|
||||
basic_block *bbs = get_loop_body (loop);
|
||||
|
||||
gcc_sort_r (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, data);
|
||||
|
||||
return bbs;
|
||||
}
|
||||
|
||||
/* Get body of a LOOP in breadth first sort order. */
|
||||
|
||||
basic_block *
|
||||
|
|
|
@ -376,6 +376,8 @@ extern basic_block *get_loop_body_in_dom_order (const class loop *);
|
|||
extern basic_block *get_loop_body_in_bfs_order (const class loop *);
|
||||
extern basic_block *get_loop_body_in_custom_order (const class loop *,
|
||||
int (*) (const void *, const void *));
|
||||
extern basic_block *get_loop_body_in_custom_order (const class loop *, void *,
|
||||
int (*) (const void *, const void *, void *));
|
||||
|
||||
extern vec<edge> get_loop_exit_edges (const class loop *);
|
||||
extern edge single_exit (const class loop *);
|
||||
|
|
|
@ -154,21 +154,10 @@ ddr_hasher::equal (const data_dependence_relation *ddr1,
|
|||
return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
|
||||
}
|
||||
|
||||
/* The loop (nest) to be distributed. */
|
||||
static vec<loop_p> loop_nest;
|
||||
|
||||
/* Vector of data references in the loop to be distributed. */
|
||||
static vec<data_reference_p> datarefs_vec;
|
||||
|
||||
/* If there is nonaddressable data reference in above vector. */
|
||||
static bool has_nonaddressable_dataref_p;
|
||||
|
||||
/* Store index of data reference in aux field. */
|
||||
#define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
|
||||
|
||||
/* Hash table for data dependence relation in the loop to be distributed. */
|
||||
static hash_table<ddr_hasher> *ddrs_table;
|
||||
|
||||
/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
|
||||
struct rdg_vertex
|
||||
{
|
||||
|
@ -215,6 +204,83 @@ struct rdg_edge
|
|||
|
||||
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
|
||||
|
||||
/* Kind of distributed loop. */
|
||||
enum partition_kind {
|
||||
PKIND_NORMAL,
|
||||
/* Partial memset stands for a paritition can be distributed into a loop
|
||||
of memset calls, rather than a single memset call. It's handled just
|
||||
like a normal parition, i.e, distributed as separate loop, no memset
|
||||
call is generated.
|
||||
|
||||
Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
|
||||
loop nest as deep as possible. As a result, parloop achieves better
|
||||
parallelization by parallelizing deeper loop nest. This hack should
|
||||
be unnecessary and removed once distributed memset can be understood
|
||||
and analyzed in data reference analysis. See PR82604 for more. */
|
||||
PKIND_PARTIAL_MEMSET,
|
||||
PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
|
||||
};
|
||||
|
||||
/* Type of distributed loop. */
|
||||
enum partition_type {
|
||||
/* The distributed loop can be executed parallelly. */
|
||||
PTYPE_PARALLEL = 0,
|
||||
/* The distributed loop has to be executed sequentially. */
|
||||
PTYPE_SEQUENTIAL
|
||||
};
|
||||
|
||||
/* Builtin info for loop distribution. */
|
||||
struct builtin_info
|
||||
{
|
||||
/* data-references a kind != PKIND_NORMAL partition is about. */
|
||||
data_reference_p dst_dr;
|
||||
data_reference_p src_dr;
|
||||
/* Base address and size of memory objects operated by the builtin. Note
|
||||
both dest and source memory objects must have the same size. */
|
||||
tree dst_base;
|
||||
tree src_base;
|
||||
tree size;
|
||||
/* Base and offset part of dst_base after stripping constant offset. This
|
||||
is only used in memset builtin distribution for now. */
|
||||
tree dst_base_base;
|
||||
unsigned HOST_WIDE_INT dst_base_offset;
|
||||
};
|
||||
|
||||
/* Partition for loop distribution. */
|
||||
struct partition
|
||||
{
|
||||
/* Statements of the partition. */
|
||||
bitmap stmts;
|
||||
/* True if the partition defines variable which is used outside of loop. */
|
||||
bool reduction_p;
|
||||
location_t loc;
|
||||
enum partition_kind kind;
|
||||
enum partition_type type;
|
||||
/* Data references in the partition. */
|
||||
bitmap datarefs;
|
||||
/* Information of builtin parition. */
|
||||
struct builtin_info *builtin;
|
||||
};
|
||||
|
||||
/* Partitions are fused because of different reasons. */
|
||||
enum fuse_type
|
||||
{
|
||||
FUSE_NON_BUILTIN = 0,
|
||||
FUSE_REDUCTION = 1,
|
||||
FUSE_SHARE_REF = 2,
|
||||
FUSE_SAME_SCC = 3,
|
||||
FUSE_FINALIZE = 4
|
||||
};
|
||||
|
||||
/* Description on different fusing reason. */
|
||||
static const char *fuse_message[] = {
|
||||
"they are non-builtins",
|
||||
"they have reductions",
|
||||
"they have shared memory refs",
|
||||
"they are in the same dependence scc",
|
||||
"there is no point to distribute loop"};
|
||||
|
||||
|
||||
/* Dump vertex I in RDG to FILE. */
|
||||
|
||||
static void
|
||||
|
@ -435,11 +501,205 @@ create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
|
|||
}
|
||||
}
|
||||
|
||||
/* Build the vertices of the reduced dependence graph RDG. Return false
|
||||
if that failed. */
|
||||
|
||||
static bool
|
||||
create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
|
||||
class loop_distribution
|
||||
{
|
||||
private:
|
||||
/* The loop (nest) to be distributed. */
|
||||
vec<loop_p> loop_nest;
|
||||
|
||||
/* Vector of data references in the loop to be distributed. */
|
||||
vec<data_reference_p> datarefs_vec;
|
||||
|
||||
/* If there is nonaddressable data reference in above vector. */
|
||||
bool has_nonaddressable_dataref_p;
|
||||
|
||||
/* Store index of data reference in aux field. */
|
||||
|
||||
/* Hash table for data dependence relation in the loop to be distributed. */
|
||||
hash_table<ddr_hasher> *ddrs_table;
|
||||
|
||||
/* Array mapping basic block's index to its topological order. */
|
||||
int *bb_top_order_index;
|
||||
/* And size of the array. */
|
||||
int bb_top_order_index_size;
|
||||
|
||||
/* Build the vertices of the reduced dependence graph RDG. Return false
|
||||
if that failed. */
|
||||
bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop);
|
||||
|
||||
/* Initialize STMTS with all the statements of LOOP. We use topological
|
||||
order to discover all statements. The order is important because
|
||||
generate_loops_for_partition is using the same traversal for identifying
|
||||
statements in loop copies. */
|
||||
void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
|
||||
|
||||
|
||||
/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
|
||||
LOOP, and one edge per flow dependence or control dependence from control
|
||||
dependence CD. During visiting each statement, data references are also
|
||||
collected and recorded in global data DATAREFS_VEC. */
|
||||
struct graph * build_rdg (class loop *loop, control_dependences *cd);
|
||||
|
||||
/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
|
||||
graph and we update type for result partition if it is non-NULL. */
|
||||
void partition_merge_into (struct graph *rdg,
|
||||
partition *dest, partition *partition,
|
||||
enum fuse_type ft);
|
||||
|
||||
|
||||
/* Return data dependence relation for data references A and B. The two
|
||||
data references must be in lexicographic order wrto reduced dependence
|
||||
graph RDG. We firstly try to find ddr from global ddr hash table. If
|
||||
it doesn't exist, compute the ddr and cache it. */
|
||||
data_dependence_relation * get_data_dependence (struct graph *rdg,
|
||||
data_reference_p a,
|
||||
data_reference_p b);
|
||||
|
||||
|
||||
/* In reduced dependence graph RDG for loop distribution, return true if
|
||||
dependence between references DR1 and DR2 leads to a dependence cycle
|
||||
and such dependence cycle can't be resolved by runtime alias check. */
|
||||
bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
|
||||
data_reference_p dr2);
|
||||
|
||||
|
||||
/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
|
||||
PARTITION1's type after merging PARTITION2 into PARTITION1. */
|
||||
void update_type_for_merge (struct graph *rdg,
|
||||
partition *partition1, partition *partition2);
|
||||
|
||||
|
||||
/* Returns a partition with all the statements needed for computing
|
||||
the vertex V of the RDG, also including the loop exit conditions. */
|
||||
partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
|
||||
|
||||
/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
|
||||
if it forms builtin memcpy or memmove call. */
|
||||
void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
|
||||
data_reference_p dst_dr, data_reference_p src_dr);
|
||||
|
||||
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
|
||||
For the moment we detect memset, memcpy and memmove patterns. Bitmap
|
||||
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
|
||||
Returns true if there is a reduction in all partitions and we
|
||||
possibly did not mark PARTITION as having one for this reason. */
|
||||
|
||||
bool
|
||||
classify_partition (loop_p loop,
|
||||
struct graph *rdg, partition *partition,
|
||||
bitmap stmt_in_all_partitions);
|
||||
|
||||
|
||||
/* Returns true when PARTITION1 and PARTITION2 access the same memory
|
||||
object in RDG. */
|
||||
bool share_memory_accesses (struct graph *rdg,
|
||||
partition *partition1, partition *partition2);
|
||||
|
||||
/* For each seed statement in STARTING_STMTS, this function builds
|
||||
partition for it by adding depended statements according to RDG.
|
||||
All partitions are recorded in PARTITIONS. */
|
||||
void rdg_build_partitions (struct graph *rdg,
|
||||
vec<gimple *> starting_stmts,
|
||||
vec<partition *> *partitions);
|
||||
|
||||
/* Compute partition dependence created by the data references in DRS1
|
||||
and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
|
||||
not NULL, we record dependence introduced by possible alias between
|
||||
two data references in ALIAS_DDRS; otherwise, we simply ignore such
|
||||
dependence as if it doesn't exist at all. */
|
||||
int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
|
||||
bitmap drs2, vec<ddr_p> *alias_ddrs);
|
||||
|
||||
|
||||
/* Build and return partition dependence graph for PARTITIONS. RDG is
|
||||
reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
|
||||
is true, data dependence caused by possible alias between references
|
||||
is ignored, as if it doesn't exist at all; otherwise all depdendences
|
||||
are considered. */
|
||||
struct graph *build_partition_graph (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
bool ignore_alias_p);
|
||||
|
||||
/* Given reduced dependence graph RDG merge strong connected components
|
||||
of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
|
||||
possible alias between references is ignored, as if it doesn't exist
|
||||
at all; otherwise all depdendences are considered. */
|
||||
void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
|
||||
*partitions, bool ignore_alias_p);
|
||||
|
||||
/* This is the main function breaking strong conected components in
|
||||
PARTITIONS giving reduced depdendence graph RDG. Store data dependence
|
||||
relations for runtime alias check in ALIAS_DDRS. */
|
||||
void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
|
||||
*partitions, vec<ddr_p> *alias_ddrs);
|
||||
|
||||
|
||||
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
|
||||
ALIAS_DDRS contains ddrs which need runtime alias check. */
|
||||
void finalize_partitions (class loop *loop, vec<struct partition *>
|
||||
*partitions, vec<ddr_p> *alias_ddrs);
|
||||
|
||||
/* Distributes the code from LOOP in such a way that producer statements
|
||||
are placed before consumer statements. Tries to separate only the
|
||||
statements from STMTS into separate loops. Returns the number of
|
||||
distributed loops. Set NB_CALLS to number of generated builtin calls.
|
||||
Set *DESTROY_P to whether LOOP needs to be destroyed. */
|
||||
int distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
control_dependences *cd, int *nb_calls, bool *destroy_p,
|
||||
bool only_patterns_p);
|
||||
|
||||
/* Compute topological order for basic blocks. Topological order is
|
||||
needed because data dependence is computed for data references in
|
||||
lexicographical order. */
|
||||
void bb_top_order_init (void);
|
||||
|
||||
void bb_top_order_destroy (void);
|
||||
|
||||
public:
|
||||
|
||||
/* Getter for bb_top_order. */
|
||||
|
||||
inline int get_bb_top_order_index_size (void)
|
||||
{
|
||||
return bb_top_order_index_size;
|
||||
}
|
||||
|
||||
inline int get_bb_top_order_index (int i)
|
||||
{
|
||||
return bb_top_order_index[i];
|
||||
}
|
||||
|
||||
unsigned int execute (function *fun);
|
||||
};
|
||||
|
||||
|
||||
/* If X has a smaller topological sort number than Y, returns -1;
|
||||
if greater, returns 1. */
|
||||
static int
|
||||
bb_top_order_cmp_r (const void *x, const void *y, void *loop)
|
||||
{
|
||||
loop_distribution *_loop =
|
||||
(loop_distribution *) loop;
|
||||
|
||||
basic_block bb1 = *(const basic_block *) x;
|
||||
basic_block bb2 = *(const basic_block *) y;
|
||||
|
||||
int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
|
||||
|
||||
gcc_assert (bb1->index < bb_top_order_index_size
|
||||
&& bb2->index < bb_top_order_index_size);
|
||||
gcc_assert (bb1 == bb2
|
||||
|| _loop->get_bb_top_order_index(bb1->index)
|
||||
!= _loop->get_bb_top_order_index(bb2->index));
|
||||
|
||||
return (_loop->get_bb_top_order_index(bb1->index) -
|
||||
_loop->get_bb_top_order_index(bb2->index));
|
||||
}
|
||||
|
||||
bool
|
||||
loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts,
|
||||
loop_p loop)
|
||||
{
|
||||
int i;
|
||||
gimple *stmt;
|
||||
|
@ -476,39 +736,11 @@ create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Array mapping basic block's index to its topological order. */
|
||||
static int *bb_top_order_index;
|
||||
/* And size of the array. */
|
||||
static int bb_top_order_index_size;
|
||||
|
||||
/* If X has a smaller topological sort number than Y, returns -1;
|
||||
if greater, returns 1. */
|
||||
|
||||
static int
|
||||
bb_top_order_cmp (const void *x, const void *y)
|
||||
{
|
||||
basic_block bb1 = *(const basic_block *) x;
|
||||
basic_block bb2 = *(const basic_block *) y;
|
||||
|
||||
gcc_assert (bb1->index < bb_top_order_index_size
|
||||
&& bb2->index < bb_top_order_index_size);
|
||||
gcc_assert (bb1 == bb2
|
||||
|| bb_top_order_index[bb1->index]
|
||||
!= bb_top_order_index[bb2->index]);
|
||||
|
||||
return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
|
||||
}
|
||||
|
||||
/* Initialize STMTS with all the statements of LOOP. We use topological
|
||||
order to discover all statements. The order is important because
|
||||
generate_loops_for_partition is using the same traversal for identifying
|
||||
statements in loop copies. */
|
||||
|
||||
static void
|
||||
stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
|
||||
void
|
||||
loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
|
||||
{
|
||||
unsigned int i;
|
||||
basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
|
||||
basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
|
||||
|
||||
for (i = 0; i < loop->num_nodes; i++)
|
||||
{
|
||||
|
@ -557,13 +789,8 @@ free_rdg (struct graph *rdg)
|
|||
free_graph (rdg);
|
||||
}
|
||||
|
||||
/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
|
||||
LOOP, and one edge per flow dependence or control dependence from control
|
||||
dependence CD. During visiting each statement, data references are also
|
||||
collected and recorded in global data DATAREFS_VEC. */
|
||||
|
||||
static struct graph *
|
||||
build_rdg (class loop *loop, control_dependences *cd)
|
||||
struct graph *
|
||||
loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
|
||||
{
|
||||
struct graph *rdg;
|
||||
|
||||
|
@ -586,65 +813,6 @@ build_rdg (class loop *loop, control_dependences *cd)
|
|||
}
|
||||
|
||||
|
||||
/* Kind of distributed loop. */
|
||||
enum partition_kind {
|
||||
PKIND_NORMAL,
|
||||
/* Partial memset stands for a paritition can be distributed into a loop
|
||||
of memset calls, rather than a single memset call. It's handled just
|
||||
like a normal parition, i.e, distributed as separate loop, no memset
|
||||
call is generated.
|
||||
|
||||
Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
|
||||
loop nest as deep as possible. As a result, parloop achieves better
|
||||
parallelization by parallelizing deeper loop nest. This hack should
|
||||
be unnecessary and removed once distributed memset can be understood
|
||||
and analyzed in data reference analysis. See PR82604 for more. */
|
||||
PKIND_PARTIAL_MEMSET,
|
||||
PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
|
||||
};
|
||||
|
||||
/* Type of distributed loop. */
|
||||
enum partition_type {
|
||||
/* The distributed loop can be executed parallelly. */
|
||||
PTYPE_PARALLEL = 0,
|
||||
/* The distributed loop has to be executed sequentially. */
|
||||
PTYPE_SEQUENTIAL
|
||||
};
|
||||
|
||||
/* Builtin info for loop distribution. */
|
||||
struct builtin_info
|
||||
{
|
||||
/* data-references a kind != PKIND_NORMAL partition is about. */
|
||||
data_reference_p dst_dr;
|
||||
data_reference_p src_dr;
|
||||
/* Base address and size of memory objects operated by the builtin. Note
|
||||
both dest and source memory objects must have the same size. */
|
||||
tree dst_base;
|
||||
tree src_base;
|
||||
tree size;
|
||||
/* Base and offset part of dst_base after stripping constant offset. This
|
||||
is only used in memset builtin distribution for now. */
|
||||
tree dst_base_base;
|
||||
unsigned HOST_WIDE_INT dst_base_offset;
|
||||
};
|
||||
|
||||
/* Partition for loop distribution. */
|
||||
struct partition
|
||||
{
|
||||
/* Statements of the partition. */
|
||||
bitmap stmts;
|
||||
/* True if the partition defines variable which is used outside of loop. */
|
||||
bool reduction_p;
|
||||
location_t loc;
|
||||
enum partition_kind kind;
|
||||
enum partition_type type;
|
||||
/* Data references in the partition. */
|
||||
bitmap datarefs;
|
||||
/* Information of builtin parition. */
|
||||
struct builtin_info *builtin;
|
||||
};
|
||||
|
||||
|
||||
/* Allocate and initialize a partition from BITMAP. */
|
||||
|
||||
static partition *
|
||||
|
@ -689,33 +857,9 @@ partition_reduction_p (partition *partition)
|
|||
return partition->reduction_p;
|
||||
}
|
||||
|
||||
/* Partitions are fused because of different reasons. */
|
||||
enum fuse_type
|
||||
{
|
||||
FUSE_NON_BUILTIN = 0,
|
||||
FUSE_REDUCTION = 1,
|
||||
FUSE_SHARE_REF = 2,
|
||||
FUSE_SAME_SCC = 3,
|
||||
FUSE_FINALIZE = 4
|
||||
};
|
||||
|
||||
/* Description on different fusing reason. */
|
||||
static const char *fuse_message[] = {
|
||||
"they are non-builtins",
|
||||
"they have reductions",
|
||||
"they have shared memory refs",
|
||||
"they are in the same dependence scc",
|
||||
"there is no point to distribute loop"};
|
||||
|
||||
static void
|
||||
update_type_for_merge (struct graph *, partition *, partition *);
|
||||
|
||||
/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
|
||||
graph and we update type for result partition if it is non-NULL. */
|
||||
|
||||
static void
|
||||
partition_merge_into (struct graph *rdg, partition *dest,
|
||||
partition *partition, enum fuse_type ft)
|
||||
void
|
||||
loop_distribution::partition_merge_into (struct graph *rdg,
|
||||
partition *dest, partition *partition, enum fuse_type ft)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
|
@ -1201,13 +1345,9 @@ generate_code_for_partition (class loop *loop,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Return data dependence relation for data references A and B. The two
|
||||
data references must be in lexicographic order wrto reduced dependence
|
||||
graph RDG. We firstly try to find ddr from global ddr hash table. If
|
||||
it doesn't exist, compute the ddr and cache it. */
|
||||
|
||||
static data_dependence_relation *
|
||||
get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
|
||||
data_dependence_relation *
|
||||
loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
|
||||
data_reference_p b)
|
||||
{
|
||||
struct data_dependence_relation ent, **slot;
|
||||
struct data_dependence_relation *ddr;
|
||||
|
@ -1228,13 +1368,10 @@ get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
|
|||
return *slot;
|
||||
}
|
||||
|
||||
/* In reduced dependence graph RDG for loop distribution, return true if
|
||||
dependence between references DR1 and DR2 leads to a dependence cycle
|
||||
and such dependence cycle can't be resolved by runtime alias check. */
|
||||
|
||||
static bool
|
||||
data_dep_in_cycle_p (struct graph *rdg,
|
||||
data_reference_p dr1, data_reference_p dr2)
|
||||
bool
|
||||
loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
|
||||
data_reference_p dr1,
|
||||
data_reference_p dr2)
|
||||
{
|
||||
struct data_dependence_relation *ddr;
|
||||
|
||||
|
@ -1264,12 +1401,10 @@ data_dep_in_cycle_p (struct graph *rdg,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
|
||||
PARTITION1's type after merging PARTITION2 into PARTITION1. */
|
||||
|
||||
static void
|
||||
update_type_for_merge (struct graph *rdg,
|
||||
partition *partition1, partition *partition2)
|
||||
void
|
||||
loop_distribution::update_type_for_merge (struct graph *rdg,
|
||||
partition *partition1,
|
||||
partition *partition2)
|
||||
{
|
||||
unsigned i, j;
|
||||
bitmap_iterator bi, bj;
|
||||
|
@ -1297,11 +1432,8 @@ update_type_for_merge (struct graph *rdg,
|
|||
}
|
||||
}
|
||||
|
||||
/* Returns a partition with all the statements needed for computing
|
||||
the vertex V of the RDG, also including the loop exit conditions. */
|
||||
|
||||
static partition *
|
||||
build_rdg_partition_for_vertex (struct graph *rdg, int v)
|
||||
partition *
|
||||
loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
|
||||
{
|
||||
partition *partition = partition_alloc ();
|
||||
auto_vec<int, 3> nodes;
|
||||
|
@ -1595,9 +1727,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
|
|||
/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
|
||||
if it forms builtin memcpy or memmove call. */
|
||||
|
||||
static void
|
||||
classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
|
||||
data_reference_p dst_dr, data_reference_p src_dr)
|
||||
void
|
||||
loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
|
||||
partition *partition,
|
||||
data_reference_p dst_dr,
|
||||
data_reference_p src_dr)
|
||||
{
|
||||
tree base, size, src_base, src_size;
|
||||
auto_vec<tree> dst_steps, src_steps;
|
||||
|
@ -1655,15 +1789,10 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
|
|||
return;
|
||||
}
|
||||
|
||||
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
|
||||
For the moment we detect memset, memcpy and memmove patterns. Bitmap
|
||||
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
|
||||
Returns true if there is a reduction in all partitions and we
|
||||
possibly did not mark PARTITION as having one for this reason. */
|
||||
|
||||
static bool
|
||||
classify_partition (loop_p loop, struct graph *rdg, partition *partition,
|
||||
bitmap stmt_in_all_partitions)
|
||||
bool
|
||||
loop_distribution::classify_partition (loop_p loop,
|
||||
struct graph *rdg, partition *partition,
|
||||
bitmap stmt_in_all_partitions)
|
||||
{
|
||||
bitmap_iterator bi;
|
||||
unsigned i;
|
||||
|
@ -1721,11 +1850,8 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
|
|||
return has_reduction;
|
||||
}
|
||||
|
||||
/* Returns true when PARTITION1 and PARTITION2 access the same memory
|
||||
object in RDG. */
|
||||
|
||||
static bool
|
||||
share_memory_accesses (struct graph *rdg,
|
||||
bool
|
||||
loop_distribution::share_memory_accesses (struct graph *rdg,
|
||||
partition *partition1, partition *partition2)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
@ -1772,10 +1898,10 @@ share_memory_accesses (struct graph *rdg,
|
|||
partition for it by adding depended statements according to RDG.
|
||||
All partitions are recorded in PARTITIONS. */
|
||||
|
||||
static void
|
||||
rdg_build_partitions (struct graph *rdg,
|
||||
vec<gimple *> starting_stmts,
|
||||
vec<partition *> *partitions)
|
||||
void
|
||||
loop_distribution::rdg_build_partitions (struct graph *rdg,
|
||||
vec<gimple *> starting_stmts,
|
||||
vec<partition *> *partitions)
|
||||
{
|
||||
auto_bitmap processed;
|
||||
int i;
|
||||
|
@ -1891,14 +2017,8 @@ partition_contains_all_rw (struct graph *rdg,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Compute partition dependence created by the data references in DRS1
|
||||
and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
|
||||
not NULL, we record dependence introduced by possible alias between
|
||||
two data references in ALIAS_DDRS; otherwise, we simply ignore such
|
||||
dependence as if it doesn't exist at all. */
|
||||
|
||||
static int
|
||||
pg_add_dependence_edges (struct graph *rdg, int dir,
|
||||
int
|
||||
loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
|
||||
bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
|
||||
{
|
||||
unsigned i, j;
|
||||
|
@ -2114,10 +2234,10 @@ free_partition_graph_vdata (struct graph *pg)
|
|||
is ignored, as if it doesn't exist at all; otherwise all depdendences
|
||||
are considered. */
|
||||
|
||||
static struct graph *
|
||||
build_partition_graph (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
bool ignore_alias_p)
|
||||
struct graph *
|
||||
loop_distribution::build_partition_graph (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
bool ignore_alias_p)
|
||||
{
|
||||
int i, j;
|
||||
struct partition *partition1, *partition2;
|
||||
|
@ -2199,15 +2319,10 @@ sort_partitions_by_post_order (struct graph *pg,
|
|||
}
|
||||
}
|
||||
|
||||
/* Given reduced dependence graph RDG merge strong connected components
|
||||
of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
|
||||
possible alias between references is ignored, as if it doesn't exist
|
||||
at all; otherwise all depdendences are considered. */
|
||||
|
||||
static void
|
||||
merge_dep_scc_partitions (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
bool ignore_alias_p)
|
||||
void
|
||||
loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
bool ignore_alias_p)
|
||||
{
|
||||
struct partition *partition1, *partition2;
|
||||
struct pg_vdata *data;
|
||||
|
@ -2280,11 +2395,10 @@ pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
|
|||
/* This is the main function breaking strong conected components in
|
||||
PARTITIONS giving reduced depdendence graph RDG. Store data dependence
|
||||
relations for runtime alias check in ALIAS_DDRS. */
|
||||
|
||||
static void
|
||||
break_alias_scc_partitions (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
vec<ddr_p> *alias_ddrs)
|
||||
void
|
||||
loop_distribution::break_alias_scc_partitions (struct graph *rdg,
|
||||
vec<struct partition *> *partitions,
|
||||
vec<ddr_p> *alias_ddrs)
|
||||
{
|
||||
int i, j, k, num_sccs, num_sccs_no_alias;
|
||||
/* Build partition dependence graph. */
|
||||
|
@ -2710,12 +2824,10 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
|
|||
}
|
||||
}
|
||||
|
||||
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
|
||||
ALIAS_DDRS contains ddrs which need runtime alias check. */
|
||||
|
||||
static void
|
||||
finalize_partitions (class loop *loop, vec<struct partition *> *partitions,
|
||||
vec<ddr_p> *alias_ddrs)
|
||||
void
|
||||
loop_distribution::finalize_partitions (class loop *loop,
|
||||
vec<struct partition *> *partitions,
|
||||
vec<ddr_p> *alias_ddrs)
|
||||
{
|
||||
unsigned i;
|
||||
struct partition *partition, *a;
|
||||
|
@ -2770,8 +2882,8 @@ finalize_partitions (class loop *loop, vec<struct partition *> *partitions,
|
|||
distributed loops. Set NB_CALLS to number of generated builtin calls.
|
||||
Set *DESTROY_P to whether LOOP needs to be destroyed. */
|
||||
|
||||
static int
|
||||
distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
int
|
||||
loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
control_dependences *cd, int *nb_calls, bool *destroy_p,
|
||||
bool only_patterns_p)
|
||||
{
|
||||
|
@ -3011,40 +3123,27 @@ distribute_loop (class loop *loop, vec<gimple *> stmts,
|
|||
return nbp - *nb_calls;
|
||||
}
|
||||
|
||||
/* Distribute all loops in the current function. */
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_loop_distribution =
|
||||
void loop_distribution::bb_top_order_init (void)
|
||||
{
|
||||
GIMPLE_PASS, /* type */
|
||||
"ldist", /* name */
|
||||
OPTGROUP_LOOP, /* optinfo_flags */
|
||||
TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
|
||||
( PROP_cfg | PROP_ssa ), /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
int rpo_num;
|
||||
int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
||||
|
||||
class pass_loop_distribution : public gimple_opt_pass
|
||||
bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
||||
bb_top_order_index_size = last_basic_block_for_fn (cfun);
|
||||
rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
|
||||
for (int i = 0; i < rpo_num; i++)
|
||||
bb_top_order_index[rpo[i]] = i;
|
||||
|
||||
free (rpo);
|
||||
}
|
||||
|
||||
void loop_distribution::bb_top_order_destroy ()
|
||||
{
|
||||
public:
|
||||
pass_loop_distribution (gcc::context *ctxt)
|
||||
: gimple_opt_pass (pass_data_loop_distribution, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
return flag_tree_loop_distribution
|
||||
|| flag_tree_loop_distribute_patterns;
|
||||
}
|
||||
|
||||
virtual unsigned int execute (function *);
|
||||
|
||||
}; // class pass_loop_distribution
|
||||
free (bb_top_order_index);
|
||||
bb_top_order_index = NULL;
|
||||
bb_top_order_index_size = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Given LOOP, this function records seed statements for distribution in
|
||||
|
@ -3131,8 +3230,9 @@ prepare_perfect_loop_nest (class loop *loop)
|
|||
return loop;
|
||||
}
|
||||
|
||||
|
||||
unsigned int
|
||||
pass_loop_distribution::execute (function *fun)
|
||||
loop_distribution::execute (function *fun)
|
||||
{
|
||||
class loop *loop;
|
||||
bool changed = false;
|
||||
|
@ -3143,22 +3243,7 @@ pass_loop_distribution::execute (function *fun)
|
|||
if (number_of_loops (fun) <= 1)
|
||||
return 0;
|
||||
|
||||
/* Compute topological order for basic blocks. Topological order is
|
||||
needed because data dependence is computed for data references in
|
||||
lexicographical order. */
|
||||
if (bb_top_order_index == NULL)
|
||||
{
|
||||
int rpo_num;
|
||||
int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
||||
|
||||
bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
||||
bb_top_order_index_size = last_basic_block_for_fn (cfun);
|
||||
rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
|
||||
for (int i = 0; i < rpo_num; i++)
|
||||
bb_top_order_index[rpo[i]] = i;
|
||||
|
||||
free (rpo);
|
||||
}
|
||||
bb_top_order_init ();
|
||||
|
||||
FOR_ALL_BB_FN (bb, fun)
|
||||
{
|
||||
|
@ -3233,11 +3318,7 @@ pass_loop_distribution::execute (function *fun)
|
|||
delete cd;
|
||||
|
||||
if (bb_top_order_index != NULL)
|
||||
{
|
||||
free (bb_top_order_index);
|
||||
bb_top_order_index = NULL;
|
||||
bb_top_order_index_size = 0;
|
||||
}
|
||||
bb_top_order_destroy ();
|
||||
|
||||
if (changed)
|
||||
{
|
||||
|
@ -3259,6 +3340,48 @@ pass_loop_distribution::execute (function *fun)
|
|||
return changed ? TODO_cleanup_cfg : 0;
|
||||
}
|
||||
|
||||
|
||||
/* Distribute all loops in the current function. */
|
||||
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_loop_distribution =
|
||||
{
|
||||
GIMPLE_PASS, /* type */
|
||||
"ldist", /* name */
|
||||
OPTGROUP_LOOP, /* optinfo_flags */
|
||||
TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
|
||||
( PROP_cfg | PROP_ssa ), /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_loop_distribution : public gimple_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_loop_distribution (gcc::context *ctxt)
|
||||
: gimple_opt_pass (pass_data_loop_distribution, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
return flag_tree_loop_distribution
|
||||
|| flag_tree_loop_distribute_patterns;
|
||||
}
|
||||
|
||||
virtual unsigned int execute (function *);
|
||||
|
||||
}; // class pass_loop_distribution
|
||||
|
||||
unsigned int
|
||||
pass_loop_distribution::execute (function *fun)
|
||||
{
|
||||
return loop_distribution ().execute (fun);
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
|
||||
gimple_opt_pass *
|
||||
|
|
Loading…
Reference in New Issue