mirror of git://gcc.gnu.org/git/gcc.git
lto-partition.c: Include sreal.h
* lto-partition.c: Include sreal.h (add_symbol_to_partition_1): Use size instead of self_size for size estimate. (account_reference_p): New. (lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup; fix accounting errors in boundary size; add debug output; combine cost as cost/size instead of cost/internal; reduce the partitioning error to +- 1/8 of the parttion size. From-SVN: r259749
This commit is contained in:
parent
8d70b61edd
commit
ddb0b8247d
|
|
@ -1,3 +1,14 @@
|
||||||
|
2018-04-19 Jan Hubicka <jh@suse.cz>
|
||||||
|
|
||||||
|
* lto-partition.c: Include sreal.h
|
||||||
|
(add_symbol_to_partition_1): Use size instead of self_size
|
||||||
|
for size estimate.
|
||||||
|
(account_reference_p): New.
|
||||||
|
(lto_balanced_map): Use 64bit arithmetics for size calculatoins; cleanup;
|
||||||
|
fix accounting errors in boundary size; add debug output; combine cost
|
||||||
|
as cost/size instead of cost/internal; reduce the partitioning error to
|
||||||
|
+- 1/8 of the parttion size.
|
||||||
|
|
||||||
2018-04-19 Martin Liska <mliska@suse.cz>
|
2018-04-19 Martin Liska <mliska@suse.cz>
|
||||||
|
|
||||||
* lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out
|
* lto-symtab.c (lto_symtab_resolve_symbols): Do not bail out
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "ipa-prop.h"
|
#include "ipa-prop.h"
|
||||||
#include "ipa-fnsummary.h"
|
#include "ipa-fnsummary.h"
|
||||||
#include "lto-partition.h"
|
#include "lto-partition.h"
|
||||||
|
#include "sreal.h"
|
||||||
|
|
||||||
vec<ltrans_partition> ltrans_partitions;
|
vec<ltrans_partition> ltrans_partitions;
|
||||||
|
|
||||||
|
|
@ -152,8 +153,8 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node)
|
||||||
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
|
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
|
||||||
{
|
{
|
||||||
struct cgraph_edge *e;
|
struct cgraph_edge *e;
|
||||||
if (!node->alias)
|
if (!node->alias && c == SYMBOL_PARTITION)
|
||||||
part->insns += ipa_fn_summaries->get (cnode)->self_size;
|
part->insns += ipa_fn_summaries->get (cnode)->size;
|
||||||
|
|
||||||
/* Add all inline clones and callees that are duplicated. */
|
/* Add all inline clones and callees that are duplicated. */
|
||||||
for (e = cnode->callees; e; e = e->next_callee)
|
for (e = cnode->callees; e; e = e->next_callee)
|
||||||
|
|
@ -276,8 +277,9 @@ undo_partition (ltrans_partition partition, unsigned int n_nodes)
|
||||||
delete partition->initializers_visited;
|
delete partition->initializers_visited;
|
||||||
partition->initializers_visited = NULL;
|
partition->initializers_visited = NULL;
|
||||||
|
|
||||||
if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node)))
|
if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
|
||||||
partition->insns -= ipa_fn_summaries->get (cnode)->self_size;
|
&& node->get_partitioning_class () == SYMBOL_PARTITION)
|
||||||
|
partition->insns -= ipa_fn_summaries->get (cnode)->size;
|
||||||
lto_symtab_encoder_delete_node (partition->encoder, node);
|
lto_symtab_encoder_delete_node (partition->encoder, node);
|
||||||
node->aux = (void *)((size_t)node->aux - 1);
|
node->aux = (void *)((size_t)node->aux - 1);
|
||||||
}
|
}
|
||||||
|
|
@ -408,6 +410,24 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
|
||||||
add_symbol_to_partition (partition, node);
|
add_symbol_to_partition (partition, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return true if we should account reference from N1 to N2 in cost
|
||||||
|
of partition boundary. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
account_reference_p (symtab_node *n1, symtab_node *n2)
|
||||||
|
{
|
||||||
|
if (cgraph_node *cnode = dyn_cast <cgraph_node *> (n1))
|
||||||
|
n1 = cnode;
|
||||||
|
/* Do not account recursion - the code below will handle it incorrectly
|
||||||
|
otherwise. Also do not account references to external symbols.
|
||||||
|
They will never become local. */
|
||||||
|
if (n1 == n2
|
||||||
|
|| DECL_EXTERNAL (n2->decl)
|
||||||
|
|| !n2->definition)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Group cgraph nodes into equally-sized partitions.
|
/* Group cgraph nodes into equally-sized partitions.
|
||||||
|
|
||||||
|
|
@ -457,14 +477,14 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
auto_vec<varpool_node *> varpool_order;
|
auto_vec<varpool_node *> varpool_order;
|
||||||
int i;
|
int i;
|
||||||
struct cgraph_node *node;
|
struct cgraph_node *node;
|
||||||
int original_total_size, total_size = 0, best_total_size = 0;
|
int64_t original_total_size, total_size = 0;
|
||||||
int partition_size;
|
int64_t partition_size;
|
||||||
ltrans_partition partition;
|
ltrans_partition partition;
|
||||||
int last_visited_node = 0;
|
int last_visited_node = 0;
|
||||||
varpool_node *vnode;
|
varpool_node *vnode;
|
||||||
int cost = 0, internal = 0;
|
int64_t cost = 0, internal = 0;
|
||||||
int best_n_nodes = 0, best_i = 0, best_cost =
|
int best_n_nodes = 0, best_i = 0;
|
||||||
INT_MAX, best_internal = 0;
|
int64_t best_cost = -1, best_internal = 0, best_size = 0;
|
||||||
int npartitions;
|
int npartitions;
|
||||||
int current_order = -1;
|
int current_order = -1;
|
||||||
int noreorder_pos = 0;
|
int noreorder_pos = 0;
|
||||||
|
|
@ -513,7 +533,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
|
|
||||||
/* Compute partition size and create the first partition. */
|
/* Compute partition size and create the first partition. */
|
||||||
if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size)
|
if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size)
|
||||||
fatal_error (input_location, "min partition size cannot be greater than max partition size");
|
fatal_error (input_location, "min partition size cannot be greater "
|
||||||
|
"than max partition size");
|
||||||
|
|
||||||
partition_size = total_size / n_lto_partitions;
|
partition_size = total_size / n_lto_partitions;
|
||||||
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
||||||
|
|
@ -521,7 +542,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
npartitions = 1;
|
npartitions = 1;
|
||||||
partition = new_partition ("");
|
partition = new_partition ("");
|
||||||
if (symtab->dump_file)
|
if (symtab->dump_file)
|
||||||
fprintf (symtab->dump_file, "Total unit size: %i, partition size: %i\n",
|
fprintf (symtab->dump_file, "Total unit size: %" PRId64 ", partition size: %" PRId64 "\n",
|
||||||
total_size, partition_size);
|
total_size, partition_size);
|
||||||
|
|
||||||
auto_vec<symtab_node *> next_nodes;
|
auto_vec<symtab_node *> next_nodes;
|
||||||
|
|
@ -540,17 +561,11 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
next_nodes.safe_push (varpool_order[varpool_pos++]);
|
next_nodes.safe_push (varpool_order[varpool_pos++]);
|
||||||
while (noreorder_pos < (int)noreorder.length ()
|
while (noreorder_pos < (int)noreorder.length ()
|
||||||
&& noreorder[noreorder_pos]->order < current_order)
|
&& noreorder[noreorder_pos]->order < current_order)
|
||||||
{
|
|
||||||
if (!noreorder[noreorder_pos]->alias)
|
|
||||||
total_size -= ipa_fn_summaries->get (noreorder[noreorder_pos])->size;
|
|
||||||
next_nodes.safe_push (noreorder[noreorder_pos++]);
|
next_nodes.safe_push (noreorder[noreorder_pos++]);
|
||||||
}
|
|
||||||
add_sorted_nodes (next_nodes, partition);
|
add_sorted_nodes (next_nodes, partition);
|
||||||
|
|
||||||
if (!symbol_partitioned_p (order[i]))
|
if (!symbol_partitioned_p (order[i]))
|
||||||
add_symbol_to_partition (partition, order[i]);
|
add_symbol_to_partition (partition, order[i]);
|
||||||
if (!order[i]->alias)
|
|
||||||
total_size -= ipa_fn_summaries->get (order[i])->size;
|
|
||||||
|
|
||||||
|
|
||||||
/* Once we added a new node to the partition, we also want to add
|
/* Once we added a new node to the partition, we also want to add
|
||||||
|
|
@ -567,7 +582,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
it and thus we need to subtract it from COST. */
|
it and thus we need to subtract it from COST. */
|
||||||
while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
|
while (last_visited_node < lto_symtab_encoder_size (partition->encoder))
|
||||||
{
|
{
|
||||||
symtab_node *refs_node;
|
|
||||||
int j;
|
int j;
|
||||||
struct ipa_ref *ref = NULL;
|
struct ipa_ref *ref = NULL;
|
||||||
symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
|
symtab_node *snode = lto_symtab_encoder_deref (partition->encoder,
|
||||||
|
|
@ -577,7 +591,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
{
|
{
|
||||||
struct cgraph_edge *edge;
|
struct cgraph_edge *edge;
|
||||||
|
|
||||||
refs_node = node;
|
|
||||||
|
|
||||||
last_visited_node++;
|
last_visited_node++;
|
||||||
|
|
||||||
|
|
@ -585,7 +598,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
|
|
||||||
/* Compute boundary cost of callgraph edges. */
|
/* Compute boundary cost of callgraph edges. */
|
||||||
for (edge = node->callees; edge; edge = edge->next_callee)
|
for (edge = node->callees; edge; edge = edge->next_callee)
|
||||||
if (edge->callee->definition)
|
/* Inline edges will always end up local. */
|
||||||
|
if (edge->inline_failed
|
||||||
|
&& account_reference_p (node, edge->callee))
|
||||||
{
|
{
|
||||||
int edge_cost = edge->frequency ();
|
int edge_cost = edge->frequency ();
|
||||||
int index;
|
int index;
|
||||||
|
|
@ -602,6 +617,8 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
cost += edge_cost;
|
cost += edge_cost;
|
||||||
}
|
}
|
||||||
for (edge = node->callers; edge; edge = edge->next_caller)
|
for (edge = node->callers; edge; edge = edge->next_caller)
|
||||||
|
if (edge->inline_failed
|
||||||
|
&& account_reference_p (edge->caller, node))
|
||||||
{
|
{
|
||||||
int edge_cost = edge->frequency ();
|
int edge_cost = edge->frequency ();
|
||||||
int index;
|
int index;
|
||||||
|
|
@ -614,27 +631,24 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
edge->caller);
|
edge->caller);
|
||||||
if (index != LCC_NOT_FOUND
|
if (index != LCC_NOT_FOUND
|
||||||
&& index < last_visited_node - 1)
|
&& index < last_visited_node - 1)
|
||||||
cost -= edge_cost;
|
cost -= edge_cost, internal += edge_cost;
|
||||||
else
|
else
|
||||||
cost += edge_cost;
|
cost += edge_cost;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
|
||||||
refs_node = snode;
|
|
||||||
last_visited_node++;
|
last_visited_node++;
|
||||||
}
|
|
||||||
|
|
||||||
/* Compute boundary cost of IPA REF edges and at the same time look into
|
/* Compute boundary cost of IPA REF edges and at the same time look into
|
||||||
variables referenced from current partition and try to add them. */
|
variables referenced from current partition and try to add them. */
|
||||||
for (j = 0; refs_node->iterate_reference (j, ref); j++)
|
for (j = 0; snode->iterate_reference (j, ref); j++)
|
||||||
if (is_a <varpool_node *> (ref->referred))
|
if (!account_reference_p (snode, ref->referred))
|
||||||
|
;
|
||||||
|
else if (is_a <varpool_node *> (ref->referred))
|
||||||
{
|
{
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
vnode = dyn_cast <varpool_node *> (ref->referred);
|
vnode = dyn_cast <varpool_node *> (ref->referred);
|
||||||
if (!vnode->definition)
|
|
||||||
continue;
|
|
||||||
if (!symbol_partitioned_p (vnode)
|
if (!symbol_partitioned_p (vnode)
|
||||||
&& !vnode->no_reorder
|
&& !vnode->no_reorder
|
||||||
&& vnode->get_partitioning_class () == SYMBOL_PARTITION)
|
&& vnode->get_partitioning_class () == SYMBOL_PARTITION)
|
||||||
|
|
@ -652,8 +666,6 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
node = dyn_cast <cgraph_node *> (ref->referred);
|
node = dyn_cast <cgraph_node *> (ref->referred);
|
||||||
if (!node->definition)
|
|
||||||
continue;
|
|
||||||
index = lto_symtab_encoder_lookup (partition->encoder,
|
index = lto_symtab_encoder_lookup (partition->encoder,
|
||||||
node);
|
node);
|
||||||
if (index != LCC_NOT_FOUND
|
if (index != LCC_NOT_FOUND
|
||||||
|
|
@ -662,8 +674,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
else
|
else
|
||||||
cost++;
|
cost++;
|
||||||
}
|
}
|
||||||
for (j = 0; refs_node->iterate_referring (j, ref); j++)
|
for (j = 0; snode->iterate_referring (j, ref); j++)
|
||||||
if (is_a <varpool_node *> (ref->referring))
|
if (!account_reference_p (ref->referring, snode))
|
||||||
|
;
|
||||||
|
else if (is_a <varpool_node *> (ref->referring))
|
||||||
{
|
{
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
|
|
@ -682,7 +696,7 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
vnode);
|
vnode);
|
||||||
if (index != LCC_NOT_FOUND
|
if (index != LCC_NOT_FOUND
|
||||||
&& index < last_visited_node - 1)
|
&& index < last_visited_node - 1)
|
||||||
cost--;
|
cost--, internal++;
|
||||||
else
|
else
|
||||||
cost++;
|
cost++;
|
||||||
}
|
}
|
||||||
|
|
@ -696,36 +710,41 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
node);
|
node);
|
||||||
if (index != LCC_NOT_FOUND
|
if (index != LCC_NOT_FOUND
|
||||||
&& index < last_visited_node - 1)
|
&& index < last_visited_node - 1)
|
||||||
cost--;
|
cost--, internal++;
|
||||||
else
|
else
|
||||||
cost++;
|
cost++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the partition is large enough, start looking for smallest boundary cost. */
|
gcc_assert (cost >= 0 && internal >= 0);
|
||||||
if (partition->insns < partition_size * 3 / 4
|
|
||||||
|| best_cost == INT_MAX
|
/* If the partition is large enough, start looking for smallest boundary cost.
|
||||||
|| ((!cost
|
If partition still seems too small (less than 7/8 of target weight) accept
|
||||||
|| (best_internal * (HOST_WIDE_INT) cost
|
any cost. If partition has right size, optimize for highest internal/cost.
|
||||||
> (internal * (HOST_WIDE_INT)best_cost)))
|
Later we stop building partition if its size is 9/8 of the target wight. */
|
||||||
&& partition->insns < partition_size * 5 / 4))
|
if (partition->insns < partition_size * 7 / 8
|
||||||
|
|| best_cost == -1
|
||||||
|
|| (!cost
|
||||||
|
|| ((sreal)best_internal * (sreal) cost
|
||||||
|
< ((sreal) internal * (sreal)best_cost))))
|
||||||
{
|
{
|
||||||
best_cost = cost;
|
best_cost = cost;
|
||||||
best_internal = internal;
|
best_internal = internal;
|
||||||
|
best_size = partition->insns;
|
||||||
best_i = i;
|
best_i = i;
|
||||||
best_n_nodes = lto_symtab_encoder_size (partition->encoder);
|
best_n_nodes = lto_symtab_encoder_size (partition->encoder);
|
||||||
best_total_size = total_size;
|
|
||||||
best_varpool_pos = varpool_pos;
|
best_varpool_pos = varpool_pos;
|
||||||
}
|
}
|
||||||
if (symtab->dump_file)
|
if (symtab->dump_file)
|
||||||
fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, cost %i/%i "
|
fprintf (symtab->dump_file, "Step %i: added %s/%i, size %i, "
|
||||||
"best %i/%i, step %i\n", i,
|
"cost %" PRId64 "/%" PRId64 " "
|
||||||
|
"best %" PRId64 "/%" PRId64", step %i\n", i,
|
||||||
order[i]->name (), order[i]->order,
|
order[i]->name (), order[i]->order,
|
||||||
partition->insns, cost, internal,
|
partition->insns, cost, internal,
|
||||||
best_cost, best_internal, best_i);
|
best_cost, best_internal, best_i);
|
||||||
/* Partition is too large, unwind into step when best cost was reached and
|
/* Partition is too large, unwind into step when best cost was reached and
|
||||||
start new partition. */
|
start new partition. */
|
||||||
if (partition->insns > 2 * partition_size
|
if (partition->insns > 9 * partition_size / 8
|
||||||
|| partition->insns > max_partition_size)
|
|| partition->insns > max_partition_size)
|
||||||
{
|
{
|
||||||
if (best_i != i)
|
if (best_i != i)
|
||||||
|
|
@ -736,21 +755,26 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
undo_partition (partition, best_n_nodes);
|
undo_partition (partition, best_n_nodes);
|
||||||
varpool_pos = best_varpool_pos;
|
varpool_pos = best_varpool_pos;
|
||||||
}
|
}
|
||||||
|
gcc_assert (best_size == partition->insns);
|
||||||
i = best_i;
|
i = best_i;
|
||||||
|
if (symtab->dump_file)
|
||||||
|
fprintf (symtab->dump_file,
|
||||||
|
"Partition insns: %i (want %" PRId64 ")\n",
|
||||||
|
partition->insns, partition_size);
|
||||||
/* When we are finished, avoid creating empty partition. */
|
/* When we are finished, avoid creating empty partition. */
|
||||||
while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1]))
|
while (i < n_nodes - 1 && symbol_partitioned_p (order[i + 1]))
|
||||||
i++;
|
i++;
|
||||||
if (i == n_nodes - 1)
|
if (i == n_nodes - 1)
|
||||||
break;
|
break;
|
||||||
|
total_size -= partition->insns;
|
||||||
partition = new_partition ("");
|
partition = new_partition ("");
|
||||||
last_visited_node = 0;
|
last_visited_node = 0;
|
||||||
total_size = best_total_size;
|
|
||||||
cost = 0;
|
cost = 0;
|
||||||
|
|
||||||
if (symtab->dump_file)
|
if (symtab->dump_file)
|
||||||
fprintf (symtab->dump_file, "New partition\n");
|
fprintf (symtab->dump_file, "New partition\n");
|
||||||
best_n_nodes = 0;
|
best_n_nodes = 0;
|
||||||
best_cost = INT_MAX;
|
best_cost = -1;
|
||||||
|
|
||||||
/* Since the size of partitions is just approximate, update the size after
|
/* Since the size of partitions is just approximate, update the size after
|
||||||
we finished current one. */
|
we finished current one. */
|
||||||
|
|
@ -760,6 +784,10 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
/* Watch for overflow. */
|
/* Watch for overflow. */
|
||||||
partition_size = INT_MAX / 16;
|
partition_size = INT_MAX / 16;
|
||||||
|
|
||||||
|
if (symtab->dump_file)
|
||||||
|
fprintf (symtab->dump_file,
|
||||||
|
"Total size: %" PRId64 " partition_size: %" PRId64 "\n",
|
||||||
|
total_size, partition_size);
|
||||||
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
|
||||||
partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
|
partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
|
||||||
npartitions ++;
|
npartitions ++;
|
||||||
|
|
@ -779,6 +807,9 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size)
|
||||||
next_nodes.safe_push (varpool_order[varpool_pos++]);
|
next_nodes.safe_push (varpool_order[varpool_pos++]);
|
||||||
while (noreorder_pos < (int)noreorder.length ())
|
while (noreorder_pos < (int)noreorder.length ())
|
||||||
next_nodes.safe_push (noreorder[noreorder_pos++]);
|
next_nodes.safe_push (noreorder[noreorder_pos++]);
|
||||||
|
/* For one partition the cost of boundary should be 0 unless we added final
|
||||||
|
symbols here (these are not accounted) or we have accounting bug. */
|
||||||
|
gcc_assert (next_nodes.length () || npartitions != 1 || !best_cost);
|
||||||
add_sorted_nodes (next_nodes, partition);
|
add_sorted_nodes (next_nodes, partition);
|
||||||
|
|
||||||
free (order);
|
free (order);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue