mirror of git://gcc.gnu.org/git/gcc.git
predict.c (drop_profile): New function.
2013-11-12 Teresa Johnson <tejohnson@google.com> Jan Hubicka <jh@suse.cz> * predict.c (drop_profile): New function. (handle_missing_profiles): Ditto. (counts_to_freqs): Don't overwrite estimated frequencies when function has no profile counts. * predict.h (handle_missing_profiles): Declare. * tree-inline.c (freqs_to_counts): New function. (copy_cfg_body): Invoke freqs_to_counts as needed. * tree-profile.c (tree_profiling): Invoke handle_missing_profiles. Co-Authored-By: Jan Hubicka <jh@suse.cz> From-SVN: r204704
This commit is contained in:
parent
a720b48e18
commit
eb4b92c13b
|
|
@ -1,3 +1,15 @@
|
|||
2013-11-12 Teresa Johnson <tejohnson@google.com>
|
||||
Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* predict.c (drop_profile): New function.
|
||||
(handle_missing_profiles): Ditto.
|
||||
(counts_to_freqs): Don't overwrite estimated frequencies
|
||||
when function has no profile counts.
|
||||
* predict.h (handle_missing_profiles): Declare.
|
||||
* tree-inline.c (freqs_to_counts): New function.
|
||||
(copy_cfg_body): Invoke freqs_to_counts as needed.
|
||||
* tree-profile.c (tree_profiling): Invoke handle_missing_profiles.
|
||||
|
||||
2013-11-12 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR target/59088
|
||||
|
|
|
|||
116
gcc/predict.c
116
gcc/predict.c
|
|
@ -2765,6 +2765,116 @@ estimate_loops (void)
|
|||
BITMAP_FREE (tovisit);
|
||||
}
|
||||
|
||||
/* Drop the profile for NODE to guessed, and update its frequency based on
|
||||
whether it is expected to be HOT. */
|
||||
|
||||
static void
|
||||
drop_profile (struct cgraph_node *node, bool hot)
|
||||
{
|
||||
struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
|
||||
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"Dropping 0 profile for %s/%i. %s based on calls.\n",
|
||||
cgraph_node_name (node), node->order,
|
||||
hot ? "Function is hot" : "Function is normal");
|
||||
/* We only expect to miss profiles for functions that are reached
|
||||
via non-zero call edges in cases where the function may have
|
||||
been linked from another module or library (COMDATs and extern
|
||||
templates). See the comments below for handle_missing_profiles. */
|
||||
if (!DECL_COMDAT (node->decl) && !DECL_EXTERNAL (node->decl))
|
||||
{
|
||||
if (flag_profile_correction)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file,
|
||||
"Missing counts for called function %s/%i\n",
|
||||
cgraph_node_name (node), node->order);
|
||||
}
|
||||
else
|
||||
error ("Missing counts for called function %s/%i",
|
||||
cgraph_node_name (node), node->order);
|
||||
}
|
||||
|
||||
profile_status_for_function (fn)
|
||||
= (flag_guess_branch_prob ? PROFILE_GUESSED : PROFILE_ABSENT);
|
||||
node->frequency
|
||||
= hot ? NODE_FREQUENCY_HOT : NODE_FREQUENCY_NORMAL;
|
||||
}
|
||||
|
||||
/* In the case of COMDAT routines, multiple object files will contain the same
|
||||
function and the linker will select one for the binary. In that case
|
||||
all the other copies from the profile instrument binary will be missing
|
||||
profile counts. Look for cases where this happened, due to non-zero
|
||||
call counts going to 0-count functions, and drop the profile to guessed
|
||||
so that we can use the estimated probabilities and avoid optimizing only
|
||||
for size.
|
||||
|
||||
The other case where the profile may be missing is when the routine
|
||||
is not going to be emitted to the object file, e.g. for "extern template"
|
||||
class methods. Those will be marked DECL_EXTERNAL. Emit a warning in
|
||||
all other cases of non-zero calls to 0-count functions. */
|
||||
|
||||
void
|
||||
handle_missing_profiles (void)
|
||||
{
|
||||
struct cgraph_node *node;
|
||||
int unlikely_count_fraction = PARAM_VALUE (UNLIKELY_BB_COUNT_FRACTION);
|
||||
vec<struct cgraph_node *> worklist;
|
||||
worklist.create (64);
|
||||
|
||||
/* See if 0 count function has non-0 count callers. In this case we
|
||||
lost some profile. Drop its function profile to PROFILE_GUESSED. */
|
||||
FOR_EACH_DEFINED_FUNCTION (node)
|
||||
{
|
||||
struct cgraph_edge *e;
|
||||
gcov_type call_count = 0;
|
||||
struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
|
||||
|
||||
if (node->count)
|
||||
continue;
|
||||
for (e = node->callers; e; e = e->next_caller)
|
||||
call_count += e->count;
|
||||
if (call_count
|
||||
&& fn && fn->cfg
|
||||
&& (call_count * unlikely_count_fraction >= profile_info->runs))
|
||||
{
|
||||
bool maybe_hot = maybe_hot_count_p (NULL, call_count);
|
||||
|
||||
drop_profile (node, maybe_hot);
|
||||
worklist.safe_push (node);
|
||||
}
|
||||
}
|
||||
|
||||
/* Propagate the profile dropping to other 0-count COMDATs that are
|
||||
potentially called by COMDATs we already dropped the profile on. */
|
||||
while (worklist.length () > 0)
|
||||
{
|
||||
struct cgraph_edge *e;
|
||||
|
||||
node = worklist.pop ();
|
||||
for (e = node->callees; e; e = e->next_caller)
|
||||
{
|
||||
struct cgraph_node *callee = e->callee;
|
||||
struct function *fn = DECL_STRUCT_FUNCTION (callee->decl);
|
||||
|
||||
if (callee->count > 0)
|
||||
continue;
|
||||
if (DECL_COMDAT (callee->decl) && fn && fn->cfg
|
||||
&& profile_status_for_function (fn) == PROFILE_READ)
|
||||
{
|
||||
/* Since there are no non-0 call counts to this function,
|
||||
we don't know for sure whether it is hot. Indicate to
|
||||
the drop_profile routine that function should be marked
|
||||
normal, rather than hot. */
|
||||
drop_profile (node, false);
|
||||
worklist.safe_push (callee);
|
||||
}
|
||||
}
|
||||
}
|
||||
worklist.release ();
|
||||
}
|
||||
|
||||
/* Convert counts measured by profile driven feedback to frequencies.
|
||||
Return nonzero iff there was any nonzero execution count. */
|
||||
|
||||
|
|
@ -2774,6 +2884,12 @@ counts_to_freqs (void)
|
|||
gcov_type count_max, true_count_max = 0;
|
||||
basic_block bb;
|
||||
|
||||
/* Don't overwrite the estimated frequencies when the profile for
|
||||
the function is missing. We may drop this function PROFILE_GUESSED
|
||||
later in drop_profile (). */
|
||||
if (!ENTRY_BLOCK_PTR->count)
|
||||
return 0;
|
||||
|
||||
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
|
||||
true_count_max = MAX (bb->count, true_count_max);
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ enum prediction
|
|||
|
||||
extern void predict_insn_def (rtx, enum br_predictor, enum prediction);
|
||||
extern int counts_to_freqs (void);
|
||||
extern void handle_missing_profiles (void);
|
||||
extern void estimate_bb_frequencies (bool);
|
||||
extern const char *predictor_name (enum br_predictor);
|
||||
extern tree build_predict_expr (enum br_predictor, enum prediction);
|
||||
|
|
|
|||
|
|
@ -2353,6 +2353,29 @@ redirect_all_calls (copy_body_data * id, basic_block bb)
|
|||
}
|
||||
}
|
||||
|
||||
/* Convert estimated frequencies into counts for NODE, scaling COUNT
|
||||
with each bb's frequency. Used when NODE has a 0-weight entry
|
||||
but we are about to inline it into a non-zero count call bb.
|
||||
See the comments for handle_missing_profiles() in predict.c for
|
||||
when this can happen for COMDATs. */
|
||||
|
||||
void
|
||||
freqs_to_counts (struct cgraph_node *node, gcov_type count)
|
||||
{
|
||||
basic_block bb;
|
||||
edge_iterator ei;
|
||||
edge e;
|
||||
struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
|
||||
|
||||
FOR_ALL_BB_FN(bb, fn)
|
||||
{
|
||||
bb->count = apply_scale (count,
|
||||
GCOV_COMPUTE_SCALE (bb->frequency, BB_FREQ_MAX));
|
||||
FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
e->count = apply_probability (e->src->count, e->probability);
|
||||
}
|
||||
}
|
||||
|
||||
/* Make a copy of the body of FN so that it can be inserted inline in
|
||||
another function. Walks FN via CFG, returns new fndecl. */
|
||||
|
||||
|
|
@ -2373,6 +2396,24 @@ copy_cfg_body (copy_body_data * id, gcov_type count, int frequency_scale,
|
|||
int incoming_frequency = 0;
|
||||
gcov_type incoming_count = 0;
|
||||
|
||||
/* This can happen for COMDAT routines that end up with 0 counts
|
||||
despite being called (see the comments for handle_missing_profiles()
|
||||
in predict.c as to why). Apply counts to the blocks in the callee
|
||||
before inlining, using the guessed edge frequencies, so that we don't
|
||||
end up with a 0-count inline body which can confuse downstream
|
||||
optimizations such as function splitting. */
|
||||
if (!ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count && count)
|
||||
{
|
||||
/* Apply the larger of the call bb count and the total incoming
|
||||
call edge count to the callee. */
|
||||
gcov_type in_count = 0;
|
||||
struct cgraph_edge *in_edge;
|
||||
for (in_edge = id->src_node->callers; in_edge;
|
||||
in_edge = in_edge->next_caller)
|
||||
in_count += in_edge->count;
|
||||
freqs_to_counts (id->src_node, count > in_count ? count : in_count);
|
||||
}
|
||||
|
||||
if (ENTRY_BLOCK_PTR_FOR_FUNCTION (src_cfun)->count)
|
||||
count_scale
|
||||
= GCOV_COMPUTE_SCALE (count,
|
||||
|
|
|
|||
|
|
@ -645,6 +645,8 @@ tree_profiling (void)
|
|||
pop_cfun ();
|
||||
}
|
||||
|
||||
handle_missing_profiles ();
|
||||
|
||||
del_node_map ();
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue