predict.h (force_edge_cold): Declare.

* predict.h (force_edge_cold): Declare. * predict.c (force_edge_cold): New function. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile updating. (canonicalize_loop_induction_variables): Fix formating. * gcc.dg/tree-ssa/cunroll-12.c: New testcase. * gcc.dg/tree-ssa/cunroll-13.c: New testcase. * gcc.dg/tree-ssa/cunroll-14.c: New testcase. From-SVN: r236874
2016-05-30 12:40:33 +02:00 · 2016-05-30 12:40:33 +02:00 · 98bdbb39a6
parent d5cf3d8da6
commit 98bdbb39a6
8 changed files with 176 additions and 5 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,11 @@
 2016-05-30  Jan Hubicka  <hubicka@ucw.cz>
 	* predict.h (force_edge_cold): Declare.
 	* predict.c (force_edge_cold): New function.
 	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile
 	updating.
 	(canonicalize_loop_induction_variables): Fix formating.
 2016-05-30  Eric Botcazou  <ebotcazou@adacore.com>
 	* config/visium/visium.c (visium_split_double_add): Minor tweaks.
--- a/gcc/predict.c
+++ b/gcc/predict.c
@ -3249,3 +3249,99 @@ report_predictor_hitrates (void)
  loop_optimizer_finalize ();
 }
 /* Force edge E to be cold.
   If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
   keep low probability to represent possible error in a guess.  This is used
   i.e. in case we predict loop to likely iterate given number of times but
   we are not 100% sure.
   This function locally updates profile without attempt to keep global
   consistency which can not be reached in full generality without full profile
   rebuild from probabilities alone.  Doing so is not necessarily a good idea
   because frequencies and counts may be more realistic then probabilities.
   In some cases (such as for elimination of early exits during full loop
   unrolling) the caller can ensure that profile will get consistent
   afterwards.  */
 void
 force_edge_cold (edge e, bool impossible)
 {
  gcov_type count_sum = 0;
  int prob_sum = 0;
  edge_iterator ei;
  edge e2;
  gcov_type old_count = e->count;
  int old_probability = e->probability;
  gcov_type gcov_scale = REG_BR_PROB_BASE;
  int prob_scale = REG_BR_PROB_BASE;
  /* If edge is already improbably or cold, just return.  */
  if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
      && (!impossible || !e->count))
    return;
  FOR_EACH_EDGE (e2, ei, e->src->succs)
    if (e2 != e)
      {
 	count_sum += e2->count;
 	prob_sum += e2->probability;
      }
  /* If there are other edges out of e->src, redistribute probabilitity
     there.  */
  if (prob_sum)
    {
      e->probability
 	 = MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
      if (old_probability)
 	e->count = RDIV (e->count * e->probability, old_probability);
      else
        e->count = MIN (e->count, impossible ? 0 : 1);
      if (count_sum)
 	gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
 			   count_sum);
      prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
 			 prob_sum);
      if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "Making edge %i->%i %s by redistributing "
 		 "probability to other edges.\n",
 		 e->src->index, e->dest->index,
 		 impossible ? "imposisble" : "cold");
      FOR_EACH_EDGE (e2, ei, e->src->succs)
 	if (e2 != e)
 	  {
 	    e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
 	    e2->probability = RDIV (e2->probability * prob_scale,
 				    REG_BR_PROB_BASE);
 	  }
    }
  /* If all edges out of e->src are unlikely, the basic block itself
     is unlikely.  */
  else
    {
      e->probability = REG_BR_PROB_BASE;
      /* If we did not adjusting, the source basic block has no likely edeges
 	 leaving other direction. In that case force that bb cold, too.
 	 This in general is difficult task to do, but handle special case when
 	 BB has only one predecestor.  This is common case when we are updating
 	 after loop transforms.  */
      if (!prob_sum && !count_sum && single_pred_p (e->src)
 	  && e->src->frequency > (impossible ? 0 : 1))
 	{
 	  int old_frequency = e->src->frequency;
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
 		     impossible ? "imposisble" : "cold");
 	  e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
 	  e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
 					   old_frequency);
 	  force_edge_cold (single_pred_edge (e->src), impossible);
 	}
      else if (dump_file && (dump_flags & TDF_DETAILS)
 	       && maybe_hot_bb_p (cfun, e->src))
 	fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
 		 impossible ? "imposisble" : "cold");
    }
 }
--- a/gcc/predict.h
+++ b/gcc/predict.h
@ -91,5 +91,6 @@ extern tree build_predict_expr (enum br_predictor, enum prediction);
 extern const char *predictor_name (enum br_predictor);
 extern void rebuild_frequencies (void);
 extern void report_predictor_hitrates (void);
 extern void force_edge_cold (edge, bool);
 #endif  /* GCC_PREDICT_H */
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@ -1,3 +1,9 @@
 2016-05-30  Jan Hubicka  <hubicka@ucw.cz>
 	* gcc.dg/tree-ssa/cunroll-12.c: New testcase.
 	* gcc.dg/tree-ssa/cunroll-13.c: New testcase.
 	* gcc.dg/tree-ssa/cunroll-14.c: New testcase.
 2016-05-30  Tom de Vries  <tom@codesourcery.com>
 	PR tree-optimization/69067
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c
@ -0,0 +1,11 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-blocks-details" } */
 struct a {int a[8];int b;};
 void
 t(struct a *a)
 {
  for (int i=0;a->a[i];i++)
    a->a[i]++;
 }
 /* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c
@ -0,0 +1,16 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdisable-tree-cunrolli -fdisable-tree-vrp1 -fdump-tree-cunroll-blocks-details" } */
 struct a {int a[8];int b;};
 void
 t(struct a *a)
 {
  for (int i=0;i<123456 && a->a[i];i++)
    a->a[i]++;
 }
 /* This pass relies on the fact that we do not eliminate the redundant test for i early.
   It is necessary to disable all passes that do so.  At the moment it is vrp1 and cunrolli.  */
 /* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
--- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c
@ -0,0 +1,14 @@
 /* { dg-do compile } */
 /* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */
 struct a {int a[100];};
 void
 t(struct a *a)
 {
  for (int i=0;i<5 && a->a[i];i++)
    a->a[i]++;
 }
 /* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
 /* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@ -730,8 +730,14 @@ try_unroll_loop_completely (struct loop *loop,
      if (ul == UL_SINGLE_ITER)
 	return false;
      /* EXIT can be removed only if we are sure it passes first N_UNROLL
 	 iterations.  */
      bool remove_exit = (exit && niter
 			  && TREE_CODE (niter) == INTEGER_CST
 			  && wi::leu_p (n_unroll, wi::to_widest (niter)));
      large = tree_estimate_loop_size
-		 (loop, exit, edge_to_cancel, &size,
+		 (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
 		  PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
      ninsns = size.overall;
      if (large)
@ -837,8 +843,20 @@ try_unroll_loop_completely (struct loop *loop,
      initialize_original_copy_tables ();
      wont_exit = sbitmap_alloc (n_unroll + 1);
      if (exit && niter
 	  && TREE_CODE (niter) == INTEGER_CST
 	  && wi::leu_p (n_unroll, wi::to_widest (niter)))
 	{
 	  bitmap_ones (wont_exit);
 	  if (wi::eq_p (wi::to_widest (niter), n_unroll)
 	      || edge_to_cancel)
 	    bitmap_clear_bit (wont_exit, 0);
 	}
      else
 	{
 	  exit = NULL;
 	  bitmap_clear (wont_exit);
 	}
      if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 						 n_unroll, wont_exit,
@ -869,6 +887,7 @@ try_unroll_loop_completely (struct loop *loop,
  if (edge_to_cancel)
    {
      gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
      force_edge_cold (edge_to_cancel, true);
      if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
 	gimple_cond_make_false (cond);
      else
@ -1112,8 +1131,8 @@ canonicalize_loop_induction_variables (struct loop *loop,
  if (dump_file && (dump_flags & TDF_DETAILS)
      && likely_max_loop_iterations_int (loop) >= 0)
    {
-      fprintf (dump_file, "Loop likely %d iterates at most %i times.\n", loop->num,
+      fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
-	       (int)likely_max_loop_iterations_int (loop));
+	       loop->num, (int)likely_max_loop_iterations_int (loop));
    }
  /* Remove exits that are known to be never taken based on loop bound.