re PR middle-end/45098 (Missed induction variable optimization)

2011-06-14 Zdenek Dvorak <ook@ucw.cz> Tom de Vries <tom@codesourcery.com> PR target/45098 * cfgloop.h (nb_iterations_upper_bound, nb_iterations_estimate): Document changed semantics. (max_stmt_executions, max_stmt_executions_int): Declare. * tree-data-ref.c (estimated_loop_iterations) (estimated_loop_iterations_int): Move functions... * tree-ssa-loop-niter.c (estimated_loop_iterations) (estimated_loop_iterations_int): here. (record_estimate): Change nb_iterations_upper_bound and nb_iterations_estimate semantics. (max_stmt_executions, max_stmt_executions_int): New function. * tree-data-ref.c (estimated_loop_iterations_tree): Rename to ... (max_stmt_executions_tree): this. (analyze_miv_subscript): Use max_stmt_executions_tree instead of estimated_loop_iterations_tree. tree-ssa-loop-ivopts.c (avg_loop_niter): Use max_stmt_executions_int instead of estimated_loop_iterations_int. * predict.c (predict_loops): Idem. * tree-parloops.c (parallelize_loops): Idem. * tree-data-ref.c (analyze_siv_subscript_cst_affine) (compute_overlap_steps_for_affine_1_2, analyze_subscript_affine_affine) (init_omega_for_ddr_1): Idem. * tree-ssa-loop-prefetch.c (determine_loop_nest_reuse) (loop_prefetch_arrays): Idem * graphite-sese-to-poly.c (build_loop_iteration_domains): Use max_stmt_executions instead of estimated_loop_iterations. * tree-data-ref.c (estimated_loop_iterations_tree): Idem. * tree-vrp.c (adjust_range_with_scev): Use estimated_loop_iterations instead of nb_iterations_upper_bound. Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r175022
2011-06-14 16:29:58 +02:00 · 2011-06-14 16:29:58 +02:00 · b4a9343cf5
parent d2640c430f
commit b4a9343cf5
10 changed files with 183 additions and 114 deletions
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@ -1,3 +1,36 @@
 2011-06-14  Zdenek Dvorak  <ook@ucw.cz>
 	    Tom de Vries  <tom@codesourcery.com>
 	PR target/45098
 	* cfgloop.h (nb_iterations_upper_bound, nb_iterations_estimate):
 	Document changed semantics.
 	(max_stmt_executions, max_stmt_executions_int): Declare.
 	* tree-data-ref.c (estimated_loop_iterations)
 	(estimated_loop_iterations_int): Move functions...
 	* tree-ssa-loop-niter.c (estimated_loop_iterations)
 	(estimated_loop_iterations_int): here.
 	(record_estimate): Change nb_iterations_upper_bound and
 	nb_iterations_estimate semantics.
 	(max_stmt_executions, max_stmt_executions_int): New function.
 	* tree-data-ref.c (estimated_loop_iterations_tree): Rename to ...
 	(max_stmt_executions_tree): this.
 	(analyze_miv_subscript): Use max_stmt_executions_tree instead of
 	estimated_loop_iterations_tree.
 	tree-ssa-loop-ivopts.c (avg_loop_niter): Use
 	max_stmt_executions_int instead of estimated_loop_iterations_int.
 	* predict.c (predict_loops): Idem.
 	* tree-parloops.c (parallelize_loops): Idem.
 	* tree-data-ref.c (analyze_siv_subscript_cst_affine)
 	(compute_overlap_steps_for_affine_1_2, analyze_subscript_affine_affine)
 	(init_omega_for_ddr_1): Idem.
 	* tree-ssa-loop-prefetch.c (determine_loop_nest_reuse)
 	(loop_prefetch_arrays): Idem
 	* graphite-sese-to-poly.c (build_loop_iteration_domains): Use
 	max_stmt_executions instead of estimated_loop_iterations.
 	* tree-data-ref.c (estimated_loop_iterations_tree): Idem.
 	* tree-vrp.c (adjust_range_with_scev): Use estimated_loop_iterations
 	instead of nb_iterations_upper_bound.
 2011-06-13  Jan Hubicka  <jh@suse.cz>
 	* ipa.c (cgraph_address_taken_from_non_vtable_p): Check the ref type.
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@ -143,11 +143,13 @@ struct GTY ((chain_next ("%h.next"))) loop {
     computes and caches the computed information in this field.  */
  tree nb_iterations;
-  /* An integer guaranteed to bound the number of iterations of the loop
+  /* An integer guaranteed to be greater or equal to nb_iterations.  Only
-     from above.  */
+     valid if any_upper_bound is true.  */
  double_int nb_iterations_upper_bound;
-  /* An integer giving the expected number of iterations of the loop.  */
+  /* An integer giving an estimate on nb_iterations.  Unlike
     nb_iterations_upper_bound, there is no guarantee that it is at least
     nb_iterations.  */
  double_int nb_iterations_estimate;
  bool any_upper_bound;
@ -278,7 +280,9 @@ extern rtx doloop_condition_get (rtx);
 void estimate_numbers_of_iterations_loop (struct loop *, bool);
 HOST_WIDE_INT estimated_loop_iterations_int (struct loop *, bool);
 HOST_WIDE_INT max_stmt_executions_int (struct loop *, bool);
 bool estimated_loop_iterations (struct loop *, bool, double_int *);
 bool max_stmt_executions (struct loop *, bool, double_int *);
 /* Loop manipulation.  */
 extern bool can_duplicate_loop_p (const struct loop *loop);
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@ -1092,7 +1092,7 @@ build_loop_iteration_domains (scop_p scop, struct loop *loop,
      scan_tree_for_params (SCOP_REGION (scop), nb_iters, ub_expr, one);
      mpz_clear (one);
-      if (estimated_loop_iterations (loop, true, &nit))
+      if (max_stmt_executions (loop, true, &nit))
 	add_upper_bounds_from_estimated_nit (scop, nit, dim, ub_expr);
      /* loop_i <= expr_nb_iters */
--- a/gcc/predict.c
+++ b/gcc/predict.c
@ -994,7 +994,7 @@ predict_loops (void)
 	     the loop, use it to predict this exit.  */
 	  else if (n_exits == 1)
 	    {
-	      nitercst = estimated_loop_iterations_int (loop, false);
+	      nitercst = max_stmt_executions_int (loop, false);
 	      if (nitercst < 0)
 		continue;
 	      if (nitercst > max)
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@ -1621,66 +1621,18 @@ analyze_ziv_subscript (tree chrec_a,
    fprintf (dump_file, ")\n");
 }
-/* Sets NIT to the estimated number of executions of the statements in
+/* Similar to max_stmt_executions_int, but returns the bound as a tree,
   LOOP.  If CONSERVATIVE is true, we must be sure that NIT is at least as
   large as the number of iterations.  If we have no reliable estimate,
   the function returns false, otherwise returns true.  */
 bool
 estimated_loop_iterations (struct loop *loop, bool conservative,
 			   double_int *nit)
 {
  estimate_numbers_of_iterations_loop (loop, true);
  if (conservative)
    {
      if (!loop->any_upper_bound)
 	return false;
      *nit = loop->nb_iterations_upper_bound;
    }
  else
    {
      if (!loop->any_estimate)
 	return false;
      *nit = loop->nb_iterations_estimate;
    }
  return true;
 }
 /* Similar to estimated_loop_iterations, but returns the estimate only
   if it fits to HOST_WIDE_INT.  If this is not the case, or the estimate
   on the number of iterations of LOOP could not be derived, returns -1.  */
 HOST_WIDE_INT
 estimated_loop_iterations_int (struct loop *loop, bool conservative)
 {
  double_int nit;
  HOST_WIDE_INT hwi_nit;
  if (!estimated_loop_iterations (loop, conservative, &nit))
    return -1;
  if (!double_int_fits_in_shwi_p (nit))
    return -1;
  hwi_nit = double_int_to_shwi (nit);
  return hwi_nit < 0 ? -1 : hwi_nit;
 }
 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
   and only if it fits to the int type.  If this is not the case, or the
-   estimate on the number of iterations of LOOP could not be derived, returns
+   bound  on the number of iterations of LOOP could not be derived, returns
   chrec_dont_know.  */
 static tree
-estimated_loop_iterations_tree (struct loop *loop, bool conservative)
+max_stmt_executions_tree (struct loop *loop)
 {
  double_int nit;
  tree type;
-  if (!estimated_loop_iterations (loop, conservative, &nit))
+  if (!max_stmt_executions (loop, true, &nit))
    return chrec_dont_know;
  type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
@ -1763,7 +1715,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a,
 		      /* Perform weak-zero siv test to see if overlap is
 			 outside the loop bounds.  */
-		      numiter = estimated_loop_iterations_int (loop, false);
+		      numiter = max_stmt_executions_int (loop, true);
 		      if (numiter >= 0
 			  && compare_tree_int (tmp, numiter) > 0)
@ -1841,7 +1793,7 @@ analyze_siv_subscript_cst_affine (tree chrec_a,
 		      /* Perform weak-zero siv test to see if overlap is
 			 outside the loop bounds.  */
-		      numiter = estimated_loop_iterations_int (loop, false);
+		      numiter = max_stmt_executions_int (loop, true);
 		      if (numiter >= 0
 			  && compare_tree_int (tmp, numiter) > 0)
@ -2022,10 +1974,9 @@ compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
  step_z = int_cst_value (CHREC_RIGHT (chrec_b));
  niter_x =
-    estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
+    max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)), true);
-				   false);
+  niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
-  niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
+  niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
  niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
  if (niter_x < 0 || niter_y < 0 || niter_z < 0)
    {
@ -2350,10 +2301,8 @@ analyze_subscript_affine_affine (tree chrec_a,
 	  HOST_WIDE_INT niter, niter_a, niter_b;
 	  affine_fn ova, ovb;
-	  niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
+	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a), true);
-						   false);
+	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b), true);
 	  niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
 						   false);
 	  niter = MIN (niter_a, niter_b);
 	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
 	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
@ -2460,10 +2409,10 @@ analyze_subscript_affine_affine (tree chrec_a,
 	  if (i1 > 0 && j1 > 0)
 	    {
-	      HOST_WIDE_INT niter_a = estimated_loop_iterations_int
+	      HOST_WIDE_INT niter_a = max_stmt_executions_int
-		(get_chrec_loop (chrec_a), false);
+		(get_chrec_loop (chrec_a), true);
-	      HOST_WIDE_INT niter_b = estimated_loop_iterations_int
+	      HOST_WIDE_INT niter_b = max_stmt_executions_int
-		(get_chrec_loop (chrec_b), false);
+		(get_chrec_loop (chrec_b), true);
 	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
 	      /* (X0, Y0) is a solution of the Diophantine equation:
@ -2740,8 +2689,7 @@ analyze_miv_subscript (tree chrec_a,
 	 in the same order.  */
      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
-      *last_conflicts = estimated_loop_iterations_tree
+      *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
 				(get_chrec_loop (chrec_a), true);
      dependence_stats.num_miv_dependent++;
    }
@ -3754,7 +3702,7 @@ init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
  for (i = 0; i <= DDR_INNER_LOOP (ddr)
 	 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
    {
-      HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
+      HOST_WIDE_INT nbi = max_stmt_executions_int (loopi, true);
      /* 0 <= loop_x */
      ineq = omega_add_zero_geq (pb, omega_black);
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@ -2134,7 +2134,7 @@ parallelize_loops (void)
 	  /* FIXME: the check for vector phi nodes could be removed.  */
 	  || loop_has_vector_phi_nodes (loop))
 	continue;
-      estimated = estimated_loop_iterations_int (loop, false);
+      estimated = max_stmt_executions_int (loop, false);
      /* FIXME: Bypass this check as graphite doesn't update the
      count and frequency correctly now.  */
      if (!flag_loop_parallelize_all
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@ -115,7 +115,7 @@ along with GCC; see the file COPYING3.  If not see
 static inline HOST_WIDE_INT
 avg_loop_niter (struct loop *loop)
 {
-  HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
+  HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
  if (niter == -1)
    return AVG_LOOP_NITER (loop);
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@ -2568,18 +2568,17 @@ record_estimate (struct loop *loop, tree bound, double_int i_bound,
    }
  /* Update the number of iteration estimates according to the bound.
-     If at_stmt is an exit, then every statement in the loop is
+     If at_stmt is an exit or dominates the single exit from the loop,
-     executed at most BOUND + 1 times.  If it is not an exit, then
+     then the loop latch is executed at most BOUND times, otherwise
-     some of the statements before it could be executed BOUND + 2
+     it can be executed BOUND + 1 times.  */
     times, if an exit of LOOP is before stmt.  */
  exit = single_exit (loop);
  if (is_exit
      || (exit != NULL
 	  && dominated_by_p (CDI_DOMINATORS,
 			     exit->src, gimple_bb (at_stmt))))
-    delta = double_int_one;
+    delta = double_int_zero;
  else
-    delta = double_int_two;
+    delta = double_int_one;
  i_bound = double_int_add (i_bound, delta);
  /* If an overflow occurred, ignore the result.  */
@ -3042,6 +3041,93 @@ estimate_numbers_of_iterations_loop (struct loop *loop, bool use_undefined_p)
    loop->nb_iterations_estimate = loop->nb_iterations_upper_bound;
 }
 /* Sets NIT to the estimated number of executions of the latch of the
   LOOP.  If CONSERVATIVE is true, we must be sure that NIT is at least as
   large as the number of iterations.  If we have no reliable estimate,
   the function returns false, otherwise returns true.  */
 bool
 estimated_loop_iterations (struct loop *loop, bool conservative,
 			   double_int *nit)
 {
  estimate_numbers_of_iterations_loop (loop, true);
  if (conservative)
    {
      if (!loop->any_upper_bound)
 	return false;
      *nit = loop->nb_iterations_upper_bound;
    }
  else
    {
      if (!loop->any_estimate)
 	return false;
      *nit = loop->nb_iterations_estimate;
    }
  return true;
 }
 /* Similar to estimated_loop_iterations, but returns the estimate only
   if it fits to HOST_WIDE_INT.  If this is not the case, or the estimate
   on the number of iterations of LOOP could not be derived, returns -1.  */
 HOST_WIDE_INT
 estimated_loop_iterations_int (struct loop *loop, bool conservative)
 {
  double_int nit;
  HOST_WIDE_INT hwi_nit;
  if (!estimated_loop_iterations (loop, conservative, &nit))
    return -1;
  if (!double_int_fits_in_shwi_p (nit))
    return -1;
  hwi_nit = double_int_to_shwi (nit);
  return hwi_nit < 0 ? -1 : hwi_nit;
 }
 /* Returns an upper bound on the number of executions of statements
   in the LOOP.  For statements before the loop exit, this exceeds
   the number of execution of the latch by one.  */
 HOST_WIDE_INT
 max_stmt_executions_int (struct loop *loop, bool conservative)
 {
  HOST_WIDE_INT nit = estimated_loop_iterations_int (loop, conservative);
  HOST_WIDE_INT snit;
  if (nit == -1)
    return -1;
  snit = (HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) nit + 1);
  /* If the computation overflows, return -1.  */
  return snit < 0 ? -1 : snit;
 }
 /* Sets NIT to the estimated number of executions of the latch of the
   LOOP, plus one.  If CONSERVATIVE is true, we must be sure that NIT is at
   least as large as the number of iterations.  If we have no reliable
   estimate, the function returns false, otherwise returns true.  */
 bool
 max_stmt_executions (struct loop *loop, bool conservative, double_int *nit)
 {
  double_int nit_minus_one;
  if (!estimated_loop_iterations (loop, conservative, nit))
    return false;
  nit_minus_one = *nit;
  *nit = double_int_add (*nit, double_int_one);
  return double_int_ucmp (*nit, nit_minus_one) > 0;
 }
 /* Records estimates on numbers of iterations of loops.  */
 void
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@ -1549,7 +1549,7 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
 	continue;
      aloop = VEC_index (loop_p, vloops, i);
-      vol = estimated_loop_iterations_int (aloop, false);
+      vol = max_stmt_executions_int (aloop, false);
      if (vol < 0)
 	vol = expected_loop_iterations (aloop);
      volume *= vol;
@ -1801,7 +1801,7 @@ loop_prefetch_arrays (struct loop *loop)
    return false;
  ahead = (PREFETCH_LATENCY + time - 1) / time;
-  est_niter = estimated_loop_iterations_int (loop, false);
+  est_niter = max_stmt_executions_int (loop, false);
  /* Prefetching is not likely to be profitable if the trip count to ahead
     ratio is too small.  */
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@ -3403,44 +3403,42 @@ adjust_range_with_scev (value_range_t *vr, struct loop *loop,
    tmax = TYPE_MAX_VALUE (type);
  /* Try to use estimated number of iterations for the loop to constrain the
-     final value in the evolution.
+     final value in the evolution.  */
     We are interested in the number of executions of the latch, while
     nb_iterations_upper_bound includes the last execution of the exit test.  */
  if (TREE_CODE (step) == INTEGER_CST
      && loop->any_upper_bound
      && !double_int_zero_p (loop->nb_iterations_upper_bound)
      && is_gimple_val (init)
      && (TREE_CODE (init) != SSA_NAME
 	  || get_value_range (init)->type == VR_RANGE))
    {
-      value_range_t maxvr = { VR_UNDEFINED, NULL_TREE, NULL_TREE, NULL };
+      double_int nit;
      double_int dtmp;
      bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (step));
      int overflow = 0;
-      dtmp = double_int_mul_with_sign (tree_to_double_int (step),
+      if (estimated_loop_iterations (loop, true, &nit))
                                       double_int_sub (
                                           loop->nb_iterations_upper_bound,
                                           double_int_one),
                                       unsigned_p, &overflow);
      /* If the multiplication overflowed we can't do a meaningful
 	 adjustment.  Likewise if the result doesn't fit in the type
 	 of the induction variable.  For a signed type we have to
 	 check whether the result has the expected signedness which
 	 is that of the step as nb_iterations_upper_bound is unsigned.  */
      if (!overflow
 	  && double_int_fits_to_tree_p (TREE_TYPE (init), dtmp)
 	  && (unsigned_p
 	      || ((dtmp.high ^ TREE_INT_CST_HIGH (step)) >= 0)))
 	{
-	  tem = double_int_to_tree (TREE_TYPE (init), dtmp);
+	  value_range_t maxvr = { VR_UNDEFINED, NULL_TREE, NULL_TREE, NULL };
-	  extract_range_from_binary_expr (&maxvr, PLUS_EXPR,
+	  double_int dtmp;
-					  TREE_TYPE (init), init, tem);
+	  bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (step));
-	  /* Likewise if the addition did.  */
+	  int overflow = 0;
-	  if (maxvr.type == VR_RANGE)
+
 	  dtmp = double_int_mul_with_sign (tree_to_double_int (step), nit,
 					   unsigned_p, &overflow);
 	  /* If the multiplication overflowed we can't do a meaningful
 	     adjustment.  Likewise if the result doesn't fit in the type
 	     of the induction variable.  For a signed type we have to
 	     check whether the result has the expected signedness which
 	     is that of the step as number of iterations is unsigned.  */
 	  if (!overflow
 	      && double_int_fits_to_tree_p (TREE_TYPE (init), dtmp)
 	      && (unsigned_p
 		  || ((dtmp.high ^ TREE_INT_CST_HIGH (step)) >= 0)))
 	    {
-	      tmin = maxvr.min;
+	      tem = double_int_to_tree (TREE_TYPE (init), dtmp);
-	      tmax = maxvr.max;
+	      extract_range_from_binary_expr (&maxvr, PLUS_EXPR,
 					      TREE_TYPE (init), init, tem);
 	      /* Likewise if the addition did.  */
 	      if (maxvr.type == VR_RANGE)
 		{
 		  tmin = maxvr.min;
 		  tmax = maxvr.max;
 		}
 	    }
 	}
    }