mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/59058 (wrong code at -O3 on x86_64-linux-gnu (affecting gcc 4.6 to trunk))
2013-11-21 Richard Biener <rguenther@suse.de> PR tree-optimization/59058 * tree-scalar-evolution.h (number_of_exit_cond_executions): Remove. * tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise. * tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ... (LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this. (NITERS_KNOWN_P): Fold into ... (LOOP_VINFO_NITERS_KNOWN_P): ... this. (LOOP_VINFO_PEELING_FOR_NITER): Add. * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. (vect_do_peeling_for_alignment): Re-use precomputed niter instead of re-emitting it. * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. * tree-vect-loop.c (vect_get_loop_niters): Use number_of_latch_executions. (new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER. (vect_analyze_loop_form): Simplify. (vect_analyze_loop_operations): Move epilogue peeling code ... (vect_analyze_loop_2): ... here and adjust it to compute LOOP_VINFO_PEELING_FOR_NITER. (vect_estimate_min_profitable_iters): Use LOOP_VINFO_PEELING_FOR_ALIGNMENT. (vect_build_loop_niters): Emit on the preheader. (vect_generate_tmps_on_preheader): Likewise. (vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead of recomputing it. Adjust. From-SVN: r205217
This commit is contained in:
parent
b05e02332f
commit
15e693cc59
|
|
@ -1,3 +1,33 @@
|
||||||
|
2013-11-21 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/59058
|
||||||
|
* tree-scalar-evolution.h (number_of_exit_cond_executions): Remove.
|
||||||
|
* tree-scalar-evolution.c (number_of_exit_cond_executions): Likewise.
|
||||||
|
* tree-vectorizer.h (LOOP_PEELING_FOR_ALIGNMENT): Rename to ...
|
||||||
|
(LOOP_VINFO_PEELING_FOR_ALIGNMENT): ... this.
|
||||||
|
(NITERS_KNOWN_P): Fold into ...
|
||||||
|
(LOOP_VINFO_NITERS_KNOWN_P): ... this.
|
||||||
|
(LOOP_VINFO_PEELING_FOR_NITER): Add.
|
||||||
|
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop):
|
||||||
|
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
|
||||||
|
(vect_do_peeling_for_alignment): Re-use precomputed niter
|
||||||
|
instead of re-emitting it.
|
||||||
|
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
|
||||||
|
Use LOOP_VINFO_PEELING_FOR_ALIGNMENT.
|
||||||
|
* tree-vect-loop.c (vect_get_loop_niters): Use
|
||||||
|
number_of_latch_executions.
|
||||||
|
(new_loop_vec_info): Initialize LOOP_VINFO_PEELING_FOR_NITER.
|
||||||
|
(vect_analyze_loop_form): Simplify.
|
||||||
|
(vect_analyze_loop_operations): Move epilogue peeling code ...
|
||||||
|
(vect_analyze_loop_2): ... here and adjust it to compute
|
||||||
|
LOOP_VINFO_PEELING_FOR_NITER.
|
||||||
|
(vect_estimate_min_profitable_iters): Use
|
||||||
|
LOOP_VINFO_PEELING_FOR_ALIGNMENT.
|
||||||
|
(vect_build_loop_niters): Emit on the preheader.
|
||||||
|
(vect_generate_tmps_on_preheader): Likewise.
|
||||||
|
(vect_transform_loop): Use LOOP_VINFO_PEELING_FOR_NITER instead
|
||||||
|
of recomputing it. Adjust.
|
||||||
|
|
||||||
2013-11-21 Richard Biener <rguenther@suse.de>
|
2013-11-21 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
* tree-vectorizer.h (LOC, UNKNOWN_LOC, EXPR_LOC, LOC_FILE,
|
* tree-vectorizer.h (LOC, UNKNOWN_LOC, EXPR_LOC, LOC_FILE,
|
||||||
|
|
|
||||||
|
|
@ -2910,34 +2910,6 @@ number_of_latch_executions (struct loop *loop)
|
||||||
loop->nb_iterations = res;
|
loop->nb_iterations = res;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns the number of executions of the exit condition of LOOP,
|
|
||||||
i.e., the number by one higher than number_of_latch_executions.
|
|
||||||
Note that unlike number_of_latch_executions, this number does
|
|
||||||
not necessarily fit in the unsigned variant of the type of
|
|
||||||
the control variable -- if the number of iterations is a constant,
|
|
||||||
we return chrec_dont_know if adding one to number_of_latch_executions
|
|
||||||
overflows; however, in case the number of iterations is symbolic
|
|
||||||
expression, the caller is responsible for dealing with this
|
|
||||||
the possible overflow. */
|
|
||||||
|
|
||||||
tree
|
|
||||||
number_of_exit_cond_executions (struct loop *loop)
|
|
||||||
{
|
|
||||||
tree ret = number_of_latch_executions (loop);
|
|
||||||
tree type = chrec_type (ret);
|
|
||||||
|
|
||||||
if (chrec_contains_undetermined (ret))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = chrec_fold_plus (type, ret, build_int_cst (type, 1));
|
|
||||||
if (TREE_CODE (ret) == INTEGER_CST
|
|
||||||
&& TREE_OVERFLOW (ret))
|
|
||||||
return chrec_dont_know;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Counters for the stats. */
|
/* Counters for the stats. */
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#define GCC_TREE_SCALAR_EVOLUTION_H
|
#define GCC_TREE_SCALAR_EVOLUTION_H
|
||||||
|
|
||||||
extern tree number_of_latch_executions (struct loop *);
|
extern tree number_of_latch_executions (struct loop *);
|
||||||
extern tree number_of_exit_cond_executions (struct loop *);
|
|
||||||
extern gimple get_loop_exit_condition (const struct loop *);
|
extern gimple get_loop_exit_condition (const struct loop *);
|
||||||
|
|
||||||
extern void scev_initialize (void);
|
extern void scev_initialize (void);
|
||||||
|
|
|
||||||
|
|
@ -1735,9 +1735,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||||
|
|
||||||
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
|
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
|
||||||
if (npeel)
|
if (npeel)
|
||||||
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
|
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
|
||||||
else
|
else
|
||||||
LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
|
LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|
||||||
|
= DR_MISALIGNMENT (dr0);
|
||||||
SET_DR_MISALIGNMENT (dr0, 0);
|
SET_DR_MISALIGNMENT (dr0, 0);
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1736,16 +1736,16 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int
|
||||||
|
|
||||||
pe = loop_preheader_edge (loop);
|
pe = loop_preheader_edge (loop);
|
||||||
|
|
||||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
|
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
|
||||||
{
|
{
|
||||||
int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
||||||
|
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_NOTE, vect_location,
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
"known peeling = %d.\n", npeel);
|
"known peeling = %d.\n", npeel);
|
||||||
|
|
||||||
iters = build_int_cst (niters_type, npeel);
|
iters = build_int_cst (niters_type, npeel);
|
||||||
*bound = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
*bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -1876,7 +1876,6 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
|
||||||
{
|
{
|
||||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||||
tree niters_of_prolog_loop;
|
tree niters_of_prolog_loop;
|
||||||
tree n_iters;
|
|
||||||
tree wide_prolog_niters;
|
tree wide_prolog_niters;
|
||||||
struct loop *new_loop;
|
struct loop *new_loop;
|
||||||
int max_iter;
|
int max_iter;
|
||||||
|
|
@ -1918,9 +1917,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
|
||||||
"loop to %d\n", max_iter);
|
"loop to %d\n", max_iter);
|
||||||
|
|
||||||
/* Update number of times loop executes. */
|
/* Update number of times loop executes. */
|
||||||
n_iters = LOOP_VINFO_NITERS (loop_vinfo);
|
|
||||||
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
|
LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
|
||||||
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
|
TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
|
||||||
|
|
||||||
if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
|
if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
|
||||||
wide_prolog_niters = niters_of_prolog_loop;
|
wide_prolog_niters = niters_of_prolog_loop;
|
||||||
|
|
|
||||||
|
|
@ -771,11 +771,12 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
|
||||||
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
|
vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Function vect_get_loop_niters.
|
/* Function vect_get_loop_niters.
|
||||||
|
|
||||||
Determine how many iterations the loop is executed.
|
Determine how many iterations the loop is executed and place it
|
||||||
If an expression that represents the number of iterations
|
in NUMBER_OF_ITERATIONS.
|
||||||
can be constructed, place it in NUMBER_OF_ITERATIONS.
|
|
||||||
Return the loop exit condition. */
|
Return the loop exit condition. */
|
||||||
|
|
||||||
static gimple
|
static gimple
|
||||||
|
|
@ -786,20 +787,16 @@ vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_NOTE, vect_location,
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
"=== get_loop_niters ===\n");
|
"=== get_loop_niters ===\n");
|
||||||
niters = number_of_exit_cond_executions (loop);
|
|
||||||
|
|
||||||
if (niters != NULL_TREE
|
niters = number_of_latch_executions (loop);
|
||||||
&& niters != chrec_dont_know)
|
/* We want the number of loop header executions which is the number
|
||||||
{
|
of latch executions plus one.
|
||||||
*number_of_iterations = niters;
|
??? For UINT_MAX latch executions this number overflows to zero
|
||||||
|
for loops like do { n++; } while (n != 0); */
|
||||||
if (dump_enabled_p ())
|
if (niters && !chrec_contains_undetermined (niters))
|
||||||
{
|
niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), niters,
|
||||||
dump_printf_loc (MSG_NOTE, vect_location, "==> get_loop_niters:");
|
build_int_cst (TREE_TYPE (niters), 1));
|
||||||
dump_generic_expr (MSG_NOTE, TDF_SLIM, *number_of_iterations);
|
*number_of_iterations = niters;
|
||||||
dump_printf (MSG_NOTE, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return get_loop_exit_condition (loop);
|
return get_loop_exit_condition (loop);
|
||||||
}
|
}
|
||||||
|
|
@ -907,7 +904,7 @@ new_loop_vec_info (struct loop *loop)
|
||||||
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
|
LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
|
||||||
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
|
LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
|
||||||
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
|
LOOP_VINFO_VECTORIZABLE_P (res) = 0;
|
||||||
LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
|
LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
|
||||||
LOOP_VINFO_VECT_FACTOR (res) = 0;
|
LOOP_VINFO_VECT_FACTOR (res) = 0;
|
||||||
LOOP_VINFO_LOOP_NEST (res).create (3);
|
LOOP_VINFO_LOOP_NEST (res).create (3);
|
||||||
LOOP_VINFO_DATAREFS (res).create (10);
|
LOOP_VINFO_DATAREFS (res).create (10);
|
||||||
|
|
@ -924,6 +921,7 @@ new_loop_vec_info (struct loop *loop)
|
||||||
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
|
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
|
||||||
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
|
LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
|
||||||
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
|
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
|
||||||
|
LOOP_VINFO_PEELING_FOR_NITER (res) = false;
|
||||||
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
|
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
|
@ -1091,12 +1089,12 @@ vect_analyze_loop_form (struct loop *loop)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (empty_block_p (loop->header))
|
if (empty_block_p (loop->header))
|
||||||
{
|
{
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
"not vectorized: empty loop.\n");
|
"not vectorized: empty loop.\n");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -1243,7 +1241,8 @@ vect_analyze_loop_form (struct loop *loop)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!number_of_iterations)
|
if (!number_of_iterations
|
||||||
|
|| chrec_contains_undetermined (number_of_iterations))
|
||||||
{
|
{
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
|
@ -1254,27 +1253,7 @@ vect_analyze_loop_form (struct loop *loop)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (chrec_contains_undetermined (number_of_iterations))
|
if (integer_zerop (number_of_iterations))
|
||||||
{
|
|
||||||
if (dump_enabled_p ())
|
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
||||||
"Infinite number of iterations.\n");
|
|
||||||
if (inner_loop_vinfo)
|
|
||||||
destroy_loop_vec_info (inner_loop_vinfo, true);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!NITERS_KNOWN_P (number_of_iterations))
|
|
||||||
{
|
|
||||||
if (dump_enabled_p ())
|
|
||||||
{
|
|
||||||
dump_printf_loc (MSG_NOTE, vect_location,
|
|
||||||
"Symbolic number of iterations is ");
|
|
||||||
dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
|
|
||||||
dump_printf (MSG_NOTE, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
|
|
||||||
{
|
{
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
|
@ -1288,6 +1267,17 @@ vect_analyze_loop_form (struct loop *loop)
|
||||||
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
|
LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
|
||||||
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
|
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
|
||||||
|
|
||||||
|
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
{
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
"Symbolic number of iterations is ");
|
||||||
|
dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
|
||||||
|
dump_printf (MSG_NOTE, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
|
STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
|
||||||
|
|
||||||
/* CHECKME: May want to keep it around it in the future. */
|
/* CHECKME: May want to keep it around it in the future. */
|
||||||
|
|
@ -1588,23 +1578,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
|
||||||
|| ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
|
||||||
< exact_log2 (vectorization_factor)))
|
|
||||||
{
|
|
||||||
if (dump_enabled_p ())
|
|
||||||
dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
|
|
||||||
if (!vect_can_advance_ivs_p (loop_vinfo)
|
|
||||||
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
|
|
||||||
{
|
|
||||||
if (dump_enabled_p ())
|
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
|
||||||
"not vectorized: can't create required "
|
|
||||||
"epilog loop\n");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1760,6 +1733,40 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Decide whether we need to create an epilogue loop to handle
|
||||||
|
remaining scalar iterations. */
|
||||||
|
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||||
|
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
|
||||||
|
{
|
||||||
|
if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
|
||||||
|
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
||||||
|
< exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
|
||||||
|
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
||||||
|
}
|
||||||
|
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
|
||||||
|
|| (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
||||||
|
< (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))))
|
||||||
|
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
|
||||||
|
|
||||||
|
/* If an epilogue loop is required make sure we can create one. */
|
||||||
|
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|
||||||
|
|| LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
|
||||||
|
if (!vect_can_advance_ivs_p (loop_vinfo)
|
||||||
|
|| !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
|
||||||
|
single_exit (LOOP_VINFO_LOOP
|
||||||
|
(loop_vinfo))))
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
"not vectorized: can't create required "
|
||||||
|
"epilog loop\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -2689,7 +2696,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
||||||
int scalar_single_iter_cost = 0;
|
int scalar_single_iter_cost = 0;
|
||||||
int scalar_outside_cost = 0;
|
int scalar_outside_cost = 0;
|
||||||
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||||
int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
||||||
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
|
||||||
|
|
||||||
/* Cost model disabled. */
|
/* Cost model disabled. */
|
||||||
|
|
@ -2880,7 +2887,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Cost model check occurs at prologue generation. */
|
/* Cost model check occurs at prologue generation. */
|
||||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
|
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
|
||||||
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
|
scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
|
||||||
+ vect_get_stmt_cost (cond_branch_not_taken);
|
+ vect_get_stmt_cost (cond_branch_not_taken);
|
||||||
/* Cost model check occurs at epilogue generation. */
|
/* Cost model check occurs at epilogue generation. */
|
||||||
|
|
@ -5574,47 +5581,51 @@ vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
|
||||||
|
|
||||||
|
|
||||||
/* This function builds ni_name = number of iterations. Statements
|
/* This function builds ni_name = number of iterations. Statements
|
||||||
are queued onto SEQ. */
|
are emitted on the loop preheader edge. */
|
||||||
|
|
||||||
static tree
|
static tree
|
||||||
vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq *seq)
|
vect_build_loop_niters (loop_vec_info loop_vinfo)
|
||||||
{
|
{
|
||||||
tree ni_name, var;
|
|
||||||
gimple_seq stmts = NULL;
|
|
||||||
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
|
tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
|
||||||
|
if (TREE_CODE (ni) == INTEGER_CST)
|
||||||
|
return ni;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
tree ni_name, var;
|
||||||
|
gimple_seq stmts = NULL;
|
||||||
|
edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
|
||||||
|
|
||||||
var = create_tmp_var (TREE_TYPE (ni), "niters");
|
var = create_tmp_var (TREE_TYPE (ni), "niters");
|
||||||
ni_name = force_gimple_operand (ni, &stmts, false, var);
|
ni_name = force_gimple_operand (ni, &stmts, false, var);
|
||||||
|
if (stmts)
|
||||||
|
gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||||
|
|
||||||
if (stmts)
|
return ni_name;
|
||||||
gimple_seq_add_seq (seq, stmts);
|
}
|
||||||
|
|
||||||
return ni_name;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* This function generates the following statements:
|
/* This function generates the following statements:
|
||||||
|
|
||||||
ni_name = number of iterations loop executes
|
ni_name = number of iterations loop executes
|
||||||
ratio = ni_name / vf
|
ratio = ni_name / vf
|
||||||
ratio_mult_vf_name = ratio * vf
|
ratio_mult_vf_name = ratio * vf
|
||||||
|
|
||||||
and places them in COND_EXPR_STMT_LIST. */
|
and places them on the loop preheader edge. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
||||||
tree ni_name,
|
tree ni_name,
|
||||||
tree *ratio_mult_vf_name_ptr,
|
tree *ratio_mult_vf_name_ptr,
|
||||||
tree *ratio_name_ptr,
|
tree *ratio_name_ptr)
|
||||||
gimple_seq *cond_expr_stmt_list)
|
|
||||||
{
|
{
|
||||||
gimple_seq stmts;
|
|
||||||
tree ni_minus_gap_name;
|
tree ni_minus_gap_name;
|
||||||
tree var;
|
tree var;
|
||||||
tree ratio_name;
|
tree ratio_name;
|
||||||
tree ratio_mult_vf_name;
|
tree ratio_mult_vf_name;
|
||||||
tree ni = LOOP_VINFO_NITERS (loop_vinfo);
|
tree ni = LOOP_VINFO_NITERS (loop_vinfo);
|
||||||
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||||
|
edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
|
||||||
tree log_vf;
|
tree log_vf;
|
||||||
|
|
||||||
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
|
log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
|
||||||
|
|
@ -5630,11 +5641,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
||||||
if (!is_gimple_val (ni_minus_gap_name))
|
if (!is_gimple_val (ni_minus_gap_name))
|
||||||
{
|
{
|
||||||
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
|
var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
|
||||||
|
gimple stmts = NULL;
|
||||||
stmts = NULL;
|
|
||||||
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
|
ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
|
||||||
true, var);
|
true, var);
|
||||||
gimple_seq_add_seq (cond_expr_stmt_list, stmts);
|
gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
@ -5647,10 +5657,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
||||||
if (!is_gimple_val (ratio_name))
|
if (!is_gimple_val (ratio_name))
|
||||||
{
|
{
|
||||||
var = create_tmp_var (TREE_TYPE (ni), "bnd");
|
var = create_tmp_var (TREE_TYPE (ni), "bnd");
|
||||||
|
gimple stmts = NULL;
|
||||||
stmts = NULL;
|
|
||||||
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
|
ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
|
||||||
gimple_seq_add_seq (cond_expr_stmt_list, stmts);
|
gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||||
}
|
}
|
||||||
*ratio_name_ptr = ratio_name;
|
*ratio_name_ptr = ratio_name;
|
||||||
|
|
||||||
|
|
@ -5663,11 +5672,10 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
|
||||||
if (!is_gimple_val (ratio_mult_vf_name))
|
if (!is_gimple_val (ratio_mult_vf_name))
|
||||||
{
|
{
|
||||||
var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
|
var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
|
||||||
|
gimple stmts = NULL;
|
||||||
stmts = NULL;
|
|
||||||
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
|
ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
|
||||||
true, var);
|
true, var);
|
||||||
gimple_seq_add_seq (cond_expr_stmt_list, stmts);
|
gsi_insert_seq_on_edge_immediate (pe, stmts);
|
||||||
}
|
}
|
||||||
*ratio_mult_vf_name_ptr = ratio_mult_vf_name;
|
*ratio_mult_vf_name_ptr = ratio_mult_vf_name;
|
||||||
}
|
}
|
||||||
|
|
@ -5739,20 +5747,20 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
check_profitability = false;
|
check_profitability = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Peel the loop if there are data refs with unknown alignment.
|
tree ni_name = vect_build_loop_niters (loop_vinfo);
|
||||||
Only one data ref with unknown store is allowed.
|
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
|
||||||
This clobbers LOOP_VINFO_NITERS but retains the original
|
|
||||||
in LOOP_VINFO_NITERS_UNCHANGED. So we cannot avoid re-computing
|
|
||||||
niters. */
|
|
||||||
|
|
||||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
/* Peel the loop if there are data refs with unknown alignment.
|
||||||
|
Only one data ref with unknown store is allowed. */
|
||||||
|
|
||||||
|
if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
|
||||||
{
|
{
|
||||||
gimple_seq stmts = NULL;
|
|
||||||
tree ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
|
|
||||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
|
|
||||||
vect_do_peeling_for_alignment (loop_vinfo, ni_name,
|
vect_do_peeling_for_alignment (loop_vinfo, ni_name,
|
||||||
th, check_profitability);
|
th, check_profitability);
|
||||||
check_profitability = false;
|
check_profitability = false;
|
||||||
|
/* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
|
||||||
|
be re-computed. */
|
||||||
|
ni_name = NULL_TREE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
|
/* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
|
||||||
|
|
@ -5763,16 +5771,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
will remain scalar and will compute the remaining (n%VF) iterations.
|
will remain scalar and will compute the remaining (n%VF) iterations.
|
||||||
(VF is the vectorization factor). */
|
(VF is the vectorization factor). */
|
||||||
|
|
||||||
if ((int) tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
|
if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
|
||||||
< exact_log2 (vectorization_factor)
|
|
||||||
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
|
||||||
{
|
{
|
||||||
tree ni_name, ratio_mult_vf;
|
tree ratio_mult_vf;
|
||||||
gimple_seq stmts = NULL;
|
if (!ni_name)
|
||||||
ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
|
ni_name = vect_build_loop_niters (loop_vinfo);
|
||||||
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
|
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
|
||||||
&ratio, &stmts);
|
&ratio);
|
||||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
|
|
||||||
vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
|
vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
|
||||||
th, check_profitability);
|
th, check_profitability);
|
||||||
}
|
}
|
||||||
|
|
@ -5781,12 +5787,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
|
LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tree ni_name;
|
if (!ni_name)
|
||||||
gimple_seq stmts = NULL;
|
ni_name = vect_build_loop_niters (loop_vinfo);
|
||||||
ni_name = vect_build_loop_niters (loop_vinfo, &stmts);
|
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
|
||||||
vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL,
|
|
||||||
&ratio, &stmts);
|
|
||||||
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 1) Make sure the loop header has exactly two entries
|
/* 1) Make sure the loop header has exactly two entries
|
||||||
|
|
|
||||||
|
|
@ -361,7 +361,7 @@ typedef struct _loop_vec_info {
|
||||||
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs
|
#define LOOP_VINFO_DATAREFS(L) (L)->datarefs
|
||||||
#define LOOP_VINFO_DDRS(L) (L)->ddrs
|
#define LOOP_VINFO_DDRS(L) (L)->ddrs
|
||||||
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
|
#define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
|
||||||
#define LOOP_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
|
#define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
|
||||||
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
|
#define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
|
||||||
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
|
#define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
|
||||||
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
|
#define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
|
||||||
|
|
@ -375,18 +375,15 @@ typedef struct _loop_vec_info {
|
||||||
#define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
|
#define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
|
||||||
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
|
#define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
|
||||||
#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
|
#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
|
||||||
|
#define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter
|
||||||
|
|
||||||
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
||||||
(L)->may_misalign_stmts.length () > 0
|
(L)->may_misalign_stmts.length () > 0
|
||||||
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
|
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
|
||||||
(L)->may_alias_ddrs.length () > 0
|
(L)->may_alias_ddrs.length () > 0
|
||||||
|
|
||||||
#define NITERS_KNOWN_P(n) \
|
|
||||||
(tree_fits_shwi_p ((n)) \
|
|
||||||
&& tree_to_shwi ((n)) > 0)
|
|
||||||
|
|
||||||
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
|
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
|
||||||
NITERS_KNOWN_P ((L)->num_iters)
|
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
|
||||||
|
|
||||||
static inline loop_vec_info
|
static inline loop_vec_info
|
||||||
loop_vec_info_for_loop (struct loop *loop)
|
loop_vec_info_for_loop (struct loop *loop)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue