mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/50031 (Sphinx3 has a 10% regression going from GCC 4.5 to GCC 4.6 on powerpc)
2012-02-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com> Ira Rosen <irar@il.ibm.com> PR tree-optimization/50031 * targhooks.c (default_builtin_vectorization_cost): Handle vec_promote_demote. * target.h (enum vect_cost_for_stmt): Add vec_promote_demote. * tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle all types of reduction and pattern statements. (vect_estimate_min_profitable_iters): Likewise. * tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function. (vect_get_load_cost): Use vec_perm for permutations; add dump logic for explicit realigns. (vectorizable_conversion): Call vect_model_promotion_demotion_cost. * config/spu/spu.c (spu_builtin_vectorization_cost): Handle vec_promote_demote. * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update vec_perm for VSX and handle vec_promote_demote. Co-Authored-By: Ira Rosen <irar@il.ibm.com> From-SVN: r184102
This commit is contained in:
parent
19f326e8ec
commit
8bd373026e
|
@ -1,3 +1,23 @@
|
||||||
|
2012-02-10 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||||
|
Ira Rosen <irar@il.ibm.com>
|
||||||
|
|
||||||
|
PR tree-optimization/50031
|
||||||
|
* targhooks.c (default_builtin_vectorization_cost): Handle
|
||||||
|
vec_promote_demote.
|
||||||
|
* target.h (enum vect_cost_for_stmt): Add vec_promote_demote.
|
||||||
|
* tree-vect-loop.c (vect_get_single_scalar_iteraion_cost): Handle
|
||||||
|
all types of reduction and pattern statements.
|
||||||
|
(vect_estimate_min_profitable_iters): Likewise.
|
||||||
|
* tree-vect-stmts.c (vect_model_promotion_demotion_cost): New function.
|
||||||
|
(vect_get_load_cost): Use vec_perm for permutations; add dump logic
|
||||||
|
for explicit realigns.
|
||||||
|
(vectorizable_conversion): Call vect_model_promotion_demotion_cost.
|
||||||
|
* config/spu/spu.c (spu_builtin_vectorization_cost): Handle
|
||||||
|
vec_promote_demote.
|
||||||
|
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
|
||||||
|
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Update
|
||||||
|
vec_perm for VSX and handle vec_promote_demote.
|
||||||
|
|
||||||
2012-02-10 Jakub Jelinek <jakub@redhat.com>
|
2012-02-10 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
PR middle-end/52177
|
PR middle-end/52177
|
||||||
|
|
|
@ -35336,6 +35336,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
return ix86_cost->cond_not_taken_branch_cost;
|
return ix86_cost->cond_not_taken_branch_cost;
|
||||||
|
|
||||||
case vec_perm:
|
case vec_perm:
|
||||||
|
case vec_promote_demote:
|
||||||
return ix86_cost->vec_stmt_cost;
|
return ix86_cost->vec_stmt_cost;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -3543,9 +3543,16 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case vec_perm:
|
case vec_perm:
|
||||||
if (!TARGET_VSX)
|
if (TARGET_VSX)
|
||||||
|
return 4;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case vec_promote_demote:
|
||||||
|
if (TARGET_VSX)
|
||||||
|
return 5;
|
||||||
|
else
|
||||||
return 1;
|
return 1;
|
||||||
return 2;
|
|
||||||
|
|
||||||
case cond_branch_taken:
|
case cond_branch_taken:
|
||||||
return 3;
|
return 3;
|
||||||
|
|
|
@ -6920,6 +6920,7 @@ spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
case scalar_to_vec:
|
case scalar_to_vec:
|
||||||
case cond_branch_not_taken:
|
case cond_branch_not_taken:
|
||||||
case vec_perm:
|
case vec_perm:
|
||||||
|
case vec_promote_demote:
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case scalar_store:
|
case scalar_store:
|
||||||
|
|
|
@ -145,7 +145,8 @@ enum vect_cost_for_stmt
|
||||||
scalar_to_vec,
|
scalar_to_vec,
|
||||||
cond_branch_not_taken,
|
cond_branch_not_taken,
|
||||||
cond_branch_taken,
|
cond_branch_taken,
|
||||||
vec_perm
|
vec_perm,
|
||||||
|
vec_promote_demote
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The target structure. This holds all the backend hooks. */
|
/* The target structure. This holds all the backend hooks. */
|
||||||
|
|
|
@ -514,6 +514,7 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
case scalar_to_vec:
|
case scalar_to_vec:
|
||||||
case cond_branch_not_taken:
|
case cond_branch_not_taken:
|
||||||
case vec_perm:
|
case vec_perm:
|
||||||
|
case vec_promote_demote:
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case unaligned_load:
|
case unaligned_load:
|
||||||
|
|
|
@ -2417,7 +2417,8 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
|
||||||
if (stmt_info
|
if (stmt_info
|
||||||
&& !STMT_VINFO_RELEVANT_P (stmt_info)
|
&& !STMT_VINFO_RELEVANT_P (stmt_info)
|
||||||
&& (!STMT_VINFO_LIVE_P (stmt_info)
|
&& (!STMT_VINFO_LIVE_P (stmt_info)
|
||||||
|| STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
|
|| !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
|
||||||
|
&& !STMT_VINFO_IN_PATTERN_P (stmt_info))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
|
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
|
||||||
|
@ -2564,15 +2565,46 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
||||||
{
|
{
|
||||||
gimple stmt = gsi_stmt (si);
|
gimple stmt = gsi_stmt (si);
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||||
|
|
||||||
|
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
|
||||||
|
{
|
||||||
|
stmt = STMT_VINFO_RELATED_STMT (stmt_info);
|
||||||
|
stmt_info = vinfo_for_stmt (stmt);
|
||||||
|
}
|
||||||
|
|
||||||
/* Skip stmts that are not vectorized inside the loop. */
|
/* Skip stmts that are not vectorized inside the loop. */
|
||||||
if (!STMT_VINFO_RELEVANT_P (stmt_info)
|
if (!STMT_VINFO_RELEVANT_P (stmt_info)
|
||||||
&& (!STMT_VINFO_LIVE_P (stmt_info)
|
&& (!STMT_VINFO_LIVE_P (stmt_info)
|
||||||
|| STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
|
|| !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
|
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
|
||||||
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
|
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
|
||||||
some of the "outside" costs are generated inside the outer-loop. */
|
some of the "outside" costs are generated inside the outer-loop. */
|
||||||
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
|
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
|
||||||
|
if (is_pattern_stmt_p (stmt_info)
|
||||||
|
&& STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))
|
||||||
|
{
|
||||||
|
gimple_stmt_iterator gsi;
|
||||||
|
|
||||||
|
for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
|
||||||
|
!gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
{
|
||||||
|
gimple pattern_def_stmt = gsi_stmt (gsi);
|
||||||
|
stmt_vec_info pattern_def_stmt_info
|
||||||
|
= vinfo_for_stmt (pattern_def_stmt);
|
||||||
|
if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
|
||||||
|
|| STMT_VINFO_LIVE_P (pattern_def_stmt_info))
|
||||||
|
{
|
||||||
|
vec_inside_cost
|
||||||
|
+= STMT_VINFO_INSIDE_OF_LOOP_COST
|
||||||
|
(pattern_def_stmt_info) * factor;
|
||||||
|
vec_outside_cost
|
||||||
|
+= STMT_VINFO_OUTSIDE_OF_LOOP_COST
|
||||||
|
(pattern_def_stmt_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -811,6 +811,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Model cost for type demotion and promotion operations. PWR is normally
|
||||||
|
zero for single-step promotions and demotions. It will be one if
|
||||||
|
two-step promotion/demotion is required, and so on. Each additional
|
||||||
|
step doubles the number of instructions required. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
|
||||||
|
enum vect_def_type *dt, int pwr)
|
||||||
|
{
|
||||||
|
int i, tmp;
|
||||||
|
int inside_cost = 0, outside_cost = 0, single_stmt_cost;
|
||||||
|
|
||||||
|
/* The SLP costs were already calculated during SLP tree build. */
|
||||||
|
if (PURE_SLP_STMT (stmt_info))
|
||||||
|
return;
|
||||||
|
|
||||||
|
single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
|
||||||
|
for (i = 0; i < pwr + 1; i++)
|
||||||
|
{
|
||||||
|
tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
|
||||||
|
(i + 1) : i;
|
||||||
|
inside_cost += vect_pow2 (tmp) * single_stmt_cost;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FORNOW: Assuming maximum 2 args per stmts. */
|
||||||
|
for (i = 0; i < 2; i++)
|
||||||
|
{
|
||||||
|
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
|
||||||
|
outside_cost += vect_get_stmt_cost (vector_stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vect_print_dump_info (REPORT_COST))
|
||||||
|
fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
|
||||||
|
"outside_cost = %d .", inside_cost, outside_cost);
|
||||||
|
|
||||||
|
/* Set the costs in STMT_INFO. */
|
||||||
|
stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
|
||||||
|
stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
|
||||||
|
}
|
||||||
|
|
||||||
/* Function vect_cost_strided_group_size
|
/* Function vect_cost_strided_group_size
|
||||||
|
|
||||||
For strided load or store, return the group_size only if it is the first
|
For strided load or store, return the group_size only if it is the first
|
||||||
|
@ -887,7 +927,6 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
||||||
if (vect_print_dump_info (REPORT_COST))
|
if (vect_print_dump_info (REPORT_COST))
|
||||||
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
|
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
|
||||||
group_size);
|
group_size);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Costs of the stores. */
|
/* Costs of the stores. */
|
||||||
|
@ -1049,7 +1088,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
||||||
case dr_explicit_realign:
|
case dr_explicit_realign:
|
||||||
{
|
{
|
||||||
*inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
|
*inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
|
||||||
+ vect_get_stmt_cost (vector_stmt));
|
+ vect_get_stmt_cost (vec_perm));
|
||||||
|
|
||||||
/* FIXME: If the misalignment remains fixed across the iterations of
|
/* FIXME: If the misalignment remains fixed across the iterations of
|
||||||
the containing loop, the following cost should be added to the
|
the containing loop, the following cost should be added to the
|
||||||
|
@ -1057,6 +1096,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
||||||
if (targetm.vectorize.builtin_mask_for_load)
|
if (targetm.vectorize.builtin_mask_for_load)
|
||||||
*inside_cost += vect_get_stmt_cost (vector_stmt);
|
*inside_cost += vect_get_stmt_cost (vector_stmt);
|
||||||
|
|
||||||
|
if (vect_print_dump_info (REPORT_COST))
|
||||||
|
fprintf (vect_dump, "vect_model_load_cost: explicit realign");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case dr_explicit_realign_optimized:
|
case dr_explicit_realign_optimized:
|
||||||
|
@ -1080,7 +1122,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
||||||
}
|
}
|
||||||
|
|
||||||
*inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
|
*inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
|
||||||
+ vect_get_stmt_cost (vector_stmt));
|
+ vect_get_stmt_cost (vec_perm));
|
||||||
|
|
||||||
|
if (vect_print_dump_info (REPORT_COST))
|
||||||
|
fprintf (vect_dump,
|
||||||
|
"vect_model_load_cost: explicit realign optimized");
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2392,16 +2439,19 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
if (vect_print_dump_info (REPORT_DETAILS))
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
fprintf (vect_dump, "=== vectorizable_conversion ===");
|
fprintf (vect_dump, "=== vectorizable_conversion ===");
|
||||||
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
|
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
|
||||||
|
{
|
||||||
STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
|
STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
|
||||||
|
vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
|
||||||
|
}
|
||||||
else if (modifier == NARROW)
|
else if (modifier == NARROW)
|
||||||
{
|
{
|
||||||
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
|
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
|
||||||
vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
|
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
|
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
|
||||||
vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
|
vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
|
||||||
}
|
}
|
||||||
VEC_free (tree, heap, interm_types);
|
VEC_free (tree, heap, interm_types);
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Reference in New Issue