mirror of git://gcc.gnu.org/git/gcc.git
tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update documentation.
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update documentation. * targhooks.c (default_builtin_vectorization_cost): New function. * targhooks.h (default_builtin_vectorization_cost): Declare. * target.h (enum vect_cost_for_stmt): Define. (builtin_vectorization_cost): Change argument and comment. * tree-vectorizer.h: Remove cost model macros. * tree-vect-loop.c: Include target.h. (vect_get_cost): New function. (vect_estimate_min_profitable_iters): Replace cost model macros with calls to vect_get_cost. (vect_model_reduction_cost, vect_model_induction_cost): Likewise. * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Add default implementation. * tree-vect-stmts.c (cost_for_stmt): Replace cost model macros with calls to target hook builtin_vectorization_cost. (vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost): Likewise. * Makefile.in (tree-vect-loop.o): Add dependency on TARGET_H. * config/spu/spu.c (spu_builtin_vectorization_cost): Replace with new implementation to return costs. * config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise. * config/spu/spu.h: Remove vectorizer cost model macros. * config/i386/i386.h: Likewise. * tree-vect-slp.c (vect_build_slp_tree): Replace cost model macro with a call to target hook builtin_vectorization_cost. From-SVN: r160360
This commit is contained in:
parent
81c566c2fa
commit
35e1a5e7cf
|
@ -1,3 +1,32 @@
|
|||
2010-06-07 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Update
|
||||
documentation.
|
||||
* targhooks.c (default_builtin_vectorization_cost): New function.
|
||||
* targhooks.h (default_builtin_vectorization_cost): Declare.
|
||||
* target.h (enum vect_cost_for_stmt): Define.
|
||||
(builtin_vectorization_cost): Change argument and comment.
|
||||
* tree-vectorizer.h: Remove cost model macros.
|
||||
* tree-vect-loop.c: Include target.h.
|
||||
(vect_get_cost): New function.
|
||||
(vect_estimate_min_profitable_iters): Replace cost model macros with
|
||||
calls to vect_get_cost.
|
||||
(vect_model_reduction_cost, vect_model_induction_cost): Likewise.
|
||||
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Add
|
||||
default implementation.
|
||||
* tree-vect-stmts.c (cost_for_stmt): Replace cost model macros with
|
||||
calls to target hook builtin_vectorization_cost.
|
||||
(vect_model_simple_cost, vect_model_store_cost, vect_model_load_cost):
|
||||
Likewise.
|
||||
* Makefile.in (tree-vect-loop.o): Add dependency on TARGET_H.
|
||||
* config/spu/spu.c (spu_builtin_vectorization_cost): Replace with new
|
||||
implementation to return costs.
|
||||
* config/i386/i386.c (ix86_builtin_vectorization_cost): Likewise.
|
||||
* config/spu/spu.h: Remove vectorizer cost model macros.
|
||||
* config/i386/i386.h: Likewise.
|
||||
* tree-vect-slp.c (vect_build_slp_tree): Replace cost model macro with
|
||||
a call to target hook builtin_vectorization_cost.
|
||||
|
||||
2010-06-06 Sriraman Tallam <tmsriram@google.com>
|
||||
|
||||
PR target/44319
|
||||
|
|
|
@ -2704,7 +2704,7 @@ tree-vect-loop.o: tree-vect-loop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
|||
$(TM_H) $(GGC_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
|
||||
$(TREE_DUMP_H) $(CFGLOOP_H) $(CFGLAYOUT_H) $(EXPR_H) $(RECOG_H) $(OPTABS_H) \
|
||||
$(TOPLEV_H) $(SCEV_H) $(TREE_VECTORIZER_H) tree-pretty-print.h \
|
||||
gimple-pretty-print.h
|
||||
gimple-pretty-print.h $(TARGET_H)
|
||||
tree-vect-loop-manip.o: tree-vect-loop-manip.c $(CONFIG_H) $(SYSTEM_H) \
|
||||
coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
|
||||
$(TREE_FLOW_H) $(TREE_DUMP_H) $(CFGLOOP_H) $(CFGLAYOUT_H) $(EXPR_H) $(TOPLEV_H) \
|
||||
|
|
|
@ -29296,28 +29296,52 @@ static const struct attribute_spec ix86_attribute_table[] =
|
|||
|
||||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||
static int
|
||||
ix86_builtin_vectorization_cost (bool runtime_test)
|
||||
ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
{
|
||||
/* If the branch of the runtime test is taken - i.e. - the vectorized
|
||||
version is skipped - this incurs a misprediction cost (because the
|
||||
vectorized version is expected to be the fall-through). So we subtract
|
||||
the latency of a mispredicted branch from the costs that are incured
|
||||
when the vectorized version is executed.
|
||||
|
||||
TODO: The values in individual target tables have to be tuned or new
|
||||
fields may be needed. For eg. on K8, the default branch path is the
|
||||
not-taken path. If the taken path is predicted correctly, the minimum
|
||||
penalty of going down the taken-path is 1 cycle. If the taken-path is
|
||||
not predicted correctly, then the minimum penalty is 10 cycles. */
|
||||
|
||||
if (runtime_test)
|
||||
switch (type_of_cost)
|
||||
{
|
||||
return (-(ix86_cost->cond_taken_branch_cost));
|
||||
case scalar_stmt:
|
||||
return ix86_cost->scalar_stmt_cost;
|
||||
|
||||
case scalar_load:
|
||||
return ix86_cost->scalar_load_cost;
|
||||
|
||||
case scalar_store:
|
||||
return ix86_cost->scalar_store_cost;
|
||||
|
||||
case vector_stmt:
|
||||
return ix86_cost->vec_stmt_cost;
|
||||
|
||||
case vector_load:
|
||||
return ix86_cost->vec_align_load_cost;
|
||||
|
||||
case vector_store:
|
||||
return ix86_cost->vec_store_cost;
|
||||
|
||||
case vec_to_scalar:
|
||||
return ix86_cost->vec_to_scalar_cost;
|
||||
|
||||
case scalar_to_vec:
|
||||
return ix86_cost->scalar_to_vec_cost;
|
||||
|
||||
case unaligned_load:
|
||||
return ix86_cost->vec_unalign_load_cost;
|
||||
|
||||
case cond_branch_taken:
|
||||
return ix86_cost->cond_taken_branch_cost;
|
||||
|
||||
case cond_branch_not_taken:
|
||||
return ix86_cost->cond_not_taken_branch_cost;
|
||||
|
||||
case vec_perm:
|
||||
return 1;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* Implement targetm.vectorize.builtin_vec_perm. */
|
||||
|
||||
static tree
|
||||
|
|
|
@ -2420,57 +2420,6 @@ struct GTY(()) machine_function {
|
|||
#define SYMBOL_REF_DLLEXPORT_P(X) \
|
||||
((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
|
||||
|
||||
/* Model costs for vectorizer. */
|
||||
|
||||
/* Cost of conditional branch. */
|
||||
#undef TARG_COND_BRANCH_COST
|
||||
#define TARG_COND_BRANCH_COST ix86_cost->branch_cost
|
||||
|
||||
/* Cost of any scalar operation, excluding load and store. */
|
||||
#undef TARG_SCALAR_STMT_COST
|
||||
#define TARG_SCALAR_STMT_COST ix86_cost->scalar_stmt_cost
|
||||
|
||||
/* Cost of scalar load. */
|
||||
#undef TARG_SCALAR_LOAD_COST
|
||||
#define TARG_SCALAR_LOAD_COST ix86_cost->scalar_load_cost
|
||||
|
||||
/* Cost of scalar store. */
|
||||
#undef TARG_SCALAR_STORE_COST
|
||||
#define TARG_SCALAR_STORE_COST ix86_cost->scalar_store_cost
|
||||
|
||||
/* Cost of any vector operation, excluding load, store or vector to scalar
|
||||
operation. */
|
||||
#undef TARG_VEC_STMT_COST
|
||||
#define TARG_VEC_STMT_COST ix86_cost->vec_stmt_cost
|
||||
|
||||
/* Cost of vector to scalar operation. */
|
||||
#undef TARG_VEC_TO_SCALAR_COST
|
||||
#define TARG_VEC_TO_SCALAR_COST ix86_cost->vec_to_scalar_cost
|
||||
|
||||
/* Cost of scalar to vector operation. */
|
||||
#undef TARG_SCALAR_TO_VEC_COST
|
||||
#define TARG_SCALAR_TO_VEC_COST ix86_cost->scalar_to_vec_cost
|
||||
|
||||
/* Cost of aligned vector load. */
|
||||
#undef TARG_VEC_LOAD_COST
|
||||
#define TARG_VEC_LOAD_COST ix86_cost->vec_align_load_cost
|
||||
|
||||
/* Cost of misaligned vector load. */
|
||||
#undef TARG_VEC_UNALIGNED_LOAD_COST
|
||||
#define TARG_VEC_UNALIGNED_LOAD_COST ix86_cost->vec_unalign_load_cost
|
||||
|
||||
/* Cost of vector store. */
|
||||
#undef TARG_VEC_STORE_COST
|
||||
#define TARG_VEC_STORE_COST ix86_cost->vec_store_cost
|
||||
|
||||
/* Cost of conditional taken branch for vectorizer cost model. */
|
||||
#undef TARG_COND_TAKEN_BRANCH_COST
|
||||
#define TARG_COND_TAKEN_BRANCH_COST ix86_cost->cond_taken_branch_cost
|
||||
|
||||
/* Cost of conditional not taken branch for vectorizer cost model. */
|
||||
#undef TARG_COND_NOT_TAKEN_BRANCH_COST
|
||||
#define TARG_COND_NOT_TAKEN_BRANCH_COST ix86_cost->cond_not_taken_branch_cost
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
version-control: t
|
||||
|
|
|
@ -209,7 +209,7 @@ static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
|
|||
static tree spu_builtin_mul_widen_even (tree);
|
||||
static tree spu_builtin_mul_widen_odd (tree);
|
||||
static tree spu_builtin_mask_for_load (void);
|
||||
static int spu_builtin_vectorization_cost (bool);
|
||||
static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt);
|
||||
static bool spu_vector_alignment_reachable (const_tree, bool);
|
||||
static tree spu_builtin_vec_perm (tree, tree *);
|
||||
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
|
||||
|
@ -6695,17 +6695,36 @@ spu_builtin_mask_for_load (void)
|
|||
|
||||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||
static int
|
||||
spu_builtin_vectorization_cost (bool runtime_test)
|
||||
spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
{
|
||||
/* If the branch of the runtime test is taken - i.e. - the vectorized
|
||||
version is skipped - this incurs a misprediction cost (because the
|
||||
vectorized version is expected to be the fall-through). So we subtract
|
||||
the latency of a mispredicted branch from the costs that are incurred
|
||||
when the vectorized version is executed. */
|
||||
if (runtime_test)
|
||||
return -19;
|
||||
else
|
||||
return 0;
|
||||
switch (type_of_cost)
|
||||
{
|
||||
case scalar_stmt:
|
||||
case vector_stmt:
|
||||
case vector_load:
|
||||
case vector_store:
|
||||
case vec_to_scalar:
|
||||
case scalar_to_vec:
|
||||
case cond_branch_not_taken:
|
||||
case vec_perm:
|
||||
return 1;
|
||||
|
||||
case scalar_store:
|
||||
return 10;
|
||||
|
||||
case scalar_load:
|
||||
/* Load + rotate. */
|
||||
return 2;
|
||||
|
||||
case unaligned_load:
|
||||
return 2;
|
||||
|
||||
case cond_branch_taken:
|
||||
return 6;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true iff, data reference of TYPE can reach vector alignment (16)
|
||||
|
|
|
@ -523,57 +523,6 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
|
|||
#define ASM_OUTPUT_ALIGN(FILE,LOG) \
|
||||
do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
|
||||
|
||||
|
||||
/* Model costs for the vectorizer. */
|
||||
|
||||
/* Cost of conditional branch. */
|
||||
#ifndef TARG_COND_BRANCH_COST
|
||||
#define TARG_COND_BRANCH_COST 6
|
||||
#endif
|
||||
|
||||
/* Cost of any scalar operation, excluding load and store. */
|
||||
#ifndef TARG_SCALAR_STMT_COST
|
||||
#define TARG_SCALAR_STMT_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar load. */
|
||||
#undef TARG_SCALAR_LOAD_COST
|
||||
#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */
|
||||
|
||||
/* Cost of scalar store. */
|
||||
#undef TARG_SCALAR_STORE_COST
|
||||
#define TARG_SCALAR_STORE_COST 10
|
||||
|
||||
/* Cost of any vector operation, excluding load, store,
|
||||
or vector to scalar operation. */
|
||||
#undef TARG_VEC_STMT_COST
|
||||
#define TARG_VEC_STMT_COST 1
|
||||
|
||||
/* Cost of vector to scalar operation. */
|
||||
#undef TARG_VEC_TO_SCALAR_COST
|
||||
#define TARG_VEC_TO_SCALAR_COST 1
|
||||
|
||||
/* Cost of scalar to vector operation. */
|
||||
#undef TARG_SCALAR_TO_VEC_COST
|
||||
#define TARG_SCALAR_TO_VEC_COST 1
|
||||
|
||||
/* Cost of aligned vector load. */
|
||||
#undef TARG_VEC_LOAD_COST
|
||||
#define TARG_VEC_LOAD_COST 1
|
||||
|
||||
/* Cost of misaligned vector load. */
|
||||
#undef TARG_VEC_UNALIGNED_LOAD_COST
|
||||
#define TARG_VEC_UNALIGNED_LOAD_COST 2
|
||||
|
||||
/* Cost of vector store. */
|
||||
#undef TARG_VEC_STORE_COST
|
||||
#define TARG_VEC_STORE_COST 1
|
||||
|
||||
/* Cost of vector permutation. */
|
||||
#ifndef TARG_VEC_PERMUTE_COST
|
||||
#define TARG_VEC_PERMUTE_COST 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Misc */
|
||||
|
||||
|
|
|
@ -5742,9 +5742,8 @@ preserved (e.g.@: used only by a reduction computation). Otherwise, the
|
|||
@code{widen_mult_hi/lo} idioms will be used.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (bool @var{runtime_test})
|
||||
Returns the cost to be added to the overhead involved with executing
|
||||
the vectorized version of a loop.
|
||||
@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost})
|
||||
Returns cost of different scalar or vector statements for vectorization cost model.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed})
|
||||
|
|
|
@ -417,7 +417,8 @@
|
|||
default_builtin_vectorized_conversion
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
||||
default_builtin_vectorization_cost
|
||||
#define TARGET_VECTOR_ALIGNMENT_REACHABLE \
|
||||
default_builtin_vector_alignment_reachable
|
||||
#define TARGET_VECTORIZE_BUILTIN_VEC_PERM 0
|
||||
|
|
23
gcc/target.h
23
gcc/target.h
|
@ -110,6 +110,23 @@ struct asm_int_op
|
|||
const char *ti;
|
||||
};
|
||||
|
||||
/* Types of costs for vectorizer cost model. */
|
||||
enum vect_cost_for_stmt
|
||||
{
|
||||
scalar_stmt,
|
||||
scalar_load,
|
||||
scalar_store,
|
||||
vector_stmt,
|
||||
vector_load,
|
||||
unaligned_load,
|
||||
vector_store,
|
||||
vec_to_scalar,
|
||||
scalar_to_vec,
|
||||
cond_branch_not_taken,
|
||||
cond_branch_taken,
|
||||
vec_perm
|
||||
};
|
||||
|
||||
/* The target structure. This holds all the backend hooks. */
|
||||
|
||||
struct gcc_target
|
||||
|
@ -505,9 +522,9 @@ struct gcc_target
|
|||
tree (* builtin_mul_widen_even) (tree);
|
||||
tree (* builtin_mul_widen_odd) (tree);
|
||||
|
||||
/* Returns the cost to be added to the overheads involved with
|
||||
executing the vectorized version of a loop. */
|
||||
int (*builtin_vectorization_cost) (bool);
|
||||
/* Cost of different vector/scalar statements in vectorization cost
|
||||
model. */
|
||||
int (* builtin_vectorization_cost) (enum vect_cost_for_stmt);
|
||||
|
||||
/* Return true if vector alignment is reachable (by peeling N
|
||||
iterations) for the given type. */
|
||||
|
|
|
@ -476,6 +476,36 @@ default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED,
|
|||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Default vectorizer cost model values. */
|
||||
|
||||
int
|
||||
default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
{
|
||||
switch (type_of_cost)
|
||||
{
|
||||
case scalar_stmt:
|
||||
case scalar_load:
|
||||
case scalar_store:
|
||||
case vector_stmt:
|
||||
case vector_load:
|
||||
case vector_store:
|
||||
case vec_to_scalar:
|
||||
case scalar_to_vec:
|
||||
case cond_branch_not_taken:
|
||||
case vec_perm:
|
||||
return 1;
|
||||
|
||||
case unaligned_load:
|
||||
return 2;
|
||||
|
||||
case cond_branch_taken:
|
||||
return 3;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Reciprocal. */
|
||||
|
||||
tree
|
||||
|
|
|
@ -77,6 +77,8 @@ extern tree default_builtin_vectorized_function (tree, tree, tree);
|
|||
|
||||
extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree);
|
||||
|
||||
extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt);
|
||||
|
||||
extern tree default_builtin_reciprocal (unsigned int, bool, bool);
|
||||
|
||||
extern bool default_builtin_vector_alignment_reachable (const_tree, bool);
|
||||
|
|
|
@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "tree-chrec.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "target.h"
|
||||
|
||||
/* Loop Vectorization Pass.
|
||||
|
||||
|
@ -1116,6 +1117,15 @@ vect_analyze_loop_form (struct loop *loop)
|
|||
}
|
||||
|
||||
|
||||
/* Get cost by calling cost target builtin. */
|
||||
|
||||
static inline
|
||||
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
|
||||
{
|
||||
return targetm.vectorize.builtin_vectorization_cost (type_of_cost);
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_analyze_loop_operations.
|
||||
|
||||
Scan the loop stmts and make sure they are all vectorizable. */
|
||||
|
@ -2056,7 +2066,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
|
||||
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
||||
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
|
||||
vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
|
||||
vec_outside_cost += vect_get_cost (cond_branch_taken);
|
||||
|
||||
/* Count statements in scalar loop. Using this as scalar cost for a single
|
||||
iteration for now.
|
||||
|
@ -2125,8 +2135,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
branch per peeled loop. Even if scalar loop iterations are known,
|
||||
vector iterations are not known since peeled prologue iterations are
|
||||
not known. Hence guards remain the same. */
|
||||
peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
|
||||
+ TARG_COND_NOT_TAKEN_BRANCH_COST);
|
||||
peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
|
||||
+ vect_get_cost (cond_branch_not_taken));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2152,8 +2162,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
|
||||
/* If peeled iterations are known but number of scalar loop
|
||||
iterations are unknown, count a taken branch per peeled loop. */
|
||||
peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
|
||||
|
||||
peel_guard_costs += 2 * vect_get_cost (cond_branch_taken);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2228,16 +2237,16 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
/* Cost model check occurs at versioning. */
|
||||
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
||||
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
|
||||
scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
|
||||
scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
|
||||
else
|
||||
{
|
||||
/* Cost model check occurs at prologue generation. */
|
||||
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
|
||||
scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
|
||||
+ TARG_COND_NOT_TAKEN_BRANCH_COST;
|
||||
scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
|
||||
+ vect_get_cost (cond_branch_not_taken);
|
||||
/* Cost model check occurs at epilogue generation. */
|
||||
else
|
||||
scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
|
||||
scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2347,7 +2356,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
|
||||
|
||||
/* Cost of reduction op inside loop. */
|
||||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
|
||||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
|
||||
+= ncopies * vect_get_cost (vector_stmt);
|
||||
|
||||
stmt = STMT_VINFO_STMT (stmt_info);
|
||||
|
||||
|
@ -2387,7 +2397,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
code = gimple_assign_rhs_code (orig_stmt);
|
||||
|
||||
/* Add in cost for initial definition. */
|
||||
outer_cost += TARG_SCALAR_TO_VEC_COST;
|
||||
outer_cost += vect_get_cost (scalar_to_vec);
|
||||
|
||||
/* Determine cost of epilogue code.
|
||||
|
||||
|
@ -2397,7 +2407,8 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
if (!nested_in_vect_loop_p (loop, orig_stmt))
|
||||
{
|
||||
if (reduc_code != ERROR_MARK)
|
||||
outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
|
||||
outer_cost += vect_get_cost (vector_stmt)
|
||||
+ vect_get_cost (vec_to_scalar);
|
||||
else
|
||||
{
|
||||
int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
|
||||
|
@ -2414,12 +2425,14 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
&& optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
|
||||
/* Final reduction via vector shifts and the reduction operator. Also
|
||||
requires scalar extract. */
|
||||
outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
|
||||
+ TARG_VEC_TO_SCALAR_COST);
|
||||
outer_cost += ((exact_log2(nelements) * 2)
|
||||
* vect_get_cost (vector_stmt)
|
||||
+ vect_get_cost (vec_to_scalar));
|
||||
else
|
||||
/* Use extracts and reduction op for final reduction. For N elements,
|
||||
we have N extracts and N-1 reduction ops. */
|
||||
outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
|
||||
outer_cost += ((nelements + nelements - 1)
|
||||
* vect_get_cost (vector_stmt));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2442,9 +2455,11 @@ static void
|
|||
vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
|
||||
{
|
||||
/* loop cost for vec_loop. */
|
||||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
|
||||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
|
||||
= ncopies * vect_get_cost (vector_stmt);
|
||||
/* prologue cost for vec_init and vec_step. */
|
||||
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
|
||||
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
|
||||
= 2 * vect_get_cost (scalar_to_vec);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
|
||||
|
|
|
@ -645,7 +645,9 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
|||
if (permutation)
|
||||
{
|
||||
VEC_safe_push (slp_tree, heap, *loads, *node);
|
||||
*inside_cost += TARG_VEC_PERMUTE_COST * group_size;
|
||||
*inside_cost
|
||||
+= targetm.vectorize.builtin_vectorization_cost (vec_perm)
|
||||
* group_size;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -553,9 +553,9 @@ cost_for_stmt (gimple stmt)
|
|||
switch (STMT_VINFO_TYPE (stmt_info))
|
||||
{
|
||||
case load_vec_info_type:
|
||||
return TARG_SCALAR_LOAD_COST;
|
||||
return targetm.vectorize.builtin_vectorization_cost (scalar_load);
|
||||
case store_vec_info_type:
|
||||
return TARG_SCALAR_STORE_COST;
|
||||
return targetm.vectorize.builtin_vectorization_cost (scalar_store);
|
||||
case op_vec_info_type:
|
||||
case condition_vec_info_type:
|
||||
case assignment_vec_info_type:
|
||||
|
@ -565,7 +565,7 @@ cost_for_stmt (gimple stmt)
|
|||
case type_demotion_vec_info_type:
|
||||
case type_conversion_vec_info_type:
|
||||
case call_vec_info_type:
|
||||
return TARG_SCALAR_STMT_COST;
|
||||
return targetm.vectorize.builtin_vectorization_cost (scalar_stmt);
|
||||
case undef_vec_info_type:
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
|
@ -589,13 +589,15 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
|
|||
if (PURE_SLP_STMT (stmt_info))
|
||||
return;
|
||||
|
||||
inside_cost = ncopies * TARG_VEC_STMT_COST;
|
||||
inside_cost = ncopies
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
|
||||
/* FORNOW: Assuming maximum 2 args per stmts. */
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
|
||||
outside_cost += TARG_SCALAR_TO_VEC_COST;
|
||||
outside_cost
|
||||
+= targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
}
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
|
@ -643,7 +645,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
|||
return;
|
||||
|
||||
if (dt == vect_constant_def || dt == vect_external_def)
|
||||
outside_cost = TARG_SCALAR_TO_VEC_COST;
|
||||
outside_cost
|
||||
= targetm.vectorize.builtin_vectorization_cost (scalar_to_vec);
|
||||
|
||||
/* Strided access? */
|
||||
if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
|
||||
|
@ -658,7 +661,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
|||
{
|
||||
/* Uses a high and low interleave operation for each needed permute. */
|
||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||
* TARG_VEC_STMT_COST;
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
|
||||
|
@ -667,7 +670,8 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
|
|||
}
|
||||
|
||||
/* Costs of the stores. */
|
||||
inside_cost += ncopies * TARG_VEC_STORE_COST;
|
||||
inside_cost += ncopies
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_store);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
|
||||
|
@ -722,7 +726,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
|||
{
|
||||
/* Uses an even and odd extract operations for each needed permute. */
|
||||
inside_cost = ncopies * exact_log2(group_size) * group_size
|
||||
* TARG_VEC_STMT_COST;
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
|
||||
|
@ -735,7 +739,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
|||
{
|
||||
case dr_aligned:
|
||||
{
|
||||
inside_cost += ncopies * TARG_VEC_LOAD_COST;
|
||||
inside_cost += ncopies
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_load);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_load_cost: aligned.");
|
||||
|
@ -745,7 +750,8 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
|||
case dr_unaligned_supported:
|
||||
{
|
||||
/* Here, we assign an additional cost for the unaligned load. */
|
||||
inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
|
||||
inside_cost += ncopies
|
||||
* targetm.vectorize.builtin_vectorization_cost (unaligned_load);
|
||||
|
||||
if (vect_print_dump_info (REPORT_COST))
|
||||
fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
|
||||
|
@ -755,13 +761,16 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
|||
}
|
||||
case dr_explicit_realign:
|
||||
{
|
||||
inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
|
||||
inside_cost += ncopies * (2
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_load)
|
||||
+ targetm.vectorize.builtin_vectorization_cost (vector_stmt));
|
||||
|
||||
/* FIXME: If the misalignment remains fixed across the iterations of
|
||||
the containing loop, the following cost should be added to the
|
||||
outside costs. */
|
||||
if (targetm.vectorize.builtin_mask_for_load)
|
||||
inside_cost += TARG_VEC_STMT_COST;
|
||||
inside_cost
|
||||
+= targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -780,13 +789,16 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
|
|||
|
||||
if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
|
||||
{
|
||||
outside_cost = 2*TARG_VEC_STMT_COST;
|
||||
outside_cost = 2
|
||||
* targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
if (targetm.vectorize.builtin_mask_for_load)
|
||||
outside_cost += TARG_VEC_STMT_COST;
|
||||
outside_cost
|
||||
+= targetm.vectorize.builtin_vectorization_cost (vector_stmt);
|
||||
}
|
||||
|
||||
inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
|
||||
|
||||
inside_cost += ncopies
|
||||
* (targetm.vectorize.builtin_vectorization_cost (vector_load)
|
||||
+ targetm.vectorize.builtin_vectorization_cost (vector_stmt));
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -543,70 +543,6 @@ typedef struct _stmt_vec_info {
|
|||
#define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
|
||||
#define STMT_SLP_TYPE(S) (S)->slp_type
|
||||
|
||||
/* These are some defines for the initial implementation of the vectorizer's
|
||||
cost model. These will later be target specific hooks. */
|
||||
|
||||
/* Cost of conditional taken branch. */
|
||||
#ifndef TARG_COND_TAKEN_BRANCH_COST
|
||||
#define TARG_COND_TAKEN_BRANCH_COST 3
|
||||
#endif
|
||||
|
||||
/* Cost of conditional not taken branch. */
|
||||
#ifndef TARG_COND_NOT_TAKEN_BRANCH_COST
|
||||
#define TARG_COND_NOT_TAKEN_BRANCH_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of any scalar operation, excluding load and store. */
|
||||
#ifndef TARG_SCALAR_STMT_COST
|
||||
#define TARG_SCALAR_STMT_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar load. */
|
||||
#ifndef TARG_SCALAR_LOAD_COST
|
||||
#define TARG_SCALAR_LOAD_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar store. */
|
||||
#ifndef TARG_SCALAR_STORE_COST
|
||||
#define TARG_SCALAR_STORE_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of any vector operation, excluding load, store or vector to scalar
|
||||
operation. */
|
||||
#ifndef TARG_VEC_STMT_COST
|
||||
#define TARG_VEC_STMT_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of vector to scalar operation. */
|
||||
#ifndef TARG_VEC_TO_SCALAR_COST
|
||||
#define TARG_VEC_TO_SCALAR_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar to vector operation. */
|
||||
#ifndef TARG_SCALAR_TO_VEC_COST
|
||||
#define TARG_SCALAR_TO_VEC_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of aligned vector load. */
|
||||
#ifndef TARG_VEC_LOAD_COST
|
||||
#define TARG_VEC_LOAD_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of misaligned vector load. */
|
||||
#ifndef TARG_VEC_UNALIGNED_LOAD_COST
|
||||
#define TARG_VEC_UNALIGNED_LOAD_COST 2
|
||||
#endif
|
||||
|
||||
/* Cost of vector store. */
|
||||
#ifndef TARG_VEC_STORE_COST
|
||||
#define TARG_VEC_STORE_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of vector permutation. */
|
||||
#ifndef TARG_VEC_PERMUTE_COST
|
||||
#define TARG_VEC_PERMUTE_COST 1
|
||||
#endif
|
||||
|
||||
/* The maximum number of intermediate steps required in multi-step type
|
||||
conversion. */
|
||||
#define MAX_INTERM_CVT_STEPS 3
|
||||
|
|
Loading…
Reference in New Issue