tree-vectorizer.c: Fix documentation.

* tree-vectorizer.c: Fix documentation.
	* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
	(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
	is_pattern_stmt_p, is_loop_header_bb_p,
	stmt_vinfo_set_inside_of_loop_cost,
	stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
	known_alignment_for_access_p): Likewise.
	* tree-vect-loop.c: Fix documentation.
	(vect_get_cost): Start function name from new line.
	* tree-vect-data-refs.c: Fix documentation.
	* tree-vect_stmts.c: Likewise.
	(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
	(vectorizable_store): Free vec_oprnds if allocated.
	(vectorizable_condition): Initialize several variables to avoid
	warnings.
	* tree-vect-slp.c: Fix documentation.

From-SVN: r164332
This commit is contained in:
Ira Rosen 2010-09-16 10:35:14 +00:00 committed by Ira Rosen
parent 6be14c0ebc
commit ff802fa1f3
11 changed files with 296 additions and 228 deletions

View File

@ -1,3 +1,22 @@
2010-09-16 Ira Rosen <irar@il.ibm.com>
* tree-vectorizer.c: Fix documentation.
* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
is_pattern_stmt_p, is_loop_header_bb_p,
stmt_vinfo_set_inside_of_loop_cost,
stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
known_alignment_for_access_p): Likewise.
* tree-vect-loop.c: Fix documentation.
(vect_get_cost): Start function name from new line.
* tree-vect-data-refs.c: Fix documentation.
* tree-vect_stmts.c: Likewise.
(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
(vectorizable_store): Free vec_oprnds if allocated.
(vectorizable_condition): Initialize several variables to avoid
warnings.
* tree-vect-slp.c: Fix documentation.
2010-09-16 Richard Guenther <rguenther@suse.de> 2010-09-16 Richard Guenther <rguenther@suse.de>
* tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL * tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL

View File

@ -1,3 +1,9 @@
2010-09-16 Ira Rosen <irar@il.ibm.com>
* gcc.dg/vect/bb-slp-8.c: Fix documentation, add space between function
name and parentheses.
* gcc.dg/vect/bb-slp-8a.c, gcc.dg/vect/bb-slp-8b.c: Likewise.
2010-09-15 Jason Merrill <jason@redhat.com> 2010-09-15 Jason Merrill <jason@redhat.com>
* g++.dg/parse/parameter-declaration-2.C: New. * g++.dg/parse/parameter-declaration-2.C: New.

View File

@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
int i; int i;
unsigned int a0, a1, a2, a3; unsigned int a0, a1, a2, a3;
/* pin and pout may alias. But since all the loads are before the first store /* pin and pout may alias. But since all the loads are before the first
the basic block is vectorizable. */ store the basic block is vectorizable. */
a0 = *pin++ + 23; a0 = *pin++ + 23;
a1 = *pin++ + 142; a1 = *pin++ + 142;
a2 = *pin++ + 2; a2 = *pin++ + 2;
@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }

View File

@ -15,7 +15,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
int i; int i;
unsigned int a0, a1, a2, a3; unsigned int a0, a1, a2, a3;
/* pin and pout may alias, and loads and stores are mixed. The basic block /* pin and pout may alias, and loads and stores are mixed. The basic block
cannot be vectorized. */ cannot be vectorized. */
a0 = *pin++ + 23; a0 = *pin++ + 23;
*pout++ = a0 * x; *pout++ = a0 * x;
@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }

View File

@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y)
|| out[1] != (in[1] + 142) * y || out[1] != (in[1] + 142) * y
|| out[2] != (in[2] + 2) * x || out[2] != (in[2] + 2) * x
|| out[3] != (in[3] + 31) * y) || out[3] != (in[3] + 31) * y)
abort(); abort ();
return 0; return 0;
} }

View File

@ -45,19 +45,19 @@ along with GCC; see the file COPYING3. If not see
#include "optabs.h" #include "optabs.h"
/* Return the smallest scalar part of STMT. /* Return the smallest scalar part of STMT.
This is used to determine the vectype of the stmt. We generally set the This is used to determine the vectype of the stmt. We generally set the
vectype according to the type of the result (lhs). For stmts whose vectype according to the type of the result (lhs). For stmts whose
result-type is different than the type of the arguments (e.g., demotion, result-type is different than the type of the arguments (e.g., demotion,
promotion), vectype will be reset appropriately (later). Note that we have promotion), vectype will be reset appropriately (later). Note that we have
to visit the smallest datatype in this function, because that determines the to visit the smallest datatype in this function, because that determines the
VF. If the smallest datatype in the loop is present only as the rhs of a VF. If the smallest datatype in the loop is present only as the rhs of a
promotion operation - we'd miss it. promotion operation - we'd miss it.
Such a case, where a variable of this datatype does not appear in the lhs Such a case, where a variable of this datatype does not appear in the lhs
anywhere in the loop, can only occur if it's an invariant: e.g.: anywhere in the loop, can only occur if it's an invariant: e.g.:
'int_x = (int) short_inv', which we'd expect to have been optimized away by 'int_x = (int) short_inv', which we'd expect to have been optimized away by
invariant motion. However, we cannot rely on invariant motion to always take invariant motion. However, we cannot rely on invariant motion to always
invariants out of the loop, and so in the case of promotion we also have to take invariants out of the loop, and so in the case of promotion we also
check the rhs. have to check the rhs.
LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
types. */ types. */
@ -89,7 +89,7 @@ vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
/* Find the place of the data-ref in STMT in the interleaving chain that starts /* Find the place of the data-ref in STMT in the interleaving chain that starts
from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
int int
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt) vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
@ -151,7 +151,7 @@ vect_insert_into_interleaving_chain (struct data_reference *dra,
/* Function vect_update_interleaving_chain. /* Function vect_update_interleaving_chain.
For two data-refs DRA and DRB that are a part of a chain interleaved data For two data-refs DRA and DRB that are a part of a chain interleaved data
accesses, update the interleaving chain. DRB's INIT is smaller than DRA's. accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
There are four possible cases: There are four possible cases:
1. New stmts - both DRA and DRB are not a part of any chain: 1. New stmts - both DRA and DRB are not a part of any chain:
@ -211,7 +211,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0) if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
{ {
/* DRB's init is smaller than the init of the stmt previously marked /* DRB's init is smaller than the init of the stmt previously marked
as the first stmt of the interleaving chain of DRA. Therefore, we as the first stmt of the interleaving chain of DRA. Therefore, we
update FIRST_STMT and put DRB in the head of the list. */ update FIRST_STMT and put DRB in the head of the list. */
DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb); DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt; DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree offset2)
} }
/* Check dependence between DRA and DRB for basic block vectorization. */ /* Check dependence between DRA and DRB for basic block vectorization.
If the accesses share same bases and offsets, we can compare their initial
constant offsets to decide whether they differ or not. In case of a read-
write dependence we check that the load is before the store to ensure that
vectorization will not change the order of the accesses. */
static bool static bool
vect_drs_dependent_in_basic_block (struct data_reference *dra, vect_drs_dependent_in_basic_block (struct data_reference *dra,
@ -342,7 +346,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
return true; return true;
} }
/* Check that the data-refs have same bases and offsets. If not, we can't /* Check that the data-refs have same bases and offsets. If not, we can't
determine if they are dependent. */ determine if they are dependent. */
if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
&& (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
@ -368,10 +372,10 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
if (init_a != init_b) if (init_a != init_b)
return false; return false;
/* We have a read-write dependence. Check that the load is before the store. /* We have a read-write dependence. Check that the load is before the store.
When we vectorize basic blocks, vector load can be only before When we vectorize basic blocks, vector load can be only before
corresponding scalar load, and vector store can be only after its corresponding scalar load, and vector store can be only after its
corresponding scalar store. So the order of the acceses is preserved in corresponding scalar store. So the order of the acceses is preserved in
case the load is before the store. */ case the load is before the store. */
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)))) if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
@ -383,7 +387,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
/* Function vect_check_interleaving. /* Function vect_check_interleaving.
Check if DRA and DRB are a part of interleaving. In case they are, insert Check if DRA and DRB are a part of interleaving. In case they are, insert
DRA and DRB in an interleaving chain. */ DRA and DRB in an interleaving chain. */
static bool static bool
@ -813,7 +817,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
/* In case the dataref is in an inner-loop of the loop that is being /* In case the dataref is in an inner-loop of the loop that is being
vectorized (LOOP), we use the base and misalignment information vectorized (LOOP), we use the base and misalignment information
relative to the outer-loop (LOOP). This is ok only if the misalignment relative to the outer-loop (LOOP). This is ok only if the misalignment
stays the same throughout the execution of the inner-loop, which is why stays the same throughout the execution of the inner-loop, which is why
we have to check that the stride of the dataref in the inner-loop evenly we have to check that the stride of the dataref in the inner-loop evenly
divides by the vector size. */ divides by the vector size. */
@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (void **slot, void *data)
} }
/* Traverse peeling hash table and calculate cost for each peeling option. Find /* Traverse peeling hash table and calculate cost for each peeling option.
one with the lowest cost. */ Find the one with the lowest cost. */
static int static int
vect_peeling_hash_get_lowest_cost (void **slot, void *data) vect_peeling_hash_get_lowest_cost (void **slot, void *data)
@ -1326,15 +1330,15 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
the alignment of data references in the loop. the alignment of data references in the loop.
FOR NOW: we assume that whatever versioning/peeling takes place, only the FOR NOW: we assume that whatever versioning/peeling takes place, only the
original loop is to be vectorized; Any other loops that are created by original loop is to be vectorized. Any other loops that are created by
the transformations performed in this pass - are not supposed to be the transformations performed in this pass - are not supposed to be
vectorized. This restriction will be relaxed. vectorized. This restriction will be relaxed.
This pass will require a cost model to guide it whether to apply peeling This pass will require a cost model to guide it whether to apply peeling
or versioning or a combination of the two. For example, the scheme that or versioning or a combination of the two. For example, the scheme that
intel uses when given a loop with several memory accesses, is as follows: intel uses when given a loop with several memory accesses, is as follows:
choose one memory access ('p') which alignment you want to force by doing choose one memory access ('p') which alignment you want to force by doing
peeling. Then, either (1) generate a loop in which 'p' is aligned and all peeling. Then, either (1) generate a loop in which 'p' is aligned and all
other accesses are not necessarily aligned, or (2) use loop versioning to other accesses are not necessarily aligned, or (2) use loop versioning to
generate one loop in which all accesses are aligned, and another loop in generate one loop in which all accesses are aligned, and another loop in
which only 'p' is necessarily aligned. which only 'p' is necessarily aligned.
@ -1343,9 +1347,9 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
Devising a cost model is the most critical aspect of this work. It will Devising a cost model is the most critical aspect of this work. It will
guide us on which access to peel for, whether to use loop versioning, how guide us on which access to peel for, whether to use loop versioning, how
many versions to create, etc. The cost model will probably consist of many versions to create, etc. The cost model will probably consist of
generic considerations as well as target specific considerations (on generic considerations as well as target specific considerations (on
powerpc for example, misaligned stores are more painful than misaligned powerpc for example, misaligned stores are more painful than misaligned
loads). loads).
@ -1406,7 +1410,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
} }
} }
These loops are later passed to loop_transform to be vectorized. The These loops are later passed to loop_transform to be vectorized. The
vectorizer will use the alignment information to guide the transformation vectorizer will use the alignment information to guide the transformation
(whether to generate regular loads/stores, or with special handling for (whether to generate regular loads/stores, or with special handling for
misalignment). */ misalignment). */
@ -1500,11 +1504,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
npeel_tmp = (nelements - mis) % vf; npeel_tmp = (nelements - mis) % vf;
/* For multiple types, it is possible that the bigger type access /* For multiple types, it is possible that the bigger type access
will have more than one peeling option. E.g., a loop with two will have more than one peeling option. E.g., a loop with two
types: one of size (vector size / 4), and the other one of types: one of size (vector size / 4), and the other one of
size (vector size / 8). Vectorization factor will 8. If both size (vector size / 8). Vectorization factor will 8. If both
access are misaligned by 3, the first one needs one scalar access are misaligned by 3, the first one needs one scalar
iteration to be aligned, and the second one needs 5. But the iteration to be aligned, and the second one needs 5. But the
the first one will be aligned also by peeling 5 scalar the first one will be aligned also by peeling 5 scalar
iterations, and in that case both accesses will be aligned. iterations, and in that case both accesses will be aligned.
Hence, except for the immediate peeling amount, we also want Hence, except for the immediate peeling amount, we also want
@ -1996,7 +2000,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
/* Analyze groups of strided accesses: check that DR belongs to a group of /* Analyze groups of strided accesses: check that DR belongs to a group of
strided accesses of legal size, step, etc. Detect gaps, single element strided accesses of legal size, step, etc. Detect gaps, single element
interleaving, and other special cases. Set strided access info. interleaving, and other special cases. Set strided access info.
Collect groups of strided stores for further use in SLP analysis. */ Collect groups of strided stores for further use in SLP analysis. */
@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_reference *dr)
while (next) while (next)
{ {
/* Skip same data-refs. In case that two or more stmts share data-ref /* Skip same data-refs. In case that two or more stmts share
(supported only for loads), we vectorize only the first stmt, and data-ref (supported only for loads), we vectorize only the first
the rest get their vectorized loads from the first one. */ stmt, and the rest get their vectorized loads from the first
one. */
if (!tree_int_cst_compare (DR_INIT (data_ref), if (!tree_int_cst_compare (DR_INIT (data_ref),
DR_INIT (STMT_VINFO_DATA_REF ( DR_INIT (STMT_VINFO_DATA_REF (
vinfo_for_stmt (next))))) vinfo_for_stmt (next)))))
@ -2196,7 +2201,7 @@ vect_analyze_group_access (struct data_reference *dr)
/* FORNOW: we handle only interleaving that is a power of 2. /* FORNOW: we handle only interleaving that is a power of 2.
We don't fail here if it may be still possible to vectorize the We don't fail here if it may be still possible to vectorize the
group using SLP. If not, the size of the group will be checked in group using SLP. If not, the size of the group will be checked in
vect_analyze_operations, and the vectorization will fail. */ vect_analyze_operations, and the vectorization will fail. */
if (exact_log2 (stride) == -1) if (exact_log2 (stride) == -1)
{ {
@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
datarefs = BB_VINFO_DATAREFS (bb_vinfo); datarefs = BB_VINFO_DATAREFS (bb_vinfo);
} }
/* Go through the data-refs, check that the analysis succeeded. Update pointer /* Go through the data-refs, check that the analysis succeeded. Update
from stmt_vec_info struct to DR and vectype. */ pointer from stmt_vec_info struct to DR and vectype. */
FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
{ {
@ -2572,7 +2577,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
tree dinit; tree dinit;
/* Build a reference to the first location accessed by the /* Build a reference to the first location accessed by the
inner-loop: *(BASE+INIT). (The first location is actually inner-loop: *(BASE+INIT). (The first location is actually
BASE+INIT+OFFSET, but we add OFFSET separately later). */ BASE+INIT+OFFSET, but we add OFFSET separately later). */
tree inner_base = build_fold_indirect_ref tree inner_base = build_fold_indirect_ref
(fold_build2 (POINTER_PLUS_EXPR, (fold_build2 (POINTER_PLUS_EXPR,
@ -2712,7 +2717,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
/* Function vect_get_new_vect_var. /* Function vect_get_new_vect_var.
Returns a name for a new variable. The current naming scheme appends the Returns a name for a new variable. The current naming scheme appends the
prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
the name of vectorizer generated variables, and appends that to NAME if the name of vectorizer generated variables, and appends that to NAME if
provided. */ provided. */
@ -2767,7 +2772,7 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
LOOP: Specify relative to which loop-nest should the address be computed. LOOP: Specify relative to which loop-nest should the address be computed.
For example, when the dataref is in an inner-loop nested in an For example, when the dataref is in an inner-loop nested in an
outer-loop that is now being vectorized, LOOP can be either the outer-loop that is now being vectorized, LOOP can be either the
outer-loop, or the inner-loop. The first memory location accessed outer-loop, or the inner-loop. The first memory location accessed
by the following dataref ('in' points to short): by the following dataref ('in' points to short):
for (i=0; i<N; i++) for (i=0; i<N; i++)
@ -2937,7 +2942,7 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
Return the increment stmt that updates the pointer in PTR_INCR. Return the increment stmt that updates the pointer in PTR_INCR.
3. Set INV_P to true if the access pattern of the data reference in the 3. Set INV_P to true if the access pattern of the data reference in the
vectorized loop is invariant. Set it to false otherwise. vectorized loop is invariant. Set it to false otherwise.
4. Return the pointer. */ 4. Return the pointer. */
@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
print_generic_expr (vect_dump, base_name, TDF_SLIM); print_generic_expr (vect_dump, base_name, TDF_SLIM);
} }
/** (1) Create the new vector-pointer variable: **/ /* (1) Create the new vector-pointer variable. */
vect_ptr_type = build_pointer_type (vectype); vect_ptr_type = build_pointer_type (vectype);
base = get_base_address (DR_REF (dr)); base = get_base_address (DR_REF (dr));
if (base if (base
@ -3067,16 +3072,16 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
add_referenced_var (vect_ptr); add_referenced_var (vect_ptr);
/** Note: If the dataref is in an inner-loop nested in LOOP, and we are /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
vectorizing LOOP (i.e. outer-loop vectorization), we need to create two vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
def-use update cycles for the pointer: One relative to the outer-loop def-use update cycles for the pointer: one relative to the outer-loop
(LOOP), which is what steps (3) and (4) below do. The other is relative (LOOP), which is what steps (3) and (4) below do. The other is relative
to the inner-loop (which is the inner-most loop containing the dataref), to the inner-loop (which is the inner-most loop containing the dataref),
and this is done be step (5) below. and this is done be step (5) below.
When vectorizing inner-most loops, the vectorized loop (LOOP) is also the When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
inner-most loop, and so steps (3),(4) work the same, and step (5) is inner-most loop, and so steps (3),(4) work the same, and step (5) is
redundant. Steps (3),(4) create the following: redundant. Steps (3),(4) create the following:
vp0 = &base_addr; vp0 = &base_addr;
LOOP: vp1 = phi(vp0,vp2) LOOP: vp1 = phi(vp0,vp2)
@ -3085,8 +3090,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
vp2 = vp1 + step vp2 = vp1 + step
goto LOOP goto LOOP
If there is an inner-loop nested in loop, then step (5) will also be If there is an inner-loop nested in loop, then step (5) will also be
applied, and an additional update in the inner-loop will be created: applied, and an additional update in the inner-loop will be created:
vp0 = &base_addr; vp0 = &base_addr;
LOOP: vp1 = phi(vp0,vp2) LOOP: vp1 = phi(vp0,vp2)
@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
vp2 = vp1 + step vp2 = vp1 + step
if () goto LOOP */ if () goto LOOP */
/** (3) Calculate the initial address the vector-pointer, and set /* (2) Calculate the initial address the vector-pointer, and set
the vector-pointer to point to it before the loop: **/ the vector-pointer to point to it before the loop. */
/* Create: (&(base[init_val+offset]) in the loop preheader. */ /* Create: (&(base[init_val+offset]) in the loop preheader. */
@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
else else
vect_ptr_init = new_temp; vect_ptr_init = new_temp;
/** (4) Handle the updating of the vector-pointer inside the loop. /* (3) Handle the updating of the vector-pointer inside the loop.
This is needed when ONLY_INIT is false, and also when AT_LOOP This is needed when ONLY_INIT is false, and also when AT_LOOP is the
is the inner-loop nested in LOOP (during outer-loop vectorization). inner-loop nested in LOOP (during outer-loop vectorization). */
**/
/* No update in loop is required. */ /* No update in loop is required. */
if (only_init && (!loop_vinfo || at_loop == loop)) if (only_init && (!loop_vinfo || at_loop == loop))
@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
return vptr; return vptr;
/** (5) Handle the updating of the vector-pointer inside the inner-loop /* (4) Handle the updating of the vector-pointer inside the inner-loop
nested in LOOP, if exists: **/ nested in LOOP, if exists. */
gcc_assert (nested_in_vect_loop); gcc_assert (nested_in_vect_loop);
if (!only_init) if (!only_init)
@ -3358,12 +3362,12 @@ vect_strided_store_supported (tree vectype)
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
a power of 2, generate interleave_high/low stmts to reorder the data a power of 2, generate interleave_high/low stmts to reorder the data
correctly for the stores. Return the final references for stores in correctly for the stores. Return the final references for stores in
RESULT_CHAIN. RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to each The input is 4 vectors each containing 8 elements. We assign a number to
element, the input sequence is: each element, the input sequence is:
1st vec: 0 1 2 3 4 5 6 7 1st vec: 0 1 2 3 4 5 6 7
2nd vec: 8 9 10 11 12 13 14 15 2nd vec: 8 9 10 11 12 13 14 15
@ -3379,18 +3383,18 @@ vect_strided_store_supported (tree vectype)
i.e., we interleave the contents of the four vectors in their order. i.e., we interleave the contents of the four vectors in their order.
We use interleave_high/low instructions to create such output. The input of We use interleave_high/low instructions to create such output. The input of
each interleave_high/low operation is two vectors: each interleave_high/low operation is two vectors:
1st vec 2nd vec 1st vec 2nd vec
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
the even elements of the result vector are obtained left-to-right from the the even elements of the result vector are obtained left-to-right from the
high/low elements of the first vector. The odd elements of the result are high/low elements of the first vector. The odd elements of the result are
obtained left-to-right from the high/low elements of the second vector. obtained left-to-right from the high/low elements of the second vector.
The output of interleave_high will be: 0 4 1 5 The output of interleave_high will be: 0 4 1 5
and of interleave_low: 2 6 3 7 and of interleave_low: 2 6 3 7
The permutation is done in log LENGTH stages. In each stage interleave_high The permutation is done in log LENGTH stages. In each stage interleave_high
and interleave_low stmts are created for each pair of vectors in DR_CHAIN, and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
where the first argument is taken from the first half of DR_CHAIN and the where the first argument is taken from the first half of DR_CHAIN and the
second argument from it's second half. second argument from it's second half.
@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
1. the misalignment computation 1. the misalignment computation
2. the extra vector load (for the optimized realignment scheme). 2. the extra vector load (for the optimized realignment scheme).
3. the phi node for the two vectors from which the realignment is 3. the phi node for the two vectors from which the realignment is
done (for the optimized realignment scheme). done (for the optimized realignment scheme). */
*/
/* 1. Determine where to generate the misalignment computation. /* 1. Determine where to generate the misalignment computation.
@ -3807,7 +3810,7 @@ vect_strided_load_supported (tree vectype)
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
a power of 2, generate extract_even/odd stmts to reorder the input data a power of 2, generate extract_even/odd stmts to reorder the input data
correctly. Return the final references for loads in RESULT_CHAIN. correctly. Return the final references for loads in RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a number to each The input is 4 vectors each containing 8 elements. We assign a number to each
@ -3828,19 +3831,19 @@ vect_strided_load_supported (tree vectype)
i.e., the first output vector should contain the first elements of each i.e., the first output vector should contain the first elements of each
interleaving group, etc. interleaving group, etc.
We use extract_even/odd instructions to create such output. The input of each We use extract_even/odd instructions to create such output. The input of
extract_even/odd operation is two vectors each extract_even/odd operation is two vectors
1st vec 2nd vec 1st vec 2nd vec
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
and the output is the vector of extracted even/odd elements. The output of and the output is the vector of extracted even/odd elements. The output of
extract_even will be: 0 2 4 6 extract_even will be: 0 2 4 6
and of extract_odd: 1 3 5 7 and of extract_odd: 1 3 5 7
The permutation is done in log LENGTH stages. In each stage extract_even and The permutation is done in log LENGTH stages. In each stage extract_even
extract_odd stmts are created for each pair of vectors in DR_CHAIN in their and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
order. In our example, their order. In our example,
E1: extract_even (1st vec, 2nd vec) E1: extract_even (1st vec, 2nd vec)
E2: extract_odd (1st vec, 2nd vec) E2: extract_odd (1st vec, 2nd vec)
@ -3977,13 +3980,12 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
if (!next_stmt) if (!next_stmt)
break; break;
/* Skip the gaps. Loads created for the gaps will be removed by dead /* Skip the gaps. Loads created for the gaps will be removed by dead
code elimination pass later. No need to check for the first stmt in code elimination pass later. No need to check for the first stmt in
the group, since it always exists. the group, since it always exists.
DR_GROUP_GAP is the number of steps in elements from the previous DR_GROUP_GAP is the number of steps in elements from the previous
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
correspond to the gaps. correspond to the gaps. */
*/
if (next_stmt != first_stmt if (next_stmt != first_stmt
&& gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt))) && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
{ {
@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
/* We can choose between using the implicit realignment scheme (generating /* We can choose between using the implicit realignment scheme (generating
a misaligned_move stmt) and the explicit realignment scheme (generating a misaligned_move stmt) and the explicit realignment scheme (generating
aligned loads with a REALIGN_LOAD). There are two variants to the explicit aligned loads with a REALIGN_LOAD). There are two variants to the
realignment scheme: optimized, and unoptimized. explicit realignment scheme: optimized, and unoptimized.
We can optimize the realignment only if the step between consecutive We can optimize the realignment only if the step between consecutive
vector loads is equal to the vector size. Since the vector memory vector loads is equal to the vector size. Since the vector memory
accesses advance in steps of VS (Vector Size) in the vectorized loop, it accesses advance in steps of VS (Vector Size) in the vectorized loop, it

View File

@ -76,7 +76,7 @@ along with GCC; see the file COPYING3. If not see
had successfully passed the analysis phase. had successfully passed the analysis phase.
Throughout this pass we make a distinction between two types of Throughout this pass we make a distinction between two types of
data: scalars (which are represented by SSA_NAMES), and memory references data: scalars (which are represented by SSA_NAMES), and memory references
("data-refs"). These two types of data require different handling both ("data-refs"). These two types of data require different handling both
during analysis and transformation. The types of data-refs that the during analysis and transformation. The types of data-refs that the
vectorizer currently supports are ARRAY_REFS which base is an array DECL vectorizer currently supports are ARRAY_REFS which base is an array DECL
(not a pointer), and INDIRECT_REFS through pointers; both array and pointer (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
@ -97,10 +97,10 @@ along with GCC; see the file COPYING3. If not see
===================== =====================
The loop transformation phase scans all the stmts in the loop, and The loop transformation phase scans all the stmts in the loop, and
creates a vector stmt (or a sequence of stmts) for each scalar stmt S in creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
the loop that needs to be vectorized. It inserts the vector code sequence the loop that needs to be vectorized. It inserts the vector code sequence
just before the scalar stmt S, and records a pointer to the vector code just before the scalar stmt S, and records a pointer to the vector code
in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
attached to S). This pointer will be used for the vectorization of following attached to S). This pointer will be used for the vectorization of following
stmts which use the def of stmt S. Stmt S is removed if it writes to memory; stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
otherwise, we rely on dead code elimination for removing it. otherwise, we rely on dead code elimination for removing it.
@ -112,7 +112,7 @@ along with GCC; see the file COPYING3. If not see
To vectorize stmt S2, the vectorizer first finds the stmt that defines To vectorize stmt S2, the vectorizer first finds the stmt that defines
the operand 'b' (S1), and gets the relevant vector def 'vb' from the the operand 'b' (S1), and gets the relevant vector def 'vb' from the
vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
resulting sequence would be: resulting sequence would be:
VS1: vb = px[i]; VS1: vb = px[i];
@ -128,13 +128,13 @@ along with GCC; see the file COPYING3. If not see
Currently the only target specific information that is used is the Currently the only target specific information that is used is the
size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
Targets that can support different sizes of vectors, for now will need Targets that can support different sizes of vectors, for now will need
to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
flexibility will be added in the future. flexibility will be added in the future.
Since we only vectorize operations which vector form can be Since we only vectorize operations which vector form can be
expressed using existing tree codes, to verify that an operation is expressed using existing tree codes, to verify that an operation is
supported, the vectorizer checks the relevant optab at the relevant supported, the vectorizer checks the relevant optab at the relevant
machine_mode (e.g, optab_handler (add_optab, V8HImode)). If machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
the value found is CODE_FOR_nothing, then there's no target support, and the value found is CODE_FOR_nothing, then there's no target support, and
we can't vectorize the stmt. we can't vectorize the stmt.
@ -144,14 +144,14 @@ along with GCC; see the file COPYING3. If not see
/* Function vect_determine_vectorization_factor /* Function vect_determine_vectorization_factor
Determine the vectorization factor (VF). VF is the number of data elements Determine the vectorization factor (VF). VF is the number of data elements
that are operated upon in parallel in a single iteration of the vectorized that are operated upon in parallel in a single iteration of the vectorized
loop. For example, when vectorizing a loop that operates on 4byte elements, loop. For example, when vectorizing a loop that operates on 4byte elements,
on a target with vector size (VS) 16byte, the VF is set to 4, since 4 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
elements can fit in a single vector register. elements can fit in a single vector register.
We currently support vectorization of loops in which all types operated upon We currently support vectorization of loops in which all types operated upon
are of the same size. Therefore this function currently sets VF according to are of the same size. Therefore this function currently sets VF according to
the size of the types operated upon, and fails if there are multiple sizes the size of the types operated upon, and fails if there are multiple sizes
in the loop. in the loop.
@ -438,7 +438,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
/* Function vect_analyze_scalar_cycles_1. /* Function vect_analyze_scalar_cycles_1.
Examine the cross iteration def-use cycles of scalar variables Examine the cross iteration def-use cycles of scalar variables
in LOOP. LOOP_VINFO represents the loop that is now being in LOOP. LOOP_VINFO represents the loop that is now being
considered for vectorization (can be LOOP, or an outer-loop considered for vectorization (can be LOOP, or an outer-loop
enclosing LOOP). */ enclosing LOOP). */
@ -454,7 +454,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ==="); fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
/* First - identify all inductions. Reduction detection assumes that all the /* First - identify all inductions. Reduction detection assumes that all the
inductions have been identified, therefore, this order must not be inductions have been identified, therefore, this order must not be
changed. */ changed. */
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@ -470,7 +470,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
} }
/* Skip virtual phi's. The data dependences that are associated with /* Skip virtual phi's. The data dependences that are associated with
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
if (!is_gimple_reg (SSA_NAME_VAR (def))) if (!is_gimple_reg (SSA_NAME_VAR (def)))
continue; continue;
@ -569,7 +569,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
/* Function vect_analyze_scalar_cycles. /* Function vect_analyze_scalar_cycles.
Examine the cross iteration def-use cycles of scalar variables, by Examine the cross iteration def-use cycles of scalar variables, by
analyzing the loop-header PHIs of scalar variables; Classify each analyzing the loop-header PHIs of scalar variables. Classify each
cycle as one of the following: invariant, induction, reduction, unknown. cycle as one of the following: invariant, induction, reduction, unknown.
We do that for the loop represented by LOOP_VINFO, and also to its We do that for the loop represented by LOOP_VINFO, and also to its
inner-loop, if exists. inner-loop, if exists.
@ -1125,8 +1125,8 @@ vect_analyze_loop_form (struct loop *loop)
/* Get cost by calling cost target builtin. */ /* Get cost by calling cost target builtin. */
static inline static inline int
int vect_get_cost (enum vect_cost_for_stmt type_of_cost) vect_get_cost (enum vect_cost_for_stmt type_of_cost)
{ {
tree dummy_type = NULL; tree dummy_type = NULL;
int dummy = 0; int dummy = 0;
@ -1301,7 +1301,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
return false; return false;
} }
/* Analyze cost. Decide if worth while to vectorize. */ /* Analyze cost. Decide if worth while to vectorize. */
/* Once VF is set, SLP costs should be updated since the number of created /* Once VF is set, SLP costs should be updated since the number of created
vector stmts depends on VF. */ vector stmts depends on VF. */
@ -1374,7 +1374,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
/* Function vect_analyze_loop. /* Function vect_analyze_loop.
Apply a set of analyses on LOOP, and create a loop_vec_info struct Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the for it. The different analyses will record information in the
loop_vec_info struct. */ loop_vec_info struct. */
loop_vec_info loop_vec_info
vect_analyze_loop (struct loop *loop) vect_analyze_loop (struct loop *loop)
@ -1594,7 +1594,7 @@ reduction_code_for_scalar_code (enum tree_code code,
} }
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
STMT is printed with a message MSG. */ STMT is printed with a message MSG. */
static void static void
@ -1608,7 +1608,7 @@ report_vect_op (gimple stmt, const char *msg)
/* Function vect_is_simple_reduction_1 /* Function vect_is_simple_reduction_1
(1) Detect a cross-iteration def-use cycle that represents a simple (1) Detect a cross-iteration def-use cycle that represents a simple
reduction computation. We look for the following pattern: reduction computation. We look for the following pattern:
loop_header: loop_header:
a1 = phi < a0, a2 > a1 = phi < a0, a2 >
@ -2023,7 +2023,7 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
int innerloop_iters, i, stmt_cost; int innerloop_iters, i, stmt_cost;
/* Count statements in scalar loop. Using this as scalar cost for a single /* Count statements in scalar loop. Using this as scalar cost for a single
iteration for now. iteration for now.
TODO: Add outer loop support. TODO: Add outer loop support.
@ -2308,7 +2308,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
something more reasonable. */ something more reasonable. */
/* If the number of iterations is known and we do not do versioning, we can /* If the number of iterations is known and we do not do versioning, we can
decide whether to vectorize at compile time. Hence the scalar version decide whether to vectorize at compile time. Hence the scalar version
do not carry cost model guard costs. */ do not carry cost model guard costs. */
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
@ -2339,7 +2339,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
} }
/* Calculate number of iterations required to make the vector version /* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition profitable, relative to the loop bodies only. The following condition
must hold true: must hold true:
SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
where where
@ -2556,7 +2556,7 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
Output: Output:
Return a vector variable, initialized with the first VF values of Return a vector variable, initialized with the first VF values of
the induction variable. E.g., for an iv with IV_PHI='X' and the induction variable. E.g., for an iv with IV_PHI='X' and
evolution S, for a vector of 4 units, we want to return: evolution S, for a vector of 4 units, we want to return:
[X, X + S, X + 2*S, X + 3*S]. */ [X, X + S, X + 2*S, X + 3*S]. */
@ -2638,8 +2638,8 @@ get_initial_def_for_induction (gimple iv_phi)
if (nested_in_vect_loop) if (nested_in_vect_loop)
{ {
/* iv_loop is nested in the loop to be vectorized. init_expr had already /* iv_loop is nested in the loop to be vectorized. init_expr had already
been created during vectorization of previous stmts; We obtain it from been created during vectorization of previous stmts. We obtain it
the STMT_VINFO_VEC_STMT of the defining stmt. */ from the STMT_VINFO_VEC_STMT of the defining stmt. */
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
loop_preheader_edge (iv_loop)); loop_preheader_edge (iv_loop));
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL); vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
@ -2905,7 +2905,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
gcc_assert (loop == (gimple_bb (stmt))->loop_father); gcc_assert (loop == (gimple_bb (stmt))->loop_father);
/* In case of double reduction we only create a vector variable to be put /* In case of double reduction we only create a vector variable to be put
in the reduction phi node. The actual statement creation is done in in the reduction phi node. The actual statement creation is done in
vect_create_epilog_for_reduction. */ vect_create_epilog_for_reduction. */
if (adjustment_def && nested_in_vect_loop if (adjustment_def && nested_in_vect_loop
&& TREE_CODE (init_val) == SSA_NAME && TREE_CODE (init_val) == SSA_NAME
@ -3023,7 +3023,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
reduction statements. reduction statements.
STMT is the scalar reduction stmt that is being vectorized. STMT is the scalar reduction stmt that is being vectorized.
NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
number of elements that we can fit in a vectype (nunits). In this case number of elements that we can fit in a vectype (nunits). In this case
we have to generate more than one vector stmt - i.e - we need to "unroll" we have to generate more than one vector stmt - i.e - we need to "unroll"
the vector stmt by a factor VF/nunits. For more details see documentation the vector stmt by a factor VF/nunits. For more details see documentation
in vectorizable_operation. in vectorizable_operation.
@ -3314,7 +3314,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
/* In case this is a reduction in an inner-loop while vectorizing an outer /* In case this is a reduction in an inner-loop while vectorizing an outer
loop - we don't need to extract a single scalar result at the end of the loop - we don't need to extract a single scalar result at the end of the
inner-loop (unless it is double reduction, i.e., the use of reduction is inner-loop (unless it is double reduction, i.e., the use of reduction is
outside the outer-loop). The final vector of partial results will be used outside the outer-loop). The final vector of partial results will be used
in the vectorized outer-loop, or reduced to a scalar result at the end of in the vectorized outer-loop, or reduced to a scalar result at the end of
the outer-loop. */ the outer-loop. */
if (nested_in_vect_loop && !double_reduc) if (nested_in_vect_loop && !double_reduc)
@ -3473,7 +3473,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
} }
/* The only case where we need to reduce scalar results in SLP, is /* The only case where we need to reduce scalar results in SLP, is
unrolling. If the size of SCALAR_RESULTS is greater than unrolling. If the size of SCALAR_RESULTS is greater than
GROUP_SIZE, we reduce them combining elements modulo GROUP_SIZE, we reduce them combining elements modulo
GROUP_SIZE. */ GROUP_SIZE. */
if (slp_node) if (slp_node)
@ -3579,7 +3579,7 @@ vect_finalize_reduction:
VEC_replace (gimple, new_phis, 0, epilog_stmt); VEC_replace (gimple, new_phis, 0, epilog_stmt);
} }
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
phis with new adjusted scalar results, i.e., replace use <s_out0> phis with new adjusted scalar results, i.e., replace use <s_out0>
with use <s_out4>. with use <s_out4>.
@ -3605,8 +3605,8 @@ vect_finalize_reduction:
use <s_out4> */ use <s_out4> */
/* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
case that GROUP_SIZE is greater than vectorization factor). Therefore, we case that GROUP_SIZE is greater than vectorization factor). Therefore, we
need to match SCALAR_RESULTS with corresponding statements. The first need to match SCALAR_RESULTS with corresponding statements. The first
(GROUP_SIZE / number of new vector stmts) scalar results correspond to (GROUP_SIZE / number of new vector stmts) scalar results correspond to
the first vector stmt, etc. the first vector stmt, etc.
(RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */ (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
@ -3639,7 +3639,7 @@ vect_finalize_reduction:
phis = VEC_alloc (gimple, heap, 3); phis = VEC_alloc (gimple, heap, 3);
/* Find the loop-closed-use at the loop exit of the original scalar /* Find the loop-closed-use at the loop exit of the original scalar
result. (The reduction result is expected to have two immediate uses - result. (The reduction result is expected to have two immediate uses -
one at the latch block, and one at the loop exit). */ one at the latch block, and one at the loop exit). */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))) if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
@ -3740,7 +3740,7 @@ vect_finalize_reduction:
vect_phi_res = PHI_RESULT (vect_phi); vect_phi_res = PHI_RESULT (vect_phi);
/* Replace the use, i.e., set the correct vs1 in the regular /* Replace the use, i.e., set the correct vs1 in the regular
reduction phi node. FORNOW, NCOPIES is always 1, so the reduction phi node. FORNOW, NCOPIES is always 1, so the
loop is redundant. */ loop is redundant. */
use = reduction_phi; use = reduction_phi;
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
@ -3764,8 +3764,8 @@ vect_finalize_reduction:
phis = VEC_alloc (gimple, heap, 3); phis = VEC_alloc (gimple, heap, 3);
/* Find the loop-closed-use at the loop exit of the original scalar /* Find the loop-closed-use at the loop exit of the original scalar
result. (The reduction result is expected to have two immediate uses - result. (The reduction result is expected to have two immediate uses,
one at the latch block, and one at the loop exit). For double one at the latch block, and one at the loop exit). For double
reductions we are looking for exit phis of the outer loop. */ reductions we are looking for exit phis of the outer loop. */
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest) FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
{ {
@ -3814,7 +3814,7 @@ vect_finalize_reduction:
Return FALSE if not a vectorizable STMT, TRUE otherwise. Return FALSE if not a vectorizable STMT, TRUE otherwise.
This function also handles reduction idioms (patterns) that have been This function also handles reduction idioms (patterns) that have been
recognized in advance during vect_pattern_recog. In this case, STMT may be recognized in advance during vect_pattern_recog. In this case, STMT may be
of this form: of this form:
X = pattern_expr (arg0, arg1, ..., X) X = pattern_expr (arg0, arg1, ..., X)
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
@ -3835,9 +3835,9 @@ vect_finalize_reduction:
Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
indicates what is the actual level of parallelism (V8HI in the example), so indicates what is the actual level of parallelism (V8HI in the example), so
that the right vectorization factor would be derived. This vectype that the right vectorization factor would be derived. This vectype
corresponds to the type of arguments to the reduction stmt, and should *NOT* corresponds to the type of arguments to the reduction stmt, and should *NOT*
be used to create the vectorized stmt. The right vectype for the vectorized be used to create the vectorized stmt. The right vectype for the vectorized
stmt is obtained from the type of the result X: stmt is obtained from the type of the result X:
get_vectype_for_scalar_type (TREE_TYPE (X)) get_vectype_for_scalar_type (TREE_TYPE (X))
@ -3934,7 +3934,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
} }
/* 3. Check the operands of the operation. The first operands are defined /* 3. Check the operands of the operation. The first operands are defined
inside the loop body. The last operand is the reduction variable, inside the loop body. The last operand is the reduction variable,
which is defined by the loop-header-phi. */ which is defined by the loop-header-phi. */
@ -3979,7 +3979,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
return false; return false;
/* All uses but the last are expected to be defined in the loop. /* All uses but the last are expected to be defined in the loop.
The last use is the reduction variable. In case of nested cycle this The last use is the reduction variable. In case of nested cycle this
assumption is not true: we use reduc_index to record the index of the assumption is not true: we use reduc_index to record the index of the
reduction variable. */ reduction variable. */
for (i = 0; i < op_type-1; i++) for (i = 0; i < op_type-1; i++)
@ -4110,7 +4110,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
1. The tree-code that is used to create the vector operation in the 1. The tree-code that is used to create the vector operation in the
epilog code (that reduces the partial results) is not the epilog code (that reduces the partial results) is not the
tree-code of STMT, but is rather the tree-code of the original tree-code of STMT, but is rather the tree-code of the original
stmt from the pattern that STMT is replacing. I.e, in the example stmt from the pattern that STMT is replacing. I.e, in the example
above we want to use 'widen_sum' in the loop, but 'plus' in the above we want to use 'widen_sum' in the loop, but 'plus' in the
epilog. epilog.
2. The type (mode) we use to check available target support 2. The type (mode) we use to check available target support
@ -4513,7 +4513,7 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
/* Function vectorizable_live_operation. /* Function vectorizable_live_operation.
STMT computes a value that is used outside the loop. Check if STMT computes a value that is used outside the loop. Check if
it can be supported. */ it can be supported. */
bool bool
@ -4554,7 +4554,7 @@ vectorizable_live_operation (gimple stmt,
gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op); gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op); gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
/* FORNOW: support only if all uses are invariant. This means /* FORNOW: support only if all uses are invariant. This means
that the scalar operations can remain in place, unvectorized. that the scalar operations can remain in place, unvectorized.
The original last scalar value that they compute will be used. */ The original last scalar value that they compute will be used. */
@ -4665,7 +4665,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
compile time constant), or it is a constant that doesn't divide by the compile time constant), or it is a constant that doesn't divide by the
vectorization factor, then an epilog loop needs to be created. vectorization factor, then an epilog loop needs to be created.
We therefore duplicate the loop: the original loop will be vectorized, We therefore duplicate the loop: the original loop will be vectorized,
and will compute the first (n/VF) iterations. The second copy of the loop and will compute the first (n/VF) iterations. The second copy of the loop
will remain scalar and will compute the remaining (n%VF) iterations. will remain scalar and will compute the remaining (n%VF) iterations.
(VF is the vectorization factor). */ (VF is the vectorization factor). */

View File

@ -147,7 +147,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
} }
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
from the pattern. Check that all the stmts of the node are in the from the pattern. Check that all the stmts of the node are in the
pattern. */ pattern. */
if (loop && def_stmt && gimple_bb (def_stmt) if (loop && def_stmt && gimple_bb (def_stmt)
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
@ -299,7 +299,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Recursively build an SLP tree starting from NODE. /* Recursively build an SLP tree starting from NODE.
Fail (and return FALSE) if def-stmts are not isomorphic, require data Fail (and return FALSE) if def-stmts are not isomorphic, require data
permutation or are of unsupported types of operation. Otherwise, return permutation or are of unsupported types of operation. Otherwise, return
TRUE. */ TRUE. */
static bool static bool
@ -542,7 +542,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (prev_first_load) if (prev_first_load)
{ {
/* Check that there are no loads from different interleaving /* Check that there are no loads from different interleaving
chains in the same node. The only exception is complex chains in the same node. The only exception is complex
numbers. */ numbers. */
if (prev_first_load != first_load if (prev_first_load != first_load
&& rhs_code != REALPART_EXPR && rhs_code != REALPART_EXPR
@ -582,7 +582,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ncopies_for_cost, *node); ncopies_for_cost, *node);
} }
/* Store the place of this load in the interleaving chain. In /* Store the place of this load in the interleaving chain. In
case that permutation is needed we later decide if a specific case that permutation is needed we later decide if a specific
permutation is supported. */ permutation is supported. */
load_place = vect_get_place_in_interleaving_chain (stmt, load_place = vect_get_place_in_interleaving_chain (stmt,
@ -729,7 +729,7 @@ vect_print_slp_tree (slp_tree node)
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
stmts in NODE are to be marked. */ stmts in NODE are to be marked. */
static void static void
@ -897,7 +897,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
/* In case of reduction every load permutation is allowed, since the order /* In case of reduction every load permutation is allowed, since the order
of the reduction statements is not important (as opposed to the case of of the reduction statements is not important (as opposed to the case of
strided stores). The only condition we need to check is that all the strided stores). The only condition we need to check is that all the
load nodes are of the same size and have the same permutation (and then load nodes are of the same size and have the same permutation (and then
rearrange all the nodes of the SLP instance according to this rearrange all the nodes of the SLP instance according to this
permutation). */ permutation). */
@ -920,7 +920,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
real_c = real_b + real_a; real_c = real_b + real_a;
imag_c = imag_a + imag_b; imag_c = imag_a + imag_b;
i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
chains are mixed, they match the above pattern. */ chains are mixed, they match the above pattern. */
if (complex_numbers) if (complex_numbers)
{ {
@ -969,7 +969,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0); stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
instance, not all the loads belong to the same node or interleaving instance, not all the loads belong to the same node or interleaving
group. Hence, we need to divide them into groups according to group. Hence, we need to divide them into groups according to
GROUP_SIZE. */ GROUP_SIZE. */
number_of_groups = VEC_length (int, load_permutation) / group_size; number_of_groups = VEC_length (int, load_permutation) / group_size;
@ -1002,7 +1002,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
if (!bad_permutation) if (!bad_permutation)
{ {
/* This permutaion is valid for reduction. Since the order of the /* This permutaion is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */ according to the order of the loads. */
@ -1064,9 +1064,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
/* Find the first load in the loop that belongs to INSTANCE. /* Find the first load in the loop that belongs to INSTANCE.
When loads are in several SLP nodes, there can be a case in which the first When loads are in several SLP nodes, there can be a case in which the first
load does not appear in the first SLP node to be transformed, causing load does not appear in the first SLP node to be transformed, causing
incorrect order of statements. Since we generate all the loads together, incorrect order of statements. Since we generate all the loads together,
they must be inserted before the first load of the SLP instance and not they must be inserted before the first load of the SLP instance and not
before the first load of the first node of the instance. */ before the first load of the first node of the instance. */
static gimple static gimple
vect_find_first_load_in_slp_instance (slp_instance instance) vect_find_first_load_in_slp_instance (slp_instance instance)
{ {
@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (slp_instance instance)
/* Find the last store in SLP INSTANCE. */ /* Find the last store in SLP INSTANCE. */
static gimple static gimple
vect_find_last_store_in_slp_instance (slp_instance instance) vect_find_last_store_in_slp_instance (slp_instance instance)
{ {
@ -1100,7 +1102,7 @@ vect_find_last_store_in_slp_instance (slp_instance instance)
} }
/* Analyze an SLP instance starting from a group of strided stores. Call /* Analyze an SLP instance starting from a group of strided stores. Call
vect_build_slp_tree to build a tree of packed stmts if possible. vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the loop. */ Return FALSE if it's impossible to SLP any stmt in the loop. */
@ -1274,7 +1276,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
} }
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
trees of packed scalar stmts if SLP is possible. */ trees of packed scalar stmts if SLP is possible. */
bool bool
@ -1339,9 +1341,9 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
loop-based vectorization. Such stmts will be marked as HYBRID. */ loop-based vectorization. Such stmts will be marked as HYBRID. */
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
decided_to_slp++; decided_to_slp++;
} }
@ -1355,7 +1357,7 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
static void static void
vect_detect_hybrid_slp_stmts (slp_tree node) vect_detect_hybrid_slp_stmts (slp_tree node)
@ -1493,7 +1495,7 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
} }
/* Analyze statements in SLP instances of the basic block. Return TRUE if the /* Analyze statements in SLP instances of the basic block. Return TRUE if the
operations are supported. */ operations are supported. */
static bool static bool
@ -1523,7 +1525,7 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo)
/* Check if loads and stores are mixed in the basic block (in that /* Check if loads and stores are mixed in the basic block (in that
case if we are not sure that the accesses differ, we can't vectorize the case if we are not sure that the accesses differ, we can't vectorize the
basic block). Also return FALSE in case that there is statement marked as basic block). Also return FALSE in case that there is statement marked as
not vectorizable. */ not vectorizable. */
static bool static bool
@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb)
/* SLP costs are calculated according to SLP instance unrolling factor (i.e., /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
the number of created vector stmts depends on the unrolling factor). However, the number of created vector stmts depends on the unrolling factor).
the actual number of vector stmts for every SLP node depends on VF which is However, the actual number of vector stmts for every SLP node depends on
set later in vect_analyze_operations(). Hence, SLP costs should be updated. VF which is set later in vect_analyze_operations (). Hence, SLP costs
In this function we assume that the inside costs calculated in should be updated. In this function we assume that the inside costs
vect_model_xxx_cost are linear in ncopies. */ calculated in vect_model_xxx_cost are linear in ncopies. */
void void
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo) vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
@ -1846,7 +1848,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
op_num = reduc_index - 1; op_num = reduc_index - 1;
op = gimple_op (stmt, op_num + 1); op = gimple_op (stmt, op_num + 1);
/* For additional copies (see the explanation of NUMBER_OF_COPIES below) /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
we need either neutral operands or the original operands. See we need either neutral operands or the original operands. See
get_initial_def_for_reduction() for details. */ get_initial_def_for_reduction() for details. */
switch (code) switch (code)
{ {
@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Number of vector stmts was calculated according to LHS in /* Number of vector stmts was calculated according to LHS in
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
necessary. See vect_get_smallest_scalar_type() for details. */ necessary. See vect_get_smallest_scalar_type () for details. */
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
&rhs_size_unit); &rhs_size_unit);
if (rhs_size_unit != lhs_size_unit) if (rhs_size_unit != lhs_size_unit)
@ -2065,7 +2067,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects); *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
/* SLP_NODE corresponds either to a group of stores or to a group of /* SLP_NODE corresponds either to a group of stores or to a group of
unary/binary operations. We don't call this function for loads. unary/binary operations. We don't call this function for loads.
For reduction defs we call vect_get_constant_vectors(), since we are For reduction defs we call vect_get_constant_vectors(), since we are
looking for initial loop invariant values. */ looking for initial loop invariant values. */
if (SLP_TREE_LEFT (slp_node) && reduc_index == -1) if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
@ -2167,7 +2169,7 @@ vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
/* Given FIRST_MASK_ELEMENT - the mask element in element representation, /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
return in CURRENT_MASK_ELEMENT its equivalent in target specific return in CURRENT_MASK_ELEMENT its equivalent in target specific
representation. Check that the mask is valid and return FALSE if not. representation. Check that the mask is valid and return FALSE if not.
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
the next vector, i.e., the current first vector is not needed. */ the next vector, i.e., the current first vector is not needed. */
@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
scpecific type, e.g., in bytes for Altivec. scpecific type, e.g., in bytes for Altivec.
The last mask is illegal since we assume two operands for permute The last mask is illegal since we assume two operands for permute
operation, and the mask element values can't be outside that range. Hence, operation, and the mask element values can't be outside that range.
the last mask must be converted into {2,5,5,5}. Hence, the last mask must be converted into {2,5,5,5}.
For the first two permutations we need the first and the second input For the first two permutations we need the first and the second input
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
we need the second and the third vectors: {b1,c1,a2,b2} and we need the second and the third vectors: {b1,c1,a2,b2} and
@ -2438,7 +2440,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
group_size = SLP_INSTANCE_GROUP_SIZE (instance); group_size = SLP_INSTANCE_GROUP_SIZE (instance);
/* For each SLP instance calculate number of vector stmts to be created /* For each SLP instance calculate number of vector stmts to be created
for the scalar stmts in each node of the SLP tree. Number of vector for the scalar stmts in each node of the SLP tree. Number of vector
elements in one vector iteration is the number of scalar elements in elements in one vector iteration is the number of scalar elements in
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
size. */ size. */
@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
} }
/* Generate vector code for all SLP instances in the loop/basic block. */
bool bool
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
{ {

View File

@ -166,7 +166,7 @@ vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
/* Function exist_non_indexing_operands_for_use_p /* Function exist_non_indexing_operands_for_use_p
USE is one of the uses attached to STMT. Check if USE is USE is one of the uses attached to STMT. Check if USE is
used in STMT for anything other than indexing an array. */ used in STMT for anything other than indexing an array. */
static bool static bool
@ -175,7 +175,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
tree operand; tree operand;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
/* USE corresponds to some operand in STMT. If there is no data /* USE corresponds to some operand in STMT. If there is no data
reference in STMT, then any operand that corresponds to USE reference in STMT, then any operand that corresponds to USE
is not indexing an array. */ is not indexing an array. */
if (!STMT_VINFO_DATA_REF (stmt_info)) if (!STMT_VINFO_DATA_REF (stmt_info))
@ -215,7 +215,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
Inputs: Inputs:
- a USE in STMT in a loop represented by LOOP_VINFO - a USE in STMT in a loop represented by LOOP_VINFO
- LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
that defined USE. This is done by calling mark_relevant and passing it that defined USE. This is done by calling mark_relevant and passing it
the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
Outputs: Outputs:
@ -466,7 +466,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
relevant = vect_used_by_reduction relevant = vect_used_by_reduction
This is because we distinguish between two kinds of relevant stmts - This is because we distinguish between two kinds of relevant stmts -
those that are used by a reduction computation, and those that are those that are used by a reduction computation, and those that are
(also) used by a regular computation. This allows us later on to (also) used by a regular computation. This allows us later on to
identify stmts that are used solely by a reduction, and therefore the identify stmts that are used solely by a reduction, and therefore the
order of the results that they produce does not have to be kept. */ order of the results that they produce does not have to be kept. */
@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
dummy_type, dummy); dummy_type, dummy);
} }
/* Get cost for STMT. */
int int
cost_for_stmt (gimple stmt) cost_for_stmt (gimple stmt)
{ {
@ -870,10 +873,10 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
"pipelined."); "pipelined.");
/* Unaligned software pipeline has a load of an address, an initial /* Unaligned software pipeline has a load of an address, an initial
load, and possibly a mask operation to "prime" the loop. However, load, and possibly a mask operation to "prime" the loop. However,
if this is an access in a group of loads, which provide strided if this is an access in a group of loads, which provide strided
access, then the above cost should only be considered for one access, then the above cost should only be considered for one
access in the group. Inside the loop, there is a load op access in the group. Inside the loop, there is a load op
and a realignment op. */ and a realignment op. */
if (add_realign_cost) if (add_realign_cost)
@ -897,8 +900,8 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
/* Function vect_init_vector. /* Function vect_init_vector.
Insert a new stmt (INIT_STMT) that initializes a new vector variable with Insert a new stmt (INIT_STMT) that initializes a new vector variable with
the vector elements of VECTOR_VAR. Place the initialization at BSI if it the vector elements of VECTOR_VAR. Place the initialization at BSI if it
is not NULL. Otherwise, place the initialization at the loop preheader. is not NULL. Otherwise, place the initialization at the loop preheader.
Return the DEF of INIT_STMT. Return the DEF of INIT_STMT.
It will be used in the vectorization of STMT. */ It will be used in the vectorization of STMT. */
@ -963,7 +966,7 @@ vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
/* Function vect_get_vec_def_for_operand. /* Function vect_get_vec_def_for_operand.
OP is an operand in STMT. This function returns a (vector) def that will be OP is an operand in STMT. This function returns a (vector) def that will be
used in the vectorized stmt for STMT. used in the vectorized stmt for STMT.
In the case that OP is an SSA_NAME which is defined in the loop, then In the case that OP is an SSA_NAME which is defined in the loop, then
@ -1117,10 +1120,10 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
/* Function vect_get_vec_def_for_stmt_copy /* Function vect_get_vec_def_for_stmt_copy
Return a vector-def for an operand. This function is used when the Return a vector-def for an operand. This function is used when the
vectorized stmt to be created (by the caller to this function) is a "copy" vectorized stmt to be created (by the caller to this function) is a "copy"
created in case the vectorized result cannot fit in one vector, and several created in case the vectorized result cannot fit in one vector, and several
copies of the vector-stmt are required. In this case the vector-def is copies of the vector-stmt are required. In this case the vector-def is
retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
of the stmt that defines VEC_OPRND. of the stmt that defines VEC_OPRND.
DT is the type of the vector def VEC_OPRND. DT is the type of the vector def VEC_OPRND.
@ -1128,7 +1131,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
Context: Context:
In case the vectorization factor (VF) is bigger than the number In case the vectorization factor (VF) is bigger than the number
of elements that can fit in a vectype (nunits), we have to generate of elements that can fit in a vectype (nunits), we have to generate
more than one vector stmt to vectorize the scalar stmt. This situation more than one vector stmt to vectorize the scalar stmt. This situation
arises when there are multiple data-types operated upon in the loop; the arises when there are multiple data-types operated upon in the loop; the
smallest data-type determines the VF, and as a result, when vectorizing smallest data-type determines the VF, and as a result, when vectorizing
stmts operating on wider types we need to create 'VF/nunits' "copies" of the stmts operating on wider types we need to create 'VF/nunits' "copies" of the
@ -1153,7 +1156,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
The vectorization of S2: The vectorization of S2:
To create the first vector-stmt out of the 4 copies - VSnew.0 - To create the first vector-stmt out of the 4 copies - VSnew.0 -
the function 'vect_get_vec_def_for_operand' is called to the function 'vect_get_vec_def_for_operand' is called to
get the relevant vector-def for each operand of S2. For operand x it get the relevant vector-def for each operand of S2. For operand x it
returns the vector-def 'vx.0'. returns the vector-def 'vx.0'.
To create the remaining copies of the vector-stmt (VSnew.j), this To create the remaining copies of the vector-stmt (VSnew.j), this
@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
/* Get vectorized definitions for the operands to create a copy of an original /* Get vectorized definitions for the operands to create a copy of an original
stmt. See vect_get_vec_def_for_stmt_copy() for details. */ stmt. See vect_get_vec_def_for_stmt_copy () for details. */
static void static void
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt, vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
} }
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */ /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
NULL. */
static void static void
vect_get_vec_defs (tree op0, tree op1, gimple stmt, vect_get_vec_defs (tree op0, tree op1, gimple stmt,
@ -1594,7 +1598,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
Create a vector stmt whose code, type, number of arguments, and result Create a vector stmt whose code, type, number of arguments, and result
variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
In the case that CODE is a CALL_EXPR, this means that a call to DECL In the case that CODE is a CALL_EXPR, this means that a call to DECL
needs to be created (DECL is a function-decl of a target-builtin). needs to be created (DECL is a function-decl of a target-builtin).
STMT is the original scalar stmt that we are vectorizing. */ STMT is the original scalar stmt that we are vectorizing. */
@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
else else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
/* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies /* Multiple types in SLP are handled by creating the appropriate number of
this, so we can safely override NCOPIES with 1 here. */ vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
return true; return true;
} }
/* Function vectorizable_assignment. /* Function vectorizable_assignment.
Check if STMT performs an assignment (copy) that can be vectorized. Check if STMT performs an assignment (copy) that can be vectorized.
@ -2156,7 +2163,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
vf = 1; vf = 1;
/* Multiple types in SLP are handled by creating the appropriate number of /* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */ case of SLP. */
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
@ -2243,7 +2250,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
fprintf (vect_dump, "proceeding using word mode."); fprintf (vect_dump, "proceeding using word mode.");
} }
/* Worthwhile without SIMD support? Check only during analysis. */ /* Worthwhile without SIMD support? Check only during analysis. */
if (!VECTOR_MODE_P (TYPE_MODE (vectype)) if (!VECTOR_MODE_P (TYPE_MODE (vectype))
&& vf < vect_min_worthwhile_factor (code) && vf < vect_min_worthwhile_factor (code)
&& !vec_stmt) && !vec_stmt)
@ -2270,12 +2277,12 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* Handle def. */ /* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_dest = vect_create_destination_var (scalar_dest, vectype);
/* Allocate VECs for vector operands. In case of SLP, vector operands are /* Allocate VECs for vector operands. In case of SLP, vector operands are
created in the previous stages of the recursion, so no allocation is created in the previous stages of the recursion, so no allocation is
needed, except for the case of shift with scalar shift argument. In that needed, except for the case of shift with scalar shift argument. In that
case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
In case of loop-based vectorization we allocate VECs of size 1. We In case of loop-based vectorization we allocate VECs of size 1. We
allocate VEC_OPRNDS1 only in case of binary operation. */ allocate VEC_OPRNDS1 only in case of binary operation. */
if (!slp_node) if (!slp_node)
{ {
@ -2289,13 +2296,13 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
/* In case the vectorization factor (VF) is bigger than the number /* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. In doing so, we record a pointer vector stmt by a factor VF/nunits. In doing so, we record a pointer
from one copy of the vector stmt to the next, in the field from one copy of the vector stmt to the next, in the field
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
stages to find the correct vector defs to be used when vectorizing stages to find the correct vector defs to be used when vectorizing
stmts that use the defs of the current stmt. The example below illustrates stmts that use the defs of the current stmt. The example below
the vectorization process when VF=16 and nunits=4 (i.e - we need to create illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4 vectorized stmts): we need to create 4 vectorized stmts):
before vectorization: before vectorization:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
@ -2314,18 +2321,18 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
step2: vectorize stmt S2 (done here): step2: vectorize stmt S2 (done here):
To vectorize stmt S2 we first need to find the relevant vector To vectorize stmt S2 we first need to find the relevant vector
def for the first operand 'x'. This is, as usual, obtained from def for the first operand 'x'. This is, as usual, obtained from
the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
that defines 'x' (S1). This way we find the stmt VS1_0, and the that defines 'x' (S1). This way we find the stmt VS1_0, and the
relevant vector def 'vx0'. Having found 'vx0' we can generate relevant vector def 'vx0'. Having found 'vx0' we can generate
the vector stmt VS2_0, and as usual, record it in the the vector stmt VS2_0, and as usual, record it in the
STMT_VINFO_VEC_STMT of stmt S2. STMT_VINFO_VEC_STMT of stmt S2.
When creating the second copy (VS2_1), we obtain the relevant vector When creating the second copy (VS2_1), we obtain the relevant vector
def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
stmt VS1_0. This way we find the stmt VS1_1 and the relevant stmt VS1_0. This way we find the stmt VS1_1 and the relevant
vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
Similarly when creating stmts VS2_2 and VS2_3. This is the resulting Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
chain of stmts and pointers: chain of stmts and pointers:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
VS1_0: vx0 = memref0 VS1_1 - VS1_0: vx0 = memref0 VS1_1 -
@ -2348,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (op_type == binary_op && scalar_shift_arg) if (op_type == binary_op && scalar_shift_arg)
{ {
/* Vector shl and shr insn patterns can be defined with scalar /* Vector shl and shr insn patterns can be defined with scalar
operand 2 (shift operand). In this case, use constant or loop operand 2 (shift operand). In this case, use constant or loop
invariant op1 directly, without extending it to vector mode invariant op1 directly, without extending it to vector mode
first. */ first. */
optab_op2_mode = insn_data[icode].operand[2].mode; optab_op2_mode = insn_data[icode].operand[2].mode;
@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
if (slp_node) if (slp_node)
{ {
/* Store vec_oprnd1 for every vector stmt to be created /* Store vec_oprnd1 for every vector stmt to be created
for SLP_NODE. We check during the analysis that all the for SLP_NODE. We check during the analysis that all
shift arguments are the same. the shift arguments are the same.
TODO: Allow different constants for different vector TODO: Allow different constants for different vector
stmts generated for an SLP instance. */ stmts generated for an SLP instance. */
for (k = 0; k < slp_node->vec_stmts_size - 1; k++) for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
@ -2415,7 +2422,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
} }
/* Get vectorized definitions for loop-based vectorization. For the first /* Get vectorized definitions for loop-based vectorization. For the first
operand we call vect_get_vec_def_for_operand() (with OPRND containing operand we call vect_get_vec_def_for_operand() (with OPRND containing
scalar operand), and for the rest we get a copy with scalar operand), and for the rest we get a copy with
vect_get_vec_def_for_stmt_copy() using the previous vector definition vect_get_vec_def_for_stmt_copy() using the previous vector definition
@ -2612,7 +2619,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
return false; return false;
/* Multiple types in SLP are handled by creating the appropriate number of /* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */ case of SLP. */
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
@ -2702,7 +2709,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
and VEC_OPRNDS1 (for binary operations). For multi-step conversions store and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
the resulting vectors and call the function recursively. */ the resulting vectors and call the function recursively. */
static void static void
@ -2779,17 +2786,18 @@ vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
if (multi_step_cvt) if (multi_step_cvt)
{ {
/* For multi-step promotion operation we first generate we call the /* For multi-step promotion operation we first generate we call the
function recurcively for every stage. We start from the input type, function recurcively for every stage. We start from the input type,
create promotion operations to the intermediate types, and then create promotion operations to the intermediate types, and then
create promotions to the output type. */ create promotions to the output type. */
*vec_oprnds0 = VEC_copy (tree, heap, vec_tmp); *vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
VEC_free (tree, heap, vec_tmp);
vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1, vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
multi_step_cvt - 1, stmt, multi_step_cvt - 1, stmt,
vec_dsts, gsi, slp_node, code1, vec_dsts, gsi, slp_node, code1,
code2, decl2, decl2, op_type, code2, decl2, decl2, op_type,
prev_stmt_info); prev_stmt_info);
} }
VEC_free (tree, heap, vec_tmp);
} }
@ -2891,7 +2899,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
return false; return false;
/* Multiple types in SLP are handled by creating the appropriate number of /* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */ case of SLP. */
if (slp_node) if (slp_node)
ncopies = 1; ncopies = 1;
@ -3259,7 +3267,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
the documentation of vect_permute_store_chain()). the documentation of vect_permute_store_chain()).
In case of both multiple types and interleaving, above vector stores and In case of both multiple types and interleaving, above vector stores and
permutation stmts are created for every copy. The result vector stmts are permutation stmts are created for every copy. The result vector stmts are
put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
STMT_VINFO_RELATED_STMT for the next copies. STMT_VINFO_RELATED_STMT for the next copies.
*/ */
@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
VEC_free (tree, heap, oprnds); VEC_free (tree, heap, oprnds);
if (result_chain) if (result_chain)
VEC_free (tree, heap, result_chain); VEC_free (tree, heap, result_chain);
if (vec_oprnds)
VEC_free (tree, heap, vec_oprnds);
return true; return true;
} }
@ -3476,7 +3486,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
vf = 1; vf = 1;
/* Multiple types in SLP are handled by creating the appropriate number of /* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */ case of SLP. */
if (slp) if (slp)
ncopies = 1; ncopies = 1;
@ -3603,13 +3613,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* In case the vectorization factor (VF) is bigger than the number /* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the more than one vector stmt - i.e - we need to "unroll" the
vector stmt by a factor VF/nunits. In doing so, we record a pointer vector stmt by a factor VF/nunits. In doing so, we record a pointer
from one copy of the vector stmt to the next, in the field from one copy of the vector stmt to the next, in the field
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
stages to find the correct vector defs to be used when vectorizing stages to find the correct vector defs to be used when vectorizing
stmts that use the defs of the current stmt. The example below illustrates stmts that use the defs of the current stmt. The example below
the vectorization process when VF=16 and nunits=4 (i.e - we need to create illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4 vectorized stmts): need to create 4 vectorized stmts):
before vectorization: before vectorization:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
@ -3621,7 +3631,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1. pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
Next, we create the vector stmt VS1_1, and record a pointer to Next, we create the vector stmt VS1_1, and record a pointer to
it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0. it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
Similarly, for VS1_2 and VS1_3. This is the resulting chain of Similarly, for VS1_2 and VS1_3. This is the resulting chain of
stmts and pointers: stmts and pointers:
RELATED_STMT VEC_STMT RELATED_STMT VEC_STMT
VS1_0: vx0 = memref0 VS1_1 - VS1_0: vx0 = memref0 VS1_1 -
@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
STMT_VINFO_VEC_STMT is done in vect_transform_strided_load(). STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
In case of both multiple types and interleaving, the vector loads and In case of both multiple types and interleaving, the vector loads and
permutation stmts above are created for every copy. The result vector stmts permutation stmts above are created for every copy. The result vector
are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
STMT_VINFO_RELATED_STMT for the next copies. */ corresponding STMT_VINFO_RELATED_STMT for the next copies. */
/* If the data reference is aligned (dr_aligned) or potentially unaligned /* If the data reference is aligned (dr_aligned) or potentially unaligned
on a target that supports unaligned accesses (dr_unaligned_supported) on a target that supports unaligned accesses (dr_unaligned_supported)
@ -3699,7 +3709,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
/* If the misalignment remains the same throughout the execution of the /* If the misalignment remains the same throughout the execution of the
loop, we can create the init_addr and permutation mask at the loop loop, we can create the init_addr and permutation mask at the loop
preheader. Otherwise, it needs to be created inside the loop. preheader. Otherwise, it needs to be created inside the loop.
This can only occur when vectorizing memory accesses in the inner-loop This can only occur when vectorizing memory accesses in the inner-loop
nested within an outer-loop that is being vectorized. */ nested within an outer-loop that is being vectorized. */
@ -3854,7 +3864,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
vect_finish_stmt_generation (stmt, new_stmt, gsi); vect_finish_stmt_generation (stmt, new_stmt, gsi);
mark_symbols_for_renaming (new_stmt); mark_symbols_for_renaming (new_stmt);
/* 3. Handle explicit realignment if necessary/supported. Create in /* 3. Handle explicit realignment if necessary/supported. Create in
loop: vec_dest = realign_load (msq, lsq, realignment_token) */ loop: vec_dest = realign_load (msq, lsq, realignment_token) */
if (alignment_support_scheme == dr_explicit_realign_optimized if (alignment_support_scheme == dr_explicit_realign_optimized
|| alignment_support_scheme == dr_explicit_realign) || alignment_support_scheme == dr_explicit_realign)
@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
tree cond_expr, then_clause, else_clause; tree cond_expr, then_clause, else_clause;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause; tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
tree vec_compare, vec_cond_expr; tree vec_compare, vec_cond_expr;
tree new_temp; tree new_temp;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
@ -4365,7 +4376,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
if (!PURE_SLP_STMT (stmt_info)) if (!PURE_SLP_STMT (stmt_info))
{ {
/* Groups of strided accesses whose size is not a power of 2 are not /* Groups of strided accesses whose size is not a power of 2 are not
vectorizable yet using loop-vectorization. Therefore, if this stmt vectorizable yet using loop-vectorization. Therefore, if this stmt
feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
loop-based vectorized), the loop cannot be vectorized. */ loop-based vectorized), the loop cannot be vectorized. */
if (STMT_VINFO_STRIDED_ACCESS (stmt_info) if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
@ -4447,7 +4458,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node) if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
{ {
/* In case of interleaving, the whole chain is vectorized when the /* In case of interleaving, the whole chain is vectorized when the
last store in the chain is reached. Store stmts before the last last store in the chain is reached. Store stmts before the last
one are skipped, and there vec_stmt_info shouldn't be freed one are skipped, and there vec_stmt_info shouldn't be freed
meanwhile. */ meanwhile. */
*strided_store = true; *strided_store = true;
@ -4747,7 +4758,7 @@ get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
Returns whether a stmt with OPERAND can be vectorized. Returns whether a stmt with OPERAND can be vectorized.
For loops, supportable operands are constants, loop invariants, and operands For loops, supportable operands are constants, loop invariants, and operands
that are defined by the current iteration of the loop. Unsupportable that are defined by the current iteration of the loop. Unsupportable
operands are those that are defined by a previous iteration of the loop (as operands are those that are defined by a previous iteration of the loop (as
is the case in reduction/induction computations). is the case in reduction/induction computations).
For basic blocks, supportable operands are constants and bb invariants. For basic blocks, supportable operands are constants and bb invariants.
@ -4929,7 +4940,7 @@ vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
- CODE1 and CODE2 are codes of vector operations to be used when - CODE1 and CODE2 are codes of vector operations to be used when
vectorizing the operation, if available. vectorizing the operation, if available.
- DECL1 and DECL2 are decls of target builtin functions to be used - DECL1 and DECL2 are decls of target builtin functions to be used
when vectorizing the operation, if available. In this case, when vectorizing the operation, if available. In this case,
CODE1 and CODE2 are CALL_EXPR. CODE1 and CODE2 are CALL_EXPR.
- MULTI_STEP_CVT determines the number of required intermediate steps in - MULTI_STEP_CVT determines the number of required intermediate steps in
case of multi-step conversion (like char->short->int - in that case case of multi-step conversion (like char->short->int - in that case
@ -4973,7 +4984,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
When vectorizing outer-loops, we execute the inner-loop sequentially When vectorizing outer-loops, we execute the inner-loop sequentially
(each vectorized inner-loop iteration contributes to VF outer-loop (each vectorized inner-loop iteration contributes to VF outer-loop
iterations in parallel). We therefore don't allow to change the order iterations in parallel). We therefore don't allow to change the order
of the computation in the inner-loop during outer-loop vectorization. */ of the computation in the inner-loop during outer-loop vectorization. */
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
@ -5086,8 +5097,9 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
*code2 = c2; *code2 = c2;
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS intermediate steps in promotion sequence. We try
to get to NARROW_VECTYPE, and fail if we do not. */ MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
not. */
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {
@ -5138,7 +5150,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
and producing a result of type VECTYPE_OUT). and producing a result of type VECTYPE_OUT).
Narrowing operations we currently support are NOP (CONVERT) and Narrowing operations we currently support are NOP (CONVERT) and
FIX_TRUNC. This function checks if these operations are supported by FIX_TRUNC. This function checks if these operations are supported by
the target platform directly via vector tree-codes. the target platform directly via vector tree-codes.
Output: Output:
@ -5206,8 +5218,9 @@ supportable_narrowing_operation (enum tree_code code,
*code1 = c1; *code1 = c1;
prev_type = vectype; prev_type = vectype;
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS intermediate steps in promotion sequence. We try
to get to NARROW_VECTYPE, and fail if we do not. */ MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
not. */
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
{ {

View File

@ -208,7 +208,7 @@ vectorize_loops (void)
/* ----------- Analyze loops. ----------- */ /* ----------- Analyze loops. ----------- */
/* If some loop was duplicated, it gets bigger number /* If some loop was duplicated, it gets bigger number
than all previously defined loops. This fact allows us to run than all previously defined loops. This fact allows us to run
only over initial loops skipping newly generated ones. */ only over initial loops skipping newly generated ones. */
FOR_EACH_LOOP (li, loop, 0) FOR_EACH_LOOP (li, loop, 0)
if (optimize_loop_nest_for_speed_p (loop)) if (optimize_loop_nest_for_speed_p (loop))

View File

@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_info_vec;
void init_stmt_vec_info_vec (void); void init_stmt_vec_info_vec (void);
void free_stmt_vec_info_vec (void); void free_stmt_vec_info_vec (void);
/* Return a stmt_vec_info corresponding to STMT. */
static inline stmt_vec_info static inline stmt_vec_info
vinfo_for_stmt (gimple stmt) vinfo_for_stmt (gimple stmt)
{ {
@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt)
return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1); return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
} }
/* Set vectorizer information INFO for STMT. */
static inline void static inline void
set_vinfo_for_stmt (gimple stmt, stmt_vec_info info) set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
{ {
@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info); VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
} }
/* Return the earlier statement between STMT1 and STMT2. */
static inline gimple static inline gimple
get_earlier_stmt (gimple stmt1, gimple stmt2) get_earlier_stmt (gimple stmt1, gimple stmt2)
{ {
@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple stmt2)
return stmt2; return stmt2;
} }
/* Return the later statement between STMT1 and STMT2. */
static inline gimple static inline gimple
get_later_stmt (gimple stmt1, gimple stmt2) get_later_stmt (gimple stmt1, gimple stmt2)
{ {
@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stmt2)
return stmt2; return stmt2;
} }
/* Return TRUE if a statement represented by STMT_INFO is a part of a
pattern. */
static inline bool static inline bool
is_pattern_stmt_p (stmt_vec_info stmt_info) is_pattern_stmt_p (stmt_vec_info stmt_info)
{ {
@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)
return false; return false;
} }
/* Return true if BB is a loop header. */
static inline bool static inline bool
is_loop_header_bb_p (basic_block bb) is_loop_header_bb_p (basic_block bb)
{ {
@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb)
return false; return false;
} }
/* Set inside loop vectorization cost. */
static inline void static inline void
stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
int cost) int cost)
@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost; STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
} }
/* Set inside loop vectorization cost. */
static inline void static inline void
stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node, stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
int cost) int cost)
@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost; STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
} }
/* Return pow2 (X). */
static inline int static inline int
vect_pow2 (int x) vect_pow2 (int x)
{ {
@ -723,12 +742,17 @@ vect_pow2 (int x)
#define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux) #define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux)
#define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL)) #define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL))
/* Return TRUE if the data access is aligned, and FALSE otherwise. */
static inline bool static inline bool
aligned_access_p (struct data_reference *data_ref_info) aligned_access_p (struct data_reference *data_ref_info)
{ {
return (DR_MISALIGNMENT (data_ref_info) == 0); return (DR_MISALIGNMENT (data_ref_info) == 0);
} }
/* Return TRUE if the alignment of the data access is known, and FALSE
otherwise. */
static inline bool static inline bool
known_alignment_for_access_p (struct data_reference *data_ref_info) known_alignment_for_access_p (struct data_reference *data_ref_info)
{ {