mirror of git://gcc.gnu.org/git/gcc.git
tree-vectorizer.c: Fix documentation.
* tree-vectorizer.c: Fix documentation. * tree-vectorizer.h (vinfo_for_stmt): Add documentation. (set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt, is_pattern_stmt_p, is_loop_header_bb_p, stmt_vinfo_set_inside_of_loop_cost, stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p, known_alignment_for_access_p): Likewise. * tree-vect-loop.c: Fix documentation. (vect_get_cost): Start function name from new line. * tree-vect-data-refs.c: Fix documentation. * tree-vect_stmts.c: Likewise. (vect_create_vectorized_promotion_stmts): Always free vec_tmp. (vectorizable_store): Free vec_oprnds if allocated. (vectorizable_condition): Initialize several variables to avoid warnings. * tree-vect-slp.c: Fix documentation. From-SVN: r164332
This commit is contained in:
parent
6be14c0ebc
commit
ff802fa1f3
|
|
@ -1,3 +1,22 @@
|
||||||
|
2010-09-16 Ira Rosen <irar@il.ibm.com>
|
||||||
|
|
||||||
|
* tree-vectorizer.c: Fix documentation.
|
||||||
|
* tree-vectorizer.h (vinfo_for_stmt): Add documentation.
|
||||||
|
(set_vinfo_for_stmt, get_earlier_stmt, get_later_stmt,
|
||||||
|
is_pattern_stmt_p, is_loop_header_bb_p,
|
||||||
|
stmt_vinfo_set_inside_of_loop_cost,
|
||||||
|
stmt_vinfo_set_outside_of_loop_cost, vect_pow2, aligned_access_p,
|
||||||
|
known_alignment_for_access_p): Likewise.
|
||||||
|
* tree-vect-loop.c: Fix documentation.
|
||||||
|
(vect_get_cost): Start function name from new line.
|
||||||
|
* tree-vect-data-refs.c: Fix documentation.
|
||||||
|
* tree-vect_stmts.c: Likewise.
|
||||||
|
(vect_create_vectorized_promotion_stmts): Always free vec_tmp.
|
||||||
|
(vectorizable_store): Free vec_oprnds if allocated.
|
||||||
|
(vectorizable_condition): Initialize several variables to avoid
|
||||||
|
warnings.
|
||||||
|
* tree-vect-slp.c: Fix documentation.
|
||||||
|
|
||||||
2010-09-16 Richard Guenther <rguenther@suse.de>
|
2010-09-16 Richard Guenther <rguenther@suse.de>
|
||||||
|
|
||||||
* tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL
|
* tree.c (tree_node_structure_for_code): TRANSLATION_UNIT_DECL
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,9 @@
|
||||||
|
2010-09-16 Ira Rosen <irar@il.ibm.com>
|
||||||
|
|
||||||
|
* gcc.dg/vect/bb-slp-8.c: Fix documentation, add space between function
|
||||||
|
name and parentheses.
|
||||||
|
* gcc.dg/vect/bb-slp-8a.c, gcc.dg/vect/bb-slp-8b.c: Likewise.
|
||||||
|
|
||||||
2010-09-15 Jason Merrill <jason@redhat.com>
|
2010-09-15 Jason Merrill <jason@redhat.com>
|
||||||
|
|
||||||
* g++.dg/parse/parameter-declaration-2.C: New.
|
* g++.dg/parse/parameter-declaration-2.C: New.
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
||||||
int i;
|
int i;
|
||||||
unsigned int a0, a1, a2, a3;
|
unsigned int a0, a1, a2, a3;
|
||||||
|
|
||||||
/* pin and pout may alias. But since all the loads are before the first store
|
/* pin and pout may alias. But since all the loads are before the first
|
||||||
the basic block is vectorizable. */
|
store the basic block is vectorizable. */
|
||||||
a0 = *pin++ + 23;
|
a0 = *pin++ + 23;
|
||||||
a1 = *pin++ + 142;
|
a1 = *pin++ + 142;
|
||||||
a2 = *pin++ + 2;
|
a2 = *pin++ + 2;
|
||||||
|
|
@ -35,7 +35,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
||||||
|| out[1] != (in[1] + 142) * y
|
|| out[1] != (in[1] + 142) * y
|
||||||
|| out[2] != (in[2] + 2) * x
|
|| out[2] != (in[2] + 2) * x
|
||||||
|| out[3] != (in[3] + 31) * y)
|
|| out[3] != (in[3] + 31) * y)
|
||||||
abort();
|
abort ();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
||||||
int i;
|
int i;
|
||||||
unsigned int a0, a1, a2, a3;
|
unsigned int a0, a1, a2, a3;
|
||||||
|
|
||||||
/* pin and pout may alias, and loads and stores are mixed. The basic block
|
/* pin and pout may alias, and loads and stores are mixed. The basic block
|
||||||
cannot be vectorized. */
|
cannot be vectorized. */
|
||||||
a0 = *pin++ + 23;
|
a0 = *pin++ + 23;
|
||||||
*pout++ = a0 * x;
|
*pout++ = a0 * x;
|
||||||
|
|
@ -34,7 +34,7 @@ main1 (unsigned int x, unsigned int y, unsigned int *pin, unsigned int *pout)
|
||||||
|| out[1] != (in[1] + 142) * y
|
|| out[1] != (in[1] + 142) * y
|
||||||
|| out[2] != (in[2] + 2) * x
|
|| out[2] != (in[2] + 2) * x
|
||||||
|| out[3] != (in[3] + 31) * y)
|
|| out[3] != (in[3] + 31) * y)
|
||||||
abort();
|
abort ();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ main1 (unsigned int x, unsigned int y)
|
||||||
|| out[1] != (in[1] + 142) * y
|
|| out[1] != (in[1] + 142) * y
|
||||||
|| out[2] != (in[2] + 2) * x
|
|| out[2] != (in[2] + 2) * x
|
||||||
|| out[3] != (in[3] + 31) * y)
|
|| out[3] != (in[3] + 31) * y)
|
||||||
abort();
|
abort ();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,19 +45,19 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "optabs.h"
|
#include "optabs.h"
|
||||||
|
|
||||||
/* Return the smallest scalar part of STMT.
|
/* Return the smallest scalar part of STMT.
|
||||||
This is used to determine the vectype of the stmt. We generally set the
|
This is used to determine the vectype of the stmt. We generally set the
|
||||||
vectype according to the type of the result (lhs). For stmts whose
|
vectype according to the type of the result (lhs). For stmts whose
|
||||||
result-type is different than the type of the arguments (e.g., demotion,
|
result-type is different than the type of the arguments (e.g., demotion,
|
||||||
promotion), vectype will be reset appropriately (later). Note that we have
|
promotion), vectype will be reset appropriately (later). Note that we have
|
||||||
to visit the smallest datatype in this function, because that determines the
|
to visit the smallest datatype in this function, because that determines the
|
||||||
VF. If the smallest datatype in the loop is present only as the rhs of a
|
VF. If the smallest datatype in the loop is present only as the rhs of a
|
||||||
promotion operation - we'd miss it.
|
promotion operation - we'd miss it.
|
||||||
Such a case, where a variable of this datatype does not appear in the lhs
|
Such a case, where a variable of this datatype does not appear in the lhs
|
||||||
anywhere in the loop, can only occur if it's an invariant: e.g.:
|
anywhere in the loop, can only occur if it's an invariant: e.g.:
|
||||||
'int_x = (int) short_inv', which we'd expect to have been optimized away by
|
'int_x = (int) short_inv', which we'd expect to have been optimized away by
|
||||||
invariant motion. However, we cannot rely on invariant motion to always take
|
invariant motion. However, we cannot rely on invariant motion to always
|
||||||
invariants out of the loop, and so in the case of promotion we also have to
|
take invariants out of the loop, and so in the case of promotion we also
|
||||||
check the rhs.
|
have to check the rhs.
|
||||||
LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
|
LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
|
||||||
types. */
|
types. */
|
||||||
|
|
||||||
|
|
@ -89,7 +89,7 @@ vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
|
||||||
|
|
||||||
|
|
||||||
/* Find the place of the data-ref in STMT in the interleaving chain that starts
|
/* Find the place of the data-ref in STMT in the interleaving chain that starts
|
||||||
from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
|
from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
|
||||||
|
|
||||||
int
|
int
|
||||||
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
|
vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
|
||||||
|
|
@ -151,7 +151,7 @@ vect_insert_into_interleaving_chain (struct data_reference *dra,
|
||||||
/* Function vect_update_interleaving_chain.
|
/* Function vect_update_interleaving_chain.
|
||||||
|
|
||||||
For two data-refs DRA and DRB that are a part of a chain interleaved data
|
For two data-refs DRA and DRB that are a part of a chain interleaved data
|
||||||
accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
|
accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
|
||||||
|
|
||||||
There are four possible cases:
|
There are four possible cases:
|
||||||
1. New stmts - both DRA and DRB are not a part of any chain:
|
1. New stmts - both DRA and DRB are not a part of any chain:
|
||||||
|
|
@ -211,7 +211,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
|
||||||
if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
|
if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
|
||||||
{
|
{
|
||||||
/* DRB's init is smaller than the init of the stmt previously marked
|
/* DRB's init is smaller than the init of the stmt previously marked
|
||||||
as the first stmt of the interleaving chain of DRA. Therefore, we
|
as the first stmt of the interleaving chain of DRA. Therefore, we
|
||||||
update FIRST_STMT and put DRB in the head of the list. */
|
update FIRST_STMT and put DRB in the head of the list. */
|
||||||
DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
|
DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
|
||||||
DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
|
DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
|
||||||
|
|
@ -323,7 +323,11 @@ vect_equal_offsets (tree offset1, tree offset2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Check dependence between DRA and DRB for basic block vectorization. */
|
/* Check dependence between DRA and DRB for basic block vectorization.
|
||||||
|
If the accesses share same bases and offsets, we can compare their initial
|
||||||
|
constant offsets to decide whether they differ or not. In case of a read-
|
||||||
|
write dependence we check that the load is before the store to ensure that
|
||||||
|
vectorization will not change the order of the accesses. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||||
|
|
@ -342,7 +346,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check that the data-refs have same bases and offsets. If not, we can't
|
/* Check that the data-refs have same bases and offsets. If not, we can't
|
||||||
determine if they are dependent. */
|
determine if they are dependent. */
|
||||||
if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
|
||||||
&& (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
&& (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
|
||||||
|
|
@ -368,10 +372,10 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||||
if (init_a != init_b)
|
if (init_a != init_b)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* We have a read-write dependence. Check that the load is before the store.
|
/* We have a read-write dependence. Check that the load is before the store.
|
||||||
When we vectorize basic blocks, vector load can be only before
|
When we vectorize basic blocks, vector load can be only before
|
||||||
corresponding scalar load, and vector store can be only after its
|
corresponding scalar load, and vector store can be only after its
|
||||||
corresponding scalar store. So the order of the acceses is preserved in
|
corresponding scalar store. So the order of the acceses is preserved in
|
||||||
case the load is before the store. */
|
case the load is before the store. */
|
||||||
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
|
earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
|
||||||
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
|
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
|
||||||
|
|
@ -383,7 +387,7 @@ vect_drs_dependent_in_basic_block (struct data_reference *dra,
|
||||||
|
|
||||||
/* Function vect_check_interleaving.
|
/* Function vect_check_interleaving.
|
||||||
|
|
||||||
Check if DRA and DRB are a part of interleaving. In case they are, insert
|
Check if DRA and DRB are a part of interleaving. In case they are, insert
|
||||||
DRA and DRB in an interleaving chain. */
|
DRA and DRB in an interleaving chain. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -813,7 +817,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
|
||||||
|
|
||||||
/* In case the dataref is in an inner-loop of the loop that is being
|
/* In case the dataref is in an inner-loop of the loop that is being
|
||||||
vectorized (LOOP), we use the base and misalignment information
|
vectorized (LOOP), we use the base and misalignment information
|
||||||
relative to the outer-loop (LOOP). This is ok only if the misalignment
|
relative to the outer-loop (LOOP). This is ok only if the misalignment
|
||||||
stays the same throughout the execution of the inner-loop, which is why
|
stays the same throughout the execution of the inner-loop, which is why
|
||||||
we have to check that the stride of the dataref in the inner-loop evenly
|
we have to check that the stride of the dataref in the inner-loop evenly
|
||||||
divides by the vector size. */
|
divides by the vector size. */
|
||||||
|
|
@ -1241,8 +1245,8 @@ vect_peeling_hash_get_most_frequent (void **slot, void *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Traverse peeling hash table and calculate cost for each peeling option. Find
|
/* Traverse peeling hash table and calculate cost for each peeling option.
|
||||||
one with the lowest cost. */
|
Find the one with the lowest cost. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vect_peeling_hash_get_lowest_cost (void **slot, void *data)
|
vect_peeling_hash_get_lowest_cost (void **slot, void *data)
|
||||||
|
|
@ -1326,15 +1330,15 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
||||||
the alignment of data references in the loop.
|
the alignment of data references in the loop.
|
||||||
|
|
||||||
FOR NOW: we assume that whatever versioning/peeling takes place, only the
|
FOR NOW: we assume that whatever versioning/peeling takes place, only the
|
||||||
original loop is to be vectorized; Any other loops that are created by
|
original loop is to be vectorized. Any other loops that are created by
|
||||||
the transformations performed in this pass - are not supposed to be
|
the transformations performed in this pass - are not supposed to be
|
||||||
vectorized. This restriction will be relaxed.
|
vectorized. This restriction will be relaxed.
|
||||||
|
|
||||||
This pass will require a cost model to guide it whether to apply peeling
|
This pass will require a cost model to guide it whether to apply peeling
|
||||||
or versioning or a combination of the two. For example, the scheme that
|
or versioning or a combination of the two. For example, the scheme that
|
||||||
intel uses when given a loop with several memory accesses, is as follows:
|
intel uses when given a loop with several memory accesses, is as follows:
|
||||||
choose one memory access ('p') which alignment you want to force by doing
|
choose one memory access ('p') which alignment you want to force by doing
|
||||||
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
peeling. Then, either (1) generate a loop in which 'p' is aligned and all
|
||||||
other accesses are not necessarily aligned, or (2) use loop versioning to
|
other accesses are not necessarily aligned, or (2) use loop versioning to
|
||||||
generate one loop in which all accesses are aligned, and another loop in
|
generate one loop in which all accesses are aligned, and another loop in
|
||||||
which only 'p' is necessarily aligned.
|
which only 'p' is necessarily aligned.
|
||||||
|
|
@ -1343,9 +1347,9 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
||||||
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
|
Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
|
||||||
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
|
Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
|
||||||
|
|
||||||
Devising a cost model is the most critical aspect of this work. It will
|
Devising a cost model is the most critical aspect of this work. It will
|
||||||
guide us on which access to peel for, whether to use loop versioning, how
|
guide us on which access to peel for, whether to use loop versioning, how
|
||||||
many versions to create, etc. The cost model will probably consist of
|
many versions to create, etc. The cost model will probably consist of
|
||||||
generic considerations as well as target specific considerations (on
|
generic considerations as well as target specific considerations (on
|
||||||
powerpc for example, misaligned stores are more painful than misaligned
|
powerpc for example, misaligned stores are more painful than misaligned
|
||||||
loads).
|
loads).
|
||||||
|
|
@ -1406,7 +1410,7 @@ vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
These loops are later passed to loop_transform to be vectorized. The
|
These loops are later passed to loop_transform to be vectorized. The
|
||||||
vectorizer will use the alignment information to guide the transformation
|
vectorizer will use the alignment information to guide the transformation
|
||||||
(whether to generate regular loads/stores, or with special handling for
|
(whether to generate regular loads/stores, or with special handling for
|
||||||
misalignment). */
|
misalignment). */
|
||||||
|
|
@ -1500,11 +1504,11 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||||
npeel_tmp = (nelements - mis) % vf;
|
npeel_tmp = (nelements - mis) % vf;
|
||||||
|
|
||||||
/* For multiple types, it is possible that the bigger type access
|
/* For multiple types, it is possible that the bigger type access
|
||||||
will have more than one peeling option. E.g., a loop with two
|
will have more than one peeling option. E.g., a loop with two
|
||||||
types: one of size (vector size / 4), and the other one of
|
types: one of size (vector size / 4), and the other one of
|
||||||
size (vector size / 8). Vectorization factor will 8. If both
|
size (vector size / 8). Vectorization factor will 8. If both
|
||||||
access are misaligned by 3, the first one needs one scalar
|
access are misaligned by 3, the first one needs one scalar
|
||||||
iteration to be aligned, and the second one needs 5. But the
|
iteration to be aligned, and the second one needs 5. But the
|
||||||
the first one will be aligned also by peeling 5 scalar
|
the first one will be aligned also by peeling 5 scalar
|
||||||
iterations, and in that case both accesses will be aligned.
|
iterations, and in that case both accesses will be aligned.
|
||||||
Hence, except for the immediate peeling amount, we also want
|
Hence, except for the immediate peeling amount, we also want
|
||||||
|
|
@ -1996,7 +2000,7 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
|
||||||
|
|
||||||
|
|
||||||
/* Analyze groups of strided accesses: check that DR belongs to a group of
|
/* Analyze groups of strided accesses: check that DR belongs to a group of
|
||||||
strided accesses of legal size, step, etc. Detect gaps, single element
|
strided accesses of legal size, step, etc. Detect gaps, single element
|
||||||
interleaving, and other special cases. Set strided access info.
|
interleaving, and other special cases. Set strided access info.
|
||||||
Collect groups of strided stores for further use in SLP analysis. */
|
Collect groups of strided stores for further use in SLP analysis. */
|
||||||
|
|
||||||
|
|
@ -2072,9 +2076,10 @@ vect_analyze_group_access (struct data_reference *dr)
|
||||||
|
|
||||||
while (next)
|
while (next)
|
||||||
{
|
{
|
||||||
/* Skip same data-refs. In case that two or more stmts share data-ref
|
/* Skip same data-refs. In case that two or more stmts share
|
||||||
(supported only for loads), we vectorize only the first stmt, and
|
data-ref (supported only for loads), we vectorize only the first
|
||||||
the rest get their vectorized loads from the first one. */
|
stmt, and the rest get their vectorized loads from the first
|
||||||
|
one. */
|
||||||
if (!tree_int_cst_compare (DR_INIT (data_ref),
|
if (!tree_int_cst_compare (DR_INIT (data_ref),
|
||||||
DR_INIT (STMT_VINFO_DATA_REF (
|
DR_INIT (STMT_VINFO_DATA_REF (
|
||||||
vinfo_for_stmt (next)))))
|
vinfo_for_stmt (next)))))
|
||||||
|
|
@ -2196,7 +2201,7 @@ vect_analyze_group_access (struct data_reference *dr)
|
||||||
|
|
||||||
/* FORNOW: we handle only interleaving that is a power of 2.
|
/* FORNOW: we handle only interleaving that is a power of 2.
|
||||||
We don't fail here if it may be still possible to vectorize the
|
We don't fail here if it may be still possible to vectorize the
|
||||||
group using SLP. If not, the size of the group will be checked in
|
group using SLP. If not, the size of the group will be checked in
|
||||||
vect_analyze_operations, and the vectorization will fail. */
|
vect_analyze_operations, and the vectorization will fail. */
|
||||||
if (exact_log2 (stride) == -1)
|
if (exact_log2 (stride) == -1)
|
||||||
{
|
{
|
||||||
|
|
@ -2483,8 +2488,8 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
||||||
datarefs = BB_VINFO_DATAREFS (bb_vinfo);
|
datarefs = BB_VINFO_DATAREFS (bb_vinfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Go through the data-refs, check that the analysis succeeded. Update pointer
|
/* Go through the data-refs, check that the analysis succeeded. Update
|
||||||
from stmt_vec_info struct to DR and vectype. */
|
pointer from stmt_vec_info struct to DR and vectype. */
|
||||||
|
|
||||||
FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
|
FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
|
||||||
{
|
{
|
||||||
|
|
@ -2572,7 +2577,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
||||||
tree dinit;
|
tree dinit;
|
||||||
|
|
||||||
/* Build a reference to the first location accessed by the
|
/* Build a reference to the first location accessed by the
|
||||||
inner-loop: *(BASE+INIT). (The first location is actually
|
inner-loop: *(BASE+INIT). (The first location is actually
|
||||||
BASE+INIT+OFFSET, but we add OFFSET separately later). */
|
BASE+INIT+OFFSET, but we add OFFSET separately later). */
|
||||||
tree inner_base = build_fold_indirect_ref
|
tree inner_base = build_fold_indirect_ref
|
||||||
(fold_build2 (POINTER_PLUS_EXPR,
|
(fold_build2 (POINTER_PLUS_EXPR,
|
||||||
|
|
@ -2712,7 +2717,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
|
||||||
|
|
||||||
/* Function vect_get_new_vect_var.
|
/* Function vect_get_new_vect_var.
|
||||||
|
|
||||||
Returns a name for a new variable. The current naming scheme appends the
|
Returns a name for a new variable. The current naming scheme appends the
|
||||||
prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
|
prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
|
||||||
the name of vectorizer generated variables, and appends that to NAME if
|
the name of vectorizer generated variables, and appends that to NAME if
|
||||||
provided. */
|
provided. */
|
||||||
|
|
@ -2767,7 +2772,7 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
|
||||||
LOOP: Specify relative to which loop-nest should the address be computed.
|
LOOP: Specify relative to which loop-nest should the address be computed.
|
||||||
For example, when the dataref is in an inner-loop nested in an
|
For example, when the dataref is in an inner-loop nested in an
|
||||||
outer-loop that is now being vectorized, LOOP can be either the
|
outer-loop that is now being vectorized, LOOP can be either the
|
||||||
outer-loop, or the inner-loop. The first memory location accessed
|
outer-loop, or the inner-loop. The first memory location accessed
|
||||||
by the following dataref ('in' points to short):
|
by the following dataref ('in' points to short):
|
||||||
|
|
||||||
for (i=0; i<N; i++)
|
for (i=0; i<N; i++)
|
||||||
|
|
@ -2937,7 +2942,7 @@ vect_create_addr_base_for_vector_ref (gimple stmt,
|
||||||
Return the increment stmt that updates the pointer in PTR_INCR.
|
Return the increment stmt that updates the pointer in PTR_INCR.
|
||||||
|
|
||||||
3. Set INV_P to true if the access pattern of the data reference in the
|
3. Set INV_P to true if the access pattern of the data reference in the
|
||||||
vectorized loop is invariant. Set it to false otherwise.
|
vectorized loop is invariant. Set it to false otherwise.
|
||||||
|
|
||||||
4. Return the pointer. */
|
4. Return the pointer. */
|
||||||
|
|
||||||
|
|
@ -3017,7 +3022,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
print_generic_expr (vect_dump, base_name, TDF_SLIM);
|
print_generic_expr (vect_dump, base_name, TDF_SLIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** (1) Create the new vector-pointer variable: **/
|
/* (1) Create the new vector-pointer variable. */
|
||||||
vect_ptr_type = build_pointer_type (vectype);
|
vect_ptr_type = build_pointer_type (vectype);
|
||||||
base = get_base_address (DR_REF (dr));
|
base = get_base_address (DR_REF (dr));
|
||||||
if (base
|
if (base
|
||||||
|
|
@ -3067,16 +3072,16 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
|
|
||||||
add_referenced_var (vect_ptr);
|
add_referenced_var (vect_ptr);
|
||||||
|
|
||||||
/** Note: If the dataref is in an inner-loop nested in LOOP, and we are
|
/* Note: If the dataref is in an inner-loop nested in LOOP, and we are
|
||||||
vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
|
vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
|
||||||
def-use update cycles for the pointer: One relative to the outer-loop
|
def-use update cycles for the pointer: one relative to the outer-loop
|
||||||
(LOOP), which is what steps (3) and (4) below do. The other is relative
|
(LOOP), which is what steps (3) and (4) below do. The other is relative
|
||||||
to the inner-loop (which is the inner-most loop containing the dataref),
|
to the inner-loop (which is the inner-most loop containing the dataref),
|
||||||
and this is done be step (5) below.
|
and this is done be step (5) below.
|
||||||
|
|
||||||
When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
|
When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
|
||||||
inner-most loop, and so steps (3),(4) work the same, and step (5) is
|
inner-most loop, and so steps (3),(4) work the same, and step (5) is
|
||||||
redundant. Steps (3),(4) create the following:
|
redundant. Steps (3),(4) create the following:
|
||||||
|
|
||||||
vp0 = &base_addr;
|
vp0 = &base_addr;
|
||||||
LOOP: vp1 = phi(vp0,vp2)
|
LOOP: vp1 = phi(vp0,vp2)
|
||||||
|
|
@ -3085,8 +3090,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
vp2 = vp1 + step
|
vp2 = vp1 + step
|
||||||
goto LOOP
|
goto LOOP
|
||||||
|
|
||||||
If there is an inner-loop nested in loop, then step (5) will also be
|
If there is an inner-loop nested in loop, then step (5) will also be
|
||||||
applied, and an additional update in the inner-loop will be created:
|
applied, and an additional update in the inner-loop will be created:
|
||||||
|
|
||||||
vp0 = &base_addr;
|
vp0 = &base_addr;
|
||||||
LOOP: vp1 = phi(vp0,vp2)
|
LOOP: vp1 = phi(vp0,vp2)
|
||||||
|
|
@ -3098,8 +3103,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
vp2 = vp1 + step
|
vp2 = vp1 + step
|
||||||
if () goto LOOP */
|
if () goto LOOP */
|
||||||
|
|
||||||
/** (3) Calculate the initial address the vector-pointer, and set
|
/* (2) Calculate the initial address the vector-pointer, and set
|
||||||
the vector-pointer to point to it before the loop: **/
|
the vector-pointer to point to it before the loop. */
|
||||||
|
|
||||||
/* Create: (&(base[init_val+offset]) in the loop preheader. */
|
/* Create: (&(base[init_val+offset]) in the loop preheader. */
|
||||||
|
|
||||||
|
|
@ -3140,10 +3145,9 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
else
|
else
|
||||||
vect_ptr_init = new_temp;
|
vect_ptr_init = new_temp;
|
||||||
|
|
||||||
/** (4) Handle the updating of the vector-pointer inside the loop.
|
/* (3) Handle the updating of the vector-pointer inside the loop.
|
||||||
This is needed when ONLY_INIT is false, and also when AT_LOOP
|
This is needed when ONLY_INIT is false, and also when AT_LOOP is the
|
||||||
is the inner-loop nested in LOOP (during outer-loop vectorization).
|
inner-loop nested in LOOP (during outer-loop vectorization). */
|
||||||
**/
|
|
||||||
|
|
||||||
/* No update in loop is required. */
|
/* No update in loop is required. */
|
||||||
if (only_init && (!loop_vinfo || at_loop == loop))
|
if (only_init && (!loop_vinfo || at_loop == loop))
|
||||||
|
|
@ -3182,8 +3186,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
||||||
return vptr;
|
return vptr;
|
||||||
|
|
||||||
|
|
||||||
/** (5) Handle the updating of the vector-pointer inside the inner-loop
|
/* (4) Handle the updating of the vector-pointer inside the inner-loop
|
||||||
nested in LOOP, if exists: **/
|
nested in LOOP, if exists. */
|
||||||
|
|
||||||
gcc_assert (nested_in_vect_loop);
|
gcc_assert (nested_in_vect_loop);
|
||||||
if (!only_init)
|
if (!only_init)
|
||||||
|
|
@ -3358,12 +3362,12 @@ vect_strided_store_supported (tree vectype)
|
||||||
|
|
||||||
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
|
||||||
a power of 2, generate interleave_high/low stmts to reorder the data
|
a power of 2, generate interleave_high/low stmts to reorder the data
|
||||||
correctly for the stores. Return the final references for stores in
|
correctly for the stores. Return the final references for stores in
|
||||||
RESULT_CHAIN.
|
RESULT_CHAIN.
|
||||||
|
|
||||||
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
||||||
The input is 4 vectors each containing 8 elements. We assign a number to each
|
The input is 4 vectors each containing 8 elements. We assign a number to
|
||||||
element, the input sequence is:
|
each element, the input sequence is:
|
||||||
|
|
||||||
1st vec: 0 1 2 3 4 5 6 7
|
1st vec: 0 1 2 3 4 5 6 7
|
||||||
2nd vec: 8 9 10 11 12 13 14 15
|
2nd vec: 8 9 10 11 12 13 14 15
|
||||||
|
|
@ -3379,18 +3383,18 @@ vect_strided_store_supported (tree vectype)
|
||||||
|
|
||||||
i.e., we interleave the contents of the four vectors in their order.
|
i.e., we interleave the contents of the four vectors in their order.
|
||||||
|
|
||||||
We use interleave_high/low instructions to create such output. The input of
|
We use interleave_high/low instructions to create such output. The input of
|
||||||
each interleave_high/low operation is two vectors:
|
each interleave_high/low operation is two vectors:
|
||||||
1st vec 2nd vec
|
1st vec 2nd vec
|
||||||
0 1 2 3 4 5 6 7
|
0 1 2 3 4 5 6 7
|
||||||
the even elements of the result vector are obtained left-to-right from the
|
the even elements of the result vector are obtained left-to-right from the
|
||||||
high/low elements of the first vector. The odd elements of the result are
|
high/low elements of the first vector. The odd elements of the result are
|
||||||
obtained left-to-right from the high/low elements of the second vector.
|
obtained left-to-right from the high/low elements of the second vector.
|
||||||
The output of interleave_high will be: 0 4 1 5
|
The output of interleave_high will be: 0 4 1 5
|
||||||
and of interleave_low: 2 6 3 7
|
and of interleave_low: 2 6 3 7
|
||||||
|
|
||||||
|
|
||||||
The permutation is done in log LENGTH stages. In each stage interleave_high
|
The permutation is done in log LENGTH stages. In each stage interleave_high
|
||||||
and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
|
and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
|
||||||
where the first argument is taken from the first half of DR_CHAIN and the
|
where the first argument is taken from the first half of DR_CHAIN and the
|
||||||
second argument from it's second half.
|
second argument from it's second half.
|
||||||
|
|
@ -3582,8 +3586,7 @@ vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
1. the misalignment computation
|
1. the misalignment computation
|
||||||
2. the extra vector load (for the optimized realignment scheme).
|
2. the extra vector load (for the optimized realignment scheme).
|
||||||
3. the phi node for the two vectors from which the realignment is
|
3. the phi node for the two vectors from which the realignment is
|
||||||
done (for the optimized realignment scheme).
|
done (for the optimized realignment scheme). */
|
||||||
*/
|
|
||||||
|
|
||||||
/* 1. Determine where to generate the misalignment computation.
|
/* 1. Determine where to generate the misalignment computation.
|
||||||
|
|
||||||
|
|
@ -3807,7 +3810,7 @@ vect_strided_load_supported (tree vectype)
|
||||||
|
|
||||||
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
|
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
|
||||||
a power of 2, generate extract_even/odd stmts to reorder the input data
|
a power of 2, generate extract_even/odd stmts to reorder the input data
|
||||||
correctly. Return the final references for loads in RESULT_CHAIN.
|
correctly. Return the final references for loads in RESULT_CHAIN.
|
||||||
|
|
||||||
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
|
||||||
The input is 4 vectors each containing 8 elements. We assign a number to each
|
The input is 4 vectors each containing 8 elements. We assign a number to each
|
||||||
|
|
@ -3828,19 +3831,19 @@ vect_strided_load_supported (tree vectype)
|
||||||
i.e., the first output vector should contain the first elements of each
|
i.e., the first output vector should contain the first elements of each
|
||||||
interleaving group, etc.
|
interleaving group, etc.
|
||||||
|
|
||||||
We use extract_even/odd instructions to create such output. The input of each
|
We use extract_even/odd instructions to create such output. The input of
|
||||||
extract_even/odd operation is two vectors
|
each extract_even/odd operation is two vectors
|
||||||
1st vec 2nd vec
|
1st vec 2nd vec
|
||||||
0 1 2 3 4 5 6 7
|
0 1 2 3 4 5 6 7
|
||||||
|
|
||||||
and the output is the vector of extracted even/odd elements. The output of
|
and the output is the vector of extracted even/odd elements. The output of
|
||||||
extract_even will be: 0 2 4 6
|
extract_even will be: 0 2 4 6
|
||||||
and of extract_odd: 1 3 5 7
|
and of extract_odd: 1 3 5 7
|
||||||
|
|
||||||
|
|
||||||
The permutation is done in log LENGTH stages. In each stage extract_even and
|
The permutation is done in log LENGTH stages. In each stage extract_even
|
||||||
extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
|
and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
|
||||||
order. In our example,
|
their order. In our example,
|
||||||
|
|
||||||
E1: extract_even (1st vec, 2nd vec)
|
E1: extract_even (1st vec, 2nd vec)
|
||||||
E2: extract_odd (1st vec, 2nd vec)
|
E2: extract_odd (1st vec, 2nd vec)
|
||||||
|
|
@ -3977,13 +3980,12 @@ vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
|
||||||
if (!next_stmt)
|
if (!next_stmt)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Skip the gaps. Loads created for the gaps will be removed by dead
|
/* Skip the gaps. Loads created for the gaps will be removed by dead
|
||||||
code elimination pass later. No need to check for the first stmt in
|
code elimination pass later. No need to check for the first stmt in
|
||||||
the group, since it always exists.
|
the group, since it always exists.
|
||||||
DR_GROUP_GAP is the number of steps in elements from the previous
|
DR_GROUP_GAP is the number of steps in elements from the previous
|
||||||
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
|
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
|
||||||
correspond to the gaps.
|
correspond to the gaps. */
|
||||||
*/
|
|
||||||
if (next_stmt != first_stmt
|
if (next_stmt != first_stmt
|
||||||
&& gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
|
&& gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
|
||||||
{
|
{
|
||||||
|
|
@ -4088,8 +4090,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
|
||||||
|
|
||||||
/* We can choose between using the implicit realignment scheme (generating
|
/* We can choose between using the implicit realignment scheme (generating
|
||||||
a misaligned_move stmt) and the explicit realignment scheme (generating
|
a misaligned_move stmt) and the explicit realignment scheme (generating
|
||||||
aligned loads with a REALIGN_LOAD). There are two variants to the explicit
|
aligned loads with a REALIGN_LOAD). There are two variants to the
|
||||||
realignment scheme: optimized, and unoptimized.
|
explicit realignment scheme: optimized, and unoptimized.
|
||||||
We can optimize the realignment only if the step between consecutive
|
We can optimize the realignment only if the step between consecutive
|
||||||
vector loads is equal to the vector size. Since the vector memory
|
vector loads is equal to the vector size. Since the vector memory
|
||||||
accesses advance in steps of VS (Vector Size) in the vectorized loop, it
|
accesses advance in steps of VS (Vector Size) in the vectorized loop, it
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
had successfully passed the analysis phase.
|
had successfully passed the analysis phase.
|
||||||
Throughout this pass we make a distinction between two types of
|
Throughout this pass we make a distinction between two types of
|
||||||
data: scalars (which are represented by SSA_NAMES), and memory references
|
data: scalars (which are represented by SSA_NAMES), and memory references
|
||||||
("data-refs"). These two types of data require different handling both
|
("data-refs"). These two types of data require different handling both
|
||||||
during analysis and transformation. The types of data-refs that the
|
during analysis and transformation. The types of data-refs that the
|
||||||
vectorizer currently supports are ARRAY_REFS which base is an array DECL
|
vectorizer currently supports are ARRAY_REFS which base is an array DECL
|
||||||
(not a pointer), and INDIRECT_REFS through pointers; both array and pointer
|
(not a pointer), and INDIRECT_REFS through pointers; both array and pointer
|
||||||
|
|
@ -97,10 +97,10 @@ along with GCC; see the file COPYING3. If not see
|
||||||
=====================
|
=====================
|
||||||
The loop transformation phase scans all the stmts in the loop, and
|
The loop transformation phase scans all the stmts in the loop, and
|
||||||
creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
|
creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
|
||||||
the loop that needs to be vectorized. It inserts the vector code sequence
|
the loop that needs to be vectorized. It inserts the vector code sequence
|
||||||
just before the scalar stmt S, and records a pointer to the vector code
|
just before the scalar stmt S, and records a pointer to the vector code
|
||||||
in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
|
in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
|
||||||
attached to S). This pointer will be used for the vectorization of following
|
attached to S). This pointer will be used for the vectorization of following
|
||||||
stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
|
stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
|
||||||
otherwise, we rely on dead code elimination for removing it.
|
otherwise, we rely on dead code elimination for removing it.
|
||||||
|
|
||||||
|
|
@ -112,7 +112,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
To vectorize stmt S2, the vectorizer first finds the stmt that defines
|
To vectorize stmt S2, the vectorizer first finds the stmt that defines
|
||||||
the operand 'b' (S1), and gets the relevant vector def 'vb' from the
|
the operand 'b' (S1), and gets the relevant vector def 'vb' from the
|
||||||
vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
|
vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
|
||||||
resulting sequence would be:
|
resulting sequence would be:
|
||||||
|
|
||||||
VS1: vb = px[i];
|
VS1: vb = px[i];
|
||||||
|
|
@ -128,13 +128,13 @@ along with GCC; see the file COPYING3. If not see
|
||||||
Currently the only target specific information that is used is the
|
Currently the only target specific information that is used is the
|
||||||
size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
|
size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
|
||||||
Targets that can support different sizes of vectors, for now will need
|
Targets that can support different sizes of vectors, for now will need
|
||||||
to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
|
to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
|
||||||
flexibility will be added in the future.
|
flexibility will be added in the future.
|
||||||
|
|
||||||
Since we only vectorize operations which vector form can be
|
Since we only vectorize operations which vector form can be
|
||||||
expressed using existing tree codes, to verify that an operation is
|
expressed using existing tree codes, to verify that an operation is
|
||||||
supported, the vectorizer checks the relevant optab at the relevant
|
supported, the vectorizer checks the relevant optab at the relevant
|
||||||
machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
|
machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
|
||||||
the value found is CODE_FOR_nothing, then there's no target support, and
|
the value found is CODE_FOR_nothing, then there's no target support, and
|
||||||
we can't vectorize the stmt.
|
we can't vectorize the stmt.
|
||||||
|
|
||||||
|
|
@ -144,14 +144,14 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
/* Function vect_determine_vectorization_factor
|
/* Function vect_determine_vectorization_factor
|
||||||
|
|
||||||
Determine the vectorization factor (VF). VF is the number of data elements
|
Determine the vectorization factor (VF). VF is the number of data elements
|
||||||
that are operated upon in parallel in a single iteration of the vectorized
|
that are operated upon in parallel in a single iteration of the vectorized
|
||||||
loop. For example, when vectorizing a loop that operates on 4byte elements,
|
loop. For example, when vectorizing a loop that operates on 4byte elements,
|
||||||
on a target with vector size (VS) 16byte, the VF is set to 4, since 4
|
on a target with vector size (VS) 16byte, the VF is set to 4, since 4
|
||||||
elements can fit in a single vector register.
|
elements can fit in a single vector register.
|
||||||
|
|
||||||
We currently support vectorization of loops in which all types operated upon
|
We currently support vectorization of loops in which all types operated upon
|
||||||
are of the same size. Therefore this function currently sets VF according to
|
are of the same size. Therefore this function currently sets VF according to
|
||||||
the size of the types operated upon, and fails if there are multiple sizes
|
the size of the types operated upon, and fails if there are multiple sizes
|
||||||
in the loop.
|
in the loop.
|
||||||
|
|
||||||
|
|
@ -438,7 +438,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
|
||||||
/* Function vect_analyze_scalar_cycles_1.
|
/* Function vect_analyze_scalar_cycles_1.
|
||||||
|
|
||||||
Examine the cross iteration def-use cycles of scalar variables
|
Examine the cross iteration def-use cycles of scalar variables
|
||||||
in LOOP. LOOP_VINFO represents the loop that is now being
|
in LOOP. LOOP_VINFO represents the loop that is now being
|
||||||
considered for vectorization (can be LOOP, or an outer-loop
|
considered for vectorization (can be LOOP, or an outer-loop
|
||||||
enclosing LOOP). */
|
enclosing LOOP). */
|
||||||
|
|
||||||
|
|
@ -454,7 +454,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
||||||
if (vect_print_dump_info (REPORT_DETAILS))
|
if (vect_print_dump_info (REPORT_DETAILS))
|
||||||
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
|
fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
|
||||||
|
|
||||||
/* First - identify all inductions. Reduction detection assumes that all the
|
/* First - identify all inductions. Reduction detection assumes that all the
|
||||||
inductions have been identified, therefore, this order must not be
|
inductions have been identified, therefore, this order must not be
|
||||||
changed. */
|
changed. */
|
||||||
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
|
@ -470,7 +470,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
||||||
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
|
print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip virtual phi's. The data dependences that are associated with
|
/* Skip virtual phi's. The data dependences that are associated with
|
||||||
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
|
virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
|
||||||
if (!is_gimple_reg (SSA_NAME_VAR (def)))
|
if (!is_gimple_reg (SSA_NAME_VAR (def)))
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -569,7 +569,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
|
||||||
/* Function vect_analyze_scalar_cycles.
|
/* Function vect_analyze_scalar_cycles.
|
||||||
|
|
||||||
Examine the cross iteration def-use cycles of scalar variables, by
|
Examine the cross iteration def-use cycles of scalar variables, by
|
||||||
analyzing the loop-header PHIs of scalar variables; Classify each
|
analyzing the loop-header PHIs of scalar variables. Classify each
|
||||||
cycle as one of the following: invariant, induction, reduction, unknown.
|
cycle as one of the following: invariant, induction, reduction, unknown.
|
||||||
We do that for the loop represented by LOOP_VINFO, and also to its
|
We do that for the loop represented by LOOP_VINFO, and also to its
|
||||||
inner-loop, if exists.
|
inner-loop, if exists.
|
||||||
|
|
@ -1125,8 +1125,8 @@ vect_analyze_loop_form (struct loop *loop)
|
||||||
|
|
||||||
/* Get cost by calling cost target builtin. */
|
/* Get cost by calling cost target builtin. */
|
||||||
|
|
||||||
static inline
|
static inline int
|
||||||
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
|
vect_get_cost (enum vect_cost_for_stmt type_of_cost)
|
||||||
{
|
{
|
||||||
tree dummy_type = NULL;
|
tree dummy_type = NULL;
|
||||||
int dummy = 0;
|
int dummy = 0;
|
||||||
|
|
@ -1301,7 +1301,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Analyze cost. Decide if worth while to vectorize. */
|
/* Analyze cost. Decide if worth while to vectorize. */
|
||||||
|
|
||||||
/* Once VF is set, SLP costs should be updated since the number of created
|
/* Once VF is set, SLP costs should be updated since the number of created
|
||||||
vector stmts depends on VF. */
|
vector stmts depends on VF. */
|
||||||
|
|
@ -1374,7 +1374,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
|
||||||
/* Function vect_analyze_loop.
|
/* Function vect_analyze_loop.
|
||||||
|
|
||||||
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
||||||
for it. The different analyses will record information in the
|
for it. The different analyses will record information in the
|
||||||
loop_vec_info struct. */
|
loop_vec_info struct. */
|
||||||
loop_vec_info
|
loop_vec_info
|
||||||
vect_analyze_loop (struct loop *loop)
|
vect_analyze_loop (struct loop *loop)
|
||||||
|
|
@ -1594,7 +1594,7 @@ reduction_code_for_scalar_code (enum tree_code code,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
|
/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
|
||||||
STMT is printed with a message MSG. */
|
STMT is printed with a message MSG. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -1608,7 +1608,7 @@ report_vect_op (gimple stmt, const char *msg)
|
||||||
/* Function vect_is_simple_reduction_1
|
/* Function vect_is_simple_reduction_1
|
||||||
|
|
||||||
(1) Detect a cross-iteration def-use cycle that represents a simple
|
(1) Detect a cross-iteration def-use cycle that represents a simple
|
||||||
reduction computation. We look for the following pattern:
|
reduction computation. We look for the following pattern:
|
||||||
|
|
||||||
loop_header:
|
loop_header:
|
||||||
a1 = phi < a0, a2 >
|
a1 = phi < a0, a2 >
|
||||||
|
|
@ -2023,7 +2023,7 @@ vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
|
||||||
int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
|
int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
|
||||||
int innerloop_iters, i, stmt_cost;
|
int innerloop_iters, i, stmt_cost;
|
||||||
|
|
||||||
/* Count statements in scalar loop. Using this as scalar cost for a single
|
/* Count statements in scalar loop. Using this as scalar cost for a single
|
||||||
iteration for now.
|
iteration for now.
|
||||||
|
|
||||||
TODO: Add outer loop support.
|
TODO: Add outer loop support.
|
||||||
|
|
@ -2308,7 +2308,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
||||||
something more reasonable. */
|
something more reasonable. */
|
||||||
|
|
||||||
/* If the number of iterations is known and we do not do versioning, we can
|
/* If the number of iterations is known and we do not do versioning, we can
|
||||||
decide whether to vectorize at compile time. Hence the scalar version
|
decide whether to vectorize at compile time. Hence the scalar version
|
||||||
do not carry cost model guard costs. */
|
do not carry cost model guard costs. */
|
||||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||||
|| LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
|| LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|
||||||
|
|
@ -2339,7 +2339,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Calculate number of iterations required to make the vector version
|
/* Calculate number of iterations required to make the vector version
|
||||||
profitable, relative to the loop bodies only. The following condition
|
profitable, relative to the loop bodies only. The following condition
|
||||||
must hold true:
|
must hold true:
|
||||||
SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
|
SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
|
||||||
where
|
where
|
||||||
|
|
@ -2556,7 +2556,7 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
Return a vector variable, initialized with the first VF values of
|
Return a vector variable, initialized with the first VF values of
|
||||||
the induction variable. E.g., for an iv with IV_PHI='X' and
|
the induction variable. E.g., for an iv with IV_PHI='X' and
|
||||||
evolution S, for a vector of 4 units, we want to return:
|
evolution S, for a vector of 4 units, we want to return:
|
||||||
[X, X + S, X + 2*S, X + 3*S]. */
|
[X, X + S, X + 2*S, X + 3*S]. */
|
||||||
|
|
||||||
|
|
@ -2638,8 +2638,8 @@ get_initial_def_for_induction (gimple iv_phi)
|
||||||
if (nested_in_vect_loop)
|
if (nested_in_vect_loop)
|
||||||
{
|
{
|
||||||
/* iv_loop is nested in the loop to be vectorized. init_expr had already
|
/* iv_loop is nested in the loop to be vectorized. init_expr had already
|
||||||
been created during vectorization of previous stmts; We obtain it from
|
been created during vectorization of previous stmts. We obtain it
|
||||||
the STMT_VINFO_VEC_STMT of the defining stmt. */
|
from the STMT_VINFO_VEC_STMT of the defining stmt. */
|
||||||
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
|
tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
|
||||||
loop_preheader_edge (iv_loop));
|
loop_preheader_edge (iv_loop));
|
||||||
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
|
vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
|
||||||
|
|
@ -2905,7 +2905,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
||||||
gcc_assert (loop == (gimple_bb (stmt))->loop_father);
|
gcc_assert (loop == (gimple_bb (stmt))->loop_father);
|
||||||
|
|
||||||
/* In case of double reduction we only create a vector variable to be put
|
/* In case of double reduction we only create a vector variable to be put
|
||||||
in the reduction phi node. The actual statement creation is done in
|
in the reduction phi node. The actual statement creation is done in
|
||||||
vect_create_epilog_for_reduction. */
|
vect_create_epilog_for_reduction. */
|
||||||
if (adjustment_def && nested_in_vect_loop
|
if (adjustment_def && nested_in_vect_loop
|
||||||
&& TREE_CODE (init_val) == SSA_NAME
|
&& TREE_CODE (init_val) == SSA_NAME
|
||||||
|
|
@ -3023,7 +3023,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
||||||
reduction statements.
|
reduction statements.
|
||||||
STMT is the scalar reduction stmt that is being vectorized.
|
STMT is the scalar reduction stmt that is being vectorized.
|
||||||
NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
|
NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
|
||||||
number of elements that we can fit in a vectype (nunits). In this case
|
number of elements that we can fit in a vectype (nunits). In this case
|
||||||
we have to generate more than one vector stmt - i.e - we need to "unroll"
|
we have to generate more than one vector stmt - i.e - we need to "unroll"
|
||||||
the vector stmt by a factor VF/nunits. For more details see documentation
|
the vector stmt by a factor VF/nunits. For more details see documentation
|
||||||
in vectorizable_operation.
|
in vectorizable_operation.
|
||||||
|
|
@ -3314,7 +3314,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
|
||||||
/* In case this is a reduction in an inner-loop while vectorizing an outer
|
/* In case this is a reduction in an inner-loop while vectorizing an outer
|
||||||
loop - we don't need to extract a single scalar result at the end of the
|
loop - we don't need to extract a single scalar result at the end of the
|
||||||
inner-loop (unless it is double reduction, i.e., the use of reduction is
|
inner-loop (unless it is double reduction, i.e., the use of reduction is
|
||||||
outside the outer-loop). The final vector of partial results will be used
|
outside the outer-loop). The final vector of partial results will be used
|
||||||
in the vectorized outer-loop, or reduced to a scalar result at the end of
|
in the vectorized outer-loop, or reduced to a scalar result at the end of
|
||||||
the outer-loop. */
|
the outer-loop. */
|
||||||
if (nested_in_vect_loop && !double_reduc)
|
if (nested_in_vect_loop && !double_reduc)
|
||||||
|
|
@ -3473,7 +3473,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The only case where we need to reduce scalar results in SLP, is
|
/* The only case where we need to reduce scalar results in SLP, is
|
||||||
unrolling. If the size of SCALAR_RESULTS is greater than
|
unrolling. If the size of SCALAR_RESULTS is greater than
|
||||||
GROUP_SIZE, we reduce them combining elements modulo
|
GROUP_SIZE, we reduce them combining elements modulo
|
||||||
GROUP_SIZE. */
|
GROUP_SIZE. */
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
|
|
@ -3579,7 +3579,7 @@ vect_finalize_reduction:
|
||||||
VEC_replace (gimple, new_phis, 0, epilog_stmt);
|
VEC_replace (gimple, new_phis, 0, epilog_stmt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
|
/* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
|
||||||
phis with new adjusted scalar results, i.e., replace use <s_out0>
|
phis with new adjusted scalar results, i.e., replace use <s_out0>
|
||||||
with use <s_out4>.
|
with use <s_out4>.
|
||||||
|
|
||||||
|
|
@ -3605,8 +3605,8 @@ vect_finalize_reduction:
|
||||||
use <s_out4> */
|
use <s_out4> */
|
||||||
|
|
||||||
/* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
|
/* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
|
||||||
case that GROUP_SIZE is greater than vectorization factor). Therefore, we
|
case that GROUP_SIZE is greater than vectorization factor). Therefore, we
|
||||||
need to match SCALAR_RESULTS with corresponding statements. The first
|
need to match SCALAR_RESULTS with corresponding statements. The first
|
||||||
(GROUP_SIZE / number of new vector stmts) scalar results correspond to
|
(GROUP_SIZE / number of new vector stmts) scalar results correspond to
|
||||||
the first vector stmt, etc.
|
the first vector stmt, etc.
|
||||||
(RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
|
(RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
|
||||||
|
|
@ -3639,7 +3639,7 @@ vect_finalize_reduction:
|
||||||
|
|
||||||
phis = VEC_alloc (gimple, heap, 3);
|
phis = VEC_alloc (gimple, heap, 3);
|
||||||
/* Find the loop-closed-use at the loop exit of the original scalar
|
/* Find the loop-closed-use at the loop exit of the original scalar
|
||||||
result. (The reduction result is expected to have two immediate uses -
|
result. (The reduction result is expected to have two immediate uses -
|
||||||
one at the latch block, and one at the loop exit). */
|
one at the latch block, and one at the loop exit). */
|
||||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
||||||
if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
|
if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
|
||||||
|
|
@ -3740,7 +3740,7 @@ vect_finalize_reduction:
|
||||||
vect_phi_res = PHI_RESULT (vect_phi);
|
vect_phi_res = PHI_RESULT (vect_phi);
|
||||||
|
|
||||||
/* Replace the use, i.e., set the correct vs1 in the regular
|
/* Replace the use, i.e., set the correct vs1 in the regular
|
||||||
reduction phi node. FORNOW, NCOPIES is always 1, so the
|
reduction phi node. FORNOW, NCOPIES is always 1, so the
|
||||||
loop is redundant. */
|
loop is redundant. */
|
||||||
use = reduction_phi;
|
use = reduction_phi;
|
||||||
for (j = 0; j < ncopies; j++)
|
for (j = 0; j < ncopies; j++)
|
||||||
|
|
@ -3764,8 +3764,8 @@ vect_finalize_reduction:
|
||||||
|
|
||||||
phis = VEC_alloc (gimple, heap, 3);
|
phis = VEC_alloc (gimple, heap, 3);
|
||||||
/* Find the loop-closed-use at the loop exit of the original scalar
|
/* Find the loop-closed-use at the loop exit of the original scalar
|
||||||
result. (The reduction result is expected to have two immediate uses -
|
result. (The reduction result is expected to have two immediate uses,
|
||||||
one at the latch block, and one at the loop exit). For double
|
one at the latch block, and one at the loop exit). For double
|
||||||
reductions we are looking for exit phis of the outer loop. */
|
reductions we are looking for exit phis of the outer loop. */
|
||||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
||||||
{
|
{
|
||||||
|
|
@ -3814,7 +3814,7 @@ vect_finalize_reduction:
|
||||||
Return FALSE if not a vectorizable STMT, TRUE otherwise.
|
Return FALSE if not a vectorizable STMT, TRUE otherwise.
|
||||||
|
|
||||||
This function also handles reduction idioms (patterns) that have been
|
This function also handles reduction idioms (patterns) that have been
|
||||||
recognized in advance during vect_pattern_recog. In this case, STMT may be
|
recognized in advance during vect_pattern_recog. In this case, STMT may be
|
||||||
of this form:
|
of this form:
|
||||||
X = pattern_expr (arg0, arg1, ..., X)
|
X = pattern_expr (arg0, arg1, ..., X)
|
||||||
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
|
and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
|
||||||
|
|
@ -3835,9 +3835,9 @@ vect_finalize_reduction:
|
||||||
|
|
||||||
Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
|
Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
|
||||||
indicates what is the actual level of parallelism (V8HI in the example), so
|
indicates what is the actual level of parallelism (V8HI in the example), so
|
||||||
that the right vectorization factor would be derived. This vectype
|
that the right vectorization factor would be derived. This vectype
|
||||||
corresponds to the type of arguments to the reduction stmt, and should *NOT*
|
corresponds to the type of arguments to the reduction stmt, and should *NOT*
|
||||||
be used to create the vectorized stmt. The right vectype for the vectorized
|
be used to create the vectorized stmt. The right vectype for the vectorized
|
||||||
stmt is obtained from the type of the result X:
|
stmt is obtained from the type of the result X:
|
||||||
get_vectype_for_scalar_type (TREE_TYPE (X))
|
get_vectype_for_scalar_type (TREE_TYPE (X))
|
||||||
|
|
||||||
|
|
@ -3934,7 +3934,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
|
gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3. Check the operands of the operation. The first operands are defined
|
/* 3. Check the operands of the operation. The first operands are defined
|
||||||
inside the loop body. The last operand is the reduction variable,
|
inside the loop body. The last operand is the reduction variable,
|
||||||
which is defined by the loop-header-phi. */
|
which is defined by the loop-header-phi. */
|
||||||
|
|
||||||
|
|
@ -3979,7 +3979,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* All uses but the last are expected to be defined in the loop.
|
/* All uses but the last are expected to be defined in the loop.
|
||||||
The last use is the reduction variable. In case of nested cycle this
|
The last use is the reduction variable. In case of nested cycle this
|
||||||
assumption is not true: we use reduc_index to record the index of the
|
assumption is not true: we use reduc_index to record the index of the
|
||||||
reduction variable. */
|
reduction variable. */
|
||||||
for (i = 0; i < op_type-1; i++)
|
for (i = 0; i < op_type-1; i++)
|
||||||
|
|
@ -4110,7 +4110,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
1. The tree-code that is used to create the vector operation in the
|
1. The tree-code that is used to create the vector operation in the
|
||||||
epilog code (that reduces the partial results) is not the
|
epilog code (that reduces the partial results) is not the
|
||||||
tree-code of STMT, but is rather the tree-code of the original
|
tree-code of STMT, but is rather the tree-code of the original
|
||||||
stmt from the pattern that STMT is replacing. I.e, in the example
|
stmt from the pattern that STMT is replacing. I.e, in the example
|
||||||
above we want to use 'widen_sum' in the loop, but 'plus' in the
|
above we want to use 'widen_sum' in the loop, but 'plus' in the
|
||||||
epilog.
|
epilog.
|
||||||
2. The type (mode) we use to check available target support
|
2. The type (mode) we use to check available target support
|
||||||
|
|
@ -4513,7 +4513,7 @@ vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
|
||||||
|
|
||||||
/* Function vectorizable_live_operation.
|
/* Function vectorizable_live_operation.
|
||||||
|
|
||||||
STMT computes a value that is used outside the loop. Check if
|
STMT computes a value that is used outside the loop. Check if
|
||||||
it can be supported. */
|
it can be supported. */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
@ -4554,7 +4554,7 @@ vectorizable_live_operation (gimple stmt,
|
||||||
gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
|
gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
|
||||||
gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
|
gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
|
||||||
|
|
||||||
/* FORNOW: support only if all uses are invariant. This means
|
/* FORNOW: support only if all uses are invariant. This means
|
||||||
that the scalar operations can remain in place, unvectorized.
|
that the scalar operations can remain in place, unvectorized.
|
||||||
The original last scalar value that they compute will be used. */
|
The original last scalar value that they compute will be used. */
|
||||||
|
|
||||||
|
|
@ -4665,7 +4665,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
||||||
compile time constant), or it is a constant that doesn't divide by the
|
compile time constant), or it is a constant that doesn't divide by the
|
||||||
vectorization factor, then an epilog loop needs to be created.
|
vectorization factor, then an epilog loop needs to be created.
|
||||||
We therefore duplicate the loop: the original loop will be vectorized,
|
We therefore duplicate the loop: the original loop will be vectorized,
|
||||||
and will compute the first (n/VF) iterations. The second copy of the loop
|
and will compute the first (n/VF) iterations. The second copy of the loop
|
||||||
will remain scalar and will compute the remaining (n%VF) iterations.
|
will remain scalar and will compute the remaining (n%VF) iterations.
|
||||||
(VF is the vectorization factor). */
|
(VF is the vectorization factor). */
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -147,7 +147,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
|
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
|
||||||
from the pattern. Check that all the stmts of the node are in the
|
from the pattern. Check that all the stmts of the node are in the
|
||||||
pattern. */
|
pattern. */
|
||||||
if (loop && def_stmt && gimple_bb (def_stmt)
|
if (loop && def_stmt && gimple_bb (def_stmt)
|
||||||
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
||||||
|
|
@ -299,7 +299,7 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
|
|
||||||
/* Recursively build an SLP tree starting from NODE.
|
/* Recursively build an SLP tree starting from NODE.
|
||||||
Fail (and return FALSE) if def-stmts are not isomorphic, require data
|
Fail (and return FALSE) if def-stmts are not isomorphic, require data
|
||||||
permutation or are of unsupported types of operation. Otherwise, return
|
permutation or are of unsupported types of operation. Otherwise, return
|
||||||
TRUE. */
|
TRUE. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -542,7 +542,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
if (prev_first_load)
|
if (prev_first_load)
|
||||||
{
|
{
|
||||||
/* Check that there are no loads from different interleaving
|
/* Check that there are no loads from different interleaving
|
||||||
chains in the same node. The only exception is complex
|
chains in the same node. The only exception is complex
|
||||||
numbers. */
|
numbers. */
|
||||||
if (prev_first_load != first_load
|
if (prev_first_load != first_load
|
||||||
&& rhs_code != REALPART_EXPR
|
&& rhs_code != REALPART_EXPR
|
||||||
|
|
@ -582,7 +582,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
ncopies_for_cost, *node);
|
ncopies_for_cost, *node);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Store the place of this load in the interleaving chain. In
|
/* Store the place of this load in the interleaving chain. In
|
||||||
case that permutation is needed we later decide if a specific
|
case that permutation is needed we later decide if a specific
|
||||||
permutation is supported. */
|
permutation is supported. */
|
||||||
load_place = vect_get_place_in_interleaving_chain (stmt,
|
load_place = vect_get_place_in_interleaving_chain (stmt,
|
||||||
|
|
@ -729,7 +729,7 @@ vect_print_slp_tree (slp_tree node)
|
||||||
|
|
||||||
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
||||||
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
||||||
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
|
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
|
||||||
stmts in NODE are to be marked. */
|
stmts in NODE are to be marked. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -897,7 +897,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||||
|
|
||||||
/* In case of reduction every load permutation is allowed, since the order
|
/* In case of reduction every load permutation is allowed, since the order
|
||||||
of the reduction statements is not important (as opposed to the case of
|
of the reduction statements is not important (as opposed to the case of
|
||||||
strided stores). The only condition we need to check is that all the
|
strided stores). The only condition we need to check is that all the
|
||||||
load nodes are of the same size and have the same permutation (and then
|
load nodes are of the same size and have the same permutation (and then
|
||||||
rearrange all the nodes of the SLP instance according to this
|
rearrange all the nodes of the SLP instance according to this
|
||||||
permutation). */
|
permutation). */
|
||||||
|
|
@ -920,7 +920,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||||
real_c = real_b + real_a;
|
real_c = real_b + real_a;
|
||||||
imag_c = imag_a + imag_b;
|
imag_c = imag_a + imag_b;
|
||||||
i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
|
i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
|
||||||
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
|
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
|
||||||
chains are mixed, they match the above pattern. */
|
chains are mixed, they match the above pattern. */
|
||||||
if (complex_numbers)
|
if (complex_numbers)
|
||||||
{
|
{
|
||||||
|
|
@ -969,7 +969,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||||
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
||||||
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
|
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
|
||||||
instance, not all the loads belong to the same node or interleaving
|
instance, not all the loads belong to the same node or interleaving
|
||||||
group. Hence, we need to divide them into groups according to
|
group. Hence, we need to divide them into groups according to
|
||||||
GROUP_SIZE. */
|
GROUP_SIZE. */
|
||||||
number_of_groups = VEC_length (int, load_permutation) / group_size;
|
number_of_groups = VEC_length (int, load_permutation) / group_size;
|
||||||
|
|
||||||
|
|
@ -1002,7 +1002,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||||
|
|
||||||
if (!bad_permutation)
|
if (!bad_permutation)
|
||||||
{
|
{
|
||||||
/* This permutaion is valid for reduction. Since the order of the
|
/* This permutaion is valid for reduction. Since the order of the
|
||||||
statements in the nodes is not important unless they are memory
|
statements in the nodes is not important unless they are memory
|
||||||
accesses, we can rearrange the statements in all the nodes
|
accesses, we can rearrange the statements in all the nodes
|
||||||
according to the order of the loads. */
|
according to the order of the loads. */
|
||||||
|
|
@ -1064,9 +1064,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
||||||
/* Find the first load in the loop that belongs to INSTANCE.
|
/* Find the first load in the loop that belongs to INSTANCE.
|
||||||
When loads are in several SLP nodes, there can be a case in which the first
|
When loads are in several SLP nodes, there can be a case in which the first
|
||||||
load does not appear in the first SLP node to be transformed, causing
|
load does not appear in the first SLP node to be transformed, causing
|
||||||
incorrect order of statements. Since we generate all the loads together,
|
incorrect order of statements. Since we generate all the loads together,
|
||||||
they must be inserted before the first load of the SLP instance and not
|
they must be inserted before the first load of the SLP instance and not
|
||||||
before the first load of the first node of the instance. */
|
before the first load of the first node of the instance. */
|
||||||
|
|
||||||
static gimple
|
static gimple
|
||||||
vect_find_first_load_in_slp_instance (slp_instance instance)
|
vect_find_first_load_in_slp_instance (slp_instance instance)
|
||||||
{
|
{
|
||||||
|
|
@ -1083,6 +1084,7 @@ vect_find_first_load_in_slp_instance (slp_instance instance)
|
||||||
|
|
||||||
|
|
||||||
/* Find the last store in SLP INSTANCE. */
|
/* Find the last store in SLP INSTANCE. */
|
||||||
|
|
||||||
static gimple
|
static gimple
|
||||||
vect_find_last_store_in_slp_instance (slp_instance instance)
|
vect_find_last_store_in_slp_instance (slp_instance instance)
|
||||||
{
|
{
|
||||||
|
|
@ -1100,7 +1102,7 @@ vect_find_last_store_in_slp_instance (slp_instance instance)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Analyze an SLP instance starting from a group of strided stores. Call
|
/* Analyze an SLP instance starting from a group of strided stores. Call
|
||||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||||
|
|
||||||
|
|
@ -1274,7 +1276,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
||||||
trees of packed scalar stmts if SLP is possible. */
|
trees of packed scalar stmts if SLP is possible. */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
@ -1339,9 +1341,9 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
||||||
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
|
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
|
||||||
unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
||||||
|
|
||||||
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
|
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
|
||||||
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
|
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
|
||||||
loop-based vectorization. Such stmts will be marked as HYBRID. */
|
loop-based vectorization. Such stmts will be marked as HYBRID. */
|
||||||
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
|
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
|
||||||
decided_to_slp++;
|
decided_to_slp++;
|
||||||
}
|
}
|
||||||
|
|
@ -1355,7 +1357,7 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
|
||||||
|
|
||||||
|
|
||||||
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
|
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
|
||||||
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
|
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_detect_hybrid_slp_stmts (slp_tree node)
|
vect_detect_hybrid_slp_stmts (slp_tree node)
|
||||||
|
|
@ -1493,7 +1495,7 @@ vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
|
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
|
||||||
operations are supported. */
|
operations are supported. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -1523,7 +1525,7 @@ vect_slp_analyze_operations (bb_vec_info bb_vinfo)
|
||||||
|
|
||||||
/* Check if loads and stores are mixed in the basic block (in that
|
/* Check if loads and stores are mixed in the basic block (in that
|
||||||
case if we are not sure that the accesses differ, we can't vectorize the
|
case if we are not sure that the accesses differ, we can't vectorize the
|
||||||
basic block). Also return FALSE in case that there is statement marked as
|
basic block). Also return FALSE in case that there is statement marked as
|
||||||
not vectorizable. */
|
not vectorizable. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -1783,11 +1785,11 @@ vect_slp_analyze_bb (basic_block bb)
|
||||||
|
|
||||||
|
|
||||||
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
||||||
the number of created vector stmts depends on the unrolling factor). However,
|
the number of created vector stmts depends on the unrolling factor).
|
||||||
the actual number of vector stmts for every SLP node depends on VF which is
|
However, the actual number of vector stmts for every SLP node depends on
|
||||||
set later in vect_analyze_operations(). Hence, SLP costs should be updated.
|
VF which is set later in vect_analyze_operations (). Hence, SLP costs
|
||||||
In this function we assume that the inside costs calculated in
|
should be updated. In this function we assume that the inside costs
|
||||||
vect_model_xxx_cost are linear in ncopies. */
|
calculated in vect_model_xxx_cost are linear in ncopies. */
|
||||||
|
|
||||||
void
|
void
|
||||||
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
||||||
|
|
@ -1846,7 +1848,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
||||||
op_num = reduc_index - 1;
|
op_num = reduc_index - 1;
|
||||||
op = gimple_op (stmt, op_num + 1);
|
op = gimple_op (stmt, op_num + 1);
|
||||||
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
|
/* For additional copies (see the explanation of NUMBER_OF_COPIES below)
|
||||||
we need either neutral operands or the original operands. See
|
we need either neutral operands or the original operands. See
|
||||||
get_initial_def_for_reduction() for details. */
|
get_initial_def_for_reduction() for details. */
|
||||||
switch (code)
|
switch (code)
|
||||||
{
|
{
|
||||||
|
|
@ -2051,7 +2053,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
||||||
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
||||||
/* Number of vector stmts was calculated according to LHS in
|
/* Number of vector stmts was calculated according to LHS in
|
||||||
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
|
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
|
||||||
necessary. See vect_get_smallest_scalar_type() for details. */
|
necessary. See vect_get_smallest_scalar_type () for details. */
|
||||||
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
|
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
|
||||||
&rhs_size_unit);
|
&rhs_size_unit);
|
||||||
if (rhs_size_unit != lhs_size_unit)
|
if (rhs_size_unit != lhs_size_unit)
|
||||||
|
|
@ -2065,7 +2067,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
||||||
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
|
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
|
||||||
|
|
||||||
/* SLP_NODE corresponds either to a group of stores or to a group of
|
/* SLP_NODE corresponds either to a group of stores or to a group of
|
||||||
unary/binary operations. We don't call this function for loads.
|
unary/binary operations. We don't call this function for loads.
|
||||||
For reduction defs we call vect_get_constant_vectors(), since we are
|
For reduction defs we call vect_get_constant_vectors(), since we are
|
||||||
looking for initial loop invariant values. */
|
looking for initial loop invariant values. */
|
||||||
if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
|
if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
|
||||||
|
|
@ -2167,7 +2169,7 @@ vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
|
||||||
|
|
||||||
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
|
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
|
||||||
return in CURRENT_MASK_ELEMENT its equivalent in target specific
|
return in CURRENT_MASK_ELEMENT its equivalent in target specific
|
||||||
representation. Check that the mask is valid and return FALSE if not.
|
representation. Check that the mask is valid and return FALSE if not.
|
||||||
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
|
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
|
||||||
the next vector, i.e., the current first vector is not needed. */
|
the next vector, i.e., the current first vector is not needed. */
|
||||||
|
|
||||||
|
|
@ -2321,8 +2323,8 @@ vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
||||||
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
|
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
|
||||||
scpecific type, e.g., in bytes for Altivec.
|
scpecific type, e.g., in bytes for Altivec.
|
||||||
The last mask is illegal since we assume two operands for permute
|
The last mask is illegal since we assume two operands for permute
|
||||||
operation, and the mask element values can't be outside that range. Hence,
|
operation, and the mask element values can't be outside that range.
|
||||||
the last mask must be converted into {2,5,5,5}.
|
Hence, the last mask must be converted into {2,5,5,5}.
|
||||||
For the first two permutations we need the first and the second input
|
For the first two permutations we need the first and the second input
|
||||||
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
|
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
|
||||||
we need the second and the third vectors: {b1,c1,a2,b2} and
|
we need the second and the third vectors: {b1,c1,a2,b2} and
|
||||||
|
|
@ -2438,7 +2440,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||||
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
||||||
|
|
||||||
/* For each SLP instance calculate number of vector stmts to be created
|
/* For each SLP instance calculate number of vector stmts to be created
|
||||||
for the scalar stmts in each node of the SLP tree. Number of vector
|
for the scalar stmts in each node of the SLP tree. Number of vector
|
||||||
elements in one vector iteration is the number of scalar elements in
|
elements in one vector iteration is the number of scalar elements in
|
||||||
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
|
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
|
||||||
size. */
|
size. */
|
||||||
|
|
@ -2492,6 +2494,8 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Generate vector code for all SLP instances in the loop/basic block. */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@ vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
|
||||||
|
|
||||||
/* Function exist_non_indexing_operands_for_use_p
|
/* Function exist_non_indexing_operands_for_use_p
|
||||||
|
|
||||||
USE is one of the uses attached to STMT. Check if USE is
|
USE is one of the uses attached to STMT. Check if USE is
|
||||||
used in STMT for anything other than indexing an array. */
|
used in STMT for anything other than indexing an array. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
|
@ -175,7 +175,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
|
||||||
tree operand;
|
tree operand;
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||||
|
|
||||||
/* USE corresponds to some operand in STMT. If there is no data
|
/* USE corresponds to some operand in STMT. If there is no data
|
||||||
reference in STMT, then any operand that corresponds to USE
|
reference in STMT, then any operand that corresponds to USE
|
||||||
is not indexing an array. */
|
is not indexing an array. */
|
||||||
if (!STMT_VINFO_DATA_REF (stmt_info))
|
if (!STMT_VINFO_DATA_REF (stmt_info))
|
||||||
|
|
@ -215,7 +215,7 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
|
||||||
Inputs:
|
Inputs:
|
||||||
- a USE in STMT in a loop represented by LOOP_VINFO
|
- a USE in STMT in a loop represented by LOOP_VINFO
|
||||||
- LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
|
- LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
|
||||||
that defined USE. This is done by calling mark_relevant and passing it
|
that defined USE. This is done by calling mark_relevant and passing it
|
||||||
the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
|
the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
|
|
@ -466,7 +466,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||||
relevant = vect_used_by_reduction
|
relevant = vect_used_by_reduction
|
||||||
This is because we distinguish between two kinds of relevant stmts -
|
This is because we distinguish between two kinds of relevant stmts -
|
||||||
those that are used by a reduction computation, and those that are
|
those that are used by a reduction computation, and those that are
|
||||||
(also) used by a regular computation. This allows us later on to
|
(also) used by a regular computation. This allows us later on to
|
||||||
identify stmts that are used solely by a reduction, and therefore the
|
identify stmts that are used solely by a reduction, and therefore the
|
||||||
order of the results that they produce does not have to be kept. */
|
order of the results that they produce does not have to be kept. */
|
||||||
|
|
||||||
|
|
@ -558,6 +558,9 @@ int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
|
||||||
dummy_type, dummy);
|
dummy_type, dummy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Get cost for STMT. */
|
||||||
|
|
||||||
int
|
int
|
||||||
cost_for_stmt (gimple stmt)
|
cost_for_stmt (gimple stmt)
|
||||||
{
|
{
|
||||||
|
|
@ -870,10 +873,10 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
||||||
"pipelined.");
|
"pipelined.");
|
||||||
|
|
||||||
/* Unaligned software pipeline has a load of an address, an initial
|
/* Unaligned software pipeline has a load of an address, an initial
|
||||||
load, and possibly a mask operation to "prime" the loop. However,
|
load, and possibly a mask operation to "prime" the loop. However,
|
||||||
if this is an access in a group of loads, which provide strided
|
if this is an access in a group of loads, which provide strided
|
||||||
access, then the above cost should only be considered for one
|
access, then the above cost should only be considered for one
|
||||||
access in the group. Inside the loop, there is a load op
|
access in the group. Inside the loop, there is a load op
|
||||||
and a realignment op. */
|
and a realignment op. */
|
||||||
|
|
||||||
if (add_realign_cost)
|
if (add_realign_cost)
|
||||||
|
|
@ -897,8 +900,8 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
|
||||||
/* Function vect_init_vector.
|
/* Function vect_init_vector.
|
||||||
|
|
||||||
Insert a new stmt (INIT_STMT) that initializes a new vector variable with
|
Insert a new stmt (INIT_STMT) that initializes a new vector variable with
|
||||||
the vector elements of VECTOR_VAR. Place the initialization at BSI if it
|
the vector elements of VECTOR_VAR. Place the initialization at BSI if it
|
||||||
is not NULL. Otherwise, place the initialization at the loop preheader.
|
is not NULL. Otherwise, place the initialization at the loop preheader.
|
||||||
Return the DEF of INIT_STMT.
|
Return the DEF of INIT_STMT.
|
||||||
It will be used in the vectorization of STMT. */
|
It will be used in the vectorization of STMT. */
|
||||||
|
|
||||||
|
|
@ -963,7 +966,7 @@ vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
|
||||||
|
|
||||||
/* Function vect_get_vec_def_for_operand.
|
/* Function vect_get_vec_def_for_operand.
|
||||||
|
|
||||||
OP is an operand in STMT. This function returns a (vector) def that will be
|
OP is an operand in STMT. This function returns a (vector) def that will be
|
||||||
used in the vectorized stmt for STMT.
|
used in the vectorized stmt for STMT.
|
||||||
|
|
||||||
In the case that OP is an SSA_NAME which is defined in the loop, then
|
In the case that OP is an SSA_NAME which is defined in the loop, then
|
||||||
|
|
@ -1117,10 +1120,10 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
||||||
|
|
||||||
/* Function vect_get_vec_def_for_stmt_copy
|
/* Function vect_get_vec_def_for_stmt_copy
|
||||||
|
|
||||||
Return a vector-def for an operand. This function is used when the
|
Return a vector-def for an operand. This function is used when the
|
||||||
vectorized stmt to be created (by the caller to this function) is a "copy"
|
vectorized stmt to be created (by the caller to this function) is a "copy"
|
||||||
created in case the vectorized result cannot fit in one vector, and several
|
created in case the vectorized result cannot fit in one vector, and several
|
||||||
copies of the vector-stmt are required. In this case the vector-def is
|
copies of the vector-stmt are required. In this case the vector-def is
|
||||||
retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
|
retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
|
||||||
of the stmt that defines VEC_OPRND.
|
of the stmt that defines VEC_OPRND.
|
||||||
DT is the type of the vector def VEC_OPRND.
|
DT is the type of the vector def VEC_OPRND.
|
||||||
|
|
@ -1128,7 +1131,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
||||||
Context:
|
Context:
|
||||||
In case the vectorization factor (VF) is bigger than the number
|
In case the vectorization factor (VF) is bigger than the number
|
||||||
of elements that can fit in a vectype (nunits), we have to generate
|
of elements that can fit in a vectype (nunits), we have to generate
|
||||||
more than one vector stmt to vectorize the scalar stmt. This situation
|
more than one vector stmt to vectorize the scalar stmt. This situation
|
||||||
arises when there are multiple data-types operated upon in the loop; the
|
arises when there are multiple data-types operated upon in the loop; the
|
||||||
smallest data-type determines the VF, and as a result, when vectorizing
|
smallest data-type determines the VF, and as a result, when vectorizing
|
||||||
stmts operating on wider types we need to create 'VF/nunits' "copies" of the
|
stmts operating on wider types we need to create 'VF/nunits' "copies" of the
|
||||||
|
|
@ -1153,7 +1156,7 @@ vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
|
||||||
The vectorization of S2:
|
The vectorization of S2:
|
||||||
To create the first vector-stmt out of the 4 copies - VSnew.0 -
|
To create the first vector-stmt out of the 4 copies - VSnew.0 -
|
||||||
the function 'vect_get_vec_def_for_operand' is called to
|
the function 'vect_get_vec_def_for_operand' is called to
|
||||||
get the relevant vector-def for each operand of S2. For operand x it
|
get the relevant vector-def for each operand of S2. For operand x it
|
||||||
returns the vector-def 'vx.0'.
|
returns the vector-def 'vx.0'.
|
||||||
|
|
||||||
To create the remaining copies of the vector-stmt (VSnew.j), this
|
To create the remaining copies of the vector-stmt (VSnew.j), this
|
||||||
|
|
@ -1196,7 +1199,7 @@ vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
|
||||||
|
|
||||||
|
|
||||||
/* Get vectorized definitions for the operands to create a copy of an original
|
/* Get vectorized definitions for the operands to create a copy of an original
|
||||||
stmt. See vect_get_vec_def_for_stmt_copy() for details. */
|
stmt. See vect_get_vec_def_for_stmt_copy () for details. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
|
vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
|
||||||
|
|
@ -1217,7 +1220,8 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
|
/* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not
|
||||||
|
NULL. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
vect_get_vec_defs (tree op0, tree op1, gimple stmt,
|
vect_get_vec_defs (tree op0, tree op1, gimple stmt,
|
||||||
|
|
@ -1594,7 +1598,7 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
|
||||||
|
|
||||||
Create a vector stmt whose code, type, number of arguments, and result
|
Create a vector stmt whose code, type, number of arguments, and result
|
||||||
variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
|
variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
|
||||||
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
|
VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
|
||||||
In the case that CODE is a CALL_EXPR, this means that a call to DECL
|
In the case that CODE is a CALL_EXPR, this means that a call to DECL
|
||||||
needs to be created (DECL is a function-decl of a target-builtin).
|
needs to be created (DECL is a function-decl of a target-builtin).
|
||||||
STMT is the original scalar stmt that we are vectorizing. */
|
STMT is the original scalar stmt that we are vectorizing. */
|
||||||
|
|
@ -1742,8 +1746,9 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
else
|
else
|
||||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
|
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
|
||||||
|
|
||||||
/* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
|
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||||
this, so we can safely override NCOPIES with 1 here. */
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||||
|
case of SLP. */
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
ncopies = 1;
|
ncopies = 1;
|
||||||
|
|
||||||
|
|
@ -1900,6 +1905,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Function vectorizable_assignment.
|
/* Function vectorizable_assignment.
|
||||||
|
|
||||||
Check if STMT performs an assignment (copy) that can be vectorized.
|
Check if STMT performs an assignment (copy) that can be vectorized.
|
||||||
|
|
@ -2156,7 +2163,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
vf = 1;
|
vf = 1;
|
||||||
|
|
||||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||||
case of SLP. */
|
case of SLP. */
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
ncopies = 1;
|
ncopies = 1;
|
||||||
|
|
@ -2243,7 +2250,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
fprintf (vect_dump, "proceeding using word mode.");
|
fprintf (vect_dump, "proceeding using word mode.");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Worthwhile without SIMD support? Check only during analysis. */
|
/* Worthwhile without SIMD support? Check only during analysis. */
|
||||||
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|
||||||
&& vf < vect_min_worthwhile_factor (code)
|
&& vf < vect_min_worthwhile_factor (code)
|
||||||
&& !vec_stmt)
|
&& !vec_stmt)
|
||||||
|
|
@ -2270,12 +2277,12 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
/* Handle def. */
|
/* Handle def. */
|
||||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||||
|
|
||||||
/* Allocate VECs for vector operands. In case of SLP, vector operands are
|
/* Allocate VECs for vector operands. In case of SLP, vector operands are
|
||||||
created in the previous stages of the recursion, so no allocation is
|
created in the previous stages of the recursion, so no allocation is
|
||||||
needed, except for the case of shift with scalar shift argument. In that
|
needed, except for the case of shift with scalar shift argument. In that
|
||||||
case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
|
case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
|
||||||
be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
|
be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
|
||||||
In case of loop-based vectorization we allocate VECs of size 1. We
|
In case of loop-based vectorization we allocate VECs of size 1. We
|
||||||
allocate VEC_OPRNDS1 only in case of binary operation. */
|
allocate VEC_OPRNDS1 only in case of binary operation. */
|
||||||
if (!slp_node)
|
if (!slp_node)
|
||||||
{
|
{
|
||||||
|
|
@ -2289,13 +2296,13 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
/* In case the vectorization factor (VF) is bigger than the number
|
/* In case the vectorization factor (VF) is bigger than the number
|
||||||
of elements that we can fit in a vectype (nunits), we have to generate
|
of elements that we can fit in a vectype (nunits), we have to generate
|
||||||
more than one vector stmt - i.e - we need to "unroll" the
|
more than one vector stmt - i.e - we need to "unroll" the
|
||||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||||
from one copy of the vector stmt to the next, in the field
|
from one copy of the vector stmt to the next, in the field
|
||||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||||
stages to find the correct vector defs to be used when vectorizing
|
stages to find the correct vector defs to be used when vectorizing
|
||||||
stmts that use the defs of the current stmt. The example below illustrates
|
stmts that use the defs of the current stmt. The example below
|
||||||
the vectorization process when VF=16 and nunits=4 (i.e - we need to create
|
illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
|
||||||
4 vectorized stmts):
|
we need to create 4 vectorized stmts):
|
||||||
|
|
||||||
before vectorization:
|
before vectorization:
|
||||||
RELATED_STMT VEC_STMT
|
RELATED_STMT VEC_STMT
|
||||||
|
|
@ -2314,18 +2321,18 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
|
|
||||||
step2: vectorize stmt S2 (done here):
|
step2: vectorize stmt S2 (done here):
|
||||||
To vectorize stmt S2 we first need to find the relevant vector
|
To vectorize stmt S2 we first need to find the relevant vector
|
||||||
def for the first operand 'x'. This is, as usual, obtained from
|
def for the first operand 'x'. This is, as usual, obtained from
|
||||||
the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
|
the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
|
||||||
that defines 'x' (S1). This way we find the stmt VS1_0, and the
|
that defines 'x' (S1). This way we find the stmt VS1_0, and the
|
||||||
relevant vector def 'vx0'. Having found 'vx0' we can generate
|
relevant vector def 'vx0'. Having found 'vx0' we can generate
|
||||||
the vector stmt VS2_0, and as usual, record it in the
|
the vector stmt VS2_0, and as usual, record it in the
|
||||||
STMT_VINFO_VEC_STMT of stmt S2.
|
STMT_VINFO_VEC_STMT of stmt S2.
|
||||||
When creating the second copy (VS2_1), we obtain the relevant vector
|
When creating the second copy (VS2_1), we obtain the relevant vector
|
||||||
def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
|
def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
|
||||||
stmt VS1_0. This way we find the stmt VS1_1 and the relevant
|
stmt VS1_0. This way we find the stmt VS1_1 and the relevant
|
||||||
vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
|
vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
|
||||||
pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
|
pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
|
||||||
Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
|
Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
|
||||||
chain of stmts and pointers:
|
chain of stmts and pointers:
|
||||||
RELATED_STMT VEC_STMT
|
RELATED_STMT VEC_STMT
|
||||||
VS1_0: vx0 = memref0 VS1_1 -
|
VS1_0: vx0 = memref0 VS1_1 -
|
||||||
|
|
@ -2348,7 +2355,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
if (op_type == binary_op && scalar_shift_arg)
|
if (op_type == binary_op && scalar_shift_arg)
|
||||||
{
|
{
|
||||||
/* Vector shl and shr insn patterns can be defined with scalar
|
/* Vector shl and shr insn patterns can be defined with scalar
|
||||||
operand 2 (shift operand). In this case, use constant or loop
|
operand 2 (shift operand). In this case, use constant or loop
|
||||||
invariant op1 directly, without extending it to vector mode
|
invariant op1 directly, without extending it to vector mode
|
||||||
first. */
|
first. */
|
||||||
optab_op2_mode = insn_data[icode].operand[2].mode;
|
optab_op2_mode = insn_data[icode].operand[2].mode;
|
||||||
|
|
@ -2361,8 +2368,8 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
{
|
{
|
||||||
/* Store vec_oprnd1 for every vector stmt to be created
|
/* Store vec_oprnd1 for every vector stmt to be created
|
||||||
for SLP_NODE. We check during the analysis that all the
|
for SLP_NODE. We check during the analysis that all
|
||||||
shift arguments are the same.
|
the shift arguments are the same.
|
||||||
TODO: Allow different constants for different vector
|
TODO: Allow different constants for different vector
|
||||||
stmts generated for an SLP instance. */
|
stmts generated for an SLP instance. */
|
||||||
for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
|
for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
|
||||||
|
|
@ -2415,7 +2422,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Get vectorized definitions for loop-based vectorization. For the first
|
/* Get vectorized definitions for loop-based vectorization. For the first
|
||||||
operand we call vect_get_vec_def_for_operand() (with OPRND containing
|
operand we call vect_get_vec_def_for_operand() (with OPRND containing
|
||||||
scalar operand), and for the rest we get a copy with
|
scalar operand), and for the rest we get a copy with
|
||||||
vect_get_vec_def_for_stmt_copy() using the previous vector definition
|
vect_get_vec_def_for_stmt_copy() using the previous vector definition
|
||||||
|
|
@ -2612,7 +2619,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||||
case of SLP. */
|
case of SLP. */
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
ncopies = 1;
|
ncopies = 1;
|
||||||
|
|
@ -2702,7 +2709,7 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
|
|
||||||
|
|
||||||
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
|
/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
|
||||||
and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
|
and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
|
||||||
the resulting vectors and call the function recursively. */
|
the resulting vectors and call the function recursively. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2779,17 +2786,18 @@ vect_create_vectorized_promotion_stmts (VEC (tree, heap) **vec_oprnds0,
|
||||||
if (multi_step_cvt)
|
if (multi_step_cvt)
|
||||||
{
|
{
|
||||||
/* For multi-step promotion operation we first generate we call the
|
/* For multi-step promotion operation we first generate we call the
|
||||||
function recurcively for every stage. We start from the input type,
|
function recurcively for every stage. We start from the input type,
|
||||||
create promotion operations to the intermediate types, and then
|
create promotion operations to the intermediate types, and then
|
||||||
create promotions to the output type. */
|
create promotions to the output type. */
|
||||||
*vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
|
*vec_oprnds0 = VEC_copy (tree, heap, vec_tmp);
|
||||||
VEC_free (tree, heap, vec_tmp);
|
|
||||||
vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
|
vect_create_vectorized_promotion_stmts (vec_oprnds0, vec_oprnds1,
|
||||||
multi_step_cvt - 1, stmt,
|
multi_step_cvt - 1, stmt,
|
||||||
vec_dsts, gsi, slp_node, code1,
|
vec_dsts, gsi, slp_node, code1,
|
||||||
code2, decl2, decl2, op_type,
|
code2, decl2, decl2, op_type,
|
||||||
prev_stmt_info);
|
prev_stmt_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VEC_free (tree, heap, vec_tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2891,7 +2899,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||||
case of SLP. */
|
case of SLP. */
|
||||||
if (slp_node)
|
if (slp_node)
|
||||||
ncopies = 1;
|
ncopies = 1;
|
||||||
|
|
@ -3259,7 +3267,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
the documentation of vect_permute_store_chain()).
|
the documentation of vect_permute_store_chain()).
|
||||||
|
|
||||||
In case of both multiple types and interleaving, above vector stores and
|
In case of both multiple types and interleaving, above vector stores and
|
||||||
permutation stmts are created for every copy. The result vector stmts are
|
permutation stmts are created for every copy. The result vector stmts are
|
||||||
put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
|
put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
|
||||||
STMT_VINFO_RELATED_STMT for the next copies.
|
STMT_VINFO_RELATED_STMT for the next copies.
|
||||||
*/
|
*/
|
||||||
|
|
@ -3411,6 +3419,8 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
VEC_free (tree, heap, oprnds);
|
VEC_free (tree, heap, oprnds);
|
||||||
if (result_chain)
|
if (result_chain)
|
||||||
VEC_free (tree, heap, result_chain);
|
VEC_free (tree, heap, result_chain);
|
||||||
|
if (vec_oprnds)
|
||||||
|
VEC_free (tree, heap, vec_oprnds);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -3476,7 +3486,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
vf = 1;
|
vf = 1;
|
||||||
|
|
||||||
/* Multiple types in SLP are handled by creating the appropriate number of
|
/* Multiple types in SLP are handled by creating the appropriate number of
|
||||||
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
|
||||||
case of SLP. */
|
case of SLP. */
|
||||||
if (slp)
|
if (slp)
|
||||||
ncopies = 1;
|
ncopies = 1;
|
||||||
|
|
@ -3603,13 +3613,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
/* In case the vectorization factor (VF) is bigger than the number
|
/* In case the vectorization factor (VF) is bigger than the number
|
||||||
of elements that we can fit in a vectype (nunits), we have to generate
|
of elements that we can fit in a vectype (nunits), we have to generate
|
||||||
more than one vector stmt - i.e - we need to "unroll" the
|
more than one vector stmt - i.e - we need to "unroll" the
|
||||||
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
vector stmt by a factor VF/nunits. In doing so, we record a pointer
|
||||||
from one copy of the vector stmt to the next, in the field
|
from one copy of the vector stmt to the next, in the field
|
||||||
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
|
||||||
stages to find the correct vector defs to be used when vectorizing
|
stages to find the correct vector defs to be used when vectorizing
|
||||||
stmts that use the defs of the current stmt. The example below illustrates
|
stmts that use the defs of the current stmt. The example below
|
||||||
the vectorization process when VF=16 and nunits=4 (i.e - we need to create
|
illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
|
||||||
4 vectorized stmts):
|
need to create 4 vectorized stmts):
|
||||||
|
|
||||||
before vectorization:
|
before vectorization:
|
||||||
RELATED_STMT VEC_STMT
|
RELATED_STMT VEC_STMT
|
||||||
|
|
@ -3621,7 +3631,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
|
pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
|
||||||
Next, we create the vector stmt VS1_1, and record a pointer to
|
Next, we create the vector stmt VS1_1, and record a pointer to
|
||||||
it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
|
it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
|
||||||
Similarly, for VS1_2 and VS1_3. This is the resulting chain of
|
Similarly, for VS1_2 and VS1_3. This is the resulting chain of
|
||||||
stmts and pointers:
|
stmts and pointers:
|
||||||
RELATED_STMT VEC_STMT
|
RELATED_STMT VEC_STMT
|
||||||
VS1_0: vx0 = memref0 VS1_1 -
|
VS1_0: vx0 = memref0 VS1_1 -
|
||||||
|
|
@ -3664,9 +3674,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
|
STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
|
||||||
|
|
||||||
In case of both multiple types and interleaving, the vector loads and
|
In case of both multiple types and interleaving, the vector loads and
|
||||||
permutation stmts above are created for every copy. The result vector stmts
|
permutation stmts above are created for every copy. The result vector
|
||||||
are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
|
stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
|
||||||
STMT_VINFO_RELATED_STMT for the next copies. */
|
corresponding STMT_VINFO_RELATED_STMT for the next copies. */
|
||||||
|
|
||||||
/* If the data reference is aligned (dr_aligned) or potentially unaligned
|
/* If the data reference is aligned (dr_aligned) or potentially unaligned
|
||||||
on a target that supports unaligned accesses (dr_unaligned_supported)
|
on a target that supports unaligned accesses (dr_unaligned_supported)
|
||||||
|
|
@ -3699,7 +3709,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
|
|
||||||
/* If the misalignment remains the same throughout the execution of the
|
/* If the misalignment remains the same throughout the execution of the
|
||||||
loop, we can create the init_addr and permutation mask at the loop
|
loop, we can create the init_addr and permutation mask at the loop
|
||||||
preheader. Otherwise, it needs to be created inside the loop.
|
preheader. Otherwise, it needs to be created inside the loop.
|
||||||
This can only occur when vectorizing memory accesses in the inner-loop
|
This can only occur when vectorizing memory accesses in the inner-loop
|
||||||
nested within an outer-loop that is being vectorized. */
|
nested within an outer-loop that is being vectorized. */
|
||||||
|
|
||||||
|
|
@ -3854,7 +3864,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
||||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||||
mark_symbols_for_renaming (new_stmt);
|
mark_symbols_for_renaming (new_stmt);
|
||||||
|
|
||||||
/* 3. Handle explicit realignment if necessary/supported. Create in
|
/* 3. Handle explicit realignment if necessary/supported. Create in
|
||||||
loop: vec_dest = realign_load (msq, lsq, realignment_token) */
|
loop: vec_dest = realign_load (msq, lsq, realignment_token) */
|
||||||
if (alignment_support_scheme == dr_explicit_realign_optimized
|
if (alignment_support_scheme == dr_explicit_realign_optimized
|
||||||
|| alignment_support_scheme == dr_explicit_realign)
|
|| alignment_support_scheme == dr_explicit_realign)
|
||||||
|
|
@ -4035,7 +4045,8 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
tree cond_expr, then_clause, else_clause;
|
tree cond_expr, then_clause, else_clause;
|
||||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
|
tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
|
||||||
|
tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
|
||||||
tree vec_compare, vec_cond_expr;
|
tree vec_compare, vec_cond_expr;
|
||||||
tree new_temp;
|
tree new_temp;
|
||||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||||
|
|
@ -4365,7 +4376,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
|
||||||
if (!PURE_SLP_STMT (stmt_info))
|
if (!PURE_SLP_STMT (stmt_info))
|
||||||
{
|
{
|
||||||
/* Groups of strided accesses whose size is not a power of 2 are not
|
/* Groups of strided accesses whose size is not a power of 2 are not
|
||||||
vectorizable yet using loop-vectorization. Therefore, if this stmt
|
vectorizable yet using loop-vectorization. Therefore, if this stmt
|
||||||
feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
|
feeds non-SLP-able stmts (i.e., this stmt has to be both SLPed and
|
||||||
loop-based vectorized), the loop cannot be vectorized. */
|
loop-based vectorized), the loop cannot be vectorized. */
|
||||||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
||||||
|
|
@ -4447,7 +4458,7 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
|
||||||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
|
if (STMT_VINFO_STRIDED_ACCESS (stmt_info) && !slp_node)
|
||||||
{
|
{
|
||||||
/* In case of interleaving, the whole chain is vectorized when the
|
/* In case of interleaving, the whole chain is vectorized when the
|
||||||
last store in the chain is reached. Store stmts before the last
|
last store in the chain is reached. Store stmts before the last
|
||||||
one are skipped, and there vec_stmt_info shouldn't be freed
|
one are skipped, and there vec_stmt_info shouldn't be freed
|
||||||
meanwhile. */
|
meanwhile. */
|
||||||
*strided_store = true;
|
*strided_store = true;
|
||||||
|
|
@ -4747,7 +4758,7 @@ get_same_sized_vectype (tree scalar_type, tree vector_type ATTRIBUTE_UNUSED)
|
||||||
|
|
||||||
Returns whether a stmt with OPERAND can be vectorized.
|
Returns whether a stmt with OPERAND can be vectorized.
|
||||||
For loops, supportable operands are constants, loop invariants, and operands
|
For loops, supportable operands are constants, loop invariants, and operands
|
||||||
that are defined by the current iteration of the loop. Unsupportable
|
that are defined by the current iteration of the loop. Unsupportable
|
||||||
operands are those that are defined by a previous iteration of the loop (as
|
operands are those that are defined by a previous iteration of the loop (as
|
||||||
is the case in reduction/induction computations).
|
is the case in reduction/induction computations).
|
||||||
For basic blocks, supportable operands are constants and bb invariants.
|
For basic blocks, supportable operands are constants and bb invariants.
|
||||||
|
|
@ -4929,7 +4940,7 @@ vect_is_simple_use_1 (tree operand, loop_vec_info loop_vinfo,
|
||||||
- CODE1 and CODE2 are codes of vector operations to be used when
|
- CODE1 and CODE2 are codes of vector operations to be used when
|
||||||
vectorizing the operation, if available.
|
vectorizing the operation, if available.
|
||||||
- DECL1 and DECL2 are decls of target builtin functions to be used
|
- DECL1 and DECL2 are decls of target builtin functions to be used
|
||||||
when vectorizing the operation, if available. In this case,
|
when vectorizing the operation, if available. In this case,
|
||||||
CODE1 and CODE2 are CALL_EXPR.
|
CODE1 and CODE2 are CALL_EXPR.
|
||||||
- MULTI_STEP_CVT determines the number of required intermediate steps in
|
- MULTI_STEP_CVT determines the number of required intermediate steps in
|
||||||
case of multi-step conversion (like char->short->int - in that case
|
case of multi-step conversion (like char->short->int - in that case
|
||||||
|
|
@ -4973,7 +4984,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
||||||
|
|
||||||
When vectorizing outer-loops, we execute the inner-loop sequentially
|
When vectorizing outer-loops, we execute the inner-loop sequentially
|
||||||
(each vectorized inner-loop iteration contributes to VF outer-loop
|
(each vectorized inner-loop iteration contributes to VF outer-loop
|
||||||
iterations in parallel). We therefore don't allow to change the order
|
iterations in parallel). We therefore don't allow to change the order
|
||||||
of the computation in the inner-loop during outer-loop vectorization. */
|
of the computation in the inner-loop during outer-loop vectorization. */
|
||||||
|
|
||||||
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
|
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
|
||||||
|
|
@ -5086,8 +5097,9 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
||||||
*code2 = c2;
|
*code2 = c2;
|
||||||
|
|
||||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
intermediate steps in promotion sequence. We try
|
||||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
|
||||||
|
not. */
|
||||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
|
|
@ -5138,7 +5150,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
|
||||||
and producing a result of type VECTYPE_OUT).
|
and producing a result of type VECTYPE_OUT).
|
||||||
|
|
||||||
Narrowing operations we currently support are NOP (CONVERT) and
|
Narrowing operations we currently support are NOP (CONVERT) and
|
||||||
FIX_TRUNC. This function checks if these operations are supported by
|
FIX_TRUNC. This function checks if these operations are supported by
|
||||||
the target platform directly via vector tree-codes.
|
the target platform directly via vector tree-codes.
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
|
|
@ -5206,8 +5218,9 @@ supportable_narrowing_operation (enum tree_code code,
|
||||||
*code1 = c1;
|
*code1 = c1;
|
||||||
prev_type = vectype;
|
prev_type = vectype;
|
||||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
intermediate steps in promotion sequence. We try
|
||||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
|
||||||
|
not. */
|
||||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||||
for (i = 0; i < 3; i++)
|
for (i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -208,7 +208,7 @@ vectorize_loops (void)
|
||||||
/* ----------- Analyze loops. ----------- */
|
/* ----------- Analyze loops. ----------- */
|
||||||
|
|
||||||
/* If some loop was duplicated, it gets bigger number
|
/* If some loop was duplicated, it gets bigger number
|
||||||
than all previously defined loops. This fact allows us to run
|
than all previously defined loops. This fact allows us to run
|
||||||
only over initial loops skipping newly generated ones. */
|
only over initial loops skipping newly generated ones. */
|
||||||
FOR_EACH_LOOP (li, loop, 0)
|
FOR_EACH_LOOP (li, loop, 0)
|
||||||
if (optimize_loop_nest_for_speed_p (loop))
|
if (optimize_loop_nest_for_speed_p (loop))
|
||||||
|
|
|
||||||
|
|
@ -582,6 +582,8 @@ extern VEC(vec_void_p,heap) *stmt_vec_info_vec;
|
||||||
void init_stmt_vec_info_vec (void);
|
void init_stmt_vec_info_vec (void);
|
||||||
void free_stmt_vec_info_vec (void);
|
void free_stmt_vec_info_vec (void);
|
||||||
|
|
||||||
|
/* Return a stmt_vec_info corresponding to STMT. */
|
||||||
|
|
||||||
static inline stmt_vec_info
|
static inline stmt_vec_info
|
||||||
vinfo_for_stmt (gimple stmt)
|
vinfo_for_stmt (gimple stmt)
|
||||||
{
|
{
|
||||||
|
|
@ -592,6 +594,8 @@ vinfo_for_stmt (gimple stmt)
|
||||||
return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
|
return (stmt_vec_info) VEC_index (vec_void_p, stmt_vec_info_vec, uid - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set vectorizer information INFO for STMT. */
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
|
set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
|
||||||
{
|
{
|
||||||
|
|
@ -607,6 +611,8 @@ set_vinfo_for_stmt (gimple stmt, stmt_vec_info info)
|
||||||
VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
|
VEC_replace (vec_void_p, stmt_vec_info_vec, uid - 1, (vec_void_p) info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return the earlier statement between STMT1 and STMT2. */
|
||||||
|
|
||||||
static inline gimple
|
static inline gimple
|
||||||
get_earlier_stmt (gimple stmt1, gimple stmt2)
|
get_earlier_stmt (gimple stmt1, gimple stmt2)
|
||||||
{
|
{
|
||||||
|
|
@ -633,6 +639,8 @@ get_earlier_stmt (gimple stmt1, gimple stmt2)
|
||||||
return stmt2;
|
return stmt2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return the later statement between STMT1 and STMT2. */
|
||||||
|
|
||||||
static inline gimple
|
static inline gimple
|
||||||
get_later_stmt (gimple stmt1, gimple stmt2)
|
get_later_stmt (gimple stmt1, gimple stmt2)
|
||||||
{
|
{
|
||||||
|
|
@ -659,6 +667,9 @@ get_later_stmt (gimple stmt1, gimple stmt2)
|
||||||
return stmt2;
|
return stmt2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return TRUE if a statement represented by STMT_INFO is a part of a
|
||||||
|
pattern. */
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_pattern_stmt_p (stmt_vec_info stmt_info)
|
is_pattern_stmt_p (stmt_vec_info stmt_info)
|
||||||
{
|
{
|
||||||
|
|
@ -674,6 +685,8 @@ is_pattern_stmt_p (stmt_vec_info stmt_info)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return true if BB is a loop header. */
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
is_loop_header_bb_p (basic_block bb)
|
is_loop_header_bb_p (basic_block bb)
|
||||||
{
|
{
|
||||||
|
|
@ -683,6 +696,8 @@ is_loop_header_bb_p (basic_block bb)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set inside loop vectorization cost. */
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||||
int cost)
|
int cost)
|
||||||
|
|
@ -693,6 +708,8 @@ stmt_vinfo_set_inside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||||
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
|
STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set inside loop vectorization cost. */
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||||
int cost)
|
int cost)
|
||||||
|
|
@ -703,6 +720,8 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
||||||
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
|
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return pow2 (X). */
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
vect_pow2 (int x)
|
vect_pow2 (int x)
|
||||||
{
|
{
|
||||||
|
|
@ -723,12 +742,17 @@ vect_pow2 (int x)
|
||||||
#define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux)
|
#define DR_MISALIGNMENT(DR) ((int) (size_t) (DR)->aux)
|
||||||
#define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL))
|
#define SET_DR_MISALIGNMENT(DR, VAL) ((DR)->aux = (void *) (size_t) (VAL))
|
||||||
|
|
||||||
|
/* Return TRUE if the data access is aligned, and FALSE otherwise. */
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
aligned_access_p (struct data_reference *data_ref_info)
|
aligned_access_p (struct data_reference *data_ref_info)
|
||||||
{
|
{
|
||||||
return (DR_MISALIGNMENT (data_ref_info) == 0);
|
return (DR_MISALIGNMENT (data_ref_info) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return TRUE if the alignment of the data access is known, and FALSE
|
||||||
|
otherwise. */
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
known_alignment_for_access_p (struct data_reference *data_ref_info)
|
known_alignment_for_access_p (struct data_reference *data_ref_info)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue