mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/43432 (Missed vectorization: "complicated access pattern" for increasing and decreasing data indexing)
PR tree-optimization/43432 * tree-vect-data-refs.c (vect_analyze_data_ref_access): Accept backwards consecutive accesses. (vect_create_data_ref_ptr): If step is negative generate decreasing IVs. * tree-vect-stmts.c (vectorizable_store): Reject negative steps. (perm_mask_for_reverse, reverse_vec_elements): New functions. (vectorizable_load): Handle loads with negative steps when easily possible. testsuite/ PR tree-optimization/43432 * lib/target-supports.exp (check_effective_target_vect_perm_byte, check_effective_target_vect_perm_short): New predicates. (check_effective_target_vect_perm): Include x86_64. * gcc.dg/vect/pr43432.c: New test. * gcc.dg/vect/vect-114.c: Adjust. * gcc.dg/vect/vect-15.c: Ditto. * gcc.dg/vect/slp-perm-8.c: Use new predicate. * gcc.dg/vect/slp-perm-9.c: Ditto. From-SVN: r164367
This commit is contained in:
parent
4c588abff3
commit
18ba3ce7e3
|
|
@ -1,3 +1,15 @@
|
|||
2010-09-17 Michael Matz <matz@suse.de>
|
||||
|
||||
PR tree-optimization/43432
|
||||
* tree-vect-data-refs.c (vect_analyze_data_ref_access):
|
||||
Accept backwards consecutive accesses.
|
||||
(vect_create_data_ref_ptr): If step is negative generate
|
||||
decreasing IVs.
|
||||
* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
|
||||
(perm_mask_for_reverse, reverse_vec_elements): New functions.
|
||||
(vectorizable_load): Handle loads with negative steps when easily
|
||||
possible.
|
||||
|
||||
2010-09-03 Jan Hubicka <jh@suse.cz>
|
||||
|
||||
* lto-cgraph.c (compute_ltrans_boundary): Use const_value_known.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,15 @@
|
|||
2010-09-17 Michael Matz <matz@suse.de>
|
||||
|
||||
PR tree-optimization/43432
|
||||
* lib/target-supports.exp (check_effective_target_vect_perm_byte,
|
||||
check_effective_target_vect_perm_short): New predicates.
|
||||
(check_effective_target_vect_perm): Include x86_64.
|
||||
* gcc.dg/vect/pr43432.c: New test.
|
||||
* gcc.dg/vect/vect-114.c: Adjust.
|
||||
* gcc.dg/vect/vect-15.c: Ditto.
|
||||
* gcc.dg/vect/slp-perm-8.c: Use new predicate.
|
||||
* gcc.dg/vect/slp-perm-9.c: Ditto.
|
||||
|
||||
2010-09-17 Nicola Pero <nicola.pero@meta-innovation.com>
|
||||
|
||||
PR testsuite/45692
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
|
||||
|
||||
|
||||
void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
|
||||
int len){
|
||||
int i;
|
||||
src1 += len-1;
|
||||
for(i=0; i<len; i++)
|
||||
dst[i] = src0[i] * src1[-i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -53,7 +53,7 @@ int main (int argc, const char* argv[])
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm_byte } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ int main (int argc, const char* argv[])
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm } } } */
|
||||
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ int main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
|||
|
|
@ -35,5 +35,5 @@ int main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
|||
|
|
@ -2425,7 +2425,8 @@ proc check_effective_target_vect_perm { } {
|
|||
} else {
|
||||
set et_vect_perm_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_perm_saved 1
|
||||
}
|
||||
}
|
||||
|
|
@ -2433,6 +2434,48 @@ proc check_effective_target_vect_perm { } {
|
|||
return $et_vect_perm_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports vector permutation
|
||||
# on byte-sized elements, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_perm_byte { } {
|
||||
global et_vect_perm_byte
|
||||
|
||||
if [info exists et_vect_perm_byte_saved] {
|
||||
verbose "check_effective_target_vect_perm_byte: using cached result" 2
|
||||
} else {
|
||||
set et_vect_perm_byte_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_perm_byte_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_perm_byte: returning $et_vect_perm_byte_saved" 2
|
||||
return $et_vect_perm_byte_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports vector permutation
|
||||
# on short-sized elements, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_perm_short { } {
|
||||
global et_vect_perm_short
|
||||
|
||||
if [info exists et_vect_perm_short_saved] {
|
||||
verbose "check_effective_target_vect_perm_short: using cached result" 2
|
||||
} else {
|
||||
set et_vect_perm_short_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_perm_short_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_perm_short: returning $et_vect_perm_short_saved" 2
|
||||
return $et_vect_perm_short_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation of *short* args into *int* result, 0 otherwise.
|
||||
#
|
||||
|
|
|
|||
|
|
@ -2287,7 +2287,9 @@ vect_analyze_data_ref_access (struct data_reference *dr)
|
|||
}
|
||||
|
||||
/* Consecutive? */
|
||||
if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
|
||||
if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
|
||||
|| (dr_step < 0
|
||||
&& !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
|
||||
{
|
||||
/* Mark that it is not interleaving. */
|
||||
DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
|
||||
|
|
@ -2970,6 +2972,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
tree vptr;
|
||||
gimple_stmt_iterator incr_gsi;
|
||||
bool insert_after;
|
||||
bool negative;
|
||||
tree indx_before_incr, indx_after_incr;
|
||||
gimple incr;
|
||||
tree step;
|
||||
|
|
@ -3002,6 +3005,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
*inv_p = true;
|
||||
else
|
||||
*inv_p = false;
|
||||
negative = tree_int_cst_compare (step, size_zero_node) < 0;
|
||||
|
||||
/* Create an expression for the first address accessed by this load
|
||||
in LOOP. */
|
||||
|
|
@ -3160,6 +3164,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
|
|||
LOOP is zero. In this case the step here is also zero. */
|
||||
if (*inv_p)
|
||||
step = size_zero_node;
|
||||
else if (negative)
|
||||
step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
|
||||
|
||||
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
|
||||
|
||||
|
|
|
|||
|
|
@ -3145,6 +3145,13 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
if (!STMT_VINFO_DATA_REF (stmt_info))
|
||||
return false;
|
||||
|
||||
if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "negative step for store.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
|
||||
{
|
||||
strided_store = true;
|
||||
|
|
@ -3425,6 +3432,68 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Given a vector type VECTYPE returns a builtin DECL to be used
|
||||
for vector permutation and stores a mask into *MASK that implements
|
||||
reversal of the vector elements. If that is impossible to do
|
||||
returns NULL (and *MASK is unchanged). */
|
||||
|
||||
static tree
|
||||
perm_mask_for_reverse (tree vectype, tree *mask)
|
||||
{
|
||||
tree builtin_decl;
|
||||
tree mask_element_type, mask_type;
|
||||
tree mask_vec = NULL;
|
||||
int i;
|
||||
int nunits;
|
||||
if (!targetm.vectorize.builtin_vec_perm)
|
||||
return NULL;
|
||||
|
||||
builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
|
||||
&mask_element_type);
|
||||
if (!builtin_decl || !mask_element_type)
|
||||
return NULL;
|
||||
|
||||
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
||||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < nunits; i++)
|
||||
mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
|
||||
mask_vec = build_vector (mask_type, mask_vec);
|
||||
|
||||
if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
|
||||
return NULL;
|
||||
if (mask)
|
||||
*mask = mask_vec;
|
||||
return builtin_decl;
|
||||
}
|
||||
|
||||
/* Given a vector variable X, that was generated for the scalar LHS of
|
||||
STMT, generate instructions to reverse the vector elements of X,
|
||||
insert them a *GSI and return the permuted vector variable. */
|
||||
|
||||
static tree
|
||||
reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
|
||||
{
|
||||
tree vectype = TREE_TYPE (x);
|
||||
tree mask_vec, builtin_decl;
|
||||
tree perm_dest, data_ref;
|
||||
gimple perm_stmt;
|
||||
|
||||
builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
|
||||
|
||||
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
|
||||
|
||||
/* Generate the permute statement. */
|
||||
perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
|
||||
data_ref = make_ssa_name (perm_dest, perm_stmt);
|
||||
gimple_call_set_lhs (perm_stmt, data_ref);
|
||||
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
||||
|
||||
return data_ref;
|
||||
}
|
||||
|
||||
/* vectorizable_load.
|
||||
|
||||
Check if STMT reads a non scalar data-ref (array/pointer/structure) that
|
||||
|
|
@ -3467,6 +3536,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
gimple first_stmt;
|
||||
tree scalar_type;
|
||||
bool inv_p;
|
||||
bool negative;
|
||||
bool compute_in_loop = false;
|
||||
struct loop *at_loop;
|
||||
int vec_num;
|
||||
|
|
@ -3529,6 +3599,14 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
if (!STMT_VINFO_DATA_REF (stmt_info))
|
||||
return false;
|
||||
|
||||
negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
|
||||
if (negative && ncopies > 1)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "multiple types with negative step.");
|
||||
return false;
|
||||
}
|
||||
|
||||
scalar_type = TREE_TYPE (DR_REF (dr));
|
||||
mode = TYPE_MODE (vectype);
|
||||
|
||||
|
|
@ -3563,6 +3641,25 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (negative)
|
||||
{
|
||||
gcc_assert (!strided_load);
|
||||
alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
|
||||
if (alignment_support_scheme != dr_aligned
|
||||
&& alignment_support_scheme != dr_unaligned_supported)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "negative step but alignment required.");
|
||||
return false;
|
||||
}
|
||||
if (!perm_mask_for_reverse (vectype, NULL))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "negative step and reversing not supported.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
|
||||
|
|
@ -3737,6 +3834,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
else
|
||||
at_loop = loop;
|
||||
|
||||
if (negative)
|
||||
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
|
||||
|
||||
prev_stmt_info = NULL;
|
||||
for (j = 0; j < ncopies; j++)
|
||||
{
|
||||
|
|
@ -3925,6 +4025,12 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
|
|||
gcc_unreachable (); /* FORNOW. */
|
||||
}
|
||||
|
||||
if (negative)
|
||||
{
|
||||
new_temp = reverse_vec_elements (new_temp, stmt, gsi);
|
||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||
}
|
||||
|
||||
/* Collect vector loads and later create their permutation in
|
||||
vect_transform_strided_load (). */
|
||||
if (strided_load || slp_perm)
|
||||
|
|
|
|||
Loading…
Reference in New Issue