mirror of git://gcc.gnu.org/git/gcc.git
Avoid unnecessary peeling for gaps with LD3
vectorizable_load forces peeling for gaps if the vectorisation factor
is not a multiple of the group size, since in that case we'd normally load
beyond the original scalar accesses but drop the excess elements as part
of a following permute:
if (loop_vinfo
&& ! STMT_VINFO_STRIDED_P (stmt_info)
&& (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
|| (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
This isn't necessary for LOAD_LANES though, since it loads only the
data needed and does the permute itself.
Tested on aarch64-linux-gnu and x86_64-linux-gnu.
gcc/
* tree-vect-stmts.c (vectorizable_load): Reorder checks so that
load_lanes/grouped_load classification comes first. Don't check
whether the vectorization factor is a multiple of the group size
for load_lanes.
gcc/testsuite/
* gcc.dg/vect/vect-load-lanes-peeling-1.c: New test.
From-SVN: r236632
This commit is contained in:
parent
836dbb1a27
commit
d3465d72ba
|
|
@ -1,3 +1,10 @@
|
||||||
|
2016-05-24 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
|
* tree-vect-stmts.c (vectorizable_load): Reorder checks so that
|
||||||
|
load_lanes/grouped_load classification comes first. Don't check
|
||||||
|
whether the vectorization factor is a multiple of the group size
|
||||||
|
for load_lanes.
|
||||||
|
|
||||||
2016-05-24 Richard Sandiford <richard.sandiford@arm.com>
|
2016-05-24 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
* tree-vect-data-refs.c (vect_analyze_group_access_1): Set
|
* tree-vect-data-refs.c (vect_analyze_group_access_1): Set
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,7 @@
|
||||||
|
2016-05-24 Richard Sandiford <richard.sandiford@arm.com>
|
||||||
|
|
||||||
|
* gcc.dg/vect/vect-load-lanes-peeling-1.c: New test.
|
||||||
|
|
||||||
2016-05-24 Richard Biener <rguenther@suse.de>
|
2016-05-24 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR middle-end/70434
|
PR middle-end/70434
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
/* { dg-require-effective-target vect_load_lanes } */
|
||||||
|
|
||||||
|
void
|
||||||
|
f (int *__restrict a, int *__restrict b)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 96; ++i)
|
||||||
|
a[i] = b[i * 3] + b[i * 3 + 1] + b[i * 3 + 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-not "Data access with gaps" "vect" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */
|
||||||
|
|
@ -6303,6 +6303,17 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||||
gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
|
gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
|
||||||
|
|
||||||
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
|
||||||
|
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
||||||
|
|
||||||
|
if (!slp
|
||||||
|
&& !PURE_SLP_STMT (stmt_info)
|
||||||
|
&& !STMT_VINFO_STRIDED_P (stmt_info))
|
||||||
|
{
|
||||||
|
if (vect_load_lanes_supported (vectype, group_size))
|
||||||
|
load_lanes_p = true;
|
||||||
|
else if (!vect_grouped_load_supported (vectype, group_size))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/* If this is single-element interleaving with an element distance
|
/* If this is single-element interleaving with an element distance
|
||||||
that leaves unused vector loads around punt - we at least create
|
that leaves unused vector loads around punt - we at least create
|
||||||
|
|
@ -6330,7 +6341,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||||
if (loop_vinfo
|
if (loop_vinfo
|
||||||
&& ! STMT_VINFO_STRIDED_P (stmt_info)
|
&& ! STMT_VINFO_STRIDED_P (stmt_info)
|
||||||
&& (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
|
&& (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0
|
||||||
|| (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0)))
|
|| (!slp && !load_lanes_p && vf % group_size != 0)))
|
||||||
{
|
{
|
||||||
if (dump_enabled_p ())
|
if (dump_enabled_p ())
|
||||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
|
@ -6350,8 +6361,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||||
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
|
||||||
slp_perm = true;
|
slp_perm = true;
|
||||||
|
|
||||||
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
|
|
||||||
|
|
||||||
/* ??? The following is overly pessimistic (as well as the loop
|
/* ??? The following is overly pessimistic (as well as the loop
|
||||||
case above) in the case we can statically determine the excess
|
case above) in the case we can statically determine the excess
|
||||||
elements loaded are within the bounds of a decl that is accessed.
|
elements loaded are within the bounds of a decl that is accessed.
|
||||||
|
|
@ -6364,16 +6373,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!slp
|
|
||||||
&& !PURE_SLP_STMT (stmt_info)
|
|
||||||
&& !STMT_VINFO_STRIDED_P (stmt_info))
|
|
||||||
{
|
|
||||||
if (vect_load_lanes_supported (vectype, group_size))
|
|
||||||
load_lanes_p = true;
|
|
||||||
else if (!vect_grouped_load_supported (vectype, group_size))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Invalidate assumptions made by dependence analysis when vectorization
|
/* Invalidate assumptions made by dependence analysis when vectorization
|
||||||
on the unrolled body effectively re-orders stmts. */
|
on the unrolled body effectively re-orders stmts. */
|
||||||
if (!PURE_SLP_STMT (stmt_info)
|
if (!PURE_SLP_STMT (stmt_info)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue