mirror of git://gcc.gnu.org/git/gcc.git
vect: Add target hook to prefer gather/scatter instructions
For AMD GCN, the instructions available for loading/storing vectors are
always scatter/gather operations (i.e. there are separate addresses for
each vector lane), so the current heuristic to avoid gather/scatter
operations with too many elements in get_group_load_store_type is
counterproductive. Avoiding such operations in that function can
subsequently lead to a missed vectorization opportunity whereby later
analyses in the vectorizer try to use a very wide array type which is
not available on this target, and thus it bails out.
This patch adds a target hook to override the "single_element_p"
heuristic in the function as a target hook, and activates it for GCN. This
allows much better code to be generated for affected loops.
Co-authored-by: Julian Brown <julian@codesourcery.com>
gcc/
* doc/tm.texi.in (TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Add
documentation hook.
* doc/tm.texi: Regenerate.
* target.def (prefer_gather_scatter): Add target hook under vectorizer.
* hooks.cc (hook_bool_mode_int_unsigned_false): New function.
* hooks.h (hook_bool_mode_int_unsigned_false): New prototype.
* tree-vect-stmts.cc (vect_use_strided_gather_scatters_p): Add
parameters group_size and single_element_p, and rework to use
targetm.vectorize.prefer_gather_scatter.
(get_group_load_store_type): Move some of the condition into
vect_use_strided_gather_scatters_p.
* config/gcn/gcn.cc (gcn_prefer_gather_scatter): New function.
(TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Define hook.
(cherry picked from commit 36c5a7aa9a
)
This commit is contained in:
parent
6847a222ba
commit
49cb093be7
|
@ -5795,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class,
|
|||
return bsd_libc_has_function (fn_class, type);
|
||||
}
|
||||
|
||||
/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
|
||||
|
||||
static bool
|
||||
gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
|
||||
int ARG_UNUSED (scale),
|
||||
unsigned int ARG_UNUSED (group_size))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/* }}} */
|
||||
/* {{{ md_reorg pass. */
|
||||
|
||||
|
@ -7994,6 +8004,8 @@ gcn_dwarf_register_span (rtx rtl)
|
|||
gcn_vectorize_builtin_vectorized_function
|
||||
#undef TARGET_VECTORIZE_GET_MASK_MODE
|
||||
#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
|
||||
#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER
|
||||
#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
|
||||
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
|
||||
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
|
||||
#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
|
||||
|
|
|
@ -6515,6 +6515,15 @@ The default is @code{NULL_TREE} which means to not vectorize scatter
|
|||
stores.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER (machine_mode @var{mode}, int @var{scale}, unsigned int @var{group_size})
|
||||
This hook returns TRUE if gather loads or scatter stores are cheaper on
|
||||
this target than a sequence of elementwise loads or stores. The @var{mode}
|
||||
and @var{scale} correspond to the @code{gather_load} and
|
||||
@code{scatter_store} instruction patterns. The @var{group_size} is the
|
||||
number of scalar elements in each scalar loop iteration that are to be
|
||||
combined into the vector.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool})
|
||||
This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float}
|
||||
fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also
|
||||
|
|
|
@ -4311,6 +4311,8 @@ address; but often a machine-dependent strategy can generate better code.
|
|||
|
||||
@hook TARGET_VECTORIZE_BUILTIN_SCATTER
|
||||
|
||||
@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER
|
||||
|
||||
@hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
|
||||
|
||||
@hook TARGET_SIMD_CLONE_ADJUST
|
||||
|
|
|
@ -117,6 +117,13 @@ hook_bool_mode_const_rtx_true (machine_mode, const_rtx)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Generic hook that takes (machine_mode, int, unsigned) and returns false. */
|
||||
bool
|
||||
hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Generic hook that takes (machine_mode, rtx) and returns false. */
|
||||
bool
|
||||
hook_bool_mode_rtx_false (machine_mode, rtx)
|
||||
|
|
|
@ -36,6 +36,7 @@ extern bool hook_bool_mode_true (machine_mode);
|
|||
extern bool hook_bool_mode_mode_true (machine_mode, machine_mode);
|
||||
extern bool hook_bool_mode_const_rtx_false (machine_mode, const_rtx);
|
||||
extern bool hook_bool_mode_const_rtx_true (machine_mode, const_rtx);
|
||||
extern bool hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned);
|
||||
extern bool hook_bool_mode_rtx_false (machine_mode, rtx);
|
||||
extern bool hook_bool_mode_rtx_true (machine_mode, rtx);
|
||||
extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *,
|
||||
|
|
|
@ -2056,6 +2056,20 @@ all zeros. GCC can then try to branch around the instruction instead.",
|
|||
(unsigned ifn),
|
||||
default_empty_mask_is_expensive)
|
||||
|
||||
/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\
|
||||
we cannot use a contiguous access. */
|
||||
DEFHOOK
|
||||
(prefer_gather_scatter,
|
||||
"This hook returns TRUE if gather loads or scatter stores are cheaper on\n\
|
||||
this target than a sequence of elementwise loads or stores. The @var{mode}\n\
|
||||
and @var{scale} correspond to the @code{gather_load} and\n\
|
||||
@code{scatter_store} instruction patterns. The @var{group_size} is the\n\
|
||||
number of scalar elements in each scalar loop iteration that are to be\n\
|
||||
combined into the vector.",
|
||||
bool,
|
||||
(machine_mode mode, int scale, unsigned int group_size),
|
||||
hook_bool_mode_int_unsigned_false)
|
||||
|
||||
/* Target builtin that implements vector gather operation. */
|
||||
DEFHOOK
|
||||
(builtin_gather,
|
||||
|
|
|
@ -1822,21 +1822,35 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
|
|||
|
||||
static bool
|
||||
vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
|
||||
tree vectype,
|
||||
loop_vec_info loop_vinfo, bool masked_p,
|
||||
gather_scatter_info *gs_info,
|
||||
vec<int> *elsvals)
|
||||
vec<int> *elsvals,
|
||||
unsigned int group_size,
|
||||
bool single_element_p)
|
||||
{
|
||||
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals)
|
||||
|| gs_info->ifn == IFN_LAST)
|
||||
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
|
||||
masked_p, gs_info, elsvals);
|
||||
{
|
||||
if (!vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
|
||||
masked_p, gs_info, elsvals))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
tree old_offset_type = TREE_TYPE (gs_info->offset);
|
||||
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
|
||||
|
||||
tree old_offset_type = TREE_TYPE (gs_info->offset);
|
||||
tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
|
||||
gcc_assert (TYPE_PRECISION (new_offset_type)
|
||||
>= TYPE_PRECISION (old_offset_type));
|
||||
gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
|
||||
}
|
||||
|
||||
gcc_assert (TYPE_PRECISION (new_offset_type)
|
||||
>= TYPE_PRECISION (old_offset_type));
|
||||
gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
|
||||
if (!single_element_p
|
||||
&& !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype),
|
||||
gs_info->scale,
|
||||
group_size))
|
||||
return false;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
|
@ -2397,11 +2411,11 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
allows us to use contiguous accesses. */
|
||||
if ((*memory_access_type == VMAT_ELEMENTWISE
|
||||
|| *memory_access_type == VMAT_STRIDED_SLP)
|
||||
&& single_element_p
|
||||
&& (!slp_node || SLP_TREE_LANES (slp_node) == 1)
|
||||
&& loop_vinfo
|
||||
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
|
||||
masked_p, gs_info, elsvals))
|
||||
&& vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
|
||||
masked_p, gs_info, elsvals,
|
||||
group_size, single_element_p))
|
||||
*memory_access_type = VMAT_GATHER_SCATTER;
|
||||
|
||||
if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
|
||||
|
@ -2558,8 +2572,9 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
{
|
||||
gcc_assert (!slp_node);
|
||||
if (loop_vinfo
|
||||
&& vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
|
||||
masked_p, gs_info, elsvals))
|
||||
&& vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo,
|
||||
masked_p, gs_info, elsvals,
|
||||
1, false))
|
||||
*memory_access_type = VMAT_GATHER_SCATTER;
|
||||
else
|
||||
*memory_access_type = VMAT_ELEMENTWISE;
|
||||
|
|
Loading…
Reference in New Issue