mirror of git://gcc.gnu.org/git/gcc.git
re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing with "not suitable for scatter store")
PR tree-optimization/88464
PR target/88498
* tree-vect-stmts.c (vect_build_gather_load_calls): For NARROWING
and mask with integral masktype, don't try to permute mask vectors,
instead emit VEC_UNPACK_{LO,HI}_EXPR. Fix up NOP_EXPR operand.
(vectorizable_store): Handle masked scatters with decl and integral
mask type.
(permute_vec_elements): Allow scalar_dest to be NULL.
* config/i386/i386.c (ix86_get_builtin)
<case IX86_BUILTIN_GATHER3ALTDIV16SF>: Use lowpart_subreg for masks.
<case IX86_BUILTIN_GATHER3ALTDIV8SF>: Don't assume mask and src have
to be the same.
* gcc.target/i386/avx512f-pr88462-1.c: Rename to ...
* gcc.target/i386/avx512f-pr88464-1.c: ... this. Fix up PR number.
Expect 4 vectorized loops instead of 3.
(f4): New function.
* gcc.target/i386/avx512f-pr88462-2.c: Rename to ...
* gcc.target/i386/avx512f-pr88464-2.c: ... this. Fix up PR number
and #include.
(avx512f_test): Prepare arguments for f4 and check the results.
* gcc.target/i386/avx512f-pr88464-3.c: New test.
* gcc.target/i386/avx512f-pr88464-4.c: New test.
From-SVN: r267169
This commit is contained in:
parent
ee34ebba15
commit
b1985ca02e
|
|
@ -1,3 +1,18 @@
|
|||
2018-12-15 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/88464
|
||||
PR target/88498
|
||||
* tree-vect-stmts.c (vect_build_gather_load_calls): For NARROWING
|
||||
and mask with integral masktype, don't try to permute mask vectors,
|
||||
instead emit VEC_UNPACK_{LO,HI}_EXPR. Fix up NOP_EXPR operand.
|
||||
(vectorizable_store): Handle masked scatters with decl and integral
|
||||
mask type.
|
||||
(permute_vec_elements): Allow scalar_dest to be NULL.
|
||||
* config/i386/i386.c (ix86_get_builtin)
|
||||
<case IX86_BUILTIN_GATHER3ALTDIV16SF>: Use lowpart_subreg for masks.
|
||||
<case IX86_BUILTIN_GATHER3ALTDIV8SF>: Don't assume mask and src have
|
||||
to be the same.
|
||||
|
||||
2018-12-15 Jan Hubicka <hubicka@ucw.cz>
|
||||
|
||||
* ipa.c (cgraph_build_static_cdtor_1): Add OPTIMIZATION and TARGET
|
||||
|
|
|
|||
|
|
@ -37607,13 +37607,7 @@ rdseed_step:
|
|||
op0 = copy_to_mode_reg (GET_MODE (op0), op0);
|
||||
emit_insn (gen (half, op0));
|
||||
op0 = half;
|
||||
if (GET_MODE (op3) != VOIDmode)
|
||||
{
|
||||
if (!nonimmediate_operand (op3, GET_MODE (op3)))
|
||||
op3 = copy_to_mode_reg (GET_MODE (op3), op3);
|
||||
emit_insn (gen (half, op3));
|
||||
op3 = half;
|
||||
}
|
||||
op3 = lowpart_subreg (QImode, op3, HImode);
|
||||
break;
|
||||
case IX86_BUILTIN_GATHER3ALTDIV8SF:
|
||||
case IX86_BUILTIN_GATHER3ALTDIV8SI:
|
||||
|
|
@ -37630,6 +37624,7 @@ rdseed_step:
|
|||
op0 = half;
|
||||
if (GET_MODE (op3) != VOIDmode)
|
||||
{
|
||||
half = gen_reg_rtx (mode0);
|
||||
if (!nonimmediate_operand (op3, GET_MODE (op3)))
|
||||
op3 = copy_to_mode_reg (GET_MODE (op3), op3);
|
||||
emit_insn (gen (half, op3));
|
||||
|
|
|
|||
|
|
@ -1,3 +1,18 @@
|
|||
2018-12-15 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/88464
|
||||
PR target/88498
|
||||
* gcc.target/i386/avx512f-pr88462-1.c: Rename to ...
|
||||
* gcc.target/i386/avx512f-pr88464-1.c: ... this. Fix up PR number.
|
||||
Expect 4 vectorized loops instead of 3.
|
||||
(f4): New function.
|
||||
* gcc.target/i386/avx512f-pr88462-2.c: Rename to ...
|
||||
* gcc.target/i386/avx512f-pr88464-2.c: ... this. Fix up PR number
|
||||
and #include.
|
||||
(avx512f_test): Prepare arguments for f4 and check the results.
|
||||
* gcc.target/i386/avx512f-pr88464-3.c: New test.
|
||||
* gcc.target/i386/avx512f-pr88464-4.c: New test.
|
||||
|
||||
2018-12-15 Paolo Carlini <paolo.carlini@oracle.com>
|
||||
|
||||
PR c++/84644
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* PR tree-optimization/88462 */
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 3 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f1 (double * __restrict__ a, const double * __restrict__ b, const int * __restrict__ c, int n)
|
||||
|
|
@ -33,3 +33,13 @@ f3 (float * __restrict__ a, const float * __restrict__ b, const int * __restrict
|
|||
if (a[i] > 10.0f)
|
||||
a[i] = b[c[i]];
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
f4 (float * __restrict__ a, const float * __restrict__ b, const long * __restrict__ c, int n)
|
||||
{
|
||||
int i;
|
||||
#pragma GCC ivdep
|
||||
for (i = 0; i < n; ++i)
|
||||
if (a[i] > 10.0f)
|
||||
a[i] = b[c[i]];
|
||||
}
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
/* PR tree-optimization/88462 */
|
||||
/* PR tree-optimization/88464 */
|
||||
/* { dg-do run { target { avx512f } } } */
|
||||
/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */
|
||||
|
||||
#include "avx512f-check.h"
|
||||
|
||||
#include "avx512f-pr88462-1.c"
|
||||
#include "avx512f-pr88464-1.c"
|
||||
|
||||
static void
|
||||
avx512f_test (void)
|
||||
|
|
@ -47,5 +47,15 @@ avx512f_test (void)
|
|||
asm volatile ("" : "+g" (i));
|
||||
if (c[i] != ((i % 3) != 2 ? (1023 - i) * 4.0f : -5.0f))
|
||||
abort ();
|
||||
c[i] = (i % 3) != 0 ? 15.0f : -5.0f;
|
||||
e[i] = (i % 3) != 0 ? 1023 - i : __INT_MAX__;
|
||||
f[i] = 5 * i;
|
||||
}
|
||||
f4 (c, f, e, 1024);
|
||||
for (i = 0; i < 1024; i++)
|
||||
{
|
||||
asm volatile ("" : "+g" (i));
|
||||
if (c[i] != ((i % 3) != 0 ? (1023 - i) * 5.0f : -5.0f))
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
|
@ -2655,6 +2655,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|||
if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
|
||||
masktype = build_same_sized_truth_vector_type (srctype);
|
||||
|
||||
tree mask_halftype = masktype;
|
||||
tree perm_mask = NULL_TREE;
|
||||
tree mask_perm_mask = NULL_TREE;
|
||||
if (known_eq (nunits, gather_off_nunits))
|
||||
|
|
@ -2690,13 +2691,16 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|||
|
||||
ncopies *= 2;
|
||||
|
||||
if (mask)
|
||||
if (mask && masktype == real_masktype)
|
||||
{
|
||||
for (int i = 0; i < count; ++i)
|
||||
sel[i] = i | (count / 2);
|
||||
indices.new_vector (sel, 2, count);
|
||||
mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
|
||||
}
|
||||
else if (mask)
|
||||
mask_halftype
|
||||
= build_same_sized_truth_vector_type (gs_info->offset_vectype);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
|
@ -2761,7 +2765,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|||
{
|
||||
if (j == 0)
|
||||
vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
|
||||
else
|
||||
else if (modifier != NARROW || (j & 1) == 0)
|
||||
vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
|
||||
vec_mask);
|
||||
|
||||
|
|
@ -2779,17 +2783,27 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|||
mask_op = var;
|
||||
}
|
||||
}
|
||||
if (modifier == NARROW && masktype != real_masktype)
|
||||
{
|
||||
var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
|
||||
gassign *new_stmt
|
||||
= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
|
||||
: VEC_UNPACK_LO_EXPR,
|
||||
mask_op);
|
||||
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
mask_op = var;
|
||||
}
|
||||
src_op = mask_op;
|
||||
}
|
||||
|
||||
tree mask_arg = mask_op;
|
||||
if (masktype != real_masktype)
|
||||
{
|
||||
tree utype;
|
||||
if (TYPE_MODE (real_masktype) == TYPE_MODE (masktype))
|
||||
tree utype, optype = TREE_TYPE (mask_op);
|
||||
if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
|
||||
utype = real_masktype;
|
||||
else
|
||||
utype = lang_hooks.types.type_for_mode (TYPE_MODE (masktype), 1);
|
||||
utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
|
||||
var = vect_get_new_ssa_name (utype, vect_scalar_var);
|
||||
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
|
||||
gassign *new_stmt
|
||||
|
|
@ -2801,7 +2815,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
|
|||
gcc_assert (TYPE_PRECISION (utype)
|
||||
<= TYPE_PRECISION (real_masktype));
|
||||
var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
|
||||
new_stmt = gimple_build_assign (var, NOP_EXPR, utype);
|
||||
new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
|
||||
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
mask_arg = var;
|
||||
}
|
||||
|
|
@ -6361,7 +6375,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
return false;
|
||||
}
|
||||
else if (memory_access_type != VMAT_LOAD_STORE_LANES
|
||||
&& (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
|
||||
&& (memory_access_type != VMAT_GATHER_SCATTER
|
||||
|| (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
|
@ -6419,7 +6434,9 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
|
||||
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
|
||||
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
|
||||
tree ptr, mask, var, scale, perm_mask = NULL_TREE;
|
||||
tree ptr, var, scale, vec_mask;
|
||||
tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
|
||||
tree mask_halfvectype = mask_vectype;
|
||||
edge pe = loop_preheader_edge (loop);
|
||||
gimple_seq seq;
|
||||
basic_block new_bb;
|
||||
|
|
@ -6460,6 +6477,10 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
perm_mask = vect_gen_perm_mask_checked (vectype, indices);
|
||||
gcc_assert (perm_mask != NULL_TREE);
|
||||
ncopies *= 2;
|
||||
|
||||
if (mask)
|
||||
mask_halfvectype
|
||||
= build_same_sized_truth_vector_type (gs_info.offset_vectype);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
|
|
@ -6482,10 +6503,11 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
gcc_assert (!new_bb);
|
||||
}
|
||||
|
||||
/* Currently we support only unconditional scatter stores,
|
||||
so mask should be all ones. */
|
||||
mask = build_int_cst (masktype, -1);
|
||||
mask = vect_init_vector (stmt_info, mask, masktype, NULL);
|
||||
if (mask == NULL_TREE)
|
||||
{
|
||||
mask_arg = build_int_cst (masktype, -1);
|
||||
mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
|
||||
}
|
||||
|
||||
scale = build_int_cst (scaletype, gs_info.scale);
|
||||
|
||||
|
|
@ -6494,36 +6516,46 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
{
|
||||
if (j == 0)
|
||||
{
|
||||
src = vec_oprnd1
|
||||
= vect_get_vec_def_for_operand (op, stmt_info);
|
||||
op = vec_oprnd0
|
||||
= vect_get_vec_def_for_operand (gs_info.offset, stmt_info);
|
||||
src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
|
||||
op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
|
||||
stmt_info);
|
||||
if (mask)
|
||||
mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
|
||||
stmt_info);
|
||||
}
|
||||
else if (modifier != NONE && (j & 1))
|
||||
{
|
||||
if (modifier == WIDEN)
|
||||
{
|
||||
src = vec_oprnd1
|
||||
= vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
|
||||
src
|
||||
= vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_oprnd1);
|
||||
op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
|
||||
stmt_info, gsi);
|
||||
if (mask)
|
||||
mask_op
|
||||
= vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_mask);
|
||||
}
|
||||
else if (modifier == NARROW)
|
||||
{
|
||||
src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
|
||||
stmt_info, gsi);
|
||||
op = vec_oprnd0
|
||||
= vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
|
||||
op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_oprnd0);
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
{
|
||||
src = vec_oprnd1
|
||||
= vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
|
||||
op = vec_oprnd0
|
||||
= vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
|
||||
src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_oprnd1);
|
||||
op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_oprnd0);
|
||||
if (mask)
|
||||
mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
|
||||
vec_mask);
|
||||
}
|
||||
|
||||
if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
|
||||
|
|
@ -6550,8 +6582,45 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
|||
op = var;
|
||||
}
|
||||
|
||||
if (mask)
|
||||
{
|
||||
tree utype;
|
||||
mask_arg = mask_op;
|
||||
if (modifier == NARROW)
|
||||
{
|
||||
var = vect_get_new_ssa_name (mask_halfvectype,
|
||||
vect_simple_var);
|
||||
gassign *new_stmt
|
||||
= gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
|
||||
: VEC_UNPACK_LO_EXPR,
|
||||
mask_op);
|
||||
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
mask_arg = var;
|
||||
}
|
||||
tree optype = TREE_TYPE (mask_arg);
|
||||
if (TYPE_MODE (masktype) == TYPE_MODE (optype))
|
||||
utype = masktype;
|
||||
else
|
||||
utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
|
||||
var = vect_get_new_ssa_name (utype, vect_scalar_var);
|
||||
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
|
||||
gassign *new_stmt
|
||||
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
|
||||
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
mask_arg = var;
|
||||
if (!useless_type_conversion_p (masktype, utype))
|
||||
{
|
||||
gcc_assert (TYPE_PRECISION (utype)
|
||||
<= TYPE_PRECISION (masktype));
|
||||
var = vect_get_new_ssa_name (masktype, vect_scalar_var);
|
||||
new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
|
||||
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
mask_arg = var;
|
||||
}
|
||||
}
|
||||
|
||||
gcall *new_stmt
|
||||
= gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
|
||||
= gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
|
||||
stmt_vec_info new_stmt_info
|
||||
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||
|
||||
|
|
@ -7284,7 +7353,7 @@ permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
|
|||
gimple *perm_stmt;
|
||||
|
||||
tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
|
||||
if (TREE_CODE (scalar_dest) == SSA_NAME)
|
||||
if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
|
||||
perm_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
else
|
||||
perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
|
||||
|
|
|
|||
Loading…
Reference in New Issue