mirror of git://gcc.gnu.org/git/gcc.git
rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*).
[gcc] 2017-09-25 Will Schmidt <will_schmidt@vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*). (rs6000_builtin_valid_without_lhs): New helper function. * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST. From-SVN: r253152
This commit is contained in:
parent
2678bf2fb5
commit
df5cc22c7e
|
|
@ -1,3 +1,11 @@
|
||||||
|
2017-09-25 Will Schmidt <will_schmidt@vnet.ibm.com>
|
||||||
|
|
||||||
|
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
|
||||||
|
for early folding of vector stores (ALTIVEC_BUILTIN_ST_*).
|
||||||
|
(rs6000_builtin_valid_without_lhs): New helper function.
|
||||||
|
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
|
||||||
|
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST.
|
||||||
|
|
||||||
2017-09-25 Richard Sandiford <richard.sandiford@linaro.org>
|
2017-09-25 Richard Sandiford <richard.sandiford@linaro.org>
|
||||||
|
|
||||||
* target.h (vec_perm_indices): Use unsigned short rather than
|
* target.h (vec_perm_indices): Use unsigned short rather than
|
||||||
|
|
|
||||||
|
|
@ -6472,78 +6472,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
|
||||||
return stmt;
|
return stmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Expand vec_st into an expression that masks the address and
|
|
||||||
performs the store. We need to expand this early to allow
|
|
||||||
the best aliasing, as by the time we get into RTL we no longer
|
|
||||||
are able to honor __restrict__, for example. We may want to
|
|
||||||
consider this for all memory access built-ins.
|
|
||||||
|
|
||||||
When -maltivec=be is specified, or the wrong number of arguments
|
|
||||||
is provided, simply punt to existing built-in processing. */
|
|
||||||
|
|
||||||
if (fcode == ALTIVEC_BUILTIN_VEC_ST
|
|
||||||
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
|
||||||
&& nargs == 3)
|
|
||||||
{
|
|
||||||
tree arg0 = (*arglist)[0];
|
|
||||||
tree arg1 = (*arglist)[1];
|
|
||||||
tree arg2 = (*arglist)[2];
|
|
||||||
|
|
||||||
/* Construct the masked address. Let existing error handling take
|
|
||||||
over if we don't have a constant offset. */
|
|
||||||
arg1 = fold (arg1);
|
|
||||||
|
|
||||||
if (TREE_CODE (arg1) == INTEGER_CST)
|
|
||||||
{
|
|
||||||
if (!ptrofftype_p (TREE_TYPE (arg1)))
|
|
||||||
arg1 = build1 (NOP_EXPR, sizetype, arg1);
|
|
||||||
|
|
||||||
tree arg2_type = TREE_TYPE (arg2);
|
|
||||||
if (TREE_CODE (arg2_type) == ARRAY_TYPE && c_dialect_cxx ())
|
|
||||||
{
|
|
||||||
/* Force array-to-pointer decay for C++. */
|
|
||||||
arg2 = default_conversion (arg2);
|
|
||||||
arg2_type = TREE_TYPE (arg2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Find the built-in to make sure a compatible one exists; if not
|
|
||||||
we fall back to default handling to get the error message. */
|
|
||||||
for (desc = altivec_overloaded_builtins;
|
|
||||||
desc->code && desc->code != fcode; desc++)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (; desc->code == fcode; desc++)
|
|
||||||
if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
|
|
||||||
&& rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2)
|
|
||||||
&& rs6000_builtin_type_compatible (TREE_TYPE (arg2),
|
|
||||||
desc->op3))
|
|
||||||
{
|
|
||||||
tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
|
|
||||||
arg2, arg1);
|
|
||||||
tree aligned
|
|
||||||
= fold_build2_loc (loc, BIT_AND_EXPR, arg2_type,
|
|
||||||
addr, build_int_cst (arg2_type, -16));
|
|
||||||
|
|
||||||
tree arg0_type = TREE_TYPE (arg0);
|
|
||||||
if (TYPE_MODE (arg0_type) == V2DImode)
|
|
||||||
/* Type-based aliasing analysis thinks vector long
|
|
||||||
and vector long long are different and will put them
|
|
||||||
in distinct alias classes. Force our address type
|
|
||||||
to be a may-alias type to avoid this. */
|
|
||||||
arg0_type
|
|
||||||
= build_pointer_type_for_mode (arg0_type, Pmode,
|
|
||||||
true/*can_alias_all*/);
|
|
||||||
else
|
|
||||||
arg0_type = build_pointer_type (arg0_type);
|
|
||||||
aligned = build1 (NOP_EXPR, arg0_type, aligned);
|
|
||||||
tree stg = build_indirect_ref (loc, aligned, RO_NULL);
|
|
||||||
tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg,
|
|
||||||
convert (TREE_TYPE (stg), arg0));
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (n = 0;
|
for (n = 0;
|
||||||
!VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
|
!VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
|
||||||
fnargs = TREE_CHAIN (fnargs), n++)
|
fnargs = TREE_CHAIN (fnargs), n++)
|
||||||
|
|
|
||||||
|
|
@ -16157,6 +16157,25 @@ rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Helper function to sort out which built-ins may be valid without having
|
||||||
|
a LHS. */
|
||||||
|
static bool
|
||||||
|
rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
|
||||||
|
{
|
||||||
|
switch (fn_code)
|
||||||
|
{
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V16QI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V8HI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V4SI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V4SF:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V2DI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V2DF:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
|
/* Fold a machine-dependent built-in in GIMPLE. (For folding into
|
||||||
a constant, use rs6000_fold_builtin.) */
|
a constant, use rs6000_fold_builtin.) */
|
||||||
|
|
||||||
|
|
@ -16184,8 +16203,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||||
if (!rs6000_fold_gimple)
|
if (!rs6000_fold_gimple)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* Generic solution to prevent gimple folding of code without a LHS. */
|
/* Prevent gimple folding for code that does not have a LHS, unless it is
|
||||||
if (!gimple_call_lhs (stmt))
|
allowed per the rs6000_builtin_valid_without_lhs helper function. */
|
||||||
|
if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
switch (fn_code)
|
switch (fn_code)
|
||||||
|
|
@ -16587,7 +16607,54 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||||
gsi_replace (gsi, g, true);
|
gsi_replace (gsi, g, true);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
/* Vector stores. */
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V16QI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V8HI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V4SI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V4SF:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V2DI:
|
||||||
|
case ALTIVEC_BUILTIN_STVX_V2DF:
|
||||||
|
{
|
||||||
|
/* Do not fold for -maltivec=be on LE targets. */
|
||||||
|
if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
|
||||||
|
return false;
|
||||||
|
arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
|
||||||
|
arg1 = gimple_call_arg (stmt, 1); /* Offset. */
|
||||||
|
tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
|
||||||
|
location_t loc = gimple_location (stmt);
|
||||||
|
tree arg0_type = TREE_TYPE (arg0);
|
||||||
|
/* Use ptr_type_node (no TBAA) for the arg2_type.
|
||||||
|
FIXME: (Richard) "A proper fix would be to transition this type as
|
||||||
|
seen from the frontend to GIMPLE, for example in a similar way we
|
||||||
|
do for MEM_REFs by piggy-backing that on an extra argument, a
|
||||||
|
constant zero pointer of the alias pointer type to use (which would
|
||||||
|
also serve as a type indicator of the store itself). I'd use a
|
||||||
|
target specific internal function for this (not sure if we can have
|
||||||
|
those target specific, but I guess if it's folded away then that's
|
||||||
|
fine) and get away with the overload set."
|
||||||
|
*/
|
||||||
|
tree arg2_type = ptr_type_node;
|
||||||
|
/* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
|
||||||
|
the tree using the value from arg0. The resulting type will match
|
||||||
|
the type of arg2. */
|
||||||
|
gimple_seq stmts = NULL;
|
||||||
|
tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
|
||||||
|
tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
|
||||||
|
arg2_type, arg2, temp_offset);
|
||||||
|
/* Mask off any lower bits from the address. */
|
||||||
|
tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
|
||||||
|
arg2_type, temp_addr,
|
||||||
|
build_int_cst (arg2_type, -16));
|
||||||
|
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||||
|
/* The desired gimple result should be similar to:
|
||||||
|
MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
|
||||||
|
gimple *g;
|
||||||
|
g = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
|
||||||
|
build_int_cst (arg2_type, 0)), arg0);
|
||||||
|
gimple_set_location (g, loc);
|
||||||
|
gsi_replace (gsi, g, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
if (TARGET_DEBUG_BUILTIN)
|
if (TARGET_DEBUG_BUILTIN)
|
||||||
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
|
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue