rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector stores (ALTIVEC_BUILTIN_ST_*).

[gcc]

2017-09-25  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
	for early folding of vector stores (ALTIVEC_BUILTIN_ST_*).
	(rs6000_builtin_valid_without_lhs): New helper function.
	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
	Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST.

From-SVN: r253152
This commit is contained in:
Will Schmidt 2017-09-25 14:35:02 +00:00 committed by Will Schmidt
parent 2678bf2fb5
commit df5cc22c7e
3 changed files with 78 additions and 75 deletions

View File

@ -1,3 +1,11 @@
2017-09-25 Will Schmidt <will_schmidt@vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
for early folding of vector stores (ALTIVEC_BUILTIN_ST_*).
(rs6000_builtin_valid_without_lhs): New helper function.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_ST.
2017-09-25 Richard Sandiford <richard.sandiford@linaro.org> 2017-09-25 Richard Sandiford <richard.sandiford@linaro.org>
* target.h (vec_perm_indices): Use unsigned short rather than * target.h (vec_perm_indices): Use unsigned short rather than

View File

@ -6472,78 +6472,6 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
return stmt; return stmt;
} }
/* Expand vec_st into an expression that masks the address and
performs the store. We need to expand this early to allow
the best aliasing, as by the time we get into RTL we no longer
are able to honor __restrict__, for example. We may want to
consider this for all memory access built-ins.
When -maltivec=be is specified, or the wrong number of arguments
is provided, simply punt to existing built-in processing. */
if (fcode == ALTIVEC_BUILTIN_VEC_ST
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
&& nargs == 3)
{
tree arg0 = (*arglist)[0];
tree arg1 = (*arglist)[1];
tree arg2 = (*arglist)[2];
/* Construct the masked address. Let existing error handling take
over if we don't have a constant offset. */
arg1 = fold (arg1);
if (TREE_CODE (arg1) == INTEGER_CST)
{
if (!ptrofftype_p (TREE_TYPE (arg1)))
arg1 = build1 (NOP_EXPR, sizetype, arg1);
tree arg2_type = TREE_TYPE (arg2);
if (TREE_CODE (arg2_type) == ARRAY_TYPE && c_dialect_cxx ())
{
/* Force array-to-pointer decay for C++. */
arg2 = default_conversion (arg2);
arg2_type = TREE_TYPE (arg2);
}
/* Find the built-in to make sure a compatible one exists; if not
we fall back to default handling to get the error message. */
for (desc = altivec_overloaded_builtins;
desc->code && desc->code != fcode; desc++)
continue;
for (; desc->code == fcode; desc++)
if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
&& rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2)
&& rs6000_builtin_type_compatible (TREE_TYPE (arg2),
desc->op3))
{
tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type,
arg2, arg1);
tree aligned
= fold_build2_loc (loc, BIT_AND_EXPR, arg2_type,
addr, build_int_cst (arg2_type, -16));
tree arg0_type = TREE_TYPE (arg0);
if (TYPE_MODE (arg0_type) == V2DImode)
/* Type-based aliasing analysis thinks vector long
and vector long long are different and will put them
in distinct alias classes. Force our address type
to be a may-alias type to avoid this. */
arg0_type
= build_pointer_type_for_mode (arg0_type, Pmode,
true/*can_alias_all*/);
else
arg0_type = build_pointer_type (arg0_type);
aligned = build1 (NOP_EXPR, arg0_type, aligned);
tree stg = build_indirect_ref (loc, aligned, RO_NULL);
tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg,
convert (TREE_TYPE (stg), arg0));
return retval;
}
}
}
for (n = 0; for (n = 0;
!VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
fnargs = TREE_CHAIN (fnargs), n++) fnargs = TREE_CHAIN (fnargs), n++)

View File

@ -16157,6 +16157,25 @@ rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
#endif #endif
} }
/* Helper function to sort out which built-ins may be valid without having
a LHS. */
static bool
rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
{
switch (fn_code)
{
case ALTIVEC_BUILTIN_STVX_V16QI:
case ALTIVEC_BUILTIN_STVX_V8HI:
case ALTIVEC_BUILTIN_STVX_V4SI:
case ALTIVEC_BUILTIN_STVX_V4SF:
case ALTIVEC_BUILTIN_STVX_V2DI:
case ALTIVEC_BUILTIN_STVX_V2DF:
return true;
default:
return false;
}
}
/* Fold a machine-dependent built-in in GIMPLE. (For folding into /* Fold a machine-dependent built-in in GIMPLE. (For folding into
a constant, use rs6000_fold_builtin.) */ a constant, use rs6000_fold_builtin.) */
@ -16184,8 +16203,9 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
if (!rs6000_fold_gimple) if (!rs6000_fold_gimple)
return false; return false;
/* Generic solution to prevent gimple folding of code without a LHS. */ /* Prevent gimple folding for code that does not have a LHS, unless it is
if (!gimple_call_lhs (stmt)) allowed per the rs6000_builtin_valid_without_lhs helper function. */
if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
return false; return false;
switch (fn_code) switch (fn_code)
@ -16587,7 +16607,54 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
gsi_replace (gsi, g, true); gsi_replace (gsi, g, true);
return true; return true;
} }
/* Vector stores. */
case ALTIVEC_BUILTIN_STVX_V16QI:
case ALTIVEC_BUILTIN_STVX_V8HI:
case ALTIVEC_BUILTIN_STVX_V4SI:
case ALTIVEC_BUILTIN_STVX_V4SF:
case ALTIVEC_BUILTIN_STVX_V2DI:
case ALTIVEC_BUILTIN_STVX_V2DF:
{
/* Do not fold for -maltivec=be on LE targets. */
if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
return false;
arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
arg1 = gimple_call_arg (stmt, 1); /* Offset. */
tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
location_t loc = gimple_location (stmt);
tree arg0_type = TREE_TYPE (arg0);
/* Use ptr_type_node (no TBAA) for the arg2_type.
FIXME: (Richard) "A proper fix would be to transition this type as
seen from the frontend to GIMPLE, for example in a similar way we
do for MEM_REFs by piggy-backing that on an extra argument, a
constant zero pointer of the alias pointer type to use (which would
also serve as a type indicator of the store itself). I'd use a
target specific internal function for this (not sure if we can have
those target specific, but I guess if it's folded away then that's
fine) and get away with the overload set."
*/
tree arg2_type = ptr_type_node;
/* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
the tree using the value from arg0. The resulting type will match
the type of arg2. */
gimple_seq stmts = NULL;
tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
arg2_type, arg2, temp_offset);
/* Mask off any lower bits from the address. */
tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
arg2_type, temp_addr,
build_int_cst (arg2_type, -16));
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
/* The desired gimple result should be similar to:
MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
gimple *g;
g = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
build_int_cst (arg2_type, 0)), arg0);
gimple_set_location (g, loc);
gsi_replace (gsi, g, true);
return true;
}
default: default:
if (TARGET_DEBUG_BUILTIN) if (TARGET_DEBUG_BUILTIN)
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",