mirror of git://gcc.gnu.org/git/gcc.git
rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
[gcc]
2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
From-SVN: r252975
This commit is contained in:
parent
81b29ad80a
commit
d14c60ad26
|
|
@ -1,3 +1,10 @@
|
|||
2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
|
||||
for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
|
||||
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
|
||||
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
|
||||
|
||||
2017-09-19 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/82244
|
||||
|
|
|
|||
|
|
@ -6472,85 +6472,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
|
|||
return stmt;
|
||||
}
|
||||
|
||||
/* Expand vec_ld into an expression that masks the address and
|
||||
performs the load. We need to expand this early to allow
|
||||
/* Expand vec_st into an expression that masks the address and
|
||||
performs the store. We need to expand this early to allow
|
||||
the best aliasing, as by the time we get into RTL we no longer
|
||||
are able to honor __restrict__, for example. We may want to
|
||||
consider this for all memory access built-ins.
|
||||
|
||||
When -maltivec=be is specified, or the wrong number of arguments
|
||||
is provided, simply punt to existing built-in processing. */
|
||||
if (fcode == ALTIVEC_BUILTIN_VEC_LD
|
||||
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
||||
&& nargs == 2)
|
||||
{
|
||||
tree arg0 = (*arglist)[0];
|
||||
tree arg1 = (*arglist)[1];
|
||||
|
||||
/* Strip qualifiers like "const" from the pointer arg. */
|
||||
tree arg1_type = TREE_TYPE (arg1);
|
||||
if (TREE_CODE (arg1_type) == ARRAY_TYPE && c_dialect_cxx ())
|
||||
{
|
||||
/* Force array-to-pointer decay for C++. */
|
||||
arg1 = default_conversion (arg1);
|
||||
arg1_type = TREE_TYPE (arg1);
|
||||
}
|
||||
if (!POINTER_TYPE_P (arg1_type))
|
||||
goto bad;
|
||||
|
||||
tree inner_type = TREE_TYPE (arg1_type);
|
||||
if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
|
||||
{
|
||||
arg1_type = build_pointer_type (build_qualified_type (inner_type,
|
||||
0));
|
||||
arg1 = fold_convert (arg1_type, arg1);
|
||||
}
|
||||
|
||||
/* Construct the masked address. Let existing error handling take
|
||||
over if we don't have a constant offset. */
|
||||
arg0 = fold (arg0);
|
||||
|
||||
if (TREE_CODE (arg0) == INTEGER_CST)
|
||||
{
|
||||
if (!ptrofftype_p (TREE_TYPE (arg0)))
|
||||
arg0 = build1 (NOP_EXPR, sizetype, arg0);
|
||||
|
||||
tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
|
||||
arg1, arg0);
|
||||
tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
|
||||
build_int_cst (arg1_type, -16));
|
||||
|
||||
/* Find the built-in to get the return type so we can convert
|
||||
the result properly (or fall back to default handling if the
|
||||
arguments aren't compatible). */
|
||||
for (desc = altivec_overloaded_builtins;
|
||||
desc->code && desc->code != fcode; desc++)
|
||||
continue;
|
||||
|
||||
for (; desc->code == fcode; desc++)
|
||||
if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
|
||||
&& (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
|
||||
desc->op2)))
|
||||
{
|
||||
tree ret_type = rs6000_builtin_type (desc->ret_type);
|
||||
if (TYPE_MODE (ret_type) == V2DImode)
|
||||
/* Type-based aliasing analysis thinks vector long
|
||||
and vector long long are different and will put them
|
||||
in distinct alias classes. Force our return type
|
||||
to be a may-alias type to avoid this. */
|
||||
ret_type
|
||||
= build_pointer_type_for_mode (ret_type, Pmode,
|
||||
true/*can_alias_all*/);
|
||||
else
|
||||
ret_type = build_pointer_type (ret_type);
|
||||
aligned = build1 (NOP_EXPR, ret_type, aligned);
|
||||
tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
|
||||
return ret_val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Similarly for stvx. */
|
||||
if (fcode == ALTIVEC_BUILTIN_VEC_ST
|
||||
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
||||
&& nargs == 3)
|
||||
|
|
|
|||
|
|
@ -16546,6 +16546,48 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
|||
update_call_from_tree (gsi, res);
|
||||
return true;
|
||||
}
|
||||
/* Vector loads. */
|
||||
case ALTIVEC_BUILTIN_LVX_V16QI:
|
||||
case ALTIVEC_BUILTIN_LVX_V8HI:
|
||||
case ALTIVEC_BUILTIN_LVX_V4SI:
|
||||
case ALTIVEC_BUILTIN_LVX_V4SF:
|
||||
case ALTIVEC_BUILTIN_LVX_V2DI:
|
||||
case ALTIVEC_BUILTIN_LVX_V2DF:
|
||||
{
|
||||
arg0 = gimple_call_arg (stmt, 0); // offset
|
||||
arg1 = gimple_call_arg (stmt, 1); // address
|
||||
/* Do not fold for -maltivec=be on LE targets. */
|
||||
if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
|
||||
return false;
|
||||
lhs = gimple_call_lhs (stmt);
|
||||
location_t loc = gimple_location (stmt);
|
||||
/* Since arg1 may be cast to a different type, just use ptr_type_node
|
||||
here instead of trying to enforce TBAA on pointer types. */
|
||||
tree arg1_type = ptr_type_node;
|
||||
tree lhs_type = TREE_TYPE (lhs);
|
||||
/* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
|
||||
the tree using the value from arg0. The resulting type will match
|
||||
the type of arg1. */
|
||||
gimple_seq stmts = NULL;
|
||||
tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
|
||||
tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
|
||||
arg1_type, arg1, temp_offset);
|
||||
/* Mask off any lower bits from the address. */
|
||||
tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
|
||||
arg1_type, temp_addr,
|
||||
build_int_cst (arg1_type, -16));
|
||||
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||
/* Use the build2 helper to set up the mem_ref. The MEM_REF could also
|
||||
take an offset, but since we've already incorporated the offset
|
||||
above, here we just pass in a zero. */
|
||||
gimple *g;
|
||||
g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
|
||||
build_int_cst (arg1_type, 0)));
|
||||
gimple_set_location (g, loc);
|
||||
gsi_replace (gsi, g, true);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
if (TARGET_DEBUG_BUILTIN)
|
||||
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
|
||||
|
|
|
|||
Loading…
Reference in New Issue