mirror of git://gcc.gnu.org/git/gcc.git
rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
[gcc]
2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
From-SVN: r252975
This commit is contained in:
parent
81b29ad80a
commit
d14c60ad26
|
|
@ -1,3 +1,10 @@
|
||||||
|
2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com>
|
||||||
|
|
||||||
|
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
|
||||||
|
for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
|
||||||
|
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
|
||||||
|
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
|
||||||
|
|
||||||
2017-09-19 Richard Biener <rguenther@suse.de>
|
2017-09-19 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
PR tree-optimization/82244
|
PR tree-optimization/82244
|
||||||
|
|
|
||||||
|
|
@ -6472,85 +6472,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
|
||||||
return stmt;
|
return stmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Expand vec_ld into an expression that masks the address and
|
/* Expand vec_st into an expression that masks the address and
|
||||||
performs the load. We need to expand this early to allow
|
performs the store. We need to expand this early to allow
|
||||||
the best aliasing, as by the time we get into RTL we no longer
|
the best aliasing, as by the time we get into RTL we no longer
|
||||||
are able to honor __restrict__, for example. We may want to
|
are able to honor __restrict__, for example. We may want to
|
||||||
consider this for all memory access built-ins.
|
consider this for all memory access built-ins.
|
||||||
|
|
||||||
When -maltivec=be is specified, or the wrong number of arguments
|
When -maltivec=be is specified, or the wrong number of arguments
|
||||||
is provided, simply punt to existing built-in processing. */
|
is provided, simply punt to existing built-in processing. */
|
||||||
if (fcode == ALTIVEC_BUILTIN_VEC_LD
|
|
||||||
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
|
||||||
&& nargs == 2)
|
|
||||||
{
|
|
||||||
tree arg0 = (*arglist)[0];
|
|
||||||
tree arg1 = (*arglist)[1];
|
|
||||||
|
|
||||||
/* Strip qualifiers like "const" from the pointer arg. */
|
|
||||||
tree arg1_type = TREE_TYPE (arg1);
|
|
||||||
if (TREE_CODE (arg1_type) == ARRAY_TYPE && c_dialect_cxx ())
|
|
||||||
{
|
|
||||||
/* Force array-to-pointer decay for C++. */
|
|
||||||
arg1 = default_conversion (arg1);
|
|
||||||
arg1_type = TREE_TYPE (arg1);
|
|
||||||
}
|
|
||||||
if (!POINTER_TYPE_P (arg1_type))
|
|
||||||
goto bad;
|
|
||||||
|
|
||||||
tree inner_type = TREE_TYPE (arg1_type);
|
|
||||||
if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
|
|
||||||
{
|
|
||||||
arg1_type = build_pointer_type (build_qualified_type (inner_type,
|
|
||||||
0));
|
|
||||||
arg1 = fold_convert (arg1_type, arg1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Construct the masked address. Let existing error handling take
|
|
||||||
over if we don't have a constant offset. */
|
|
||||||
arg0 = fold (arg0);
|
|
||||||
|
|
||||||
if (TREE_CODE (arg0) == INTEGER_CST)
|
|
||||||
{
|
|
||||||
if (!ptrofftype_p (TREE_TYPE (arg0)))
|
|
||||||
arg0 = build1 (NOP_EXPR, sizetype, arg0);
|
|
||||||
|
|
||||||
tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
|
|
||||||
arg1, arg0);
|
|
||||||
tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
|
|
||||||
build_int_cst (arg1_type, -16));
|
|
||||||
|
|
||||||
/* Find the built-in to get the return type so we can convert
|
|
||||||
the result properly (or fall back to default handling if the
|
|
||||||
arguments aren't compatible). */
|
|
||||||
for (desc = altivec_overloaded_builtins;
|
|
||||||
desc->code && desc->code != fcode; desc++)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (; desc->code == fcode; desc++)
|
|
||||||
if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
|
|
||||||
&& (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
|
|
||||||
desc->op2)))
|
|
||||||
{
|
|
||||||
tree ret_type = rs6000_builtin_type (desc->ret_type);
|
|
||||||
if (TYPE_MODE (ret_type) == V2DImode)
|
|
||||||
/* Type-based aliasing analysis thinks vector long
|
|
||||||
and vector long long are different and will put them
|
|
||||||
in distinct alias classes. Force our return type
|
|
||||||
to be a may-alias type to avoid this. */
|
|
||||||
ret_type
|
|
||||||
= build_pointer_type_for_mode (ret_type, Pmode,
|
|
||||||
true/*can_alias_all*/);
|
|
||||||
else
|
|
||||||
ret_type = build_pointer_type (ret_type);
|
|
||||||
aligned = build1 (NOP_EXPR, ret_type, aligned);
|
|
||||||
tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
|
|
||||||
return ret_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Similarly for stvx. */
|
|
||||||
if (fcode == ALTIVEC_BUILTIN_VEC_ST
|
if (fcode == ALTIVEC_BUILTIN_VEC_ST
|
||||||
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
&& (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
|
||||||
&& nargs == 3)
|
&& nargs == 3)
|
||||||
|
|
|
||||||
|
|
@ -16546,6 +16546,48 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||||
update_call_from_tree (gsi, res);
|
update_call_from_tree (gsi, res);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
/* Vector loads. */
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V16QI:
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V8HI:
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V4SI:
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V4SF:
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V2DI:
|
||||||
|
case ALTIVEC_BUILTIN_LVX_V2DF:
|
||||||
|
{
|
||||||
|
arg0 = gimple_call_arg (stmt, 0); // offset
|
||||||
|
arg1 = gimple_call_arg (stmt, 1); // address
|
||||||
|
/* Do not fold for -maltivec=be on LE targets. */
|
||||||
|
if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
|
||||||
|
return false;
|
||||||
|
lhs = gimple_call_lhs (stmt);
|
||||||
|
location_t loc = gimple_location (stmt);
|
||||||
|
/* Since arg1 may be cast to a different type, just use ptr_type_node
|
||||||
|
here instead of trying to enforce TBAA on pointer types. */
|
||||||
|
tree arg1_type = ptr_type_node;
|
||||||
|
tree lhs_type = TREE_TYPE (lhs);
|
||||||
|
/* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
|
||||||
|
the tree using the value from arg0. The resulting type will match
|
||||||
|
the type of arg1. */
|
||||||
|
gimple_seq stmts = NULL;
|
||||||
|
tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
|
||||||
|
tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
|
||||||
|
arg1_type, arg1, temp_offset);
|
||||||
|
/* Mask off any lower bits from the address. */
|
||||||
|
tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
|
||||||
|
arg1_type, temp_addr,
|
||||||
|
build_int_cst (arg1_type, -16));
|
||||||
|
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
|
||||||
|
/* Use the build2 helper to set up the mem_ref. The MEM_REF could also
|
||||||
|
take an offset, but since we've already incorporated the offset
|
||||||
|
above, here we just pass in a zero. */
|
||||||
|
gimple *g;
|
||||||
|
g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
|
||||||
|
build_int_cst (arg1_type, 0)));
|
||||||
|
gimple_set_location (g, loc);
|
||||||
|
gsi_replace (gsi, g, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (TARGET_DEBUG_BUILTIN)
|
if (TARGET_DEBUG_BUILTIN)
|
||||||
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
|
fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue