[multiple changes]

2014-11-12  Thomas Preud'homme  <thomas.preudhomme@arm.com>

    gcc/
    PR tree-optimization/63761
    * tree-ssa-math-opts.c (bswap_replace): Construct gsi from cur_stmt
    rather than taking it as a parameter. Add some comments to explain the
    gsi_move_before in case of load and why canonicalization of bswap into
    a rotation is only done for 16bit values.
    (pass_optimize_bswap::execute): Adapt for loop via gsi to make gsi
    refer to the statement just before cur_stmt. Ignore 16bit bswap that
    are already in canonical form. Adapt bswap_replace to removal of its
    gsi parameter.

    2014-11-12  Thomas Preud'homme  <thomas.preudhomme@arm.com>

    gcc/testsuite/
    PR tree-optimization/63761
    * gcc.c-torture/compile/pr63761.c: New test.

From-SVN: r217409
This commit is contained in:
Thomas Preud'homme 2014-11-12 09:50:20 +00:00 committed by Thomas Preud'homme
parent d30fc980ac
commit f351abd679
4 changed files with 74 additions and 18 deletions

View File

@ -1,3 +1,15 @@
2014-11-12 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR tree-optimization/63761
* tree-ssa-math-opts.c (bswap_replace): Construct gsi from cur_stmt
rather than taking it as a parameter. Add some comments to explain the
gsi_move_before in case of load and why canonicalization of bswap into
a rotation is only done for 16bit values.
(pass_optimize_bswap::execute): Adapt for loop via gsi to make gsi
refer to the statement just before cur_stmt. Ignore 16bit bswap that
are already in canonical form. Adapt bswap_replace to removal of its
gsi parameter.
2014-11-12 Richard Sandiford <richard.sandiford@arm.com> 2014-11-12 Richard Sandiford <richard.sandiford@arm.com>
* rtl.h (rtx_function, for_each_rtx, for_each_rtx_in_insn): Delete. * rtl.h (rtx_function, for_each_rtx, for_each_rtx_in_insn): Delete.

View File

@ -1,3 +1,8 @@
2014-11-12 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR tree-optimization/63761
* gcc.c-torture/compile/pr63761.c: New test.
2014-11-12 Jiong Wang <jiong.wang@arm.com> 2014-11-12 Jiong Wang <jiong.wang@arm.com>
* lib/gcc-dg.exp (${tool}_load): Truncate gcc output. * lib/gcc-dg.exp (${tool}_load): Truncate gcc output.

View File

@ -0,0 +1,17 @@
int a, b;
short c;
void fn1 ();
void
fn2 (unsigned short p1)
{
int d;
c = p1 >> 8 | p1 << 8;
d = b;
if (d)
fn1 ();
a = d >> 8 & 0x00FF
| d << 8 & 0xFF00;
}

View File

@ -2172,23 +2172,28 @@ public:
}; // class pass_optimize_bswap }; // class pass_optimize_bswap
/* Perform the bswap optimization: replace the statement CUR_STMT at /* Perform the bswap optimization: replace the expression computed in the rhs
GSI with a load of type, VUSE and set-alias as described by N if a of CUR_STMT by an equivalent bswap, load or load + bswap expression.
memory source is involved (N->base_addr is non null), followed by Which of these alternatives replace the rhs is given by N->base_addr (non
the builtin bswap invocation in FNDECL if BSWAP is true. SRC_STMT null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
gives where should the replacement be made. It also gives the load to perform are also given in N while the builtin bswap invoke is given
source on which CUR_STMT is operating via its rhs's first tree nad in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
N->range gives the size of the expression involved for maintaining load statements involved to construct the rhs in CUR_STMT and N->range gives
some statistics. */ the size of the rhs expression for maintaining some statistics.
Note that if the replacement involve a load, CUR_STMT is moved just after
SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
changing of basic block. */
static bool static bool
bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, gimple src_stmt, bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
tree fndecl, tree bswap_type, tree load_type, tree load_type, struct symbolic_number *n, bool bswap)
struct symbolic_number *n, bool bswap)
{ {
gimple_stmt_iterator gsi;
tree src, tmp, tgt; tree src, tmp, tgt;
gimple bswap_stmt; gimple bswap_stmt;
gsi = gsi_for_stmt (cur_stmt);
src = gimple_assign_rhs1 (src_stmt); src = gimple_assign_rhs1 (src_stmt);
tgt = gimple_assign_lhs (cur_stmt); tgt = gimple_assign_lhs (cur_stmt);
@ -2207,6 +2212,9 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, gimple src_stmt,
&& SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align)) && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
return false; return false;
/* Move cur_stmt just before one of the load of the original
to ensure it has the same VUSE. See PR61517 for what could
go wrong. */
gsi_move_before (&gsi, &gsi_ins); gsi_move_before (&gsi, &gsi_ins);
gsi = gsi_for_stmt (cur_stmt); gsi = gsi_for_stmt (cur_stmt);
@ -2293,7 +2301,10 @@ bswap_replace (gimple cur_stmt, gimple_stmt_iterator gsi, gimple src_stmt,
tmp = src; tmp = src;
/* Canonical form for 16 bit bswap is a rotate expression. */ /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
are considered as rotation of 2N bit values by N bits is generally not
equivalent to a bswap. Consider for instance 0x01020304 >> 16 which gives
0x03040102 while a bswap for that value is 0x04030201. */
if (bswap && n->range == 16) if (bswap && n->range == 16)
{ {
tree count = build_int_cst (NULL, BITS_PER_UNIT); tree count = build_int_cst (NULL, BITS_PER_UNIT);
@ -2393,10 +2404,10 @@ pass_optimize_bswap::execute (function *fun)
gimple_stmt_iterator gsi; gimple_stmt_iterator gsi;
/* We do a reverse scan for bswap patterns to make sure we get the /* We do a reverse scan for bswap patterns to make sure we get the
widest match. As bswap pattern matching doesn't handle widest match. As bswap pattern matching doesn't handle previously
previously inserted smaller bswap replacements as sub- inserted smaller bswap replacements as sub-patterns, the wider
patterns, the wider variant wouldn't be detected. */ variant wouldn't be detected. */
for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
{ {
gimple src_stmt, cur_stmt = gsi_stmt (gsi); gimple src_stmt, cur_stmt = gsi_stmt (gsi);
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
@ -2404,6 +2415,14 @@ pass_optimize_bswap::execute (function *fun)
struct symbolic_number n; struct symbolic_number n;
bool bswap; bool bswap;
/* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
might be moved to a different basic block by bswap_replace and gsi
must not points to it if that's the case. Moving the gsi_prev
there make sure that gsi points to the statement previous to
cur_stmt while still making sure that all statements are
considered in this basic block. */
gsi_prev (&gsi);
if (!is_gimple_assign (cur_stmt)) if (!is_gimple_assign (cur_stmt))
continue; continue;
@ -2431,6 +2450,9 @@ pass_optimize_bswap::execute (function *fun)
switch (n.range) switch (n.range)
{ {
case 16: case 16:
/* Already in canonical form, nothing to do. */
if (code == LROTATE_EXPR || code == RROTATE_EXPR)
continue;
load_type = uint16_type_node; load_type = uint16_type_node;
if (bswap16_p) if (bswap16_p)
{ {
@ -2461,8 +2483,8 @@ pass_optimize_bswap::execute (function *fun)
if (bswap && !fndecl) if (bswap && !fndecl)
continue; continue;
if (bswap_replace (cur_stmt, gsi, src_stmt, fndecl, bswap_type, if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
load_type, &n, bswap)) &n, bswap))
changed = true; changed = true;
} }
} }