mirror of git://gcc.gnu.org/git/gcc.git
Remove inefficient branchless conditional negate optimization
2015-03-06 Wilco Dijkstra <wdijkstr@arm.com>
gcc/
* tree-ssa-phiopt.c (neg_replacement): Remove.
(tree_ssa_phiopt_worker): Remove negate optimization.
gcc/testsuite/
* gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
* gcc.target/aarch64/csneg-1.c (test_csneg_cmp): New test.
* gcc.target/i386/pr45685.c: Moved test, check for conditional move on x64.
From-SVN: r221246
This commit is contained in:
parent
c271e224c2
commit
268cb4b373
|
|
@ -1,3 +1,8 @@
|
||||||
|
2015-03-06 Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
|
||||||
|
* tree-ssa-phiopt.c (neg_replacement): Remove.
|
||||||
|
(tree_ssa_phiopt_worker): Remove negate optimization.
|
||||||
|
|
||||||
2015-03-05 Jan Hubicka <hubicka@ucw.cz>
|
2015-03-05 Jan Hubicka <hubicka@ucw.cz>
|
||||||
|
|
||||||
PR ipa/65302
|
PR ipa/65302
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,10 @@
|
||||||
|
2015-03-06 Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
|
||||||
|
* gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
|
||||||
|
* gcc.target/aarch64/csneg-1.c (test_csneg_cmp): New test.
|
||||||
|
* gcc.target/i386/pr45685.c: Moved test, check for conditional move on
|
||||||
|
x64.
|
||||||
|
|
||||||
2015-03-05 Jan Hubicka <hubicka@ucw.cz>
|
2015-03-05 Jan Hubicka <hubicka@ucw.cz>
|
||||||
|
|
||||||
* g++.dg/lto/pr65302_1.C: New testcase.
|
* g++.dg/lto/pr65302_1.C: New testcase.
|
||||||
|
|
|
||||||
|
|
@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0,
|
||||||
x4 = (x0 == x1) ? x3 : -x2;
|
x4 = (x0 == x1) ? x3 : -x2;
|
||||||
return x4;
|
return x4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int test_csneg_cmp(int x)
|
||||||
|
{
|
||||||
|
/* { dg-final { scan-assembler "csneg\tw\[0-9\]" } } */
|
||||||
|
if (x > 3)
|
||||||
|
x = -x;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/* { dg-do compile } */
|
/* { dg-do compile { target { ! { ia32 } } } } */
|
||||||
/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */
|
/* { dg-options "-O3" } */
|
||||||
|
|
||||||
typedef unsigned long int uint64_t;
|
typedef unsigned long int uint64_t;
|
||||||
typedef long int int64_t;
|
typedef long int int64_t;
|
||||||
|
|
@ -36,6 +36,4 @@ int summation_helper_2(int64_t* products, uint64_t count)
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */
|
/* { dg-final { scan-assembler-times "cmov" 4 } } */
|
||||||
/* { dg-final { cleanup-tree-dump "phiopt1" } } */
|
|
||||||
|
|
||||||
|
|
@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
|
||||||
edge, edge, gimple, tree, tree);
|
edge, edge, gimple, tree, tree);
|
||||||
static bool abs_replacement (basic_block, basic_block,
|
static bool abs_replacement (basic_block, basic_block,
|
||||||
edge, edge, gimple, tree, tree);
|
edge, edge, gimple, tree, tree);
|
||||||
static bool neg_replacement (basic_block, basic_block,
|
|
||||||
edge, edge, gimple, tree, tree);
|
|
||||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
||||||
hash_set<tree> *);
|
hash_set<tree> *);
|
||||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
||||||
|
|
@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
|
||||||
/* Calculate the set of non-trapping memory accesses. */
|
/* Calculate the set of non-trapping memory accesses. */
|
||||||
nontrap = get_non_trapping ();
|
nontrap = get_non_trapping ();
|
||||||
|
|
||||||
/* The replacement of conditional negation with a non-branching
|
|
||||||
sequence is really only a win when optimizing for speed and we
|
|
||||||
can avoid transformations by gimple if-conversion that result
|
|
||||||
in poor RTL generation.
|
|
||||||
|
|
||||||
Ideally either gimple if-conversion or the RTL expanders will
|
|
||||||
be improved and the code to emit branchless conditional negation
|
|
||||||
can be removed. */
|
|
||||||
bool replace_conditional_negation = false;
|
|
||||||
if (!do_store_elim)
|
|
||||||
replace_conditional_negation
|
|
||||||
= ((!optimize_size && optimize >= 2)
|
|
||||||
|| (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
|
|
||||||
&& flag_tree_loop_if_convert != 0)
|
|
||||||
|| flag_tree_loop_if_convert == 1
|
|
||||||
|| flag_tree_loop_if_convert_stores == 1));
|
|
||||||
|
|
||||||
/* Search every basic block for COND_EXPR we may be able to optimize.
|
/* Search every basic block for COND_EXPR we may be able to optimize.
|
||||||
|
|
||||||
We walk the blocks in order that guarantees that a block with
|
We walk the blocks in order that guarantees that a block with
|
||||||
|
|
@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
|
||||||
cfgchanged = true;
|
cfgchanged = true;
|
||||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||||
cfgchanged = true;
|
cfgchanged = true;
|
||||||
else if (replace_conditional_negation
|
|
||||||
&& neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
|
||||||
cfgchanged = true;
|
|
||||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||||
cfgchanged = true;
|
cfgchanged = true;
|
||||||
}
|
}
|
||||||
|
|
@ -1319,140 +1297,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The function neg_replacement replaces conditional negation with
|
|
||||||
equivalent straight line code. Returns TRUE if replacement is done,
|
|
||||||
otherwise returns FALSE.
|
|
||||||
|
|
||||||
COND_BB branches around negation occuring in MIDDLE_BB.
|
|
||||||
|
|
||||||
E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
|
|
||||||
E1 reaches the other successor which should contain PHI with
|
|
||||||
arguments ARG0 and ARG1.
|
|
||||||
|
|
||||||
Assuming negation is to occur when the condition is true,
|
|
||||||
then the non-branching sequence is:
|
|
||||||
|
|
||||||
result = (rhs ^ -cond) + cond
|
|
||||||
|
|
||||||
Inverting the condition or its result gives us negation
|
|
||||||
when the original condition is false. */
|
|
||||||
|
|
||||||
static bool
|
|
||||||
neg_replacement (basic_block cond_bb, basic_block middle_bb,
|
|
||||||
edge e0 ATTRIBUTE_UNUSED, edge e1,
|
|
||||||
gimple phi, tree arg0, tree arg1)
|
|
||||||
{
|
|
||||||
gimple new_stmt, cond;
|
|
||||||
gimple_stmt_iterator gsi;
|
|
||||||
gimple assign;
|
|
||||||
edge true_edge, false_edge;
|
|
||||||
tree rhs, lhs;
|
|
||||||
enum tree_code cond_code;
|
|
||||||
bool invert = false;
|
|
||||||
|
|
||||||
/* This transformation performs logical operations on the
|
|
||||||
incoming arguments. So force them to be integral types. */
|
|
||||||
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* OTHER_BLOCK must have only one executable statement which must have the
|
|
||||||
form arg0 = -arg1 or arg1 = -arg0. */
|
|
||||||
|
|
||||||
assign = last_and_only_stmt (middle_bb);
|
|
||||||
/* If we did not find the proper negation assignment, then we can not
|
|
||||||
optimize. */
|
|
||||||
if (assign == NULL)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* If we got here, then we have found the only executable statement
|
|
||||||
in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
|
|
||||||
arg1 = -arg0, then we can not optimize. */
|
|
||||||
if (gimple_code (assign) != GIMPLE_ASSIGN)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
lhs = gimple_assign_lhs (assign);
|
|
||||||
|
|
||||||
if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
rhs = gimple_assign_rhs1 (assign);
|
|
||||||
|
|
||||||
/* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
|
|
||||||
if (!(lhs == arg0 && rhs == arg1)
|
|
||||||
&& !(lhs == arg1 && rhs == arg0))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* The basic sequence assumes we negate when the condition is true.
|
|
||||||
If we need the opposite, then we will either need to invert the
|
|
||||||
condition or its result. */
|
|
||||||
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
|
||||||
invert = false_edge->dest == middle_bb;
|
|
||||||
|
|
||||||
/* Unlike abs_replacement, we can handle arbitrary conditionals here. */
|
|
||||||
cond = last_stmt (cond_bb);
|
|
||||||
cond_code = gimple_cond_code (cond);
|
|
||||||
|
|
||||||
/* If inversion is needed, first try to invert the test since
|
|
||||||
that's cheapest. */
|
|
||||||
if (invert)
|
|
||||||
{
|
|
||||||
bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
|
|
||||||
enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
|
|
||||||
|
|
||||||
/* If invert_tree_comparison was successful, then use its return
|
|
||||||
value as the new code and note that inversion is no longer
|
|
||||||
needed. */
|
|
||||||
if (new_code != ERROR_MARK)
|
|
||||||
{
|
|
||||||
cond_code = new_code;
|
|
||||||
invert = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tree cond_val = make_ssa_name (boolean_type_node);
|
|
||||||
new_stmt = gimple_build_assign (cond_val, cond_code,
|
|
||||||
gimple_cond_lhs (cond),
|
|
||||||
gimple_cond_rhs (cond));
|
|
||||||
gsi = gsi_last_bb (cond_bb);
|
|
||||||
gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
/* If we still need inversion, then invert the result of the
|
|
||||||
condition. */
|
|
||||||
if (invert)
|
|
||||||
{
|
|
||||||
tree tmp = make_ssa_name (boolean_type_node);
|
|
||||||
new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
|
|
||||||
boolean_true_node);
|
|
||||||
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
cond_val = tmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the condition in the right type so that we can perform
|
|
||||||
logical and arithmetic operations on it. */
|
|
||||||
tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
|
|
||||||
new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
|
|
||||||
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
|
|
||||||
new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
|
|
||||||
cond_val_converted);
|
|
||||||
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
tree tmp = make_ssa_name (TREE_TYPE (rhs));
|
|
||||||
new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
|
|
||||||
neg_cond_val_converted);
|
|
||||||
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
|
|
||||||
new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
|
|
||||||
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
|
|
||||||
|
|
||||||
replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
|
|
||||||
|
|
||||||
/* Note that we optimized this PHI. */
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Auxiliary functions to determine the set of memory accesses which
|
/* Auxiliary functions to determine the set of memory accesses which
|
||||||
can't trap because they are preceded by accesses to the same memory
|
can't trap because they are preceded by accesses to the same memory
|
||||||
portion. We do that for MEM_REFs, so we only need to track
|
portion. We do that for MEM_REFs, so we only need to track
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue