mirror of git://gcc.gnu.org/git/gcc.git
phiopt/cselim: Improve cselim-limited to commonalize all stores [PR122083]
cselim (and the phiopt's cselim-limited) can commonalize a single store which makes this too limited in some/many cases. Instead let's commonalize all trailing stores as much as possible (only in the same order). The change is smallish, basically the restriction on being the only store is removed from single_trailing_store_in_bb (renamed too). And also looping to remove all of the trailing stores instead of just doing one for the pass. Note sink will do the same optimization so doing it earlier seems like a good idea because it improve change inlining size estimates. For an example with this change, early inlining can happen for min_cmp<long int> in g++.dg/opt/pr122083-1.C now; that avoids a -Wnonnull warning as the memcmp with the null argument is optimized early. It can also catch some min in phiopt1 in some cases. Bootstrapped and tested on x86_64-linux-gnu. Changes since v1: * v2: For !flag_expensive_optimizations, handle the only store rather than just the last store. PR tree-optimization/122083 gcc/ChangeLog: * tree-ssa-phiopt.cc (single_trailing_store_in_bb): Rename to ... (trailing_store_in_bb): This and take new argument to check for only store. (cond_if_else_store_replacement_limited): Update to use trailing_store_in_bb. (cond_if_else_store_replacement): Loop until cond_if_else_store_replacement_limited returns false. (pass_phiopt::execute): Instead of calling cond_if_else_store_replacement_limited once, also loop on it. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/ssa-pre-19.c: Disable phiopt and cselim. * g++.dg/opt/pr122083-1.C: New test. * gcc.dg/tree-ssa/cselim-1.c: New test. * gcc.dg/tree-ssa/cselim-2.c: New test. Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
This commit is contained in:
parent
9242a89d08
commit
decd4277e9
|
@ -0,0 +1,50 @@
|
|||
// { dg-do compile { target c++20 } }
|
||||
// { dg-options "-O2 -Wall" }
|
||||
|
||||
// Make sure we don't get a -Wnonnull warning here.
|
||||
|
||||
#include <compare>
|
||||
|
||||
template<typename Tp>
|
||||
constexpr auto
|
||||
min_cmp(Tp x, Tp y)
|
||||
{
|
||||
struct Res {
|
||||
Tp M_min;
|
||||
decltype(x <=> y) M_cmp;
|
||||
};
|
||||
auto c = x <=> y;
|
||||
if (c > 0)
|
||||
return Res{y, c};
|
||||
return Res{x, c};
|
||||
}
|
||||
|
||||
|
||||
template<typename InputIter1, typename InputIter2>
|
||||
auto
|
||||
lexicographical_compare_three_way(InputIter1 first1,
|
||||
InputIter1 last1,
|
||||
InputIter2 first2,
|
||||
InputIter2 last2)
|
||||
-> decltype(*first1 <=> *first2)
|
||||
{
|
||||
const auto [len, lencmp] =
|
||||
min_cmp(last1 - first1, last2 - first2);
|
||||
if (len)
|
||||
{
|
||||
const auto blen = len * sizeof(*first1);
|
||||
const auto c
|
||||
= __builtin_memcmp(&*first1, &*first2, blen) <=> 0;
|
||||
if (c != 0)
|
||||
return c;
|
||||
}
|
||||
return lencmp;
|
||||
}
|
||||
|
||||
auto
|
||||
test03()
|
||||
{
|
||||
unsigned char a[2] = { 1, 2 };
|
||||
unsigned char* p = nullptr;
|
||||
return lexicographical_compare_three_way(p, p, a, a+2);
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-phiopt1-stats" } */
|
||||
|
||||
struct Loc {
|
||||
int x[3];
|
||||
};
|
||||
|
||||
void bar (struct Loc *);
|
||||
|
||||
int foo (int i, int j, int k, int b)
|
||||
{
|
||||
struct Loc IND;
|
||||
int res;
|
||||
|
||||
if (b)
|
||||
{
|
||||
IND.x[0] = i;
|
||||
IND.x[1] = j;
|
||||
IND.x[2] = k-1;
|
||||
}
|
||||
else
|
||||
{
|
||||
IND.x[0] = i;
|
||||
IND.x[1] = j;
|
||||
IND.x[2] = k;
|
||||
}
|
||||
|
||||
/* This should be optimized to i + j + {k, k + 1}. */
|
||||
res = IND.x[0] + IND.x[1] + IND.x[2];
|
||||
|
||||
/* This is just to prevent SRA. */
|
||||
bar (&IND);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* All three stores should be commonalized. */
|
||||
/* { dg-final { scan-tree-dump "if-then-else store replacement: 3" "phiopt1" } } */
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-cselim-stats -fno-ssa-phiopt" } */
|
||||
|
||||
struct Loc {
|
||||
int x[3];
|
||||
};
|
||||
|
||||
void bar (struct Loc *);
|
||||
|
||||
int foo (int i, int j, int k, int b)
|
||||
{
|
||||
struct Loc IND;
|
||||
int res;
|
||||
|
||||
if (b)
|
||||
{
|
||||
IND.x[0] = i;
|
||||
IND.x[1] = j;
|
||||
IND.x[2] = k-1;
|
||||
}
|
||||
else
|
||||
{
|
||||
IND.x[0] = i;
|
||||
IND.x[1] = j;
|
||||
IND.x[2] = k;
|
||||
}
|
||||
|
||||
/* This should be optimized to i + j + {k, k + 1}. */
|
||||
res = IND.x[0] + IND.x[1] + IND.x[2];
|
||||
|
||||
/* This is just to prevent SRA. */
|
||||
bar (&IND);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* All three stores should be commonalized. */
|
||||
/* { dg-final { scan-tree-dump "if-then-else store replacement: 3" "cselim" } } */
|
|
@ -1,5 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-pre-stats" } */
|
||||
/* Both phiopt and cselim can commonalize the stores so disable them too. */
|
||||
/* { dg-options "-O2 -fdump-tree-pre-stats -fno-ssa-phiopt -fno-tree-cselim" } */
|
||||
|
||||
struct Loc {
|
||||
int x[3];
|
||||
|
|
|
@ -3757,12 +3757,13 @@ cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Return the single store in BB with VDEF or NULL if there are
|
||||
other stores in the BB or loads following the store. VPHI is
|
||||
where the only use of the vdef should be. */
|
||||
/* Return the last store in BB with VDEF or NULL if there are
|
||||
loads following the store. VPHI is where the only use of the
|
||||
vdef should be. If ONLYONESTORE is true, then the store is
|
||||
the only store in the BB. */
|
||||
|
||||
static gimple *
|
||||
single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
|
||||
trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi, bool onlyonestore)
|
||||
{
|
||||
if (SSA_NAME_IS_DEFAULT_DEF (vdef))
|
||||
return NULL;
|
||||
|
@ -3771,12 +3772,14 @@ single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
|
|||
|| gimple_code (store) == GIMPLE_PHI)
|
||||
return NULL;
|
||||
|
||||
/* Verify there is no other store in this BB. */
|
||||
if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
|
||||
/* Verify there is no other store in this BB if requested. */
|
||||
if (onlyonestore
|
||||
&& !SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
|
||||
&& gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb
|
||||
&& gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI)
|
||||
return NULL;
|
||||
|
||||
|
||||
/* Verify there is no load or store after the store, the vdef of the store
|
||||
should only be used by the vphi joining the 2 bbs. */
|
||||
use_operand_p use_p;
|
||||
|
@ -3796,20 +3799,22 @@ single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
|
|||
if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
|
||||
THEN_BB:
|
||||
...
|
||||
ONLY_STORE = Y;
|
||||
STORE = Y;
|
||||
...
|
||||
goto JOIN_BB;
|
||||
ELSE_BB:
|
||||
...
|
||||
ONLY_STORE = Z;
|
||||
STORE = Z;
|
||||
...
|
||||
fallthrough (edge E0)
|
||||
JOIN_BB:
|
||||
some more
|
||||
|
||||
Handles only the case with single store in THEN_BB and ELSE_BB. That is
|
||||
Handles only the case with store in THEN_BB and ELSE_BB. That is
|
||||
cheap enough due to in phiopt and not worry about heurstics. Moving the store
|
||||
out might provide an opportunity for a phiopt to happen. */
|
||||
out might provide an opportunity for a phiopt to happen.
|
||||
At -O1 (!flag_expensive_optimizations), this only handles the only store in
|
||||
the BBs. */
|
||||
|
||||
static bool
|
||||
cond_if_else_store_replacement_limited (basic_block then_bb, basic_block else_bb,
|
||||
|
@ -3820,12 +3825,14 @@ cond_if_else_store_replacement_limited (basic_block then_bb, basic_block else_bb
|
|||
return false;
|
||||
|
||||
tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb));
|
||||
gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef, vphi);
|
||||
gimple *then_assign = trailing_store_in_bb (then_bb, then_vdef, vphi,
|
||||
!flag_expensive_optimizations);
|
||||
if (!then_assign)
|
||||
return false;
|
||||
|
||||
tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb));
|
||||
gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef, vphi);
|
||||
gimple *else_assign = trailing_store_in_bb (else_bb, else_vdef, vphi,
|
||||
!flag_expensive_optimizations);
|
||||
if (!else_assign)
|
||||
return false;
|
||||
|
||||
|
@ -3865,25 +3872,15 @@ cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
|
|||
bool found, ok = false, res;
|
||||
tree then_lhs, else_lhs;
|
||||
basic_block blocks[3];
|
||||
|
||||
/* Handle the case with single store in THEN_BB and ELSE_BB. That is
|
||||
cheap enough to always handle as it allows us to elide dependence
|
||||
checking. */
|
||||
gphi *vphi = get_virtual_phi (join_bb);
|
||||
if (!vphi)
|
||||
return false;
|
||||
tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb));
|
||||
gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef, vphi);
|
||||
if (then_assign)
|
||||
{
|
||||
tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb));
|
||||
gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef,
|
||||
vphi);
|
||||
if (else_assign)
|
||||
return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
|
||||
then_assign, else_assign,
|
||||
vphi);
|
||||
}
|
||||
|
||||
/* Handle the case with trailing stores in THEN_BB and ELSE_BB. That is
|
||||
cheap enough to always handle as it allows us to elide dependence
|
||||
checking. */
|
||||
while (cond_if_else_store_replacement_limited (then_bb, else_bb, join_bb))
|
||||
;
|
||||
|
||||
/* If either vectorization or if-conversion is disabled then do
|
||||
not sink any stores. */
|
||||
|
@ -4557,10 +4554,11 @@ pass_phiopt::execute (function *)
|
|||
&& !predictable_edge_p (EDGE_SUCC (bb, 1)))
|
||||
hoist_adjacent_loads (bb, bb1, bb2, bb3);
|
||||
|
||||
/* Try to see if there are only one store in each side of the if
|
||||
/* Try to see if there are only store in each side of the if
|
||||
and try to remove that. */
|
||||
if (EDGE_COUNT (bb3->preds) == 2)
|
||||
cond_if_else_store_replacement_limited (bb1, bb2, bb3);
|
||||
while (cond_if_else_store_replacement_limited (bb1, bb2, bb3))
|
||||
;
|
||||
}
|
||||
|
||||
gimple_stmt_iterator gsi;
|
||||
|
|
Loading…
Reference in New Issue