phiopt/cselim: Improve cselim-limited to commonalize all stores [PR122083]

cselim (and the phiopt's cselim-limited) can commonalize a single
store which makes this too limited in some/many cases. Instead let's
commonalize all trailing stores as much as possible (only in the same
order).
The change is smallish, basically the restriction on being the only store
is removed from single_trailing_store_in_bb (renamed too). And also
looping to remove all of the trailing stores instead of just doing one for
the pass.

Note sink will do the same optimization so doing it earlier seems like a good
idea because it improve change inlining size estimates.
For an example with this change, early inlining can happen for min_cmp<long int>
in g++.dg/opt/pr122083-1.C now; that avoids a -Wnonnull warning as the memcmp with
the null argument is optimized early. It can also catch some min in phiopt1 in some
cases.

Bootstrapped and tested on x86_64-linux-gnu.

Changes since v1:
* v2: For !flag_expensive_optimizations, handle the only store rather than just the last
      store.

	PR tree-optimization/122083

gcc/ChangeLog:

	* tree-ssa-phiopt.cc (single_trailing_store_in_bb): Rename to ...
	(trailing_store_in_bb): This and take new argument to check for
	only store.
	(cond_if_else_store_replacement_limited): Update to use
	trailing_store_in_bb.
	(cond_if_else_store_replacement): Loop until
	cond_if_else_store_replacement_limited returns false.
	(pass_phiopt::execute): Instead of calling cond_if_else_store_replacement_limited
	once, also loop on it.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/ssa-pre-19.c: Disable phiopt and cselim.
	* g++.dg/opt/pr122083-1.C: New test.
	* gcc.dg/tree-ssa/cselim-1.c: New test.
	* gcc.dg/tree-ssa/cselim-2.c: New test.

Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
This commit is contained in:
Andrew Pinski 2025-10-03 20:18:07 -07:00
parent 9242a89d08
commit decd4277e9
5 changed files with 156 additions and 31 deletions

View File

@ -0,0 +1,50 @@
// { dg-do compile { target c++20 } }
// { dg-options "-O2 -Wall" }
// Make sure we don't get a -Wnonnull warning here.
#include <compare>
template<typename Tp>
constexpr auto
min_cmp(Tp x, Tp y)
{
struct Res {
Tp M_min;
decltype(x <=> y) M_cmp;
};
auto c = x <=> y;
if (c > 0)
return Res{y, c};
return Res{x, c};
}
template<typename InputIter1, typename InputIter2>
auto
lexicographical_compare_three_way(InputIter1 first1,
InputIter1 last1,
InputIter2 first2,
InputIter2 last2)
-> decltype(*first1 <=> *first2)
{
const auto [len, lencmp] =
min_cmp(last1 - first1, last2 - first2);
if (len)
{
const auto blen = len * sizeof(*first1);
const auto c
= __builtin_memcmp(&*first1, &*first2, blen) <=> 0;
if (c != 0)
return c;
}
return lencmp;
}
auto
test03()
{
unsigned char a[2] = { 1, 2 };
unsigned char* p = nullptr;
return lexicographical_compare_three_way(p, p, a, a+2);
}

View File

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-phiopt1-stats" } */
struct Loc {
int x[3];
};
void bar (struct Loc *);
int foo (int i, int j, int k, int b)
{
struct Loc IND;
int res;
if (b)
{
IND.x[0] = i;
IND.x[1] = j;
IND.x[2] = k-1;
}
else
{
IND.x[0] = i;
IND.x[1] = j;
IND.x[2] = k;
}
/* This should be optimized to i + j + {k, k + 1}. */
res = IND.x[0] + IND.x[1] + IND.x[2];
/* This is just to prevent SRA. */
bar (&IND);
return res;
}
/* All three stores should be commonalized. */
/* { dg-final { scan-tree-dump "if-then-else store replacement: 3" "phiopt1" } } */

View File

@ -0,0 +1,38 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cselim-stats -fno-ssa-phiopt" } */
struct Loc {
int x[3];
};
void bar (struct Loc *);
int foo (int i, int j, int k, int b)
{
struct Loc IND;
int res;
if (b)
{
IND.x[0] = i;
IND.x[1] = j;
IND.x[2] = k-1;
}
else
{
IND.x[0] = i;
IND.x[1] = j;
IND.x[2] = k;
}
/* This should be optimized to i + j + {k, k + 1}. */
res = IND.x[0] + IND.x[1] + IND.x[2];
/* This is just to prevent SRA. */
bar (&IND);
return res;
}
/* All three stores should be commonalized. */
/* { dg-final { scan-tree-dump "if-then-else store replacement: 3" "cselim" } } */

View File

@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-pre-stats" } */
/* Both phiopt and cselim can commonalize the stores so disable them too. */
/* { dg-options "-O2 -fdump-tree-pre-stats -fno-ssa-phiopt -fno-tree-cselim" } */
struct Loc {
int x[3];

View File

@ -3757,12 +3757,13 @@ cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
return true;
}
/* Return the single store in BB with VDEF or NULL if there are
other stores in the BB or loads following the store. VPHI is
where the only use of the vdef should be. */
/* Return the last store in BB with VDEF or NULL if there are
loads following the store. VPHI is where the only use of the
vdef should be. If ONLYONESTORE is true, then the store is
the only store in the BB. */
static gimple *
single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi, bool onlyonestore)
{
if (SSA_NAME_IS_DEFAULT_DEF (vdef))
return NULL;
@ -3771,12 +3772,14 @@ single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
|| gimple_code (store) == GIMPLE_PHI)
return NULL;
/* Verify there is no other store in this BB. */
if (!SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
/* Verify there is no other store in this BB if requested. */
if (onlyonestore
&& !SSA_NAME_IS_DEFAULT_DEF (gimple_vuse (store))
&& gimple_bb (SSA_NAME_DEF_STMT (gimple_vuse (store))) == bb
&& gimple_code (SSA_NAME_DEF_STMT (gimple_vuse (store))) != GIMPLE_PHI)
return NULL;
/* Verify there is no load or store after the store, the vdef of the store
should only be used by the vphi joining the 2 bbs. */
use_operand_p use_p;
@ -3796,20 +3799,22 @@ single_trailing_store_in_bb (basic_block bb, tree vdef, gphi *vphi)
if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
THEN_BB:
...
ONLY_STORE = Y;
STORE = Y;
...
goto JOIN_BB;
ELSE_BB:
...
ONLY_STORE = Z;
STORE = Z;
...
fallthrough (edge E0)
JOIN_BB:
some more
Handles only the case with single store in THEN_BB and ELSE_BB. That is
Handles only the case with store in THEN_BB and ELSE_BB. That is
cheap enough due to in phiopt and not worry about heurstics. Moving the store
out might provide an opportunity for a phiopt to happen. */
out might provide an opportunity for a phiopt to happen.
At -O1 (!flag_expensive_optimizations), this only handles the only store in
the BBs. */
static bool
cond_if_else_store_replacement_limited (basic_block then_bb, basic_block else_bb,
@ -3820,12 +3825,14 @@ cond_if_else_store_replacement_limited (basic_block then_bb, basic_block else_bb
return false;
tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb));
gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef, vphi);
gimple *then_assign = trailing_store_in_bb (then_bb, then_vdef, vphi,
!flag_expensive_optimizations);
if (!then_assign)
return false;
tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb));
gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef, vphi);
gimple *else_assign = trailing_store_in_bb (else_bb, else_vdef, vphi,
!flag_expensive_optimizations);
if (!else_assign)
return false;
@ -3865,25 +3872,15 @@ cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
bool found, ok = false, res;
tree then_lhs, else_lhs;
basic_block blocks[3];
/* Handle the case with single store in THEN_BB and ELSE_BB. That is
cheap enough to always handle as it allows us to elide dependence
checking. */
gphi *vphi = get_virtual_phi (join_bb);
if (!vphi)
return false;
tree then_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (then_bb));
gimple *then_assign = single_trailing_store_in_bb (then_bb, then_vdef, vphi);
if (then_assign)
{
tree else_vdef = PHI_ARG_DEF_FROM_EDGE (vphi, single_succ_edge (else_bb));
gimple *else_assign = single_trailing_store_in_bb (else_bb, else_vdef,
vphi);
if (else_assign)
return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
then_assign, else_assign,
vphi);
}
/* Handle the case with trailing stores in THEN_BB and ELSE_BB. That is
cheap enough to always handle as it allows us to elide dependence
checking. */
while (cond_if_else_store_replacement_limited (then_bb, else_bb, join_bb))
;
/* If either vectorization or if-conversion is disabled then do
not sink any stores. */
@ -4557,10 +4554,11 @@ pass_phiopt::execute (function *)
&& !predictable_edge_p (EDGE_SUCC (bb, 1)))
hoist_adjacent_loads (bb, bb1, bb2, bb3);
/* Try to see if there are only one store in each side of the if
/* Try to see if there are only store in each side of the if
and try to remove that. */
if (EDGE_COUNT (bb3->preds) == 2)
cond_if_else_store_replacement_limited (bb1, bb2, bb3);
while (cond_if_else_store_replacement_limited (bb1, bb2, bb3))
;
}
gimple_stmt_iterator gsi;