From 5ee4349fa9c34e711ca6a0a6b21defb62dfa2c62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Arsen=20Arsenovi=C4=87?=
Date: Tue, 14 Nov 2023 01:02:22 +0100
Subject: [PATCH 001/169] libcpp: Regenerate config.in
The previous commit did not include regenerating files maintained by
autoheader.
libcpp/ChangeLog:
* config.in: Regenerate.
---
libcpp/config.in | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/libcpp/config.in b/libcpp/config.in
index 32397b821667..df4fd44c9ef6 100644
--- a/libcpp/config.in
+++ b/libcpp/config.in
@@ -38,9 +38,21 @@
*/
#undef HAVE_ALLOCA_H
+/* Define to 1 if you have the Mac OS X function
+ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
+#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
+
+/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
+ the CoreFoundation framework. */
+#undef HAVE_CFPREFERENCESCOPYAPPVALUE
+
/* Define to 1 if you have the `clearerr_unlocked' function. */
#undef HAVE_CLEARERR_UNLOCKED
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+ */
+#undef HAVE_DCGETTEXT
+
/* Define to 1 if you have the declaration of `abort', and to 0 if you don't.
*/
#undef HAVE_DECL_ABORT
@@ -171,7 +183,10 @@
/* Define to 1 if you have the `getc_unlocked' function. */
#undef HAVE_GETC_UNLOCKED
-/* Define if you have the iconv() function. */
+/* Define if the GNU gettext() function is already present or preinstalled. */
+#undef HAVE_GETTEXT
+
+/* Define if you have the iconv() function and it works. */
#undef HAVE_ICONV
/* Define to 1 if you have the header file. */
From 3239a80555556578da8e0be1b1dc3db9fcf4e882 Mon Sep 17 00:00:00 2001
From: Sam James
Date: Thu, 2 Nov 2023 08:39:07 +0000
Subject: [PATCH 002/169] maintainer-scripts/gcc_release: use HTTPS for links
maintainer-scripts/
* gcc_release: Use HTTPS for links.
Signed-off-by: Sam James
---
maintainer-scripts/gcc_release | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/maintainer-scripts/gcc_release b/maintainer-scripts/gcc_release
index 962b8efe99a7..9d8aa3babd6d 100755
--- a/maintainer-scripts/gcc_release
+++ b/maintainer-scripts/gcc_release
@@ -25,7 +25,7 @@
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
-# .
+# .
#
########################################################################
@@ -451,7 +451,7 @@ announce_snapshot() {
echo \
"Snapshot gcc-"${RELEASE}" is now available on
https://gcc.gnu.org/pub/gcc/snapshots/"${RELEASE}"/
-and on various mirrors, see http://gcc.gnu.org/mirrors.html for details.
+and on various mirrors, see https://gcc.gnu.org/mirrors.html for details.
This snapshot has been generated from the GCC "${BRANCH}" git branch
with the following options: "git://gcc.gnu.org/git/gcc.git branch ${GITBRANCH} revision ${GITREV}"
@@ -469,7 +469,7 @@ You'll find:
GCC "${RELEASE}" Snapshot
-The GCC Project makes
+
The GCC Project makes
periodic snapshots of the GCC source tree available to the public
for testing purposes.
From d22b87864e5d476535b75098e20f8d8fdaab4f8f Mon Sep 17 00:00:00 2001
From: Sam James
Date: Thu, 2 Nov 2023 08:39:08 +0000
Subject: [PATCH 003/169] maintainer-scripts/gcc_release: cleanup whitespace
maintainer-scripts/
* gcc_release: Cleanup whitespace.
Signed-off-by: Sam James
---
maintainer-scripts/gcc_release | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/maintainer-scripts/gcc_release b/maintainer-scripts/gcc_release
index 9d8aa3babd6d..42bb7f68f171 100755
--- a/maintainer-scripts/gcc_release
+++ b/maintainer-scripts/gcc_release
@@ -153,7 +153,7 @@ build_sources() {
# Update this ChangeLog file only if it does not yet contain the
# entry we are going to add. (This is a safety net for repeated
# runs of this script for the same release.)
- if ! grep "GCC ${RELEASE} released." ${SOURCE_DIRECTORY}/${x} > /dev/null ; then
+ if ! grep "GCC ${RELEASE} released." ${SOURCE_DIRECTORY}/${x} > /dev/null ; then
cat - ${SOURCE_DIRECTORY}/${x} > ${SOURCE_DIRECTORY}/${x}.new <The GCC Project makes
periodic snapshots of the GCC source tree available to the public
for testing purposes.
-
+
If you are planning to download and use one of our snapshots, then
we highly recommend you join the GCC developers list. Details for
how to sign up can be found on the GCC project home page.
@@ -481,7 +481,7 @@ how to sign up can be found on the GCC project home page.
with the following options: "git://gcc.gnu.org/git/gcc.git branch ${GITBRANCH} revision ${GITREV}"
" > ${SNAPSHOT_INDEX}
-
+
snapshot_print gcc-${RELEASE}.tar.xz "Complete GCC"
echo \
@@ -548,7 +548,7 @@ FTP_PATH=/var/ftp/pub/gcc
# The directory in which snapshots will be placed.
SNAPSHOTS_DIR=${FTP_PATH}/snapshots
-# The major number for the release. For release `3.0.2' this would be
+# The major number for the release. For release `3.0.2' this would be
# `3'
RELEASE_MAJOR=""
# The minor number for the release. For release `3.0.2' this would be
@@ -560,7 +560,7 @@ RELEASE_REVISION=""
# The complete name of the release.
RELEASE=""
-# The name of the branch from which the release should be made, in a
+# The name of the branch from which the release should be made, in a
# user-friendly form.
BRANCH=""
From fd1596f9962569afff6c9298a7c79686c6950bef Mon Sep 17 00:00:00 2001
From: liuhongt
Date: Mon, 30 Oct 2023 15:43:48 +0800
Subject: [PATCH 004/169] Handle bitop with INTEGER_CST in
analyze_and_compute_bitop_with_inv_effect.
analyze_and_compute_bitop_with_inv_effect assumes the first operand is
loop invariant which is not the case when it's INTEGER_CST.
gcc/ChangeLog:
PR tree-optimization/105735
PR tree-optimization/111972
* tree-scalar-evolution.cc
(analyze_and_compute_bitop_with_inv_effect): Handle bitop with
INTEGER_CST.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr105735-3.c: New test.
---
gcc/testsuite/gcc.target/i386/pr105735-3.c | 87 ++++++++++++++++++++++
gcc/tree-scalar-evolution.cc | 3 +
2 files changed, 90 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-3.c
diff --git a/gcc/testsuite/gcc.target/i386/pr105735-3.c b/gcc/testsuite/gcc.target/i386/pr105735-3.c
new file mode 100644
index 000000000000..9e268a1a997e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105735-3.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-sccp-details" } */
+/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" } } */
+
+unsigned int
+__attribute__((noipa))
+foo (unsigned int tmp)
+{
+ for (int bit = 0; bit < 64; bit++)
+ tmp &= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo1 (unsigned int tmp)
+{
+ for (int bit = 63; bit >= 0; bit -=3)
+ tmp &= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo2 (unsigned int tmp)
+{
+ for (int bit = 0; bit < 64; bit++)
+ tmp |= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo3 (unsigned int tmp)
+{
+ for (int bit = 63; bit >= 0; bit -=3)
+ tmp |= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo4 (unsigned int tmp)
+{
+ for (int bit = 0; bit < 64; bit++)
+ tmp ^= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+foo5 (unsigned int tmp)
+{
+ for (int bit = 0; bit < 63; bit++)
+ tmp ^= 11304;
+ return tmp;
+}
+
+unsigned int
+__attribute__((noipa))
+f (unsigned int tmp, int bit)
+{
+ unsigned int res = tmp;
+ for (int i = 0; i < bit; i++)
+ res &= 11304;
+ return res;
+}
+
+unsigned int
+__attribute__((noipa))
+f1 (unsigned int tmp, int bit)
+{
+ unsigned int res = tmp;
+ for (int i = 0; i < bit; i++)
+ res |= 11304;
+ return res;
+}
+
+unsigned int
+__attribute__((noipa))
+f2 (unsigned int tmp, int bit)
+{
+ unsigned int res = tmp;
+ for (int i = 0; i < bit; i++)
+ res ^= 11304;
+ return res;
+}
diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index 70b17c5bca16..f61277c32df3 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3689,6 +3689,9 @@ analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
match_op[0] = gimple_assign_rhs1 (def);
match_op[1] = gimple_assign_rhs2 (def);
+ if (expr_invariant_in_loop_p (loop, match_op[1]))
+ std::swap (match_op[0], match_op[1]);
+
if (TREE_CODE (match_op[1]) != SSA_NAME
|| !expr_invariant_in_loop_p (loop, match_op[0])
|| !(header_phi = dyn_cast (SSA_NAME_DEF_STMT (match_op[1])))
From bfcb6e518371bb943b77e0ef784e1de72a99aec6 Mon Sep 17 00:00:00 2001
From: Juzhe-Zhong
Date: Tue, 14 Nov 2023 11:21:16 +0800
Subject: [PATCH 005/169] RISC-V: Fix init-2.c assembly check
Notice the assembly check of init-2.c is wrong.
Committed.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls/init-2.c: Fix vid.v check.
---
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/init-2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/init-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/init-2.c
index f27c395441b2..ae31e227ad1d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/init-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/init-2.c
@@ -45,4 +45,4 @@ DEF_INIT (v128uhi, uint16_t, 128, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
127)
-/* { dg-final { scan-assembler-times {vid\.vx} 494 } } */
+/* { dg-final { scan-assembler-times {vid\.v} 14 } } */
From 6043bfbd89b335dd10f093a653ee58c5b1e08ed3 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek
Date: Tue, 14 Nov 2023 08:11:44 +0100
Subject: [PATCH 006/169] i386: Don't optimize vshuf{i,f}{32x4,64x2} and
vperm{i,f}128 to vblendps for %ymm16+ [PR112435]
The vblendps instruction is only VEX encoded, not EVEX, so can't be used if
there are %ymm16+ or EGPR registers involved.
2023-11-14 Jakub Jelinek
Hu, Lin1
PR target/112435
* config/i386/sse.md (avx512vl_shuf_32x4_1,
avx512dq_shuf_64x2_1): Add
alternative with just x instead of v constraints and xjm instead of
vm and use vblendps as optimization only with that alternative.
* gcc.target/i386/avx512vl-pr112435-1.c: New test.
* gcc.target/i386/avx512vl-pr112435-2.c: New test.
* gcc.target/i386/avx512vl-pr112435-3.c: New test.
---
gcc/config/i386/sse.md | 16 ++--
.../gcc.target/i386/avx512vl-pr112435-1.c | 13 ++++
.../gcc.target/i386/avx512vl-pr112435-2.c | 63 +++++++++++++++
.../gcc.target/i386/avx512vl-pr112435-3.c | 78 +++++++++++++++++++
4 files changed, 162 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr112435-1.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr112435-2.c
create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr112435-3.c
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c502582102e0..af482f24df4d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -19235,11 +19235,11 @@
})
(define_insn "avx512dq_shuf_64x2_1"
- [(set (match_operand:VI8F_256 0 "register_operand" "=v")
+ [(set (match_operand:VI8F_256 0 "register_operand" "=x,v")
(vec_select:VI8F_256
(vec_concat:
- (match_operand:VI8F_256 1 "register_operand" "v")
- (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
+ (match_operand:VI8F_256 1 "register_operand" "x,v")
+ (match_operand:VI8F_256 2 "nonimmediate_operand" "xjm,vm"))
(parallel [(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_4_to_7_operand")
@@ -19254,7 +19254,7 @@
mask = INTVAL (operands[3]) / 2;
mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
operands[3] = GEN_INT (mask);
- if (INTVAL (operands[3]) == 2 && !)
+ if (INTVAL (operands[3]) == 2 && ! && which_alternative == 0)
return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}";
}
@@ -19386,11 +19386,11 @@
})
(define_insn "avx512vl_shuf_32x4_1"
- [(set (match_operand:VI4F_256 0 "register_operand" "=v")
+ [(set (match_operand:VI4F_256 0 "register_operand" "=x,v")
(vec_select:VI4F_256
(vec_concat:
- (match_operand:VI4F_256 1 "register_operand" "v")
- (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
+ (match_operand:VI4F_256 1 "register_operand" "x,v")
+ (match_operand:VI4F_256 2 "nonimmediate_operand" "xjm,vm"))
(parallel [(match_operand 3 "const_0_to_7_operand")
(match_operand 4 "const_0_to_7_operand")
(match_operand 5 "const_0_to_7_operand")
@@ -19414,7 +19414,7 @@
mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
operands[3] = GEN_INT (mask);
- if (INTVAL (operands[3]) == 2 && !)
+ if (INTVAL (operands[3]) == 2 && ! && which_alternative == 0)
return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-1.c
new file mode 100644
index 000000000000..46aae282303b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-1.c
@@ -0,0 +1,13 @@
+/* PR target/112435 */
+/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
+/* { dg-options "-mavx512vl -O2" } */
+
+#include
+
+__m256i
+foo (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm16") = a;
+ asm ("" : "+v" (c));
+ return _mm256_shuffle_i32x4 (c, b, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-2.c
new file mode 100644
index 000000000000..a856fb5887a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-2.c
@@ -0,0 +1,63 @@
+/* PR target/112435 */
+/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
+/* { dg-options "-mavx512vl -O2" } */
+
+#include
+
+/* vpermi128/vpermf128 */
+__m256i
+perm0 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_permute2x128_si256 (c, b, 50);
+}
+
+__m256i
+perm1 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_permute2x128_si256 (c, b, 18);
+}
+
+__m256i
+perm2 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_permute2x128_si256 (c, b, 48);
+}
+
+/* vshuf{i,f}{32x4,64x2} ymm .*/
+__m256i
+shuff0 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_shuffle_i32x4 (c, b, 2);
+}
+
+__m256
+shuff1 (__m256 a, __m256 b)
+{
+ register __m256 c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_shuffle_f32x4 (c, b, 2);
+}
+
+__m256i
+shuff2 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_shuffle_i64x2 (c, b, 2);
+}
+
+__m256d
+shuff3 (__m256d a, __m256d b)
+{
+ register __m256d c __asm__("ymm17") = a;
+ asm ("":"+v" (c));
+ return _mm256_shuffle_f64x2 (c, b, 2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-3.c
new file mode 100644
index 000000000000..f7538ffbbcfb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr112435-3.c
@@ -0,0 +1,78 @@
+/* PR target/112435 */
+/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */
+/* { dg-options "-mavx512vl -O2" } */
+
+#include
+
+/* vpermf128 */
+__m256
+perm0 (__m256 a, __m256 b)
+{
+ register __m256 c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_ps (c, b, 50);
+}
+
+__m256
+perm1 (__m256 a, __m256 b)
+{
+ register __m256 c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_ps (c, b, 18);
+}
+
+__m256
+perm2 (__m256 a, __m256 b)
+{
+ register __m256 c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_ps (c, b, 48);
+}
+
+__m256i
+perm3 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_si256 (c, b, 50);
+}
+
+__m256i
+perm4 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_si256 (c, b, 18);
+}
+
+__m256i
+perm5 (__m256i a, __m256i b)
+{
+ register __m256i c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_si256 (c, b, 48);
+}
+
+__m256d
+perm6 (__m256d a, __m256d b)
+{
+ register __m256d c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_pd (c, b, 50);
+}
+
+__m256d
+perm7 (__m256d a, __m256d b)
+{
+ register __m256d c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_pd (c, b, 18);
+}
+
+__m256d
+perm8 (__m256d a, __m256d b)
+{
+ register __m256d c __asm__("ymm17") =a;
+ asm ("":"+v" (c));
+ return _mm256_permute2f128_pd (c, b, 48);
+}
From 6e5f318fc6480412569273c1a71c638c3d677607 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek
Date: Tue, 14 Nov 2023 09:24:34 +0100
Subject: [PATCH 007/169] tree: Handle BITINT_TYPE in
type_contains_placeholder_1 [PR112511]
The following testcase ICEs because BITINT_TYPE isn't handled in
type_contains_placeholder_1. Given that Ada doesn't emit it, it doesn't
matter that much where exactly we handle it as right now it should never
contain a placeholder; I've picked the same spot as INTEGER_TYPE, but if
you prefer e.g. the one with OFFSET_TYPE above, I can move it there too.
2023-11-14 Jakub Jelinek
PR middle-end/112511
* tree.cc (type_contains_placeholder_1): Handle BITINT_TYPE like
INTEGER_TYPE.
* gcc.dg/pr112511.c: New test.
---
gcc/testsuite/gcc.dg/pr112511.c | 17 +++++++++++++++++
gcc/tree.cc | 1 +
2 files changed, 18 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr112511.c
diff --git a/gcc/testsuite/gcc.dg/pr112511.c b/gcc/testsuite/gcc.dg/pr112511.c
new file mode 100644
index 000000000000..7d0b73f4776f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr112511.c
@@ -0,0 +1,17 @@
+/* PR middle-end/112511 */
+/* { dg-do compile { target bitint } } */
+/* { dg-options "-O2" } */
+
+struct T { _BitInt(22) a; };
+
+void
+bar (struct T t)
+{
+}
+
+void
+foo (void)
+{
+ struct T t;
+ bar (t);
+}
diff --git a/gcc/tree.cc b/gcc/tree.cc
index 33ea1d2e2d07..a3d907a4ac0f 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -4210,6 +4210,7 @@ type_contains_placeholder_1 (const_tree type)
return false;
case INTEGER_TYPE:
+ case BITINT_TYPE:
case REAL_TYPE:
case FIXED_POINT_TYPE:
/* Here we just check the bounds. */
From 4a70bfbf686c2b6a1ecd83fe851de826c612c3e0 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao
Date: Tue, 14 Nov 2023 05:32:38 +0800
Subject: [PATCH 008/169] LoongArch: Use finer-grained DBAR hints
LA664 defines DBAR hints 0x1 - 0x1f (except 0xf and 0x1f) as follows [1-2]:
- Bit 4: kind of constraint (0: completion, 1: ordering)
- Bit 3: barrier for previous read (0: true, 1: false)
- Bit 2: barrier for previous write (0: true, 1: false)
- Bit 1: barrier for succeeding read (0: true, 1: false)
- Bit 0: barrier for succeeding write (0: true, 1: false)
LLVM has already utilized them for different memory orders [3]:
- Bit 4 is always set to one because it's only intended to be zero for
things like MMIO devices, which are out of the scope of memory orders.
- An acquire barrier is used to implement acquire loads like
ld.d $a1, $t0, 0
dbar acquire_hint
where the load operation (ld.d) should not be reordered with any load
or store operation after the acquire load. To accomplish this
constraint, we need to prevent the load operation from being reordered
after the barrier, and also prevent any following load/store operation
from being reordered before the barrier. Thus bits 0, 1, and 3 must
be zero, and bit 2 can be one, so acquire_hint should be 0b10100.
- An release barrier is used to implement release stores like
dbar release_hint
st.d $a1, $t0, 0
where the store operation (st.d) should not be reordered with any load
or store operation before the release store. So we need to prevent
the store operation from being reordered before the barrier, and also
prevent any preceding load/store operation from being reordered after
the barrier. So bits 0, 2, 3 must be zero, and bit 1 can be one. So
release_hint should be 0b10010.
A similar mapping has been utilized for RISC-V GCC [4], LoongArch Linux
kernel [1], and LoongArch LLVM [3]. So the mapping should be correct.
And I've also bootstrapped & regtested GCC on a LA664 with this patch.
The LoongArch CPUs should treat "unknown" hints as dbar 0, so we can
unconditionally emit the new hints without a compiler switch.
[1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed
[2]: https://github.com/loongson-community/docs/pull/12
[3]: https://github.com/llvm/llvm-project/pull/68787
[4]: https://gcc.gnu.org/r14-406
gcc/ChangeLog:
* config/loongarch/sync.md (mem_thread_fence): Remove redundant
check.
(mem_thread_fence_1): Emit finer-grained DBAR hints for
different memory models, instead of 0.
---
gcc/config/loongarch/sync.md | 51 +++++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 9 deletions(-)
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 9924d522bcdf..1ad0c63e0d9b 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -50,23 +50,56 @@
[(match_operand:SI 0 "const_int_operand" "")] ;; model
""
{
- if (INTVAL (operands[0]) != MEMMODEL_RELAXED)
- {
- rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
- MEM_VOLATILE_P (mem) = 1;
- emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
- }
+ rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+ MEM_VOLATILE_P (mem) = 1;
+ emit_insn (gen_mem_thread_fence_1 (mem, operands[0]));
+
DONE;
})
-;; Until the LoongArch memory model (hence its mapping from C++) is finalized,
-;; conservatively emit a full FENCE.
+;; DBAR hint encoding for LA664 and later micro-architectures, paraphrased from
+;; the Linux patch revealing it [1]:
+;;
+;; - Bit 4: kind of constraint (0: completion, 1: ordering)
+;; - Bit 3: barrier for previous read (0: true, 1: false)
+;; - Bit 2: barrier for previous write (0: true, 1: false)
+;; - Bit 1: barrier for succeeding read (0: true, 1: false)
+;; - Bit 0: barrier for succeeding write (0: true, 1: false)
+;;
+;; [1]: https://git.kernel.org/torvalds/c/e031a5f3f1ed
+;;
+;; Implementations without support for the finer-granularity hints simply treat
+;; all as the full barrier (DBAR 0), so we can unconditionally start emiting the
+;; more precise hints right away.
(define_insn "mem_thread_fence_1"
[(set (match_operand:BLK 0 "" "")
(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))
(match_operand:SI 1 "const_int_operand" "")] ;; model
""
- "dbar\t0")
+ {
+ enum memmodel model = memmodel_base (INTVAL (operands[1]));
+
+ switch (model)
+ {
+ case MEMMODEL_ACQUIRE:
+ return "dbar\t0b10100";
+ case MEMMODEL_RELEASE:
+ return "dbar\t0b10010";
+ case MEMMODEL_ACQ_REL:
+ case MEMMODEL_SEQ_CST:
+ return "dbar\t0b10000";
+ default:
+ /* GCC internal: "For the '__ATOMIC_RELAXED' model no instructions
+ need to be issued and this expansion is not invoked."
+
+ __atomic builtins doc: "Consume is implemented using the
+ stronger acquire memory order because of a deficiency in C++11's
+ semantics." See PR 59448 and get_memmodel in builtins.cc.
+
+ Other values should not be returned by memmodel_base. */
+ gcc_unreachable ();
+ }
+ })
;; Atomic memory operations.
From f28306b4fd309b579c8a4a5bf2f1b24fa40f8f7f Mon Sep 17 00:00:00 2001
From: liuhongt
Date: Mon, 13 Nov 2023 17:56:49 +0800
Subject: [PATCH 009/169] Fix ICE in vectorizable_nonlinear_induction with
bitfield.
if (TREE_CODE (init_expr) == INTEGER_CST)
init_expr = fold_convert (TREE_TYPE (vectype), init_expr);
else
gcc_assert (tree_nop_conversion_p (TREE_TYPE (vectype),
TREE_TYPE (init_expr)));
and init_expr is a 24 bit integer type while vectype has 32bit components.
The "fix" is to bail out instead of asserting.
gcc/ChangeLog:
PR tree-optimization/112496
* tree-vect-loop.cc (vectorizable_nonlinear_induction): Return
false when !tree_nop_conversion_p (TREE_TYPE (vectype),
TREE_TYPE (init_expr)).
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr112496.c: New test.
---
gcc/testsuite/gcc.target/i386/pr112496.c | 7 +++++++
gcc/tree-vect-loop.cc | 13 ++++++++++---
2 files changed, 17 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pr112496.c
diff --git a/gcc/testsuite/gcc.target/i386/pr112496.c b/gcc/testsuite/gcc.target/i386/pr112496.c
new file mode 100644
index 000000000000..c478fda9cceb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112496.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct T { int x : 24; } v;
+void f1(int x) {
+ while (v.x - ((v.x <<= 1) - v.x)) ;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 7d00cc9689c7..fb8d999ee6bf 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -9569,9 +9569,16 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo,
if (TREE_CODE (init_expr) == INTEGER_CST)
init_expr = fold_convert (TREE_TYPE (vectype), init_expr);
- else
- gcc_assert (tree_nop_conversion_p (TREE_TYPE (vectype),
- TREE_TYPE (init_expr)));
+ else if (!tree_nop_conversion_p (TREE_TYPE (vectype), TREE_TYPE (init_expr)))
+ {
+ /* INIT_EXPR could be a bit_field, bail out for such case. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "nonlinear induction vectorization failed:"
+ " component type of vectype is not a nop conversion"
+ " from type of init_expr.\n");
+ return false;
+ }
switch (induction_type)
{
From fe23a2ff1f5072559552be0e41ab55bf72f5c79f Mon Sep 17 00:00:00 2001
From: Xi Ruoyao
Date: Fri, 3 Nov 2023 21:19:59 +0800
Subject: [PATCH 010/169] LoongArch: Disable relaxation if the assembler don't
support conditional branch relaxation [PR112330]
As the commit message of r14-4674 has indicated, if the assembler does
not support conditional branch relaxation, a relocation overflow may
happen on conditional branches when relaxation is enabled because the
number of NOP instructions inserted by the assembler will be more than
the number estimated by GCC.
To work around this issue, disable relaxation by default if the
assembler is detected incapable to perform conditional branch relaxation
at GCC build time. We also need to pass -mno-relax to the assembler to
really disable relaxation. But, if the assembler does not support
-mrelax option at all, we should not pass -mno-relax to the assembler or
it will immediately error out. Also handle this with the build time
assembler capability probing, and add a pair of options
-m[no-]pass-mrelax-to-as to allow using a different assembler from the
build-time one.
With this change, if GCC is built with GAS 2.41, relaxation will be
disabled by default. So the default value of -mexplicit-relocs= is also
changed to 'always' if -mno-relax is specified or implied by the
build-time default, because using assembler macros for symbol addresses
produces no benefit when relaxation is disabled.
gcc/ChangeLog:
PR target/112330
* config/loongarch/genopts/loongarch.opt.in: Add
-m[no]-pass-relax-to-as. Change the default of -m[no]-relax to
account conditional branch relaxation support status.
* config/loongarch/loongarch.opt: Regenerate.
* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
the assembler supports conditional branch relaxation.
* configure: Regenerate.
* config.in: Regenerate. Note that there are some unrelated
changes introduced by r14-5424 (which does not contain a
config.in regeneration).
* config/loongarch/loongarch-opts.h
(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
Define.
(ASM_MRELAX_SPEC): Define.
(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
* config/loongarch/loongarch.cc: Take the setting of
-m[no-]relax into account when determining the default of
-mexplicit-relocs=.
* doc/invoke.texi: Document -m[no-]relax and
-m[no-]pass-mrelax-to-as for LoongArch. Update the default
value of -mexplicit-relocs=.
---
gcc/config.in | 35 +++++++++++++++++-
gcc/config/loongarch/genopts/loongarch.opt.in | 6 +++-
gcc/config/loongarch/loongarch-driver.h | 16 ++++++++-
gcc/config/loongarch/loongarch-opts.h | 4 +++
gcc/config/loongarch/loongarch.cc | 2 +-
gcc/config/loongarch/loongarch.opt | 6 +++-
gcc/configure | 35 ++++++++++++++++++
gcc/configure.ac | 10 ++++++
gcc/doc/invoke.texi | 36 ++++++++++++++-----
9 files changed, 137 insertions(+), 13 deletions(-)
diff --git a/gcc/config.in b/gcc/config.in
index 0509ba7e6a79..866f9fff1018 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -386,6 +386,12 @@
#endif
+/* Define if your assembler supports conditional branch relaxation. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_COND_BRANCH_RELAXATION
+#endif
+
+
/* Define if your assembler supports the --debug-prefix-map option. */
#ifndef USED_FOR_TARGET
#undef HAVE_AS_DEBUG_PREFIX_MAP
@@ -820,6 +826,20 @@
#endif
+/* Define to 1 if you have the Mac OS X function
+ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
+#endif
+
+
+/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
+ the CoreFoundation framework. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_CFPREFERENCESCOPYAPPVALUE
+#endif
+
+
/* Define to 1 if you have the `clearerr_unlocked' function. */
#ifndef USED_FOR_TARGET
#undef HAVE_CLEARERR_UNLOCKED
@@ -844,6 +864,13 @@
#endif
+/* Define if the GNU dcgettext() function is already present or preinstalled.
+ */
+#ifndef USED_FOR_TARGET
+#undef HAVE_DCGETTEXT
+#endif
+
+
/* Define to 1 if we found a declaration for 'abort', otherwise define to 0.
*/
#ifndef USED_FOR_TARGET
@@ -1516,6 +1543,12 @@
#endif
+/* Define if the GNU gettext() function is already present or preinstalled. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_GETTEXT
+#endif
+
+
/* Define to 1 if you have the `gettimeofday' function. */
#ifndef USED_FOR_TARGET
#undef HAVE_GETTIMEOFDAY
@@ -1547,7 +1580,7 @@
#endif
-/* Define if you have the iconv() function. */
+/* Define if you have the iconv() function and it works. */
#ifndef USED_FOR_TARGET
#undef HAVE_ICONV
#endif
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
index e1fe0c7086ea..158701d327a7 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -223,10 +223,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
Avoid using the GOT to access external symbols.
mrelax
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
Take advantage of linker relaxations to reduce the number of instructions
required to materialize symbol addresses.
+mpass-mrelax-to-as
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+Pass -mrelax or -mno-relax option to the assembler.
+
-param=loongarch-vect-unroll-limit=
Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
Used to limit unroll factor which indicates how much the autovectorizer may
diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
index d859afcc9fee..20d233cc938b 100644
--- a/gcc/config/loongarch/loongarch-driver.h
+++ b/gcc/config/loongarch/loongarch-driver.h
@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3. If not see
"%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
"%(subtarget_cc1_spec)"
+#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION
+#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}"
+#else
+#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}"
+#endif
+
+#if HAVE_AS_MRELAX_OPTION
+#define ASM_MRELAX_SPEC \
+ "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
+#else
+#define ASM_MRELAX_SPEC \
+ "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
+#endif
+
#undef ASM_SPEC
#define ASM_SPEC \
- "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
+ "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)"
extern const char*
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
index f204828015e1..8de41bbc4f78 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -101,6 +101,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
#define HAVE_AS_MRELAX_OPTION 0
#endif
+#ifndef HAVE_AS_COND_BRANCH_RELAXATION
+#define HAVE_AS_COND_BRANCH_RELAXATION 0
+#endif
+
#ifndef HAVE_AS_TLS
#define HAVE_AS_TLS 0
#endif
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 0a2db8452a3f..2998bf740d4b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -7432,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
if (la_opt_explicit_relocs == M_OPT_UNSET)
la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
- ? (HAVE_AS_MRELAX_OPTION
+ ? (loongarch_mrelax
? EXPLICIT_RELOCS_AUTO
: EXPLICIT_RELOCS_ALWAYS)
: EXPLICIT_RELOCS_NONE);
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 029466083274..a5988411fbb2 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -230,10 +230,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
Avoid using the GOT to access external symbols.
mrelax
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
Take advantage of linker relaxations to reduce the number of instructions
required to materialize symbol addresses.
+mpass-mrelax-to-as
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
+Pass -mrelax or -mno-relax option to the assembler.
+
-param=loongarch-vect-unroll-limit=
Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
Used to limit unroll factor which indicates how much the autovectorizer may
diff --git a/gcc/configure b/gcc/configure
index c37cae7331a2..ee97934ac4f6 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -31016,6 +31016,41 @@ if test $gcc_cv_as_loongarch_relax = yes; then
$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for conditional branch relaxation support" >&5
+$as_echo_n "checking assembler for conditional branch relaxation support... " >&6; }
+if ${gcc_cv_as_loongarch_cond_branch_relax+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ gcc_cv_as_loongarch_cond_branch_relax=no
+ if test x$gcc_cv_as != x; then
+ $as_echo 'a:
+ .rept 32769
+ nop
+ .endr
+ beq $a0,$a1,a' > conftest.s
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ gcc_cv_as_loongarch_cond_branch_relax=yes
+ else
+ echo "configure: failed program was" >&5
+ cat conftest.s >&5
+ fi
+ rm -f conftest.o conftest.s
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_cond_branch_relax" >&5
+$as_echo "$gcc_cv_as_loongarch_cond_branch_relax" >&6; }
+if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
+
+$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
+
fi
;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 764a33f0b044..d0caf820648e 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5453,6 +5453,16 @@ x:
[-mrelax], [.text],,
[AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
[Define if your assembler supports -mrelax option.])])
+ gcc_GAS_CHECK_FEATURE([conditional branch relaxation support],
+ gcc_cv_as_loongarch_cond_branch_relax,
+ [],
+ [a:
+ .rept 32769
+ nop
+ .endr
+ beq $a0,$a1,a],,
+ [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1,
+ [Define if your assembler supports conditional branch relaxation.])])
;;
s390*-*-*)
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f1a5722675fa..2d30a5d47672 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1045,7 +1045,7 @@ Objective-C and Objective-C++ Dialects}.
-mmax-inline-memcpy-size=@var{n}
-mexplicit-relocs=@var{style} -mexplicit-relocs -mno-explicit-relocs
-mdirect-extern-access -mno-direct-extern-access
--mcmodel=@var{code-model}}
+-mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as}
@emph{M32R/D Options}
@gccoptlist{-m32r2 -m32rx -m32r
@@ -26356,16 +26356,14 @@ with @option{-mexplicit-relocs=always} the assembler relocation operators
are always used, with @option{-mexplicit-relocs=auto} the compiler will
use the relocation operators where the linker relaxation is impossible to
improve the code quality, and macros elsewhere. The default
-value for the option is determined during GCC build-time by detecting
-corresponding assembler support:
+value for the option is determined with the assembler capability detected
+during GCC build-time and the setting of @option{-mrelax}:
@option{-mexplicit-relocs=none} if the assembler does not support
relocation operators at all,
@option{-mexplicit-relocs=always} if the assembler supports relocation
-operators but does not support relaxation,
-@option{-mexplicit-relocs=auto} if the assembler supports both relocation
-operators and relaxation. This option is mostly useful for
-debugging, or interoperation with assemblers different from the build-time
-one.
+operators but @option{-mrelax} is not enabled,
+@option{-mexplicit-relocs=auto} if the assembler supports relocation
+operators and @option{-mrelax} is enabled.
@opindex mexplicit-relocs
@item -mexplicit-relocs
@@ -26389,6 +26387,28 @@ kernels, executables linked with @option{-static} or @option{-static-pie}.
@option{-mdirect-extern-access} is not compatible with @option{-fPIC} or
@option{-fpic}.
+@item -mrelax
+@itemx -mno-relax
+Take (do not take) advantage of linker relaxations. If
+@option{-mpass-mrelax-to-as} is enabled, this option is also passed to
+the assembler. The default is determined during GCC build-time by
+detecting corresponding assembler support:
+@option{-mrelax} if the assembler supports both the @option{-mrelax}
+option and the conditional branch relaxation (it's required or the
+@code{.align} directives and conditional branch instructions in the
+assembly code outputted by GCC may be rejected by the assembler because
+of a relocation overflow), @option{-mno-relax} otherwise.
+
+@item -mpass-mrelax-to-as
+@itemx -mno-pass-mrelax-to-as
+Pass (do not pass) the @option{-mrelax} or @option{-mno-relax} option
+to the assembler. The default is determined during GCC build-time by
+detecting corresponding assembler support:
+@option{-mpass-mrelax-to-as} if the assembler supports the
+@option{-mrelax} option, @option{-mno-pass-mrelax-to-as} otherwise.
+This option is mostly useful for debugging, or interoperation with
+assemblers different from the build-time one.
+
@item loongarch-vect-unroll-limit
The vectorizer will use available tuning information to determine whether it
would be beneficial to unroll the main vectorized loop and by how much. This
From 7383cb56e1170789929201b0dadc156888928fdd Mon Sep 17 00:00:00 2001
From: Jakub Jelinek
Date: Tue, 14 Nov 2023 10:38:56 +0100
Subject: [PATCH 011/169] Add type-generic clz/ctz/clrsb/ffs/parity/popcount
builtins [PR111309]
The following patch adds 6 new type-generic builtins,
__builtin_clzg
__builtin_ctzg
__builtin_clrsbg
__builtin_ffsg
__builtin_parityg
__builtin_popcountg
The g at the end stands for generic because the unsuffixed variant
of the builtins already have unsigned int or int arguments.
The main reason to add these is to support arbitrary unsigned (for
clrsb/ffs signed) bit-precise integer types and also __int128 which
wasn't supported by the existing builtins, so that e.g.
type-generic functions could then support not just bit-precise unsigned
integer type whose width matches a standard or extended integer type,
but others too.
None of these new builtins promote their first argument, so the argument
can be e.g. unsigned char or unsigned short or unsigned __int20 etc.
The first 2 support either 1 or 2 arguments, if only 1 argument is supplied,
the behavior is undefined for argument 0 like for other __builtin_c[lt]z*
builtins, if 2 arguments are supplied, the second argument should be int
that will be returned if the argument is 0. All other builtins have
just one argument. For __builtin_clrsbg and __builtin_ffsg the argument
shall be any signed standard/extended or bit-precise integer, for the others
any unsigned standard/extended or bit-precise integer (bool not allowed).
One possibility would be to also allow signed integer types for
the clz/ctz/parity/popcount ones (and just cast the argument to
unsigned_type_for during folding) and similarly unsigned integer types
for the clrsb/ffs ones, dunno what is better; for stdbit.h the current
version is sufficient and diagnoses use of the inappropriate sign,
though on the other side I wonder if users won't be confused by
__builtin_clzg (1) being an error and having to write __builtin_clzg (1U).
The new builtins are lowered to corresponding builtins with other suffixes
or internal calls (plus casts and adjustments where needed) during FE
folding or during gimplification at latest, the non-suffixed builtins
handling precisions up to precision of int, l up to precision of long,
ll up to precision of long long, up to __int128 precision lowered to
double-word expansion early and the rest (which must be _BitInt) lowered
to internal fn calls - those are then lowered during bitint lowering pass.
The patch also changes representation of IFN_CLZ and IFN_CTZ calls,
previously they were in the IL only if they are directly supported optab
and depending on C[LT]Z_DEFINED_VALUE_AT_ZERO (...) == 2 they had or didn't
have defined behavior at 0, now they are in the IL either if directly
supported optab, or for the large/huge BITINT_TYPEs and they have either
1 or 2 arguments. If one, the behavior is undefined at zero, if 2, the
second argument is an int constant that should be returned for 0.
As there is no extra support during expansion, for directly supported optab
the second argument if present should still match the
C[LT]Z_DEFINED_VALUE_AT_ZERO (...) == 2 value, but for BITINT_TYPE arguments
it can be arbitrary int INTEGER_CST.
The indended uses in stdbit.h are e.g.
#ifdef __has_builtin
#if __has_builtin(__builtin_clzg) && __has_builtin(__builtin_ctzg) && __has_builtin(__builtin_popcountg)
#define stdc_leading_zeros(value) \
((unsigned int) __builtin_clzg (value, __builtin_popcountg ((__typeof (value)) ~(__typeof (value)) 0)))
#define stdc_leading_ones(value) \
((unsigned int) __builtin_clzg ((__typeof (value)) ~(value), __builtin_popcountg ((__typeof (value)) ~(__typeof (value)) 0)))
#define stdc_first_trailing_one(value) \
((unsigned int) (__builtin_ctzg (value, -1) + 1))
#define stdc_trailing_zeros(value) \
((unsigned int) __builtin_ctzg (value, __builtin_popcountg ((__typeof (value)) ~(__typeof (value)) 0)))
#endif
#endif
where __builtin_popcountg ((__typeof (x)) -1) computes the bit precision
of x's type (kind of _Bitwidthof (x) alternative).
They also allow casting of arbitrary unsigned _BitInt other than
unsigned _BitInt(1) to corresponding signed _BitInt by using
signed _BitInt(__builtin_popcountg ((__typeof (a)) -1))
and of arbitrary signed _BitInt to corresponding unsigned _BitInt
using unsigned _BitInt(__builtin_clrsbg ((__typeof (a)) -1) + 1).
2023-11-14 Jakub Jelinek
PR c/111309
gcc/
* builtins.def (BUILT_IN_CLZG, BUILT_IN_CTZG, BUILT_IN_CLRSBG,
BUILT_IN_FFSG, BUILT_IN_PARITYG, BUILT_IN_POPCOUNTG): New
builtins.
* builtins.cc (fold_builtin_bit_query): New function.
(fold_builtin_1): Use it for
BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
(fold_builtin_2): Use it for BUILT_IN_{CLZ,CTZ}G.
* fold-const-call.cc: Fix comment typo on tm.h inclusion.
(fold_const_call_ss): Handle
CFN_BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
(fold_const_call_sss): New function.
(fold_const_call_1): Call it for 2 argument functions returning
scalar when passed 2 INTEGER_CSTs.
* genmatch.cc (cmp_operand): For function calls also compare
number of arguments.
(fns_cmp): New function.
(dt_node::gen_kids): Sort fns and generic_fns.
(dt_node::gen_kids_1): Handle fns with the same id but different
number of arguments.
* match.pd (CLZ simplifications): Drop checks for defined behavior
at zero. Add variant of simplifications for IFN_CLZ with 2 arguments.
(CTZ simplifications): Drop checks for defined behavior at zero,
don't optimize precisions above MAX_FIXED_MODE_SIZE. Add variant of
simplifications for IFN_CTZ with 2 arguments.
(a != 0 ? CLZ(a) : CST -> .CLZ(a)): Use TREE_TYPE (@3) instead of
type, add BITINT_TYPE handling, create 2 argument IFN_CLZ rather than
one argument. Add variant for matching CLZ with 2 arguments.
(a != 0 ? CTZ(a) : CST -> .CTZ(a)): Similarly.
* gimple-lower-bitint.cc (bitint_large_huge::lower_bit_query): New
method.
(bitint_large_huge::lower_call): Use it for IFN_{CLZ,CTZ,CLRSB,FFS}
and IFN_{PARITY,POPCOUNT} calls.
* gimple-range-op.cc (cfn_clz::fold_range): Don't check
CLZ_DEFINED_VALUE_AT_ZERO for m_gimple_call_internal_p, instead
assume defined value at zero if the call has 2 arguments and use
second argument value for that case.
(cfn_ctz::fold_range): Similarly.
(gimple_range_op_handler::maybe_builtin_call): Use op_cfn_clz_internal
or op_cfn_ctz_internal only if internal fn call has 2 arguments and
set m_op2 in that case.
* tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern,
vect_recog_popcount_clz_ctz_ffs_pattern): For value defined at zero
use second argument of calls if present, otherwise assume UB at zero,
create 2 argument .CLZ/.CTZ calls if needed.
* tree-vect-stmts.cc (vectorizable_call): Handle 2 argument .CLZ/.CTZ
calls.
* tree-ssa-loop-niter.cc (build_cltz_expr): Create 2 argument
.CLZ/.CTZ calls if needed.
* tree-ssa-forwprop.cc (simplify_count_trailing_zeroes): Create 2
argument .CTZ calls if needed.
* tree-ssa-phiopt.cc (cond_removal_in_builtin_zero_pattern): Handle
2 argument .CLZ/.CTZ calls, handle BITINT_TYPE, create 2 argument
.CLZ/.CTZ calls.
* doc/extend.texi (__builtin_clzg, __builtin_ctzg, __builtin_clrsbg,
__builtin_ffsg, __builtin_parityg, __builtin_popcountg): Document.
gcc/c-family/
* c-common.cc (check_builtin_function_arguments): Handle
BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
* c-gimplify.cc (c_gimplify_expr): If __builtin_c[lt]zg second
argument hasn't been folded into constant yet, transform it to one
argument call inside of a COND_EXPR which for first argument 0
returns the second argument.
gcc/c/
* c-typeck.cc (convert_arguments): Don't promote first argument
of BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
gcc/cp/
* call.cc (magic_varargs_p): Return 4 for
BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
(build_over_call): Don't promote first argument of
BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
* cp-gimplify.cc (cp_gimplify_expr): For BUILT_IN_C{L,T}ZG use
c_gimplify_expr.
gcc/testsuite/
* c-c++-common/pr111309-1.c: New test.
* c-c++-common/pr111309-2.c: New test.
* gcc.dg/torture/bitint-43.c: New test.
* gcc.dg/torture/bitint-44.c: New test.
---
gcc/builtins.cc | 277 ++++++++++++
gcc/builtins.def | 6 +
gcc/c-family/c-common.cc | 74 +++-
gcc/c-family/c-gimplify.cc | 22 +
gcc/c/c-typeck.cc | 18 +-
gcc/cp/call.cc | 14 +-
gcc/cp/cp-gimplify.cc | 4 +
gcc/doc/extend.texi | 42 ++
gcc/fold-const-call.cc | 68 ++-
gcc/genmatch.cc | 66 ++-
gcc/gimple-lower-bitint.cc | 527 +++++++++++++++++++++++
gcc/gimple-range-op.cc | 67 ++-
gcc/match.pd | 202 ++++++---
gcc/testsuite/c-c++-common/pr111309-1.c | 470 ++++++++++++++++++++
gcc/testsuite/c-c++-common/pr111309-2.c | 85 ++++
gcc/testsuite/gcc.dg/torture/bitint-43.c | 306 +++++++++++++
gcc/testsuite/gcc.dg/torture/bitint-44.c | 306 +++++++++++++
gcc/tree-ssa-forwprop.cc | 8 +-
gcc/tree-ssa-loop-niter.cc | 10 +-
gcc/tree-ssa-phiopt.cc | 66 ++-
gcc/tree-vect-patterns.cc | 65 +--
gcc/tree-vect-stmts.cc | 16 +
22 files changed, 2576 insertions(+), 143 deletions(-)
create mode 100644 gcc/testsuite/c-c++-common/pr111309-1.c
create mode 100644 gcc/testsuite/c-c++-common/pr111309-2.c
create mode 100644 gcc/testsuite/gcc.dg/torture/bitint-43.c
create mode 100644 gcc/testsuite/gcc.dg/torture/bitint-44.c
diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index cb90bd03b3ea..5ece0d23eb9a 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -9573,6 +9573,271 @@ fold_builtin_arith_overflow (location_t loc, enum built_in_function fcode,
return build2_loc (loc, COMPOUND_EXPR, boolean_type_node, store, ovfres);
}
+/* Fold __builtin_{clz,ctz,clrsb,ffs,parity,popcount}g into corresponding
+ internal function. */
+
+static tree
+fold_builtin_bit_query (location_t loc, enum built_in_function fcode,
+ tree arg0, tree arg1)
+{
+ enum internal_fn ifn;
+ enum built_in_function fcodei, fcodel, fcodell;
+ tree arg0_type = TREE_TYPE (arg0);
+ tree cast_type = NULL_TREE;
+ int addend = 0;
+
+ switch (fcode)
+ {
+ case BUILT_IN_CLZG:
+ if (arg1 && TREE_CODE (arg1) != INTEGER_CST)
+ return NULL_TREE;
+ ifn = IFN_CLZ;
+ fcodei = BUILT_IN_CLZ;
+ fcodel = BUILT_IN_CLZL;
+ fcodell = BUILT_IN_CLZLL;
+ break;
+ case BUILT_IN_CTZG:
+ if (arg1 && TREE_CODE (arg1) != INTEGER_CST)
+ return NULL_TREE;
+ ifn = IFN_CTZ;
+ fcodei = BUILT_IN_CTZ;
+ fcodel = BUILT_IN_CTZL;
+ fcodell = BUILT_IN_CTZLL;
+ break;
+ case BUILT_IN_CLRSBG:
+ ifn = IFN_CLRSB;
+ fcodei = BUILT_IN_CLRSB;
+ fcodel = BUILT_IN_CLRSBL;
+ fcodell = BUILT_IN_CLRSBLL;
+ break;
+ case BUILT_IN_FFSG:
+ ifn = IFN_FFS;
+ fcodei = BUILT_IN_FFS;
+ fcodel = BUILT_IN_FFSL;
+ fcodell = BUILT_IN_FFSLL;
+ break;
+ case BUILT_IN_PARITYG:
+ ifn = IFN_PARITY;
+ fcodei = BUILT_IN_PARITY;
+ fcodel = BUILT_IN_PARITYL;
+ fcodell = BUILT_IN_PARITYLL;
+ break;
+ case BUILT_IN_POPCOUNTG:
+ ifn = IFN_POPCOUNT;
+ fcodei = BUILT_IN_POPCOUNT;
+ fcodel = BUILT_IN_POPCOUNTL;
+ fcodell = BUILT_IN_POPCOUNTLL;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (TYPE_PRECISION (arg0_type)
+ <= TYPE_PRECISION (long_long_unsigned_type_node))
+ {
+ if (TYPE_PRECISION (arg0_type) <= TYPE_PRECISION (unsigned_type_node))
+
+ cast_type = (TYPE_UNSIGNED (arg0_type)
+ ? unsigned_type_node : integer_type_node);
+ else if (TYPE_PRECISION (arg0_type)
+ <= TYPE_PRECISION (long_unsigned_type_node))
+ {
+ cast_type = (TYPE_UNSIGNED (arg0_type)
+ ? long_unsigned_type_node : long_integer_type_node);
+ fcodei = fcodel;
+ }
+ else
+ {
+ cast_type = (TYPE_UNSIGNED (arg0_type)
+ ? long_long_unsigned_type_node
+ : long_long_integer_type_node);
+ fcodei = fcodell;
+ }
+ }
+ else if (TYPE_PRECISION (arg0_type) <= MAX_FIXED_MODE_SIZE)
+ {
+ cast_type
+ = build_nonstandard_integer_type (MAX_FIXED_MODE_SIZE,
+ TYPE_UNSIGNED (arg0_type));
+ gcc_assert (TYPE_PRECISION (cast_type)
+ == 2 * TYPE_PRECISION (long_long_unsigned_type_node));
+ fcodei = END_BUILTINS;
+ }
+ else
+ fcodei = END_BUILTINS;
+ if (cast_type)
+ {
+ switch (fcode)
+ {
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CLRSBG:
+ addend = TYPE_PRECISION (arg0_type) - TYPE_PRECISION (cast_type);
+ break;
+ default:
+ break;
+ }
+ arg0 = fold_convert (cast_type, arg0);
+ arg0_type = cast_type;
+ }
+
+ if (arg1)
+ arg1 = fold_convert (integer_type_node, arg1);
+
+ tree arg2 = arg1;
+ if (fcode == BUILT_IN_CLZG && addend)
+ {
+ if (arg1)
+ arg0 = save_expr (arg0);
+ arg2 = NULL_TREE;
+ }
+ tree call = NULL_TREE, tem;
+ if (TYPE_PRECISION (arg0_type) == MAX_FIXED_MODE_SIZE
+ && (TYPE_PRECISION (arg0_type)
+ == 2 * TYPE_PRECISION (long_long_unsigned_type_node)))
+ {
+ /* __int128 expansions using up to 2 long long builtins. */
+ arg0 = save_expr (arg0);
+ tree type = (TYPE_UNSIGNED (arg0_type)
+ ? long_long_unsigned_type_node
+ : long_long_integer_type_node);
+ tree hi = fold_build2 (RSHIFT_EXPR, arg0_type, arg0,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2));
+ hi = fold_convert (type, hi);
+ tree lo = fold_convert (type, arg0);
+ switch (fcode)
+ {
+ case BUILT_IN_CLZG:
+ call = fold_builtin_bit_query (loc, fcode, lo, NULL_TREE);
+ call = fold_build2 (PLUS_EXPR, integer_type_node, call,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2));
+ if (arg2)
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ lo, build_zero_cst (type)),
+ call, arg2);
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ hi, build_zero_cst (type)),
+ fold_builtin_bit_query (loc, fcode, hi,
+ NULL_TREE),
+ call);
+ break;
+ case BUILT_IN_CTZG:
+ call = fold_builtin_bit_query (loc, fcode, hi, NULL_TREE);
+ call = fold_build2 (PLUS_EXPR, integer_type_node, call,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2));
+ if (arg2)
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ hi, build_zero_cst (type)),
+ call, arg2);
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ lo, build_zero_cst (type)),
+ fold_builtin_bit_query (loc, fcode, lo,
+ NULL_TREE),
+ call);
+ break;
+ case BUILT_IN_CLRSBG:
+ tem = fold_builtin_bit_query (loc, fcode, lo, NULL_TREE);
+ tem = fold_build2 (PLUS_EXPR, integer_type_node, tem,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2));
+ tem = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (LT_EXPR, boolean_type_node,
+ fold_build2 (BIT_XOR_EXPR, type,
+ lo, hi),
+ build_zero_cst (type)),
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2 - 1),
+ tem);
+ call = fold_builtin_bit_query (loc, fcode, hi, NULL_TREE);
+ call = save_expr (call);
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ call,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE
+ / 2 - 1)),
+ call, tem);
+ break;
+ case BUILT_IN_FFSG:
+ call = fold_builtin_bit_query (loc, fcode, hi, NULL_TREE);
+ call = fold_build2 (PLUS_EXPR, integer_type_node, call,
+ build_int_cst (integer_type_node,
+ MAX_FIXED_MODE_SIZE / 2));
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ hi, build_zero_cst (type)),
+ call, integer_zero_node);
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ lo, build_zero_cst (type)),
+ fold_builtin_bit_query (loc, fcode, lo,
+ NULL_TREE),
+ call);
+ break;
+ case BUILT_IN_PARITYG:
+ call = fold_builtin_bit_query (loc, fcode,
+ fold_build2 (BIT_XOR_EXPR, type,
+ lo, hi), NULL_TREE);
+ break;
+ case BUILT_IN_POPCOUNTG:
+ call = fold_build2 (PLUS_EXPR, integer_type_node,
+ fold_builtin_bit_query (loc, fcode, hi,
+ NULL_TREE),
+ fold_builtin_bit_query (loc, fcode, lo,
+ NULL_TREE));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ {
+ /* Only keep second argument to IFN_CLZ/IFN_CTZ if it is the
+ value defined at zero during GIMPLE, or for large/huge _BitInt
+ (which are then lowered during bitint lowering). */
+ if (arg2 && TREE_CODE (TREE_TYPE (arg0)) != BITINT_TYPE)
+ {
+ int val;
+ if (fcode == BUILT_IN_CLZG)
+ {
+ if (CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (arg0_type),
+ val) != 2
+ || wi::to_widest (arg2) != val)
+ arg2 = NULL_TREE;
+ }
+ else if (CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (arg0_type),
+ val) != 2
+ || wi::to_widest (arg2) != val)
+ arg2 = NULL_TREE;
+ if (!direct_internal_fn_supported_p (ifn, arg0_type,
+ OPTIMIZE_FOR_BOTH))
+ arg2 = NULL_TREE;
+ }
+ if (fcodei == END_BUILTINS || arg2)
+ call = build_call_expr_internal_loc (loc, ifn, integer_type_node,
+ arg2 ? 2 : 1, arg0, arg2);
+ else
+ call = build_call_expr_loc (loc, builtin_decl_explicit (fcodei), 1,
+ arg0);
+ }
+ if (addend)
+ call = fold_build2 (PLUS_EXPR, integer_type_node, call,
+ build_int_cst (integer_type_node, addend));
+ if (arg1 && arg2 == NULL_TREE)
+ call = fold_build3 (COND_EXPR, integer_type_node,
+ fold_build2 (NE_EXPR, boolean_type_node,
+ arg0, build_zero_cst (arg0_type)),
+ call, arg1);
+
+ return call;
+}
+
/* Fold __builtin_{add,sub}c{,l,ll} into pair of internal functions
that return both result of arithmetics and overflowed boolean
flag in a complex integer result. */
@@ -9824,6 +10089,14 @@ fold_builtin_1 (location_t loc, tree expr, tree fndecl, tree arg0)
return build_empty_stmt (loc);
break;
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CTZG:
+ case BUILT_IN_CLRSBG:
+ case BUILT_IN_FFSG:
+ case BUILT_IN_PARITYG:
+ case BUILT_IN_POPCOUNTG:
+ return fold_builtin_bit_query (loc, fcode, arg0, NULL_TREE);
+
default:
break;
}
@@ -9913,6 +10186,10 @@ fold_builtin_2 (location_t loc, tree expr, tree fndecl, tree arg0, tree arg1)
case BUILT_IN_ATOMIC_IS_LOCK_FREE:
return fold_builtin_atomic_is_lock_free (arg0, arg1);
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CTZG:
+ return fold_builtin_bit_query (loc, fcode, arg0, arg1);
+
default:
break;
}
diff --git a/gcc/builtins.def b/gcc/builtins.def
index a6fdb2262714..33e6cad8ce14 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -962,15 +962,18 @@ DEF_GCC_BUILTIN (BUILT_IN_CLZ, "clz", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_
DEF_GCC_BUILTIN (BUILT_IN_CLZIMAX, "clzimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CLZL, "clzl", BT_FN_INT_ULONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CLZLL, "clzll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_CLZG, "clzg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_CONSTANT_P, "constant_p", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CTZ, "ctz", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CTZIMAX, "ctzimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CTZL, "ctzl", BT_FN_INT_ULONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CTZLL, "ctzll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_CTZG, "ctzg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_CLRSB, "clrsb", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CLRSBIMAX, "clrsbimax", BT_FN_INT_INTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CLRSBL, "clrsbl", BT_FN_INT_LONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_CLRSBLL, "clrsbll", BT_FN_INT_LONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_CLRSBG, "clrsbg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_DCGETTEXT, "dcgettext", BT_FN_STRING_CONST_STRING_CONST_STRING_INT, ATTR_FORMAT_ARG_2)
DEF_EXT_LIB_BUILTIN (BUILT_IN_DGETTEXT, "dgettext", BT_FN_STRING_CONST_STRING_CONST_STRING, ATTR_FORMAT_ARG_2)
DEF_GCC_BUILTIN (BUILT_IN_DWARF_CFA, "dwarf_cfa", BT_FN_PTR, ATTR_NULL)
@@ -993,6 +996,7 @@ DEF_EXT_LIB_BUILTIN (BUILT_IN_FFS, "ffs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_L
DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSIMAX, "ffsimax", BT_FN_INT_INTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSL, "ffsl", BT_FN_INT_LONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_EXT_LIB_BUILTIN (BUILT_IN_FFSLL, "ffsll", BT_FN_INT_LONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_FFSG, "ffsg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_FORK, "fork", BT_FN_PID, ATTR_NOTHROW_LIST)
DEF_GCC_BUILTIN (BUILT_IN_FRAME_ADDRESS, "frame_address", BT_FN_PTR_UINT, ATTR_NULL)
/* [trans-mem]: Adjust BUILT_IN_TM_FREE if BUILT_IN_FREE is changed. */
@@ -1041,10 +1045,12 @@ DEF_GCC_BUILTIN (BUILT_IN_PARITY, "parity", BT_FN_INT_UINT, ATTR_CONST_NO
DEF_GCC_BUILTIN (BUILT_IN_PARITYIMAX, "parityimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_PARITYL, "parityl", BT_FN_INT_ULONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_PARITYLL, "parityll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_PARITYG, "parityg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNT, "popcount", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTIMAX, "popcountimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTLL, "popcountll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTG, "popcountg", BT_FN_INT_VAR, ATTR_CONST_NOTHROW_TYPEGENERIC_LEAF)
DEF_EXT_LIB_BUILTIN (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
DEF_GCC_BUILTIN (BUILT_IN_PREFETCH, "prefetch", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
DEF_LIB_BUILTIN (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc
index 77faf179cca8..a619429f694f 100644
--- a/gcc/c-family/c-common.cc
+++ b/gcc/c-family/c-common.cc
@@ -6475,14 +6475,14 @@ check_builtin_function_arguments (location_t loc, vec arg_loc,
}
if (TREE_CODE (TREE_TYPE (args[2])) == ENUMERAL_TYPE)
{
- error_at (ARG_LOCATION (2), "argument 3 in call to function "
- "%qE has enumerated type", fndecl);
+ error_at (ARG_LOCATION (2), "argument %u in call to function "
+ "%qE has enumerated type", 3, fndecl);
return false;
}
else if (TREE_CODE (TREE_TYPE (args[2])) == BOOLEAN_TYPE)
{
- error_at (ARG_LOCATION (2), "argument 3 in call to function "
- "%qE has boolean type", fndecl);
+ error_at (ARG_LOCATION (2), "argument %u in call to function "
+ "%qE has boolean type", 3, fndecl);
return false;
}
return true;
@@ -6522,6 +6522,72 @@ check_builtin_function_arguments (location_t loc, vec arg_loc,
}
return false;
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CTZG:
+ case BUILT_IN_CLRSBG:
+ case BUILT_IN_FFSG:
+ case BUILT_IN_PARITYG:
+ case BUILT_IN_POPCOUNTG:
+ if (nargs == 2
+ && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CLZG
+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CTZG))
+ {
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (args[1])))
+ {
+ error_at (ARG_LOCATION (1), "argument %u in call to function "
+ "%qE does not have integral type", 2, fndecl);
+ return false;
+ }
+ if ((TYPE_PRECISION (TREE_TYPE (args[1]))
+ > TYPE_PRECISION (integer_type_node))
+ || (TYPE_PRECISION (TREE_TYPE (args[1]))
+ == TYPE_PRECISION (integer_type_node)
+ && TYPE_UNSIGNED (TREE_TYPE (args[1]))))
+ {
+ error_at (ARG_LOCATION (1), "argument %u in call to function "
+ "%qE does not have % type", 2, fndecl);
+ return false;
+ }
+ }
+ else if (!builtin_function_validate_nargs (loc, fndecl, nargs, 1))
+ return false;
+
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (args[0])))
+ {
+ error_at (ARG_LOCATION (0), "argument %u in call to function "
+ "%qE does not have integral type", 1, fndecl);
+ return false;
+ }
+ if (TREE_CODE (TREE_TYPE (args[0])) == ENUMERAL_TYPE)
+ {
+ error_at (ARG_LOCATION (0), "argument %u in call to function "
+ "%qE has enumerated type", 1, fndecl);
+ return false;
+ }
+ if (TREE_CODE (TREE_TYPE (args[0])) == BOOLEAN_TYPE)
+ {
+ error_at (ARG_LOCATION (0), "argument %u in call to function "
+ "%qE has boolean type", 1, fndecl);
+ return false;
+ }
+ if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_FFSG
+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CLRSBG)
+ {
+ if (TYPE_UNSIGNED (TREE_TYPE (args[0])))
+ {
+ error_at (ARG_LOCATION (0), "argument 1 in call to function "
+ "%qE has unsigned type", fndecl);
+ return false;
+ }
+ }
+ else if (!TYPE_UNSIGNED (TREE_TYPE (args[0])))
+ {
+ error_at (ARG_LOCATION (0), "argument 1 in call to function "
+ "%qE has signed type", fndecl);
+ return false;
+ }
+ return true;
+
default:
return true;
}
diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index 17b0610a89f9..2f44e125f925 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -818,6 +818,28 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p ATTRIBUTE_UNUSED,
break;
}
+ case CALL_EXPR:
+ {
+ tree fndecl = get_callee_fndecl (*expr_p);
+ if (fndecl
+ && fndecl_built_in_p (fndecl, BUILT_IN_CLZG, BUILT_IN_CTZG)
+ && call_expr_nargs (*expr_p) == 2
+ && TREE_CODE (CALL_EXPR_ARG (*expr_p, 1)) != INTEGER_CST)
+ {
+ tree a = save_expr (CALL_EXPR_ARG (*expr_p, 0));
+ tree c = build_call_expr_loc (EXPR_LOCATION (*expr_p),
+ fndecl, 1, a);
+ *expr_p = build3_loc (EXPR_LOCATION (*expr_p), COND_EXPR,
+ integer_type_node,
+ build2_loc (EXPR_LOCATION (*expr_p),
+ NE_EXPR, boolean_type_node, a,
+ build_zero_cst (TREE_TYPE (a))),
+ c, CALL_EXPR_ARG (*expr_p, 1));
+ return GS_OK;
+ }
+ break;
+ }
+
default:;
}
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 366ca88c6331..1dbb4471a88a 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -3416,6 +3416,7 @@ convert_arguments (location_t loc, vec arg_loc, tree typelist,
&& lookup_attribute ("type generic", TYPE_ATTRIBUTES (TREE_TYPE (fundecl)));
bool type_generic_remove_excess_precision = false;
bool type_generic_overflow_p = false;
+ bool type_generic_bit_query = false;
tree selector;
/* Change pointer to function to the function itself for
@@ -3471,6 +3472,17 @@ convert_arguments (location_t loc, vec arg_loc, tree typelist,
type_generic_overflow_p = true;
break;
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CTZG:
+ case BUILT_IN_CLRSBG:
+ case BUILT_IN_FFSG:
+ case BUILT_IN_PARITYG:
+ case BUILT_IN_POPCOUNTG:
+ /* The first argument of these type-generic builtins
+ should not be promoted. */
+ type_generic_bit_query = true;
+ break;
+
default:
break;
}
@@ -3606,11 +3618,13 @@ convert_arguments (location_t loc, vec arg_loc, tree typelist,
}
}
else if ((excess_precision && !type_generic)
- || (type_generic_overflow_p && parmnum == 2))
+ || (type_generic_overflow_p && parmnum == 2)
+ || (type_generic_bit_query && parmnum == 0))
/* A "double" argument with excess precision being passed
without a prototype or in variable arguments.
The last argument of __builtin_*_overflow_p should not be
- promoted. */
+ promoted, similarly the first argument of
+ __builtin_{clz,ctz,clrsb,ffs,parity,popcount}g. */
parmval = convert (valtype, val);
else if ((invalid_func_diag =
targetm.calls.invalid_arg_for_unprototyped_fn (typelist, fundecl, val)))
diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 4516677bcabf..709fd74f55e3 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -9290,7 +9290,9 @@ convert_for_arg_passing (tree type, tree val, tsubst_flags_t complain)
This is true for some builtins which don't act like normal functions.
Return 2 if just decay_conversion and removal of excess precision should
be done, 1 if just decay_conversion. Return 3 for special treatment of
- the 3rd argument for __builtin_*_overflow_p. */
+ the 3rd argument for __builtin_*_overflow_p. Return 4 for special
+ treatment of the 1st argument for
+ __builtin_{clz,ctz,clrsb,ffs,parity,popcount}g. */
int
magic_varargs_p (tree fn)
@@ -9317,6 +9319,14 @@ magic_varargs_p (tree fn)
case BUILT_IN_FPCLASSIFY:
return 2;
+ case BUILT_IN_CLZG:
+ case BUILT_IN_CTZG:
+ case BUILT_IN_CLRSBG:
+ case BUILT_IN_FFSG:
+ case BUILT_IN_PARITYG:
+ case BUILT_IN_POPCOUNTG:
+ return 4;
+
default:
return lookup_attribute ("type generic",
TYPE_ATTRIBUTES (TREE_TYPE (fn))) != 0;
@@ -10122,7 +10132,7 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain)
for (; arg_index < vec_safe_length (args); ++arg_index)
{
tree a = (*args)[arg_index];
- if (magic == 3 && arg_index == 2)
+ if ((magic == 3 && arg_index == 2) || (magic == 4 && arg_index == 0))
{
/* Do no conversions for certain magic varargs. */
a = mark_type_use (a);
diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index 9375a116f06c..795c811471d9 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -771,6 +771,10 @@ cp_gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p)
default:
break;
}
+ else if (decl
+ && fndecl_built_in_p (decl, BUILT_IN_CLZG, BUILT_IN_CTZG))
+ ret = (enum gimplify_status) c_gimplify_expr (expr_p, pre_p,
+ post_p);
}
break;
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7cdfdf8c83b2..406ccc9bc751 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -14960,6 +14960,48 @@ Similar to @code{__builtin_parity}, except the argument type is
@code{unsigned long long}.
@enddefbuiltin
+@defbuiltin{int __builtin_ffsg (...)}
+Similar to @code{__builtin_ffs}, except the argument is type-generic
+signed integer (standard, extended or bit-precise). No integral argument
+promotions are performed on the argument.
+@enddefbuiltin
+
+@defbuiltin{int __builtin_clzg (...)}
+Similar to @code{__builtin_clz}, except the argument is type-generic
+unsigned integer (standard, extended or bit-precise) and there is
+optional second argument with int type. No integral argument promotions
+are performed on the first argument. If two arguments are specified,
+and first argument is 0, the result is the second argument. If only
+one argument is specified and it is 0, the result is undefined.
+@enddefbuiltin
+
+@defbuiltin{int __builtin_ctzg (...)}
+Similar to @code{__builtin_ctz}, except the argument is type-generic
+unsigned integer (standard, extended or bit-precise) and there is
+optional second argument with int type. No integral argument promotions
+are performed on the first argument. If two arguments are specified,
+and first argument is 0, the result is the second argument. If only
+one argument is specified and it is 0, the result is undefined.
+@enddefbuiltin
+
+@defbuiltin{int __builtin_clrsbg (...)}
+Similar to @code{__builtin_clrsb}, except the argument is type-generic
+signed integer (standard, extended or bit-precise). No integral argument
+promotions are performed on the argument.
+@enddefbuiltin
+
+@defbuiltin{int __builtin_popcountg (...)}
+Similar to @code{__builtin_popcount}, except the argument is type-generic
+unsigned integer (standard, extended or bit-precise). No integral argument
+promotions are performed on the argument.
+@enddefbuiltin
+
+@defbuiltin{int __builtin_parityg (...)}
+Similar to @code{__builtin_parity}, except the argument is type-generic
+unsigned integer (standard, extended or bit-precise). No integral argument
+promotions are performed on the argument.
+@enddefbuiltin
+
@defbuiltin{double __builtin_powi (double, int)}
@defbuiltinx{float __builtin_powif (float, int)}
@defbuiltinx{{long double} __builtin_powil (long double, int)}
diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
index 04be3d2b3362..36f4eccb812a 100644
--- a/gcc/fold-const-call.cc
+++ b/gcc/fold-const-call.cc
@@ -27,7 +27,7 @@ along with GCC; see the file COPYING3. If not see
#include "fold-const.h"
#include "fold-const-call.h"
#include "case-cfn-macros.h"
-#include "tm.h" /* For C[LT]Z_DEFINED_AT_ZERO. */
+#include "tm.h" /* For C[LT]Z_DEFINED_VALUE_AT_ZERO. */
#include "builtins.h"
#include "gimple-expr.h"
#include "tree-vector-builder.h"
@@ -1017,14 +1017,18 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
switch (fn)
{
CASE_CFN_FFS:
+ case CFN_BUILT_IN_FFSG:
*result = wi::shwi (wi::ffs (arg), precision);
return true;
CASE_CFN_CLZ:
+ case CFN_BUILT_IN_CLZG:
{
int tmp;
if (wi::ne_p (arg, 0))
tmp = wi::clz (arg);
+ else if (TREE_CODE (arg_type) == BITINT_TYPE)
+ tmp = TYPE_PRECISION (arg_type);
else if (!CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (arg_type),
tmp))
tmp = TYPE_PRECISION (arg_type);
@@ -1033,10 +1037,13 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
}
CASE_CFN_CTZ:
+ case CFN_BUILT_IN_CTZG:
{
int tmp;
if (wi::ne_p (arg, 0))
tmp = wi::ctz (arg);
+ else if (TREE_CODE (arg_type) == BITINT_TYPE)
+ tmp = TYPE_PRECISION (arg_type);
else if (!CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (arg_type),
tmp))
tmp = TYPE_PRECISION (arg_type);
@@ -1045,14 +1052,17 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
}
CASE_CFN_CLRSB:
+ case CFN_BUILT_IN_CLRSBG:
*result = wi::shwi (wi::clrsb (arg), precision);
return true;
CASE_CFN_POPCOUNT:
+ case CFN_BUILT_IN_POPCOUNTG:
*result = wi::shwi (wi::popcount (arg), precision);
return true;
CASE_CFN_PARITY:
+ case CFN_BUILT_IN_PARITYG:
*result = wi::shwi (wi::parity (arg), precision);
return true;
@@ -1529,6 +1539,49 @@ fold_const_call_sss (real_value *result, combined_fn fn,
}
}
+/* Try to evaluate:
+
+ *RESULT = FN (ARG0, ARG1)
+
+ where ARG_TYPE is the type of ARG0 and PRECISION is the number of bits in
+ the result. Return true on success. */
+
+static bool
+fold_const_call_sss (wide_int *result, combined_fn fn,
+ const wide_int_ref &arg0, const wide_int_ref &arg1,
+ unsigned int precision, tree arg_type ATTRIBUTE_UNUSED)
+{
+ switch (fn)
+ {
+ case CFN_CLZ:
+ case CFN_BUILT_IN_CLZG:
+ {
+ int tmp;
+ if (wi::ne_p (arg0, 0))
+ tmp = wi::clz (arg0);
+ else
+ tmp = arg1.to_shwi ();
+ *result = wi::shwi (tmp, precision);
+ return true;
+ }
+
+ case CFN_CTZ:
+ case CFN_BUILT_IN_CTZG:
+ {
+ int tmp;
+ if (wi::ne_p (arg0, 0))
+ tmp = wi::ctz (arg0);
+ else
+ tmp = arg1.to_shwi ();
+ *result = wi::shwi (tmp, precision);
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
+
/* Try to evaluate:
RESULT = fn (ARG0, ARG1)
@@ -1565,6 +1618,19 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1)
machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0));
machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1));
+ if (integer_cst_p (arg0) && integer_cst_p (arg1))
+ {
+ if (SCALAR_INT_MODE_P (mode))
+ {
+ wide_int result;
+ if (fold_const_call_sss (&result, fn, wi::to_wide (arg0),
+ wi::to_wide (arg1), TYPE_PRECISION (type),
+ TREE_TYPE (arg0)))
+ return wide_int_to_tree (type, result);
+ }
+ return NULL_TREE;
+ }
+
if (mode == arg0_mode
&& real_cst_p (arg0)
&& real_cst_p (arg1))
diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index 45723425f7ab..3488764ec640 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -1896,8 +1896,14 @@ cmp_operand (operand *o1, operand *o2)
{
expr *e1 = static_cast(o1);
expr *e2 = static_cast(o2);
- return (e1->operation == e2->operation
- && e1->is_generic == e2->is_generic);
+ if (e1->operation != e2->operation
+ || e1->is_generic != e2->is_generic)
+ return false;
+ if (e1->operation->kind == id_base::FN
+ /* For function calls also compare number of arguments. */
+ && e1->ops.length () != e2->ops.length ())
+ return false;
+ return true;
}
else
return false;
@@ -3071,6 +3077,26 @@ dt_operand::gen_generic_expr (FILE *f, int indent, const char *opname)
return 0;
}
+/* Compare 2 fns or generic_fns vector entries for vector sorting.
+ Same operation entries with different number of arguments should
+ be adjacent. */
+
+static int
+fns_cmp (const void *p1, const void *p2)
+{
+ dt_operand *op1 = *(dt_operand *const *) p1;
+ dt_operand *op2 = *(dt_operand *const *) p2;
+ expr *e1 = as_a (op1->op);
+ expr *e2 = as_a (op2->op);
+ id_base *b1 = e1->operation;
+ id_base *b2 = e2->operation;
+ if (b1->hashval < b2->hashval)
+ return -1;
+ if (b1->hashval > b2->hashval)
+ return 1;
+ return strcmp (b1->id, b2->id);
+}
+
/* Generate matching code for the children of the decision tree node. */
void
@@ -3144,6 +3170,8 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
Like DT_TRUE, DT_MATCH serves as a barrier as it can cause
dependent matches to get out-of-order. Generate code now
for what we have collected sofar. */
+ fns.qsort (fns_cmp);
+ generic_fns.qsort (fns_cmp);
gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
fns, generic_fns, preds, others);
/* And output the true operand itself. */
@@ -3160,6 +3188,8 @@ dt_node::gen_kids (FILE *f, int indent, bool gimple, int depth)
}
/* Generate code for the remains. */
+ fns.qsort (fns_cmp);
+ generic_fns.qsort (fns_cmp);
gen_kids_1 (f, indent, gimple, depth, gimple_exprs, generic_exprs,
fns, generic_fns, preds, others);
}
@@ -3257,14 +3287,21 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
indent += 4;
fprintf_indent (f, indent, "{\n");
+ id_base *last_op = NULL;
for (unsigned i = 0; i < fns_len; ++i)
{
expr *e = as_a (fns[i]->op);
- if (user_id *u = dyn_cast (e->operation))
- for (auto id : u->substitutes)
- fprintf_indent (f, indent, "case %s:\n", id->id);
- else
- fprintf_indent (f, indent, "case %s:\n", e->operation->id);
+ if (e->operation != last_op)
+ {
+ if (i)
+ fprintf_indent (f, indent, " break;\n");
+ if (user_id *u = dyn_cast (e->operation))
+ for (auto id : u->substitutes)
+ fprintf_indent (f, indent, "case %s:\n", id->id);
+ else
+ fprintf_indent (f, indent, "case %s:\n", e->operation->id);
+ }
+ last_op = e->operation;
/* We need to be defensive against bogus prototypes allowing
calls with not enough arguments. */
fprintf_indent (f, indent,
@@ -3273,9 +3310,9 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
fprintf_indent (f, indent, " {\n");
fns[i]->gen (f, indent + 6, true, depth);
fprintf_indent (f, indent, " }\n");
- fprintf_indent (f, indent, " break;\n");
}
+ fprintf_indent (f, indent, " break;\n");
fprintf_indent (f, indent, "default:;\n");
fprintf_indent (f, indent, "}\n");
indent -= 4;
@@ -3335,18 +3372,25 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, int depth,
" {\n");
indent += 4;
+ id_base *last_op = NULL;
for (unsigned j = 0; j < generic_fns.length (); ++j)
{
expr *e = as_a (generic_fns[j]->op);
gcc_assert (e->operation->kind == id_base::FN);
- fprintf_indent (f, indent, "case %s:\n", e->operation->id);
+ if (e->operation != last_op)
+ {
+ if (j)
+ fprintf_indent (f, indent, " break;\n");
+ fprintf_indent (f, indent, "case %s:\n", e->operation->id);
+ }
+ last_op = e->operation;
fprintf_indent (f, indent, " if (call_expr_nargs (%s) == %d)\n"
" {\n", kid_opname, e->ops.length ());
generic_fns[j]->gen (f, indent + 6, false, depth);
- fprintf_indent (f, indent, " }\n"
- " break;\n");
+ fprintf_indent (f, indent, " }\n");
}
+ fprintf_indent (f, indent, " break;\n");
fprintf_indent (f, indent, "default:;\n");
indent -= 4;
diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc
index 66558594b51b..c429cb245d37 100644
--- a/gcc/gimple-lower-bitint.cc
+++ b/gcc/gimple-lower-bitint.cc
@@ -427,6 +427,7 @@ struct bitint_large_huge
void lower_mul_overflow (tree, gimple *);
void lower_cplxpart_stmt (tree, gimple *);
void lower_complexexpr_stmt (gimple *);
+ void lower_bit_query (gimple *);
void lower_call (tree, gimple *);
void lower_asm (gimple *);
void lower_stmt (gimple *);
@@ -4455,6 +4456,524 @@ bitint_large_huge::lower_complexexpr_stmt (gimple *stmt)
insert_before (g);
}
+/* Lower a .{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT} call with one large/huge _BitInt
+ argument. */
+
+void
+bitint_large_huge::lower_bit_query (gimple *stmt)
+{
+ tree arg0 = gimple_call_arg (stmt, 0);
+ tree arg1 = (gimple_call_num_args (stmt) == 2
+ ? gimple_call_arg (stmt, 1) : NULL_TREE);
+ tree lhs = gimple_call_lhs (stmt);
+ gimple *g;
+
+ if (!lhs)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_remove (&gsi, true);
+ return;
+ }
+ tree type = TREE_TYPE (arg0);
+ gcc_assert (TREE_CODE (type) == BITINT_TYPE);
+ bitint_prec_kind kind = bitint_precision_kind (type);
+ gcc_assert (kind >= bitint_prec_large);
+ enum internal_fn ifn = gimple_call_internal_fn (stmt);
+ enum built_in_function fcode = END_BUILTINS;
+ gcc_assert (TYPE_PRECISION (unsigned_type_node) == limb_prec
+ || TYPE_PRECISION (long_unsigned_type_node) == limb_prec
+ || TYPE_PRECISION (long_long_unsigned_type_node) == limb_prec);
+ switch (ifn)
+ {
+ case IFN_CLZ:
+ if (TYPE_PRECISION (unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CLZ;
+ else if (TYPE_PRECISION (long_unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CLZL;
+ else
+ fcode = BUILT_IN_CLZLL;
+ break;
+ case IFN_FFS:
+ /* .FFS (X) is .CTZ (X, -1) + 1, though under the hood
+ we don't add the addend at the end. */
+ arg1 = integer_zero_node;
+ /* FALLTHRU */
+ case IFN_CTZ:
+ if (TYPE_PRECISION (unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CTZ;
+ else if (TYPE_PRECISION (long_unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CTZL;
+ else
+ fcode = BUILT_IN_CTZLL;
+ m_upwards = true;
+ break;
+ case IFN_CLRSB:
+ if (TYPE_PRECISION (unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CLRSB;
+ else if (TYPE_PRECISION (long_unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_CLRSBL;
+ else
+ fcode = BUILT_IN_CLRSBLL;
+ break;
+ case IFN_PARITY:
+ if (TYPE_PRECISION (unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_PARITY;
+ else if (TYPE_PRECISION (long_unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_PARITYL;
+ else
+ fcode = BUILT_IN_PARITYLL;
+ m_upwards = true;
+ break;
+ case IFN_POPCOUNT:
+ if (TYPE_PRECISION (unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_POPCOUNT;
+ else if (TYPE_PRECISION (long_unsigned_type_node) == limb_prec)
+ fcode = BUILT_IN_POPCOUNTL;
+ else
+ fcode = BUILT_IN_POPCOUNTLL;
+ m_upwards = true;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ tree fndecl = builtin_decl_explicit (fcode), res = NULL_TREE;
+ unsigned cnt = 0, rem = 0, end = 0, prec = TYPE_PRECISION (type);
+ struct bq_details { edge e; tree val, addend; } *bqp = NULL;
+ basic_block edge_bb = NULL;
+ if (m_upwards)
+ {
+ tree idx = NULL_TREE, idx_first = NULL_TREE, idx_next = NULL_TREE;
+ if (kind == bitint_prec_large)
+ cnt = CEIL (prec, limb_prec);
+ else
+ {
+ rem = (prec % (2 * limb_prec));
+ end = (prec - rem) / limb_prec;
+ cnt = 2 + CEIL (rem, limb_prec);
+ idx = idx_first = create_loop (size_zero_node, &idx_next);
+ }
+
+ if (ifn == IFN_CTZ || ifn == IFN_FFS)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_prev (&gsi);
+ edge e = split_block (gsi_bb (gsi), gsi_stmt (gsi));
+ edge_bb = e->src;
+ if (kind == bitint_prec_large)
+ {
+ m_gsi = gsi_last_bb (edge_bb);
+ if (!gsi_end_p (m_gsi))
+ gsi_next (&m_gsi);
+ }
+ bqp = XALLOCAVEC (struct bq_details, cnt);
+ }
+ else
+ m_after_stmt = stmt;
+ if (kind != bitint_prec_large)
+ m_upwards_2limb = end;
+
+ for (unsigned i = 0; i < cnt; i++)
+ {
+ m_data_cnt = 0;
+ if (kind == bitint_prec_large)
+ idx = size_int (i);
+ else if (i >= 2)
+ idx = size_int (end + (i > 2));
+
+ tree rhs1 = handle_operand (arg0, idx);
+ if (!useless_type_conversion_p (m_limb_type, TREE_TYPE (rhs1)))
+ {
+ if (!TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+ rhs1 = add_cast (unsigned_type_for (TREE_TYPE (rhs1)), rhs1);
+ rhs1 = add_cast (m_limb_type, rhs1);
+ }
+
+ tree in, out, tem;
+ if (ifn == IFN_PARITY)
+ in = prepare_data_in_out (build_zero_cst (m_limb_type), idx, &out);
+ else if (ifn == IFN_FFS)
+ in = prepare_data_in_out (integer_one_node, idx, &out);
+ else
+ in = prepare_data_in_out (integer_zero_node, idx, &out);
+
+ switch (ifn)
+ {
+ case IFN_CTZ:
+ case IFN_FFS:
+ g = gimple_build_cond (NE_EXPR, rhs1,
+ build_zero_cst (m_limb_type),
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ edge e1, e2;
+ e1 = split_block (gsi_bb (m_gsi), g);
+ e1->flags = EDGE_FALSE_VALUE;
+ e2 = make_edge (e1->src, gimple_bb (stmt), EDGE_TRUE_VALUE);
+ e1->probability = profile_probability::unlikely ();
+ e2->probability = e1->probability.invert ();
+ if (i == 0)
+ set_immediate_dominator (CDI_DOMINATORS, e2->dest, e2->src);
+ m_gsi = gsi_after_labels (e1->dest);
+ bqp[i].e = e2;
+ bqp[i].val = rhs1;
+ if (tree_fits_uhwi_p (idx))
+ bqp[i].addend
+ = build_int_cst (integer_type_node,
+ tree_to_uhwi (idx) * limb_prec
+ + (ifn == IFN_FFS));
+ else
+ {
+ bqp[i].addend = in;
+ if (i == 1)
+ res = out;
+ else
+ res = make_ssa_name (integer_type_node);
+ g = gimple_build_assign (res, PLUS_EXPR, in,
+ build_int_cst (integer_type_node,
+ limb_prec));
+ insert_before (g);
+ m_data[m_data_cnt] = res;
+ }
+ break;
+ case IFN_PARITY:
+ if (!integer_zerop (in))
+ {
+ if (kind == bitint_prec_huge && i == 1)
+ res = out;
+ else
+ res = make_ssa_name (m_limb_type);
+ g = gimple_build_assign (res, BIT_XOR_EXPR, in, rhs1);
+ insert_before (g);
+ }
+ else
+ res = rhs1;
+ m_data[m_data_cnt] = res;
+ break;
+ case IFN_POPCOUNT:
+ g = gimple_build_call (fndecl, 1, rhs1);
+ tem = make_ssa_name (integer_type_node);
+ gimple_call_set_lhs (g, tem);
+ insert_before (g);
+ if (!integer_zerop (in))
+ {
+ if (kind == bitint_prec_huge && i == 1)
+ res = out;
+ else
+ res = make_ssa_name (integer_type_node);
+ g = gimple_build_assign (res, PLUS_EXPR, in, tem);
+ insert_before (g);
+ }
+ else
+ res = tem;
+ m_data[m_data_cnt] = res;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ m_first = false;
+ if (kind == bitint_prec_huge && i <= 1)
+ {
+ if (i == 0)
+ {
+ idx = make_ssa_name (sizetype);
+ g = gimple_build_assign (idx, PLUS_EXPR, idx_first,
+ size_one_node);
+ insert_before (g);
+ }
+ else
+ {
+ g = gimple_build_assign (idx_next, PLUS_EXPR, idx_first,
+ size_int (2));
+ insert_before (g);
+ g = gimple_build_cond (NE_EXPR, idx_next, size_int (end),
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ if (ifn == IFN_CTZ || ifn == IFN_FFS)
+ m_gsi = gsi_after_labels (edge_bb);
+ else
+ m_gsi = gsi_for_stmt (stmt);
+ }
+ }
+ }
+ }
+ else
+ {
+ tree idx = NULL_TREE, idx_next = NULL_TREE, first = NULL_TREE;
+ int sub_one = 0;
+ if (kind == bitint_prec_large)
+ cnt = CEIL (prec, limb_prec);
+ else
+ {
+ rem = prec % limb_prec;
+ if (rem == 0 && (!TYPE_UNSIGNED (type) || ifn == IFN_CLRSB))
+ rem = limb_prec;
+ end = (prec - rem) / limb_prec;
+ cnt = 1 + (rem != 0);
+ if (ifn == IFN_CLRSB)
+ sub_one = 1;
+ }
+
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+ gsi_prev (&gsi);
+ edge e = split_block (gsi_bb (gsi), gsi_stmt (gsi));
+ edge_bb = e->src;
+ m_gsi = gsi_last_bb (edge_bb);
+ if (!gsi_end_p (m_gsi))
+ gsi_next (&m_gsi);
+
+ if (ifn == IFN_CLZ)
+ bqp = XALLOCAVEC (struct bq_details, cnt);
+ else
+ {
+ gsi = gsi_for_stmt (stmt);
+ gsi_prev (&gsi);
+ e = split_block (gsi_bb (gsi), gsi_stmt (gsi));
+ edge_bb = e->src;
+ bqp = XALLOCAVEC (struct bq_details, 2 * cnt);
+ }
+
+ for (unsigned i = 0; i < cnt; i++)
+ {
+ m_data_cnt = 0;
+ if (kind == bitint_prec_large)
+ idx = size_int (cnt - i - 1);
+ else if (i == cnt - 1)
+ idx = create_loop (size_int (end - 1), &idx_next);
+ else
+ idx = size_int (end);
+
+ tree rhs1 = handle_operand (arg0, idx);
+ if (!useless_type_conversion_p (m_limb_type, TREE_TYPE (rhs1)))
+ {
+ if (ifn == IFN_CLZ && !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+ rhs1 = add_cast (unsigned_type_for (TREE_TYPE (rhs1)), rhs1);
+ else if (ifn == IFN_CLRSB && TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+ rhs1 = add_cast (signed_type_for (TREE_TYPE (rhs1)), rhs1);
+ rhs1 = add_cast (m_limb_type, rhs1);
+ }
+
+ if (ifn == IFN_CLZ)
+ {
+ g = gimple_build_cond (NE_EXPR, rhs1,
+ build_zero_cst (m_limb_type),
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ edge e1 = split_block (gsi_bb (m_gsi), g);
+ e1->flags = EDGE_FALSE_VALUE;
+ edge e2 = make_edge (e1->src, gimple_bb (stmt), EDGE_TRUE_VALUE);
+ e1->probability = profile_probability::unlikely ();
+ e2->probability = e1->probability.invert ();
+ if (i == 0)
+ set_immediate_dominator (CDI_DOMINATORS, e2->dest, e2->src);
+ m_gsi = gsi_after_labels (e1->dest);
+ bqp[i].e = e2;
+ bqp[i].val = rhs1;
+ }
+ else
+ {
+ if (i == 0)
+ {
+ first = rhs1;
+ g = gimple_build_assign (make_ssa_name (m_limb_type),
+ PLUS_EXPR, rhs1,
+ build_int_cst (m_limb_type, 1));
+ insert_before (g);
+ g = gimple_build_cond (GT_EXPR, gimple_assign_lhs (g),
+ build_int_cst (m_limb_type, 1),
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ }
+ else
+ {
+ g = gimple_build_assign (make_ssa_name (m_limb_type),
+ BIT_XOR_EXPR, rhs1, first);
+ insert_before (g);
+ tree stype = signed_type_for (m_limb_type);
+ g = gimple_build_cond (LT_EXPR,
+ add_cast (stype,
+ gimple_assign_lhs (g)),
+ build_zero_cst (stype),
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ edge e1 = split_block (gsi_bb (m_gsi), g);
+ e1->flags = EDGE_FALSE_VALUE;
+ edge e2 = make_edge (e1->src, gimple_bb (stmt),
+ EDGE_TRUE_VALUE);
+ e1->probability = profile_probability::unlikely ();
+ e2->probability = e1->probability.invert ();
+ if (i == 1)
+ set_immediate_dominator (CDI_DOMINATORS, e2->dest,
+ e2->src);
+ m_gsi = gsi_after_labels (e1->dest);
+ bqp[2 * i].e = e2;
+ g = gimple_build_cond (NE_EXPR, rhs1, first,
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ }
+ edge e1 = split_block (gsi_bb (m_gsi), g);
+ e1->flags = EDGE_FALSE_VALUE;
+ edge e2 = make_edge (e1->src, edge_bb, EDGE_TRUE_VALUE);
+ e1->probability = profile_probability::unlikely ();
+ e2->probability = e1->probability.invert ();
+ if (i == 0)
+ set_immediate_dominator (CDI_DOMINATORS, e2->dest, e2->src);
+ m_gsi = gsi_after_labels (e1->dest);
+ bqp[2 * i + 1].e = e2;
+ bqp[i].val = rhs1;
+ }
+ if (tree_fits_uhwi_p (idx))
+ bqp[i].addend
+ = build_int_cst (integer_type_node,
+ (int) prec
+ - (((int) tree_to_uhwi (idx) + 1)
+ * limb_prec) - sub_one);
+ else
+ {
+ tree in, out;
+ in = build_int_cst (integer_type_node, rem - sub_one);
+ m_first = true;
+ in = prepare_data_in_out (in, idx, &out);
+ out = m_data[m_data_cnt + 1];
+ bqp[i].addend = in;
+ g = gimple_build_assign (out, PLUS_EXPR, in,
+ build_int_cst (integer_type_node,
+ limb_prec));
+ insert_before (g);
+ m_data[m_data_cnt] = out;
+ }
+
+ m_first = false;
+ if (kind == bitint_prec_huge && i == cnt - 1)
+ {
+ g = gimple_build_assign (idx_next, PLUS_EXPR, idx,
+ size_int (-1));
+ insert_before (g);
+ g = gimple_build_cond (NE_EXPR, idx, size_zero_node,
+ NULL_TREE, NULL_TREE);
+ insert_before (g);
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (gsi_bb (m_gsi),
+ &true_edge, &false_edge);
+ m_gsi = gsi_after_labels (false_edge->dest);
+ }
+ }
+ }
+ switch (ifn)
+ {
+ case IFN_CLZ:
+ case IFN_CTZ:
+ case IFN_FFS:
+ gphi *phi1, *phi2, *phi3;
+ basic_block bb;
+ bb = gsi_bb (m_gsi);
+ remove_edge (find_edge (bb, gimple_bb (stmt)));
+ phi1 = create_phi_node (make_ssa_name (m_limb_type),
+ gimple_bb (stmt));
+ phi2 = create_phi_node (make_ssa_name (integer_type_node),
+ gimple_bb (stmt));
+ for (unsigned i = 0; i < cnt; i++)
+ {
+ add_phi_arg (phi1, bqp[i].val, bqp[i].e, UNKNOWN_LOCATION);
+ add_phi_arg (phi2, bqp[i].addend, bqp[i].e, UNKNOWN_LOCATION);
+ }
+ if (arg1 == NULL_TREE)
+ {
+ g = gimple_build_builtin_unreachable (m_loc);
+ insert_before (g);
+ }
+ m_gsi = gsi_for_stmt (stmt);
+ g = gimple_build_call (fndecl, 1, gimple_phi_result (phi1));
+ gimple_call_set_lhs (g, make_ssa_name (integer_type_node));
+ insert_before (g);
+ if (arg1 == NULL_TREE)
+ g = gimple_build_assign (lhs, PLUS_EXPR,
+ gimple_phi_result (phi2),
+ gimple_call_lhs (g));
+ else
+ {
+ g = gimple_build_assign (make_ssa_name (integer_type_node),
+ PLUS_EXPR, gimple_phi_result (phi2),
+ gimple_call_lhs (g));
+ insert_before (g);
+ edge e1 = split_block (gimple_bb (stmt), g);
+ edge e2 = make_edge (bb, e1->dest, EDGE_FALLTHRU);
+ e2->probability = profile_probability::always ();
+ set_immediate_dominator (CDI_DOMINATORS, e1->dest,
+ get_immediate_dominator (CDI_DOMINATORS,
+ e1->src));
+ phi3 = create_phi_node (make_ssa_name (integer_type_node), e1->dest);
+ add_phi_arg (phi3, gimple_assign_lhs (g), e1, UNKNOWN_LOCATION);
+ add_phi_arg (phi3, arg1, e2, UNKNOWN_LOCATION);
+ m_gsi = gsi_for_stmt (stmt);
+ g = gimple_build_assign (lhs, gimple_phi_result (phi3));
+ }
+ gsi_replace (&m_gsi, g, true);
+ break;
+ case IFN_CLRSB:
+ bb = gsi_bb (m_gsi);
+ remove_edge (find_edge (bb, edge_bb));
+ edge e;
+ e = make_edge (bb, gimple_bb (stmt), EDGE_FALLTHRU);
+ e->probability = profile_probability::always ();
+ set_immediate_dominator (CDI_DOMINATORS, gimple_bb (stmt),
+ get_immediate_dominator (CDI_DOMINATORS,
+ edge_bb));
+ phi1 = create_phi_node (make_ssa_name (m_limb_type),
+ edge_bb);
+ phi2 = create_phi_node (make_ssa_name (integer_type_node),
+ edge_bb);
+ phi3 = create_phi_node (make_ssa_name (integer_type_node),
+ gimple_bb (stmt));
+ for (unsigned i = 0; i < cnt; i++)
+ {
+ add_phi_arg (phi1, bqp[i].val, bqp[2 * i + 1].e, UNKNOWN_LOCATION);
+ add_phi_arg (phi2, bqp[i].addend, bqp[2 * i + 1].e,
+ UNKNOWN_LOCATION);
+ tree a = bqp[i].addend;
+ if (i && kind == bitint_prec_large)
+ a = int_const_binop (PLUS_EXPR, a, integer_minus_one_node);
+ if (i)
+ add_phi_arg (phi3, a, bqp[2 * i].e, UNKNOWN_LOCATION);
+ }
+ add_phi_arg (phi3, build_int_cst (integer_type_node, prec - 1), e,
+ UNKNOWN_LOCATION);
+ m_gsi = gsi_after_labels (edge_bb);
+ g = gimple_build_call (fndecl, 1,
+ add_cast (signed_type_for (m_limb_type),
+ gimple_phi_result (phi1)));
+ gimple_call_set_lhs (g, make_ssa_name (integer_type_node));
+ insert_before (g);
+ g = gimple_build_assign (make_ssa_name (integer_type_node),
+ PLUS_EXPR, gimple_call_lhs (g),
+ gimple_phi_result (phi2));
+ insert_before (g);
+ if (kind != bitint_prec_large)
+ {
+ g = gimple_build_assign (make_ssa_name (integer_type_node),
+ PLUS_EXPR, gimple_assign_lhs (g),
+ integer_one_node);
+ insert_before (g);
+ }
+ add_phi_arg (phi3, gimple_assign_lhs (g),
+ find_edge (edge_bb, gimple_bb (stmt)), UNKNOWN_LOCATION);
+ m_gsi = gsi_for_stmt (stmt);
+ g = gimple_build_assign (lhs, gimple_phi_result (phi3));
+ gsi_replace (&m_gsi, g, true);
+ break;
+ case IFN_PARITY:
+ g = gimple_build_call (fndecl, 1, res);
+ gimple_call_set_lhs (g, lhs);
+ gsi_replace (&m_gsi, g, true);
+ break;
+ case IFN_POPCOUNT:
+ g = gimple_build_assign (lhs, res);
+ gsi_replace (&m_gsi, g, true);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Lower a call statement with one or more large/huge _BitInt
arguments or large/huge _BitInt return value. */
@@ -4476,6 +4995,14 @@ bitint_large_huge::lower_call (tree obj, gimple *stmt)
case IFN_UBSAN_CHECK_MUL:
lower_mul_overflow (obj, stmt);
return;
+ case IFN_CLZ:
+ case IFN_CTZ:
+ case IFN_CLRSB:
+ case IFN_FFS:
+ case IFN_PARITY:
+ case IFN_POPCOUNT:
+ lower_bit_query (stmt);
+ return;
default:
break;
}
diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc
index 67b3c3d015ef..26c42da07562 100644
--- a/gcc/gimple-range-op.cc
+++ b/gcc/gimple-range-op.cc
@@ -908,39 +908,34 @@ public:
cfn_clz (bool internal) { m_gimple_call_internal_p = internal; }
using range_operator::fold_range;
virtual bool fold_range (irange &r, tree type, const irange &lh,
- const irange &, relation_trio) const;
+ const irange &rh, relation_trio) const;
private:
bool m_gimple_call_internal_p;
} op_cfn_clz (false), op_cfn_clz_internal (true);
bool
cfn_clz::fold_range (irange &r, tree type, const irange &lh,
- const irange &, relation_trio) const
+ const irange &rh, relation_trio) const
{
// __builtin_c[lt]z* return [0, prec-1], except when the
// argument is 0, but that is undefined behavior.
//
// For __builtin_c[lt]z* consider argument of 0 always undefined
- // behavior, for internal fns depending on C?Z_DEFINED_VALUE_AT_ZERO.
+ // behavior, for internal fns likewise, unless it has 2 arguments,
+ // then the second argument is the value at zero.
if (lh.undefined_p ())
return false;
int prec = TYPE_PRECISION (lh.type ());
int mini = 0;
int maxi = prec - 1;
- int zerov = 0;
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (lh.type ());
if (m_gimple_call_internal_p)
{
- if (optab_handler (clz_optab, mode) != CODE_FOR_nothing
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, zerov) == 2)
- {
- // Only handle the single common value.
- if (zerov == prec)
- maxi = prec;
- else
- // Magic value to give up, unless we can prove arg is non-zero.
- mini = -2;
- }
+ // Only handle the single common value.
+ if (rh.lower_bound () == prec)
+ maxi = prec;
+ else
+ // Magic value to give up, unless we can prove arg is non-zero.
+ mini = -2;
}
// From clz of minimum we can compute result maximum.
@@ -985,37 +980,31 @@ public:
cfn_ctz (bool internal) { m_gimple_call_internal_p = internal; }
using range_operator::fold_range;
virtual bool fold_range (irange &r, tree type, const irange &lh,
- const irange &, relation_trio) const;
+ const irange &rh, relation_trio) const;
private:
bool m_gimple_call_internal_p;
} op_cfn_ctz (false), op_cfn_ctz_internal (true);
bool
cfn_ctz::fold_range (irange &r, tree type, const irange &lh,
- const irange &, relation_trio) const
+ const irange &rh, relation_trio) const
{
if (lh.undefined_p ())
return false;
int prec = TYPE_PRECISION (lh.type ());
int mini = 0;
int maxi = prec - 1;
- int zerov = 0;
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (lh.type ());
if (m_gimple_call_internal_p)
{
- if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing
- && CTZ_DEFINED_VALUE_AT_ZERO (mode, zerov) == 2)
- {
- // Handle only the two common values.
- if (zerov == -1)
- mini = -1;
- else if (zerov == prec)
- maxi = prec;
- else
- // Magic value to give up, unless we can prove arg is non-zero.
- mini = -2;
- }
+ // Handle only the two common values.
+ if (rh.lower_bound () == -1)
+ mini = -1;
+ else if (rh.lower_bound () == prec)
+ maxi = prec;
+ else
+ // Magic value to give up, unless we can prove arg is non-zero.
+ mini = -2;
}
// If arg is non-zero, then use [0, prec - 1].
if (!range_includes_zero_p (&lh))
@@ -1288,16 +1277,24 @@ gimple_range_op_handler::maybe_builtin_call ()
CASE_CFN_CLZ:
m_op1 = gimple_call_arg (call, 0);
- if (gimple_call_internal_p (call))
- m_operator = &op_cfn_clz_internal;
+ if (gimple_call_internal_p (call)
+ && gimple_call_num_args (call) == 2)
+ {
+ m_op2 = gimple_call_arg (call, 1);
+ m_operator = &op_cfn_clz_internal;
+ }
else
m_operator = &op_cfn_clz;
break;
CASE_CFN_CTZ:
m_op1 = gimple_call_arg (call, 0);
- if (gimple_call_internal_p (call))
- m_operator = &op_cfn_ctz_internal;
+ if (gimple_call_internal_p (call)
+ && gimple_call_num_args (call) == 2)
+ {
+ m_op2 = gimple_call_arg (call, 1);
+ m_operator = &op_cfn_ctz_internal;
+ }
else
m_operator = &op_cfn_ctz;
break;
diff --git a/gcc/match.pd b/gcc/match.pd
index 281c6c087e6a..59791f448512 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8536,31 +8536,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(op (clz:s@2 @0) INTEGER_CST@1)
(if (integer_zerop (@1) && single_use (@2))
/* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0. */
- (with { tree type0 = TREE_TYPE (@0);
- tree stype = signed_type_for (type0);
- HOST_WIDE_INT val = 0;
- /* Punt on hypothetical weird targets. */
- if (clz == CFN_CLZ
- && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (type0),
- val) == 2
- && val == 0)
- stype = NULL_TREE;
- }
- (if (stype)
- (cmp (convert:stype @0) { build_zero_cst (stype); })))
+ (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+ (cmp (convert:stype @0) { build_zero_cst (stype); }))
/* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1. */
- (with { bool ok = true;
- HOST_WIDE_INT val = 0;
- tree type0 = TREE_TYPE (@0);
- /* Punt on hypothetical weird targets. */
- if (clz == CFN_CLZ
- && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (type0),
- val) == 2
- && val == TYPE_PRECISION (type0) - 1)
- ok = false;
- }
- (if (ok && wi::to_wide (@1) == (TYPE_PRECISION (type0) - 1))
- (op @0 { build_one_cst (type0); })))))))
+ (if (wi::to_wide (@1) == TYPE_PRECISION (TREE_TYPE (@0)) - 1)
+ (op @0 { build_one_cst (TREE_TYPE (@0)); }))))))
+(for op (eq ne)
+ cmp (lt ge)
+ (simplify
+ (op (IFN_CLZ:s@2 @0 @3) INTEGER_CST@1)
+ (if (integer_zerop (@1) && single_use (@2))
+ /* clz(X) == 0 is (int)X < 0 and clz(X) != 0 is (int)X >= 0. */
+ (with { tree type0 = TREE_TYPE (@0);
+ tree stype = signed_type_for (TREE_TYPE (@0));
+ /* Punt if clz(0) == 0. */
+ if (integer_zerop (@3))
+ stype = NULL_TREE;
+ }
+ (if (stype)
+ (cmp (convert:stype @0) { build_zero_cst (stype); })))
+ /* clz(X) == (prec-1) is X == 1 and clz(X) != (prec-1) is X != 1. */
+ (with { bool ok = true;
+ tree type0 = TREE_TYPE (@0);
+ /* Punt if clz(0) == prec - 1. */
+ if (wi::to_widest (@3) == TYPE_PRECISION (type0) - 1)
+ ok = false;
+ }
+ (if (ok && wi::to_wide (@1) == (TYPE_PRECISION (type0) - 1))
+ (op @0 { build_one_cst (type0); }))))))
/* CTZ simplifications. */
(for ctz (CTZ)
@@ -8585,22 +8588,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
val++;
}
}
- bool zero_res = false;
- HOST_WIDE_INT zero_val = 0;
tree type0 = TREE_TYPE (@0);
int prec = TYPE_PRECISION (type0);
- if (ctz == CFN_CTZ
- && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (type0),
- zero_val) == 2)
- zero_res = true;
}
- (if (val <= 0)
- (if (ok && (!zero_res || zero_val >= val))
- { constant_boolean_node (cmp == EQ_EXPR ? true : false, type); })
- (if (val >= prec)
- (if (ok && (!zero_res || zero_val < val))
- { constant_boolean_node (cmp == EQ_EXPR ? false : true, type); })
- (if (ok && (!zero_res || zero_val < 0 || zero_val >= prec))
+ (if (ok && prec <= MAX_FIXED_MODE_SIZE)
+ (if (val <= 0)
+ { constant_boolean_node (cmp == EQ_EXPR ? true : false, type); }
+ (if (val >= prec)
+ { constant_boolean_node (cmp == EQ_EXPR ? false : true, type); }
(cmp (bit_and @0 { wide_int_to_tree (type0,
wi::mask (val, false, prec)); })
{ build_zero_cst (type0); })))))))
@@ -8608,19 +8603,68 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(simplify
/* __builtin_ctz (x) == C -> (x & ((1 << (C + 1)) - 1)) == (1 << C). */
(op (ctz:s @0) INTEGER_CST@1)
- (with { bool zero_res = false;
- HOST_WIDE_INT zero_val = 0;
- tree type0 = TREE_TYPE (@0);
+ (with { tree type0 = TREE_TYPE (@0);
int prec = TYPE_PRECISION (type0);
- if (ctz == CFN_CTZ
- && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_TYPE_MODE (type0),
- zero_val) == 2)
- zero_res = true;
}
+ (if (prec <= MAX_FIXED_MODE_SIZE)
+ (if (tree_int_cst_sgn (@1) < 0 || wi::to_widest (@1) >= prec)
+ { constant_boolean_node (op == EQ_EXPR ? false : true, type); }
+ (op (bit_and @0 { wide_int_to_tree (type0,
+ wi::mask (tree_to_uhwi (@1) + 1,
+ false, prec)); })
+ { wide_int_to_tree (type0,
+ wi::shifted_mask (tree_to_uhwi (@1), 1,
+ false, prec)); })))))))
+(for op (ge gt le lt)
+ cmp (eq eq ne ne)
+ (simplify
+ /* __builtin_ctz (x) >= C -> (x & ((1 << C) - 1)) == 0. */
+ (op (IFN_CTZ:s @0 @2) INTEGER_CST@1)
+ (with { bool ok = true;
+ HOST_WIDE_INT val = 0;
+ if (!tree_fits_shwi_p (@1))
+ ok = false;
+ else
+ {
+ val = tree_to_shwi (@1);
+ /* Canonicalize to >= or <. */
+ if (op == GT_EXPR || op == LE_EXPR)
+ {
+ if (val == HOST_WIDE_INT_MAX)
+ ok = false;
+ else
+ val++;
+ }
+ }
+ HOST_WIDE_INT zero_val = tree_to_shwi (@2);
+ tree type0 = TREE_TYPE (@0);
+ int prec = TYPE_PRECISION (type0);
+ if (prec > MAX_FIXED_MODE_SIZE)
+ ok = false;
+ }
+ (if (val <= 0)
+ (if (ok && zero_val >= val)
+ { constant_boolean_node (cmp == EQ_EXPR ? true : false, type); })
+ (if (val >= prec)
+ (if (ok && zero_val < val)
+ { constant_boolean_node (cmp == EQ_EXPR ? false : true, type); })
+ (if (ok && (zero_val < 0 || zero_val >= prec))
+ (cmp (bit_and @0 { wide_int_to_tree (type0,
+ wi::mask (val, false, prec)); })
+ { build_zero_cst (type0); })))))))
+(for op (eq ne)
+ (simplify
+ /* __builtin_ctz (x) == C -> (x & ((1 << (C + 1)) - 1)) == (1 << C). */
+ (op (IFN_CTZ:s @0 @2) INTEGER_CST@1)
+ (with { HOST_WIDE_INT zero_val = tree_to_shwi (@2);
+ tree type0 = TREE_TYPE (@0);
+ int prec = TYPE_PRECISION (type0);
+ }
+ (if (prec <= MAX_FIXED_MODE_SIZE)
(if (tree_int_cst_sgn (@1) < 0 || wi::to_widest (@1) >= prec)
- (if (!zero_res || zero_val != wi::to_widest (@1))
+ (if (zero_val != wi::to_widest (@1))
{ constant_boolean_node (op == EQ_EXPR ? false : true, type); })
- (if (!zero_res || zero_val < 0 || zero_val >= prec)
+ (if (zero_val < 0 || zero_val >= prec)
(op (bit_and @0 { wide_int_to_tree (type0,
wi::mask (tree_to_uhwi (@1) + 1,
false, prec)); })
@@ -8757,13 +8801,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cond (ne @0 integer_zerop@1) (func (convert?@3 @0)) INTEGER_CST@2)
(with { int val;
internal_fn ifn = IFN_LAST;
- if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
- && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
- val) == 2)
+ if (TREE_CODE (TREE_TYPE (@3)) == BITINT_TYPE)
+ {
+ if (tree_fits_shwi_p (@2))
+ {
+ HOST_WIDE_INT valw = tree_to_shwi (@2);
+ if ((int) valw == valw)
+ {
+ val = valw;
+ ifn = IFN_CLZ;
+ }
+ }
+ }
+ else if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (@3),
+ OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO
+ (SCALAR_INT_TYPE_MODE (TREE_TYPE (@3)), val) == 2)
ifn = IFN_CLZ;
}
(if (ifn == IFN_CLZ && wi::to_widest (@2) == val)
- (IFN_CLZ @3)))))
+ (IFN_CLZ @3 @2)))))
+(simplify
+ (cond (ne @0 integer_zerop@1) (IFN_CLZ (convert?@3 @0) INTEGER_CST@2) @2)
+ (with { int val;
+ internal_fn ifn = IFN_LAST;
+ if (TREE_CODE (TREE_TYPE (@3)) == BITINT_TYPE)
+ ifn = IFN_CLZ;
+ else if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (@3),
+ OPTIMIZE_FOR_BOTH))
+ ifn = IFN_CLZ;
+ }
+ (if (ifn == IFN_CLZ)
+ (IFN_CLZ @3 @2))))
/* a != 0 ? CTZ(a) : CST -> .CTZ(a) where CST is the result of the internal function for 0. */
(for func (CTZ)
@@ -8771,13 +8840,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cond (ne @0 integer_zerop@1) (func (convert?@3 @0)) INTEGER_CST@2)
(with { int val;
internal_fn ifn = IFN_LAST;
- if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
- && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
- val) == 2)
+ if (TREE_CODE (TREE_TYPE (@3)) == BITINT_TYPE)
+ {
+ if (tree_fits_shwi_p (@2))
+ {
+ HOST_WIDE_INT valw = tree_to_shwi (@2);
+ if ((int) valw == valw)
+ {
+ val = valw;
+ ifn = IFN_CTZ;
+ }
+ }
+ }
+ else if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@3),
+ OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO
+ (SCALAR_INT_TYPE_MODE (TREE_TYPE (@3)), val) == 2)
ifn = IFN_CTZ;
}
(if (ifn == IFN_CTZ && wi::to_widest (@2) == val)
- (IFN_CTZ @3)))))
+ (IFN_CTZ @3 @2)))))
+(simplify
+ (cond (ne @0 integer_zerop@1) (IFN_CTZ (convert?@3 @0) INTEGER_CST@2) @2)
+ (with { int val;
+ internal_fn ifn = IFN_LAST;
+ if (TREE_CODE (TREE_TYPE (@3)) == BITINT_TYPE)
+ ifn = IFN_CTZ;
+ else if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@3),
+ OPTIMIZE_FOR_BOTH))
+ ifn = IFN_CTZ;
+ }
+ (if (ifn == IFN_CTZ)
+ (IFN_CTZ @3 @2))))
#endif
/* Common POPCOUNT/PARITY simplifications. */
diff --git a/gcc/testsuite/c-c++-common/pr111309-1.c b/gcc/testsuite/c-c++-common/pr111309-1.c
new file mode 100644
index 000000000000..fdf3bf11c336
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr111309-1.c
@@ -0,0 +1,470 @@
+/* PR c/111309 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) int
+clzc (unsigned char x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzc2 (unsigned char x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+clzs (unsigned short x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzs2 (unsigned short x)
+{
+ return __builtin_clzg (x, -2);
+}
+
+__attribute__((noipa)) int
+clzi (unsigned int x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzi2 (unsigned int x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+clzl (unsigned long x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzl2 (unsigned long x)
+{
+ return __builtin_clzg (x, -1);
+}
+
+__attribute__((noipa)) int
+clzL (unsigned long long x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzL2 (unsigned long long x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+clzI (unsigned __int128 x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzI2 (unsigned __int128 x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+#endif
+
+__attribute__((noipa)) int
+ctzc (unsigned char x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzc2 (unsigned char x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctzs (unsigned short x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzs2 (unsigned short x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctzi (unsigned int x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzi2 (unsigned int x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctzl (unsigned long x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzl2 (unsigned long x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctzL (unsigned long long x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzL2 (unsigned long long x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+ctzI (unsigned __int128 x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzI2 (unsigned __int128 x)
+{
+ return __builtin_ctzg (x, __SIZEOF_INT128__ * __CHAR_BIT__);
+}
+#endif
+
+__attribute__((noipa)) int
+clrsbc (signed char x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+clrsbs (signed short x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+clrsbi (signed int x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+clrsbl (signed long x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+clrsbL (signed long long x)
+{
+ return __builtin_clrsbg (x);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+clrsbI (signed __int128 x)
+{
+ return __builtin_clrsbg (x);
+}
+#endif
+
+__attribute__((noipa)) int
+ffsc (signed char x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+ffss (signed short x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+ffsi (signed int x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+ffsl (signed long x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+ffsL (signed long long x)
+{
+ return __builtin_ffsg (x);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+ffsI (signed __int128 x)
+{
+ return __builtin_ffsg (x);
+}
+#endif
+
+__attribute__((noipa)) int
+parityc (unsigned char x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+paritys (unsigned short x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+parityi (unsigned int x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+parityl (unsigned long x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+parityL (unsigned long long x)
+{
+ return __builtin_parityg (x);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+parityI (unsigned __int128 x)
+{
+ return __builtin_parityg (x);
+}
+#endif
+
+__attribute__((noipa)) int
+popcountc (unsigned char x)
+{
+ return __builtin_popcountg (x);
+}
+
+__attribute__((noipa)) int
+popcounts (unsigned short x)
+{
+ return __builtin_popcountg (x);
+}
+
+__attribute__((noipa)) int
+popcounti (unsigned int x)
+{
+ return __builtin_popcountg (x);
+}
+
+__attribute__((noipa)) int
+popcountl (unsigned long x)
+{
+ return __builtin_popcountg (x);
+}
+
+__attribute__((noipa)) int
+popcountL (unsigned long long x)
+{
+ return __builtin_popcountg (x);
+}
+
+#ifdef __SIZEOF_INT128__
+__attribute__((noipa)) int
+popcountI (unsigned __int128 x)
+{
+ return __builtin_popcountg (x);
+}
+#endif
+
+int
+main ()
+{
+ if (__builtin_clzg ((unsigned char) 1) != __CHAR_BIT__ - 1
+ || __builtin_clzg ((unsigned short) 2, -2) != __SIZEOF_SHORT__ * __CHAR_BIT__ - 2
+ || __builtin_clzg (0U, 42) != 42
+ || __builtin_clzg (0U, -1) != -1
+ || __builtin_clzg (1U) != __SIZEOF_INT__ * __CHAR_BIT__ - 1
+ || __builtin_clzg (2UL, -1) != __SIZEOF_LONG__ * __CHAR_BIT__ - 2
+ || __builtin_clzg (5ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 3
+#ifdef __SIZEOF_INT128__
+ || __builtin_clzg ((unsigned __int128) 9) != __SIZEOF_INT128__ * __CHAR_BIT__ - 4
+#endif
+ || __builtin_clzg (~0U, -5) != 0
+ || __builtin_clzg (~0ULL >> 2) != 2
+ || __builtin_ctzg ((unsigned char) 1) != 0
+ || __builtin_ctzg ((unsigned short) 28) != 2
+ || __builtin_ctzg (0U, 32) != 32
+ || __builtin_ctzg (0U, -42) != -42
+ || __builtin_ctzg (1U) != 0
+ || __builtin_ctzg (16UL, -1) != 4
+ || __builtin_ctzg (5ULL << 52, 0) != 52
+#ifdef __SIZEOF_INT128__
+ || __builtin_ctzg (((unsigned __int128) 9) << 72) != 72
+#endif
+ || __builtin_clrsbg ((signed char) 0) != __CHAR_BIT__ - 1
+ || __builtin_clrsbg ((signed short) -1) != __SIZEOF_SHORT__ * __CHAR_BIT__ - 1
+ || __builtin_clrsbg (0) != __SIZEOF_INT__ * __CHAR_BIT__ - 1
+ || __builtin_clrsbg (-1L) != __SIZEOF_LONG__ * __CHAR_BIT__ - 1
+ || __builtin_clrsbg (0LL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 1
+#ifdef __SIZEOF_INT128__
+ || __builtin_clrsbg ((__int128) -1) != __SIZEOF_INT128__ * __CHAR_BIT__ - 1
+#endif
+ || __builtin_clrsbg (0x1afb) != __SIZEOF_INT__ * __CHAR_BIT__ - 14
+ || __builtin_clrsbg (-2) != __SIZEOF_INT__ * __CHAR_BIT__ - 2
+ || __builtin_clrsbg (1L) != __SIZEOF_LONG__ * __CHAR_BIT__ - 2
+ || __builtin_clrsbg (-4LL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 3
+ || __builtin_ffsg ((signed char) 0) != 0
+ || __builtin_ffsg ((signed short) 0) != 0
+ || __builtin_ffsg (0) != 0
+ || __builtin_ffsg (0L) != 0
+ || __builtin_ffsg (0LL) != 0
+#ifdef __SIZEOF_INT128__
+ || __builtin_ffsg ((__int128) 0) != 0
+#endif
+ || __builtin_ffsg ((signed char) 4) != 3
+ || __builtin_ffsg ((signed short) 8) != 4
+ || __builtin_ffsg (1) != 1
+ || __builtin_ffsg (2L) != 2
+ || __builtin_ffsg (28LL) != 3
+ || __builtin_parityg ((unsigned char) 1) != 1
+ || __builtin_parityg ((unsigned short) 2) != 1
+ || __builtin_parityg (0U) != 0
+ || __builtin_parityg (3U) != 0
+ || __builtin_parityg (0UL) != 0
+ || __builtin_parityg (7UL) != 1
+ || __builtin_parityg (0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || __builtin_parityg ((unsigned __int128) 0) != 0
+#endif
+ || __builtin_parityg ((unsigned char) ~0U) != 0
+ || __builtin_parityg ((unsigned short) ~0U) != 0
+ || __builtin_parityg (~0U) != 0
+ || __builtin_parityg (~0UL) != 0
+ || __builtin_parityg (~0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || __builtin_parityg (~(unsigned __int128) 0) != 0
+#endif
+ || __builtin_popcountg (0U) != 0
+ || __builtin_popcountg (0UL) != 0
+ || __builtin_popcountg (0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || __builtin_popcountg ((unsigned __int128) 0) != 0
+#endif
+ || __builtin_popcountg ((unsigned char) ~0U) != __CHAR_BIT__
+ || __builtin_popcountg ((unsigned short) ~0U) != __SIZEOF_SHORT__ * __CHAR_BIT__
+ || __builtin_popcountg (~0U) != __SIZEOF_INT__ * __CHAR_BIT__
+ || __builtin_popcountg (~0UL) != __SIZEOF_LONG__ * __CHAR_BIT__
+ || __builtin_popcountg (~0ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__
+#ifdef __SIZEOF_INT128__
+ || __builtin_popcountg (~(unsigned __int128) 0) != __SIZEOF_INT128__ * __CHAR_BIT__
+#endif
+ || 0)
+ __builtin_abort ();
+ if (clzc (1) != __CHAR_BIT__ - 1
+ || clzs2 (2) != __SIZEOF_SHORT__ * __CHAR_BIT__ - 2
+ || clzi2 (0U, 42) != 42
+ || clzi2 (0U, -1) != -1
+ || clzi (1U) != __SIZEOF_INT__ * __CHAR_BIT__ - 1
+ || clzl2 (2UL) != __SIZEOF_LONG__ * __CHAR_BIT__ - 2
+ || clzL (5ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 3
+#ifdef __SIZEOF_INT128__
+ || clzI ((unsigned __int128) 9) != __SIZEOF_INT128__ * __CHAR_BIT__ - 4
+#endif
+ || clzi2 (~0U, -5) != 0
+ || clzL (~0ULL >> 2) != 2
+ || ctzc (1) != 0
+ || ctzs (28) != 2
+ || ctzi2 (0U, 32) != 32
+ || ctzi2 (0U, -42) != -42
+ || ctzi (1U) != 0
+ || ctzl2 (16UL, -1) != 4
+ || ctzL2 (5ULL << 52, 0) != 52
+#ifdef __SIZEOF_INT128__
+ || ctzI (((unsigned __int128) 9) << 72) != 72
+#endif
+ || clrsbc (0) != __CHAR_BIT__ - 1
+ || clrsbs (-1) != __SIZEOF_SHORT__ * __CHAR_BIT__ - 1
+ || clrsbi (0) != __SIZEOF_INT__ * __CHAR_BIT__ - 1
+ || clrsbl (-1L) != __SIZEOF_LONG__ * __CHAR_BIT__ - 1
+ || clrsbL (0LL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 1
+#ifdef __SIZEOF_INT128__
+ || clrsbI (-1) != __SIZEOF_INT128__ * __CHAR_BIT__ - 1
+#endif
+ || clrsbi (0x1afb) != __SIZEOF_INT__ * __CHAR_BIT__ - 14
+ || clrsbi (-2) != __SIZEOF_INT__ * __CHAR_BIT__ - 2
+ || clrsbl (1L) != __SIZEOF_LONG__ * __CHAR_BIT__ - 2
+ || clrsbL (-4LL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 3
+ || ffsc (0) != 0
+ || ffss (0) != 0
+ || ffsi (0) != 0
+ || ffsl (0L) != 0
+ || ffsL (0LL) != 0
+#ifdef __SIZEOF_INT128__
+ || ffsI (0) != 0
+#endif
+ || ffsc (4) != 3
+ || ffss (8) != 4
+ || ffsi (1) != 1
+ || ffsl (2L) != 2
+ || ffsL (28LL) != 3
+ || parityc (1) != 1
+ || paritys (2) != 1
+ || parityi (0U) != 0
+ || parityi (3U) != 0
+ || parityl (0UL) != 0
+ || parityl (7UL) != 1
+ || parityL (0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || parityI (0) != 0
+#endif
+ || parityc ((unsigned char) ~0U) != 0
+ || paritys ((unsigned short) ~0U) != 0
+ || parityi (~0U) != 0
+ || parityl (~0UL) != 0
+ || parityL (~0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || parityI (~(unsigned __int128) 0) != 0
+#endif
+ || popcounti (0U) != 0
+ || popcountl (0UL) != 0
+ || popcountL (0ULL) != 0
+#ifdef __SIZEOF_INT128__
+ || popcountI (0) != 0
+#endif
+ || popcountc ((unsigned char) ~0U) != __CHAR_BIT__
+ || popcounts ((unsigned short) ~0U) != __SIZEOF_SHORT__ * __CHAR_BIT__
+ || popcounti (~0U) != __SIZEOF_INT__ * __CHAR_BIT__
+ || popcountl (~0UL) != __SIZEOF_LONG__ * __CHAR_BIT__
+ || popcountL (~0ULL) != __SIZEOF_LONG_LONG__ * __CHAR_BIT__
+#ifdef __SIZEOF_INT128__
+ || popcountI (~(unsigned __int128) 0) != __SIZEOF_INT128__ * __CHAR_BIT__
+#endif
+ || 0)
+ __builtin_abort ();
+}
diff --git a/gcc/testsuite/c-c++-common/pr111309-2.c b/gcc/testsuite/c-c++-common/pr111309-2.c
new file mode 100644
index 000000000000..535208b1cd3b
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/pr111309-2.c
@@ -0,0 +1,85 @@
+/* PR c/111309 */
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99" { target c } } */
+
+#ifndef __cplusplus
+#define bool _Bool
+#define true ((_Bool) 1)
+#define false ((_Bool) 0)
+#endif
+
+void
+foo (void)
+{
+ enum E { E0 = 0 };
+ struct S { int s; } s;
+ __builtin_clzg (); /* { dg-error "too few arguments" } */
+ __builtin_clzg (0U, 1, 2); /* { dg-error "too many arguments" } */
+ __builtin_clzg (0); /* { dg-error "has signed type" } */
+ __builtin_clzg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (s); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (true); /* { dg-error "has boolean type" } */
+ __builtin_clzg (E0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+ __builtin_clzg (0, 0); /* { dg-error "has signed type" } */
+ __builtin_clzg (0.0, 0); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (s, 0); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (true, 0); /* { dg-error "has boolean type" } */
+ __builtin_clzg (E0, 0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+ __builtin_clzg (0U, 2.0); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (0U, s); /* { dg-error "does not have integral type" } */
+ __builtin_clzg (0U, 2LL); /* { dg-error "does not have 'int' type" } */
+ __builtin_clzg (0U, 2U); /* { dg-error "does not have 'int' type" } */
+ __builtin_clzg (0U, true);
+ __builtin_clzg (0U, E0); /* { dg-error "does not have 'int' type" "" { target c++ } } */
+ __builtin_ctzg (); /* { dg-error "too few arguments" } */
+ __builtin_ctzg (0U, 1, 2); /* { dg-error "too many arguments" } */
+ __builtin_ctzg (0); /* { dg-error "has signed type" } */
+ __builtin_ctzg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_ctzg (s); /* { dg-error "does not have integral type" } */
+ __builtin_ctzg (true); /* { dg-error "has boolean type" } */
+ __builtin_ctzg (E0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+ __builtin_ctzg (0, 0); /* { dg-error "has signed type" } */
+ __builtin_ctzg (0.0, 0); /* { dg-error "does not have integral type" } */
+ __builtin_ctzg (s, 0); /* { dg-error "does not have integral type" } */
+ __builtin_ctzg (true, 0); /* { dg-error "has boolean type" } */
+ __builtin_ctzg (E0, 0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+ __builtin_ctzg (0U, 2.0); /* { dg-error "does not have integral type" } */
+ __builtin_ctzg (0U, 2LL); /* { dg-error "does not have 'int' type" } */
+ __builtin_ctzg (0U, 2U); /* { dg-error "does not have 'int' type" } */
+ __builtin_ctzg (0U, true);
+ __builtin_ctzg (0U, E0); /* { dg-error "does not have 'int' type" "" { target c++ } } */
+ __builtin_clrsbg (); /* { dg-error "too few arguments" } */
+ __builtin_clrsbg (0, 1); /* { dg-error "too many arguments" } */
+ __builtin_clrsbg (0U); /* { dg-error "has unsigned type" } */
+ __builtin_clrsbg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_clrsbg (s); /* { dg-error "does not have integral type" } */
+ __builtin_clrsbg (true); /* { dg-error "has boolean type" } */
+ __builtin_clrsbg (E0); /* { dg-error "has enumerated type" "" { target c++ } } */
+ __builtin_ffsg (); /* { dg-error "too few arguments" } */
+ __builtin_ffsg (0, 1); /* { dg-error "too many arguments" } */
+ __builtin_ffsg (0U); /* { dg-error "has unsigned type" } */
+ __builtin_ffsg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_ffsg (s); /* { dg-error "does not have integral type" } */
+ __builtin_ffsg (true); /* { dg-error "has boolean type" } */
+ __builtin_ffsg (E0); /* { dg-error "has enumerated type" "" { target c++ } } */
+ __builtin_parityg (); /* { dg-error "too few arguments" } */
+ __builtin_parityg (0U, 1); /* { dg-error "too many arguments" } */
+ __builtin_parityg (0); /* { dg-error "has signed type" } */
+ __builtin_parityg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_parityg (s); /* { dg-error "does not have integral type" } */
+ __builtin_parityg (true); /* { dg-error "has boolean type" } */
+ __builtin_parityg (E0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+ __builtin_popcountg (); /* { dg-error "too few arguments" } */
+ __builtin_popcountg (0U, 1); /* { dg-error "too many arguments" } */
+ __builtin_popcountg (0); /* { dg-error "has signed type" } */
+ __builtin_popcountg (0.0); /* { dg-error "does not have integral type" } */
+ __builtin_popcountg (s); /* { dg-error "does not have integral type" } */
+ __builtin_popcountg (true); /* { dg-error "has boolean type" } */
+ __builtin_popcountg (E0); /* { dg-error "has signed type" "" { target c } } */
+ /* { dg-error "has enumerated type" "" { target c++ } .-1 } */
+}
diff --git a/gcc/testsuite/gcc.dg/torture/bitint-43.c b/gcc/testsuite/gcc.dg/torture/bitint-43.c
new file mode 100644
index 000000000000..4265bffef706
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/bitint-43.c
@@ -0,0 +1,306 @@
+/* PR c/111309 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 156
+__attribute__((noipa)) int
+clz156 (unsigned _BitInt(156) x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzd156 (unsigned _BitInt(156) x)
+{
+ return __builtin_clzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+clzD156 (unsigned _BitInt(156) x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctz156 (unsigned _BitInt(156) x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzd156 (unsigned _BitInt(156) x)
+{
+ return __builtin_ctzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+ctzD156 (unsigned _BitInt(156) x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+clrsb156 (_BitInt(156) x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+ffs156 (_BitInt(156) x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+parity156 (unsigned _BitInt(156) x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+popcount156 (unsigned _BitInt(156) x)
+{
+ return __builtin_popcountg (x);
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 192
+__attribute__((noipa)) int
+clz192 (unsigned _BitInt(192) x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzd192 (unsigned _BitInt(192) x)
+{
+ return __builtin_clzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+clzD192 (unsigned _BitInt(192) x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctz192 (unsigned _BitInt(192) x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzd192 (unsigned _BitInt(192) x)
+{
+ return __builtin_ctzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+ctzD192 (unsigned _BitInt(192) x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+clrsb192 (_BitInt(192) x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+ffs192 (_BitInt(192) x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+parity192 (unsigned _BitInt(192) x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+popcount192 (unsigned _BitInt(192) x)
+{
+ return __builtin_popcountg (x);
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 156
+ if (clzd156 (0) != 156
+ || clzD156 (0, -1) != -1
+ || ctzd156 (0) != 156
+ || ctzD156 (0, 42) != 42
+ || clrsb156 (0) != 156 - 1
+ || ffs156 (0) != 0
+ || parity156 (0) != 0
+ || popcount156 (0) != 0
+ || __builtin_clzg ((unsigned _BitInt(156)) 0, 156 + 32) != 156 + 32
+ || __builtin_ctzg ((unsigned _BitInt(156)) 0, 156) != 156
+ || __builtin_clrsbg ((_BitInt(156)) 0) != 156 - 1
+ || __builtin_ffsg ((_BitInt(156)) 0) != 0
+ || __builtin_parityg ((unsigned _BitInt(156)) 0) != 0
+ || __builtin_popcountg ((unsigned _BitInt(156)) 0) != 0)
+ __builtin_abort ();
+ if (clz156 (-1) != 0
+ || clzd156 (-1) != 0
+ || clzD156 (-1, 0) != 0
+ || ctz156 (-1) != 0
+ || ctzd156 (-1) != 0
+ || ctzD156 (-1, 17) != 0
+ || clrsb156 (-1) != 156 - 1
+ || ffs156 (-1) != 1
+ || parity156 (-1) != 0
+ || popcount156 (-1) != 156
+ || __builtin_clzg ((unsigned _BitInt(156)) -1) != 0
+ || __builtin_clzg ((unsigned _BitInt(156)) -1, 156 + 32) != 0
+ || __builtin_ctzg ((unsigned _BitInt(156)) -1) != 0
+ || __builtin_ctzg ((unsigned _BitInt(156)) -1, 156) != 0
+ || __builtin_clrsbg ((_BitInt(156)) -1) != 156 - 1
+ || __builtin_ffsg ((_BitInt(156)) -1) != 1
+ || __builtin_parityg ((unsigned _BitInt(156)) -1) != 0
+ || __builtin_popcountg ((unsigned _BitInt(156)) -1) != 156)
+ __builtin_abort ();
+ if (clz156 (((unsigned _BitInt(156)) -1) >> 24) != 24
+ || clz156 (((unsigned _BitInt(156)) -1) >> 79) != 79
+ || clz156 (1) != 156 - 1
+ || clzd156 (((unsigned _BitInt(156)) -1) >> 139) != 139
+ || clzd156 (2) != 156 - 2
+ || ctz156 (((unsigned _BitInt(156)) -1) << 42) != 42
+ || ctz156 (((unsigned _BitInt(156)) -1) << 57) != 57
+ || ctz156 (0x4000000000000000000000uwb) != 86
+ || ctzd156 (((unsigned _BitInt(156)) -1) << 149) != 149
+ || ctzd156 (2) != 1
+ || clrsb156 ((unsigned _BitInt(156 - 4)) -1) != 3
+ || clrsb156 ((unsigned _BitInt(156 - 28)) -1) != 27
+ || clrsb156 ((unsigned _BitInt(156 - 29)) -1) != 28
+ || clrsb156 (~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 68)) -1) != 67
+ || clrsb156 (~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 92)) -1) != 91
+ || clrsb156 (~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 93)) -1) != 92
+ || ffs156 (((unsigned _BitInt(156)) -1) << 42) != 43
+ || ffs156 (((unsigned _BitInt(156)) -1) << 57) != 58
+ || ffs156 (0x4000000000000000000000uwb) != 87
+ || ffs156 (((unsigned _BitInt(156)) -1) << 149) != 150
+ || ffs156 (2) != 2
+ || __builtin_clzg (((unsigned _BitInt(156)) -1) >> 24) != 24
+ || __builtin_clzg (((unsigned _BitInt(156)) -1) >> 79) != 79
+ || __builtin_clzg ((unsigned _BitInt(156)) 1) != 156 - 1
+ || __builtin_clzg (((unsigned _BitInt(156)) -1) >> 139, 156) != 139
+ || __builtin_clzg ((unsigned _BitInt(156)) 2, 156) != 156 - 2
+ || __builtin_ctzg (((unsigned _BitInt(156)) -1) << 42) != 42
+ || __builtin_ctzg (((unsigned _BitInt(156)) -1) << 57) != 57
+ || __builtin_ctzg ((unsigned _BitInt(156)) 0x4000000000000000000000uwb) != 86
+ || __builtin_ctzg (((unsigned _BitInt(156)) -1) << 149, 156) != 149
+ || __builtin_ctzg ((unsigned _BitInt(156)) 2, 156) != 1
+ || __builtin_clrsbg ((_BitInt(156)) (unsigned _BitInt(156 - 4)) -1) != 3
+ || __builtin_clrsbg ((_BitInt(156)) (unsigned _BitInt(156 - 28)) -1) != 27
+ || __builtin_clrsbg ((_BitInt(156)) (unsigned _BitInt(156 - 29)) -1) != 28
+ || __builtin_clrsbg ((_BitInt(156)) ~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 68)) -1) != 67
+ || __builtin_clrsbg ((_BitInt(156)) ~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 92)) -1) != 91
+ || __builtin_clrsbg ((_BitInt(156)) ~(unsigned _BitInt(156)) (unsigned _BitInt(156 - 93)) -1) != 92
+ || __builtin_ffsg ((_BitInt(156)) (((unsigned _BitInt(156)) -1) << 42)) != 43
+ || __builtin_ffsg ((_BitInt(156)) (((unsigned _BitInt(156)) -1) << 57)) != 58
+ || __builtin_ffsg ((_BitInt(156)) 0x4000000000000000000000uwb) != 87
+ || __builtin_ffsg ((_BitInt(156)) (((unsigned _BitInt(156)) -1) << 149)) != 150
+ || __builtin_ffsg ((_BitInt(156)) 2) != 2)
+ __builtin_abort ();
+ if (parity156 (23008250258685373142923325827291949461178444434uwb) != __builtin_parityg (23008250258685373142923325827291949461178444434uwb)
+ || parity156 (41771568792516301628132437740665810252917251244uwb) != __builtin_parityg (41771568792516301628132437740665810252917251244uwb)
+ || parity156 (5107402473866766219120283991834936835726115452uwb) != __builtin_parityg (5107402473866766219120283991834936835726115452uwb)
+ || popcount156 (50353291748276374580944955711958129678996395562uwb) != __builtin_popcountg (50353291748276374580944955711958129678996395562uwb)
+ || popcount156 (29091263616891212550063067166307725491211684496uwb) != __builtin_popcountg (29091263616891212550063067166307725491211684496uwb)
+ || popcount156 (64973284306583205619384799873110935608793072026uwb) != __builtin_popcountg (64973284306583205619384799873110935608793072026uwb))
+ __builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 192
+ if (clzd192 (0) != 192
+ || clzD192 (0, 42) != 42
+ || ctzd192 (0) != 192
+ || ctzD192 (0, -1) != -1
+ || clrsb192 (0) != 192 - 1
+ || ffs192 (0) != 0
+ || parity192 (0) != 0
+ || popcount192 (0) != 0
+ || __builtin_clzg ((unsigned _BitInt(192)) 0, 192 + 32) != 192 + 32
+ || __builtin_ctzg ((unsigned _BitInt(192)) 0, 192) != 192
+ || __builtin_clrsbg ((_BitInt(192)) 0) != 192 - 1
+ || __builtin_ffsg ((_BitInt(192)) 0) != 0
+ || __builtin_parityg ((unsigned _BitInt(192)) 0) != 0
+ || __builtin_popcountg ((unsigned _BitInt(192)) 0) != 0)
+ __builtin_abort ();
+ if (clz192 (-1) != 0
+ || clzd192 (-1) != 0
+ || clzD192 (-1, 15) != 0
+ || ctz192 (-1) != 0
+ || ctzd192 (-1) != 0
+ || ctzD192 (-1, -57) != 0
+ || clrsb192 (-1) != 192 - 1
+ || ffs192 (-1) != 1
+ || parity192 (-1) != 0
+ || popcount192 (-1) != 192
+ || __builtin_clzg ((unsigned _BitInt(192)) -1) != 0
+ || __builtin_clzg ((unsigned _BitInt(192)) -1, 192 + 32) != 0
+ || __builtin_ctzg ((unsigned _BitInt(192)) -1) != 0
+ || __builtin_ctzg ((unsigned _BitInt(192)) -1, 192) != 0
+ || __builtin_clrsbg ((_BitInt(192)) -1) != 192 - 1
+ || __builtin_ffsg ((_BitInt(192)) -1) != 1
+ || __builtin_parityg ((unsigned _BitInt(192)) -1) != 0
+ || __builtin_popcountg ((unsigned _BitInt(192)) -1) != 192)
+ __builtin_abort ();
+ if (clz192 (((unsigned _BitInt(192)) -1) >> 24) != 24
+ || clz192 (((unsigned _BitInt(192)) -1) >> 79) != 79
+ || clz192 (1) != 192 - 1
+ || clzd192 (((unsigned _BitInt(192)) -1) >> 139) != 139
+ || clzd192 (2) != 192 - 2
+ || ctz192 (((unsigned _BitInt(192)) -1) << 42) != 42
+ || ctz192 (((unsigned _BitInt(192)) -1) << 57) != 57
+ || ctz192 (0x4000000000000000000000uwb) != 86
+ || ctzd192 (((unsigned _BitInt(192)) -1) << 149) != 149
+ || ctzd192 (2) != 1
+ || clrsb192 ((unsigned _BitInt(192 - 4)) -1) != 3
+ || clrsb192 ((unsigned _BitInt(192 - 28)) -1) != 27
+ || clrsb192 ((unsigned _BitInt(192 - 29)) -1) != 28
+ || clrsb192 (~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 68)) -1) != 67
+ || clrsb192 (~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 92)) -1) != 91
+ || clrsb192 (~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 93)) -1) != 92
+ || ffs192 (((unsigned _BitInt(192)) -1) << 42) != 43
+ || ffs192 (((unsigned _BitInt(192)) -1) << 57) != 58
+ || ffs192 (0x4000000000000000000000uwb) != 87
+ || ffs192 (((unsigned _BitInt(192)) -1) << 149) != 150
+ || ffs192 (2) != 2
+ || __builtin_clzg (((unsigned _BitInt(192)) -1) >> 24) != 24
+ || __builtin_clzg (((unsigned _BitInt(192)) -1) >> 79) != 79
+ || __builtin_clzg ((unsigned _BitInt(192)) 1) != 192 - 1
+ || __builtin_clzg (((unsigned _BitInt(192)) -1) >> 139, 192) != 139
+ || __builtin_clzg ((unsigned _BitInt(192)) 2, 192) != 192 - 2
+ || __builtin_ctzg (((unsigned _BitInt(192)) -1) << 42) != 42
+ || __builtin_ctzg (((unsigned _BitInt(192)) -1) << 57) != 57
+ || __builtin_ctzg ((unsigned _BitInt(192)) 0x4000000000000000000000uwb) != 86
+ || __builtin_ctzg (((unsigned _BitInt(192)) -1) << 149, 192) != 149
+ || __builtin_ctzg ((unsigned _BitInt(192)) 2, 192) != 1
+ || __builtin_clrsbg ((_BitInt(192)) (unsigned _BitInt(192 - 4)) -1) != 3
+ || __builtin_clrsbg ((_BitInt(192)) (unsigned _BitInt(192 - 28)) -1) != 27
+ || __builtin_clrsbg ((_BitInt(192)) (unsigned _BitInt(192 - 29)) -1) != 28
+ || __builtin_clrsbg ((_BitInt(192)) ~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 68)) -1) != 67
+ || __builtin_clrsbg ((_BitInt(192)) ~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 92)) -1) != 91
+ || __builtin_clrsbg ((_BitInt(192)) ~(unsigned _BitInt(192)) (unsigned _BitInt(192 - 93)) -1) != 92
+ || __builtin_ffsg ((_BitInt(192)) (((unsigned _BitInt(192)) -1) << 42)) != 43
+ || __builtin_ffsg ((_BitInt(192)) (((unsigned _BitInt(192)) -1) << 57)) != 58
+ || __builtin_ffsg ((_BitInt(192)) 0x4000000000000000000000uwb) != 87
+ || __builtin_ffsg ((_BitInt(192)) (((unsigned _BitInt(192)) -1) << 149)) != 150
+ || __builtin_ffsg ((_BitInt(192)) 2) != 2)
+ __builtin_abort ();
+ if (parity192 (4692147078159863499615754634965484598760535154638668598762uwb) != __builtin_parityg (4692147078159863499615754634965484598760535154638668598762uwb)
+ || parity192 (1669461228546917627909935444501097256112222796898845183538uwb) != __builtin_parityg (1669461228546917627909935444501097256112222796898845183538uwb)
+ || parity192 (5107402473866766219120283991834936835726115452uwb) != __builtin_parityg (5107402473866766219120283991834936835726115452uwb)
+ || popcount192 (4033871057575185619108386380181511734118888391160164588976uwb) != __builtin_popcountg (4033871057575185619108386380181511734118888391160164588976uwb)
+ || popcount192 (58124766715713711628758119849579188845074973856704521119uwb) != __builtin_popcountg (58124766715713711628758119849579188845074973856704521119uwb)
+ || popcount192 (289948065236269174335700831610076764076947650072787325852uwb) != __builtin_popcountg (289948065236269174335700831610076764076947650072787325852uwb))
+ __builtin_abort ();
+#endif
+}
diff --git a/gcc/testsuite/gcc.dg/torture/bitint-44.c b/gcc/testsuite/gcc.dg/torture/bitint-44.c
new file mode 100644
index 000000000000..938c0e917f34
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/bitint-44.c
@@ -0,0 +1,306 @@
+/* PR c/111309 */
+/* { dg-do run { target bitint } } */
+/* { dg-options "-std=c2x -pedantic-errors" } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */
+/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */
+
+#if __BITINT_MAXWIDTH__ >= 512
+__attribute__((noipa)) int
+clz512 (unsigned _BitInt(512) x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzd512 (unsigned _BitInt(512) x)
+{
+ return __builtin_clzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+clzD512 (unsigned _BitInt(512) x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctz512 (unsigned _BitInt(512) x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzd512 (unsigned _BitInt(512) x)
+{
+ return __builtin_ctzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+ctzD512 (unsigned _BitInt(512) x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+clrsb512 (_BitInt(512) x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+ffs512 (_BitInt(512) x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+parity512 (unsigned _BitInt(512) x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+popcount512 (unsigned _BitInt(512) x)
+{
+ return __builtin_popcountg (x);
+}
+#endif
+
+#if __BITINT_MAXWIDTH__ >= 523
+__attribute__((noipa)) int
+clz523 (unsigned _BitInt(523) x)
+{
+ return __builtin_clzg (x);
+}
+
+__attribute__((noipa)) int
+clzd523 (unsigned _BitInt(523) x)
+{
+ return __builtin_clzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+clzD523 (unsigned _BitInt(523) x, int y)
+{
+ return __builtin_clzg (x, y);
+}
+
+__attribute__((noipa)) int
+ctz523 (unsigned _BitInt(523) x)
+{
+ return __builtin_ctzg (x);
+}
+
+__attribute__((noipa)) int
+ctzd523 (unsigned _BitInt(523) x)
+{
+ return __builtin_ctzg (x, __builtin_popcountg ((typeof (x)) ~(typeof (x)) 0));
+}
+
+__attribute__((noipa)) int
+ctzD523 (unsigned _BitInt(523) x, int y)
+{
+ return __builtin_ctzg (x, y);
+}
+
+__attribute__((noipa)) int
+clrsb523 (_BitInt(523) x)
+{
+ return __builtin_clrsbg (x);
+}
+
+__attribute__((noipa)) int
+ffs523 (_BitInt(523) x)
+{
+ return __builtin_ffsg (x);
+}
+
+__attribute__((noipa)) int
+parity523 (unsigned _BitInt(523) x)
+{
+ return __builtin_parityg (x);
+}
+
+__attribute__((noipa)) int
+popcount523 (unsigned _BitInt(523) x)
+{
+ return __builtin_popcountg (x);
+}
+#endif
+
+int
+main ()
+{
+#if __BITINT_MAXWIDTH__ >= 512
+ if (clzd512 (0) != 512
+ || clzD512 (0, -1) != -1
+ || ctzd512 (0) != 512
+ || ctzD512 (0, 42) != 42
+ || clrsb512 (0) != 512 - 1
+ || ffs512 (0) != 0
+ || parity512 (0) != 0
+ || popcount512 (0) != 0
+ || __builtin_clzg ((unsigned _BitInt(512)) 0, 512 + 32) != 512 + 32
+ || __builtin_ctzg ((unsigned _BitInt(512)) 0, 512) != 512
+ || __builtin_clrsbg ((_BitInt(512)) 0) != 512 - 1
+ || __builtin_ffsg ((_BitInt(512)) 0) != 0
+ || __builtin_parityg ((unsigned _BitInt(512)) 0) != 0
+ || __builtin_popcountg ((unsigned _BitInt(512)) 0) != 0)
+ __builtin_abort ();
+ if (clz512 (-1) != 0
+ || clzd512 (-1) != 0
+ || clzD512 (-1, 0) != 0
+ || ctz512 (-1) != 0
+ || ctzd512 (-1) != 0
+ || ctzD512 (-1, 17) != 0
+ || clrsb512 (-1) != 512 - 1
+ || ffs512 (-1) != 1
+ || parity512 (-1) != 0
+ || popcount512 (-1) != 512
+ || __builtin_clzg ((unsigned _BitInt(512)) -1) != 0
+ || __builtin_clzg ((unsigned _BitInt(512)) -1, 512 + 32) != 0
+ || __builtin_ctzg ((unsigned _BitInt(512)) -1) != 0
+ || __builtin_ctzg ((unsigned _BitInt(512)) -1, 512) != 0
+ || __builtin_clrsbg ((_BitInt(512)) -1) != 512 - 1
+ || __builtin_ffsg ((_BitInt(512)) -1) != 1
+ || __builtin_parityg ((unsigned _BitInt(512)) -1) != 0
+ || __builtin_popcountg ((unsigned _BitInt(512)) -1) != 512)
+ __builtin_abort ();
+ if (clz512 (((unsigned _BitInt(512)) -1) >> 24) != 24
+ || clz512 (((unsigned _BitInt(512)) -1) >> 79) != 79
+ || clz512 (1) != 512 - 1
+ || clzd512 (((unsigned _BitInt(512)) -1) >> 139) != 139
+ || clzd512 (2) != 512 - 2
+ || ctz512 (((unsigned _BitInt(512)) -1) << 42) != 42
+ || ctz512 (((unsigned _BitInt(512)) -1) << 57) != 57
+ || ctz512 (0x4000000000000000000000uwb) != 86
+ || ctzd512 (((unsigned _BitInt(512)) -1) << 149) != 149
+ || ctzd512 (2) != 1
+ || clrsb512 ((unsigned _BitInt(512 - 4)) -1) != 3
+ || clrsb512 ((unsigned _BitInt(512 - 28)) -1) != 27
+ || clrsb512 ((unsigned _BitInt(512 - 29)) -1) != 28
+ || clrsb512 (~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 68)) -1) != 67
+ || clrsb512 (~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 92)) -1) != 91
+ || clrsb512 (~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 93)) -1) != 92
+ || ffs512 (((unsigned _BitInt(512)) -1) << 42) != 43
+ || ffs512 (((unsigned _BitInt(512)) -1) << 57) != 58
+ || ffs512 (0x4000000000000000000000uwb) != 87
+ || ffs512 (((unsigned _BitInt(512)) -1) << 149) != 150
+ || ffs512 (2) != 2
+ || __builtin_clzg (((unsigned _BitInt(512)) -1) >> 24) != 24
+ || __builtin_clzg (((unsigned _BitInt(512)) -1) >> 79) != 79
+ || __builtin_clzg ((unsigned _BitInt(512)) 1) != 512 - 1
+ || __builtin_clzg (((unsigned _BitInt(512)) -1) >> 139, 512) != 139
+ || __builtin_clzg ((unsigned _BitInt(512)) 2, 512) != 512 - 2
+ || __builtin_ctzg (((unsigned _BitInt(512)) -1) << 42) != 42
+ || __builtin_ctzg (((unsigned _BitInt(512)) -1) << 57) != 57
+ || __builtin_ctzg ((unsigned _BitInt(512)) 0x4000000000000000000000uwb) != 86
+ || __builtin_ctzg (((unsigned _BitInt(512)) -1) << 149, 512) != 149
+ || __builtin_ctzg ((unsigned _BitInt(512)) 2, 512) != 1
+ || __builtin_clrsbg ((_BitInt(512)) (unsigned _BitInt(512 - 4)) -1) != 3
+ || __builtin_clrsbg ((_BitInt(512)) (unsigned _BitInt(512 - 28)) -1) != 27
+ || __builtin_clrsbg ((_BitInt(512)) (unsigned _BitInt(512 - 29)) -1) != 28
+ || __builtin_clrsbg ((_BitInt(512)) ~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 68)) -1) != 67
+ || __builtin_clrsbg ((_BitInt(512)) ~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 92)) -1) != 91
+ || __builtin_clrsbg ((_BitInt(512)) ~(unsigned _BitInt(512)) (unsigned _BitInt(512 - 93)) -1) != 92
+ || __builtin_ffsg ((_BitInt(512)) (((unsigned _BitInt(512)) -1) << 42)) != 43
+ || __builtin_ffsg ((_BitInt(512)) (((unsigned _BitInt(512)) -1) << 57)) != 58
+ || __builtin_ffsg ((_BitInt(512)) 0x4000000000000000000000uwb) != 87
+ || __builtin_ffsg ((_BitInt(512)) (((unsigned _BitInt(512)) -1) << 149)) != 150
+ || __builtin_ffsg ((_BitInt(512)) 2) != 2)
+ __builtin_abort ();
+ if (parity512 (8278593062772967967574644592392030907507244457324713380127157444008480135136016412791369421272159911061801023217823646324038055629840240503699995274750141uwb) != __builtin_parityg (8278593062772967967574644592392030907507244457324713380127157444008480135136016412791369421272159911061801023217823646324038055629840240503699995274750141uwb)
+ || parity512 (663951521760319802637316646127146913163123967584512032007606686578544864655291546789196279408181546344880831465704154822174055168766759305688225967189384uwb) != __builtin_parityg (663951521760319802637316646127146913163123967584512032007606686578544864655291546789196279408181546344880831465704154822174055168766759305688225967189384uwb)
+ || parity512 (8114152627481936575035564712656624361256533214211179387274127464949371919139038942819974113641465089580051998523156404968195970853124179018281296621919217uwb) != __builtin_parityg (8114152627481936575035564712656624361256533214211179387274127464949371919139038942819974113641465089580051998523156404968195970853124179018281296621919217uwb)
+ || popcount512 (697171368046392901434470580443928282938585745214587494987284546386421344865289735592202298494880955572094546861862007016154025065165834164941207378563932uwb) != __builtin_popcountg (697171368046392901434470580443928282938585745214587494987284546386421344865289735592202298494880955572094546861862007016154025065165834164941207378563932uwb)
+ || popcount512 (12625357869391866487124235043239209385173615631331705015179232007319637649427586947822360147798041278948617160703315666047585702906648747835331939389354450uwb) != __builtin_popcountg (12625357869391866487124235043239209385173615631331705015179232007319637649427586947822360147798041278948617160703315666047585702906648747835331939389354450uwb)
+ || popcount512 (12989863959706456104163426941303698078341934896544520782734564901708926112239778316241786242633862403309192697330635825122310265805838908726925342761646021uwb) != __builtin_popcountg (12989863959706456104163426941303698078341934896544520782734564901708926112239778316241786242633862403309192697330635825122310265805838908726925342761646021uwb))
+ __builtin_abort ();
+#endif
+#if __BITINT_MAXWIDTH__ >= 523
+ if (clzd523 (0) != 523
+ || clzD523 (0, 42) != 42
+ || ctzd523 (0) != 523
+ || ctzD523 (0, -1) != -1
+ || clrsb523 (0) != 523 - 1
+ || ffs523 (0) != 0
+ || parity523 (0) != 0
+ || popcount523 (0) != 0
+ || __builtin_clzg ((unsigned _BitInt(523)) 0, 523 + 32) != 523 + 32
+ || __builtin_ctzg ((unsigned _BitInt(523)) 0, 523) != 523
+ || __builtin_clrsbg ((_BitInt(523)) 0) != 523 - 1
+ || __builtin_ffsg ((_BitInt(523)) 0) != 0
+ || __builtin_parityg ((unsigned _BitInt(523)) 0) != 0
+ || __builtin_popcountg ((unsigned _BitInt(523)) 0) != 0)
+ __builtin_abort ();
+ if (clz523 (-1) != 0
+ || clzd523 (-1) != 0
+ || clzD523 (-1, 15) != 0
+ || ctz523 (-1) != 0
+ || ctzd523 (-1) != 0
+ || ctzD523 (-1, -57) != 0
+ || clrsb523 (-1) != 523 - 1
+ || ffs523 (-1) != 1
+ || parity523 (-1) != 1
+ || popcount523 (-1) != 523
+ || __builtin_clzg ((unsigned _BitInt(523)) -1) != 0
+ || __builtin_clzg ((unsigned _BitInt(523)) -1, 523 + 32) != 0
+ || __builtin_ctzg ((unsigned _BitInt(523)) -1) != 0
+ || __builtin_ctzg ((unsigned _BitInt(523)) -1, 523) != 0
+ || __builtin_clrsbg ((_BitInt(523)) -1) != 523 - 1
+ || __builtin_ffsg ((_BitInt(523)) -1) != 1
+ || __builtin_parityg ((unsigned _BitInt(523)) -1) != 1
+ || __builtin_popcountg ((unsigned _BitInt(523)) -1) != 523)
+ __builtin_abort ();
+ if (clz523 (((unsigned _BitInt(523)) -1) >> 24) != 24
+ || clz523 (((unsigned _BitInt(523)) -1) >> 79) != 79
+ || clz523 (1) != 523 - 1
+ || clzd523 (((unsigned _BitInt(523)) -1) >> 139) != 139
+ || clzd523 (2) != 523 - 2
+ || ctz523 (((unsigned _BitInt(523)) -1) << 42) != 42
+ || ctz523 (((unsigned _BitInt(523)) -1) << 57) != 57
+ || ctz523 (0x4000000000000000000000uwb) != 86
+ || ctzd523 (((unsigned _BitInt(523)) -1) << 149) != 149
+ || ctzd523 (2) != 1
+ || clrsb523 ((unsigned _BitInt(523 - 4)) -1) != 3
+ || clrsb523 ((unsigned _BitInt(523 - 28)) -1) != 27
+ || clrsb523 ((unsigned _BitInt(523 - 29)) -1) != 28
+ || clrsb523 (~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 68)) -1) != 67
+ || clrsb523 (~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 92)) -1) != 91
+ || clrsb523 (~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 93)) -1) != 92
+ || ffs523 (((unsigned _BitInt(523)) -1) << 42) != 43
+ || ffs523 (((unsigned _BitInt(523)) -1) << 57) != 58
+ || ffs523 (0x4000000000000000000000uwb) != 87
+ || ffs523 (((unsigned _BitInt(523)) -1) << 149) != 150
+ || ffs523 (2) != 2
+ || __builtin_clzg (((unsigned _BitInt(523)) -1) >> 24) != 24
+ || __builtin_clzg (((unsigned _BitInt(523)) -1) >> 79) != 79
+ || __builtin_clzg ((unsigned _BitInt(523)) 1) != 523 - 1
+ || __builtin_clzg (((unsigned _BitInt(523)) -1) >> 139, 523) != 139
+ || __builtin_clzg ((unsigned _BitInt(523)) 2, 523) != 523 - 2
+ || __builtin_ctzg (((unsigned _BitInt(523)) -1) << 42) != 42
+ || __builtin_ctzg (((unsigned _BitInt(523)) -1) << 57) != 57
+ || __builtin_ctzg ((unsigned _BitInt(523)) 0x4000000000000000000000uwb) != 86
+ || __builtin_ctzg (((unsigned _BitInt(523)) -1) << 149, 523) != 149
+ || __builtin_ctzg ((unsigned _BitInt(523)) 2, 523) != 1
+ || __builtin_clrsbg ((_BitInt(523)) (unsigned _BitInt(523 - 4)) -1) != 3
+ || __builtin_clrsbg ((_BitInt(523)) (unsigned _BitInt(523 - 28)) -1) != 27
+ || __builtin_clrsbg ((_BitInt(523)) (unsigned _BitInt(523 - 29)) -1) != 28
+ || __builtin_clrsbg ((_BitInt(523)) ~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 68)) -1) != 67
+ || __builtin_clrsbg ((_BitInt(523)) ~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 92)) -1) != 91
+ || __builtin_clrsbg ((_BitInt(523)) ~(unsigned _BitInt(523)) (unsigned _BitInt(523 - 93)) -1) != 92
+ || __builtin_ffsg ((_BitInt(523)) (((unsigned _BitInt(523)) -1) << 42)) != 43
+ || __builtin_ffsg ((_BitInt(523)) (((unsigned _BitInt(523)) -1) << 57)) != 58
+ || __builtin_ffsg ((_BitInt(523)) 0x4000000000000000000000uwb) != 87
+ || __builtin_ffsg ((_BitInt(523)) (((unsigned _BitInt(523)) -1) << 149)) != 150
+ || __builtin_ffsg ((_BitInt(523)) 2) != 2)
+ __builtin_abort ();
+ if (parity523 (14226628251091586975416900831427560438504550751597528218770815297642064445318137709184907300499591292677456563377096100346699421879373024906380724757049700104uwb) != __builtin_parityg (14226628251091586975416900831427560438504550751597528218770815297642064445318137709184907300499591292677456563377096100346699421879373024906380724757049700104uwb)
+ || parity523 (20688958227123188226117538663818621034852702121556301239818743230005799574164516085541310491875153692467123662601853835357822935286851364843928714141587045255uwb) != __builtin_parityg (20688958227123188226117538663818621034852702121556301239818743230005799574164516085541310491875153692467123662601853835357822935286851364843928714141587045255uwb)
+ || parity523 (8927708174664018648856542263215989788443763271738485875573765922613438023117960552135374015673598803453205044464280019640319125968982118836809392169156450404uwb) != __builtin_parityg (8927708174664018648856542263215989788443763271738485875573765922613438023117960552135374015673598803453205044464280019640319125968982118836809392169156450404uwb)
+ || popcount523 (27178327344587654457581274852432957423537947348354896748701960885269035920194935311522194372418922852798513401240689173265979378157685169921449935364246334672uwb) != __builtin_popcountg (27178327344587654457581274852432957423537947348354896748701960885269035920194935311522194372418922852798513401240689173265979378157685169921449935364246334672uwb)
+ || popcount523 (5307736750284212829931201546806718535860789684371772688568780952567669490917265125893664418036905110148872995350655890585853451175740907670080602411287166989uwb) != __builtin_popcountg (5307736750284212829931201546806718535860789684371772688568780952567669490917265125893664418036905110148872995350655890585853451175740907670080602411287166989uwb)
+ || popcount523 (21261096432069432668470452941790780841888331284195411465624030283325239673941548816191698556934198698768393659379577567450765073013688585051560340496749593370uwb) != __builtin_popcountg (21261096432069432668470452941790780841888331284195411465624030283325239673941548816191698556934198698768393659379577567450765073013688585051560340496749593370uwb))
+ __builtin_abort ();
+#endif
+}
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index d4e9202a2d46..d39dfc1065f2 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -2381,6 +2381,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
HOST_WIDE_INT type_size = tree_to_shwi (TYPE_SIZE (type));
bool zero_ok
= CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ctz_val) == 2;
+ int nargs = 2;
/* If the input value can't be zero, don't special case ctz (0). */
if (tree_expr_nonzero_p (res_ops[0]))
@@ -2388,6 +2389,7 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
zero_ok = true;
zero_val = 0;
ctz_val = 0;
+ nargs = 1;
}
/* Skip if there is no value defined at zero, or if we can't easily
@@ -2399,7 +2401,11 @@ simplify_count_trailing_zeroes (gimple_stmt_iterator *gsi)
gimple_seq seq = NULL;
gimple *g;
- gcall *call = gimple_build_call_internal (IFN_CTZ, 1, res_ops[0]);
+ gcall *call
+ = gimple_build_call_internal (IFN_CTZ, nargs, res_ops[0],
+ nargs == 1 ? NULL_TREE
+ : build_int_cst (integer_type_node,
+ ctz_val));
gimple_set_location (call, gimple_location (stmt));
gimple_set_lhs (call, make_ssa_name (integer_type_node));
gimple_seq_add_stmt (&seq, call);
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 718582a7790d..2098bef9a970 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -2235,14 +2235,18 @@ build_cltz_expr (tree src, bool leading, bool define_at_zero)
tree call;
if (use_ifn)
{
- call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
- integer_type_node, 1, src);
int val;
int optab_defined_at_zero
= (leading
? CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val)
: CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (utype), val));
- if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec))
+ tree arg2 = NULL_TREE;
+ if (define_at_zero && optab_defined_at_zero == 2 && val == prec)
+ arg2 = build_int_cst (integer_type_node, val);
+ call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
+ integer_type_node, arg2 ? 2 : 1,
+ src, arg2);
+ if (define_at_zero && arg2 == NULL_TREE)
{
tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
build_zero_cst (TREE_TYPE (src)));
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index bb55a4fba339..ac805173453d 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -2863,18 +2863,26 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
}
/* Check that we have a popcount/clz/ctz builtin. */
- if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
+ if (!is_gimple_call (call))
return false;
- arg = gimple_call_arg (call, 0);
lhs = gimple_get_lhs (call);
if (lhs == NULL_TREE)
return false;
combined_fn cfn = gimple_call_combined_fn (call);
+ if (gimple_call_num_args (call) != 1
+ && (gimple_call_num_args (call) != 2
+ || cfn == CFN_CLZ
+ || cfn == CFN_CTZ))
+ return false;
+
+ arg = gimple_call_arg (call, 0);
+
internal_fn ifn = IFN_LAST;
int val = 0;
+ bool any_val = false;
switch (cfn)
{
case CFN_BUILT_IN_BSWAP16:
@@ -2889,6 +2897,23 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
tree type = TREE_TYPE (arg);
+ if (TREE_CODE (type) == BITINT_TYPE)
+ {
+ if (gimple_call_num_args (call) == 1)
+ {
+ any_val = true;
+ ifn = IFN_CLZ;
+ break;
+ }
+ if (!tree_fits_shwi_p (gimple_call_arg (call, 1)))
+ return false;
+ HOST_WIDE_INT at_zero = tree_to_shwi (gimple_call_arg (call, 1));
+ if ((int) at_zero != at_zero)
+ return false;
+ ifn = IFN_CLZ;
+ val = at_zero;
+ break;
+ }
if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
&& CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
val) == 2)
@@ -2902,6 +2927,23 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
tree type = TREE_TYPE (arg);
+ if (TREE_CODE (type) == BITINT_TYPE)
+ {
+ if (gimple_call_num_args (call) == 1)
+ {
+ any_val = true;
+ ifn = IFN_CTZ;
+ break;
+ }
+ if (!tree_fits_shwi_p (gimple_call_arg (call, 1)))
+ return false;
+ HOST_WIDE_INT at_zero = tree_to_shwi (gimple_call_arg (call, 1));
+ if ((int) at_zero != at_zero)
+ return false;
+ ifn = IFN_CTZ;
+ val = at_zero;
+ break;
+ }
if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
&& CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
val) == 2)
@@ -2960,8 +3002,18 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
/* Check PHI arguments. */
if (lhs != arg0
- || TREE_CODE (arg1) != INTEGER_CST
- || wi::to_wide (arg1) != val)
+ || TREE_CODE (arg1) != INTEGER_CST)
+ return false;
+ if (any_val)
+ {
+ if (!tree_fits_shwi_p (arg1))
+ return false;
+ HOST_WIDE_INT at_zero = tree_to_shwi (arg1);
+ if ((int) at_zero != at_zero)
+ return false;
+ val = at_zero;
+ }
+ else if (wi::to_wide (arg1) != val)
return false;
/* And insert the popcount/clz/ctz builtin and cast stmt before the
@@ -2974,13 +3026,15 @@ cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
reset_flow_sensitive_info (gimple_get_lhs (cast));
}
gsi_from = gsi_for_stmt (call);
- if (ifn == IFN_LAST || gimple_call_internal_p (call))
+ if (ifn == IFN_LAST
+ || (gimple_call_internal_p (call) && gimple_call_num_args (call) == 2))
gsi_move_before (&gsi_from, &gsi);
else
{
/* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
the latter is well defined at zero. */
- call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+ call = gimple_build_call_internal (ifn, 2, gimple_call_arg (call, 0),
+ build_int_cst (integer_type_node, val));
gimple_call_set_lhs (call, lhs);
gsi_insert_before (&gsi, call, GSI_SAME_STMT);
gsi_remove (&gsi_from, true);
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 6b6b41252bbe..7debe7f07316 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1818,7 +1818,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
tree new_var;
internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST;
bool defined_at_zero = true, defined_at_zero_new = false;
- int val = 0, val_new = 0;
+ int val = 0, val_new = 0, val_cmp = 0;
int prec;
int sub = 0, add = 0;
location_t loc;
@@ -1826,7 +1826,8 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
if (!is_gimple_call (call_stmt))
return NULL;
- if (gimple_call_num_args (call_stmt) != 1)
+ if (gimple_call_num_args (call_stmt) != 1
+ && gimple_call_num_args (call_stmt) != 2)
return NULL;
rhs_oprnd = gimple_call_arg (call_stmt, 0);
@@ -1846,9 +1847,10 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
CASE_CFN_CTZ:
ifn = IFN_CTZ;
if (!gimple_call_internal_p (call_stmt)
- || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type),
- val) != 2)
+ || gimple_call_num_args (call_stmt) != 2)
defined_at_zero = false;
+ else
+ val = tree_to_shwi (gimple_call_arg (call_stmt, 1));
break;
CASE_CFN_FFS:
ifn = IFN_FFS;
@@ -1907,6 +1909,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt);
+ val_cmp = val_new;
if ((ifnnew == IFN_CLZ
&& defined_at_zero
&& defined_at_zero_new
@@ -1918,7 +1921,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
.CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
if (ifnnew == IFN_CLZ)
sub = prec;
- val_new = prec;
+ val_cmp = prec;
if (!TYPE_UNSIGNED (rhs_type))
{
@@ -1955,7 +1958,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
/* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
.FFS (X) = PREC - .CLZ (X & -X). */
sub = prec - (ifn == IFN_CTZ);
- val_new = sub - val_new;
+ val_cmp = sub - val_new;
tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
@@ -1974,7 +1977,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
/* .CTZ (X) = PREC - .POPCOUNT (X | -X)
.FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
sub = prec + (ifn == IFN_FFS);
- val_new = sub;
+ val_cmp = sub;
tree neg = vect_recog_temp_ssa_var (rhs_type, NULL);
pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd);
@@ -1992,12 +1995,18 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
{
/* .FFS (X) = .CTZ (X) + 1. */
add = 1;
- val_new++;
+ val_cmp++;
}
/* Create B = .IFNNEW (A). */
new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
- pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
+ if ((ifnnew == IFN_CLZ || ifnnew == IFN_CTZ) && defined_at_zero_new)
+ pattern_stmt
+ = gimple_build_call_internal (ifnnew, 2, rhs_oprnd,
+ build_int_cst (integer_type_node,
+ val_new));
+ else
+ pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd);
gimple_call_set_lhs (pattern_stmt, new_var);
gimple_set_location (pattern_stmt, loc);
*type_out = vec_type;
@@ -2023,7 +2032,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
}
if (defined_at_zero
- && (!defined_at_zero_new || val != val_new))
+ && (!defined_at_zero_new || val != val_cmp))
{
append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type);
tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL);
@@ -2143,7 +2152,8 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
return NULL;
}
- if (gimple_call_num_args (call_stmt) != 1)
+ if (gimple_call_num_args (call_stmt) != 1
+ && gimple_call_num_args (call_stmt) != 2)
return NULL;
rhs_oprnd = gimple_call_arg (call_stmt, 0);
@@ -2181,17 +2191,14 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
return NULL;
addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd))
- TYPE_PRECISION (lhs_type));
- if (gimple_call_internal_p (call_stmt))
+ if (gimple_call_internal_p (call_stmt)
+ && gimple_call_num_args (call_stmt) == 2)
{
int val1, val2;
- int d1
- = CLZ_DEFINED_VALUE_AT_ZERO
- (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
+ val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
int d2
= CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
val2);
- if (d1 != 2)
- break;
if (d2 != 2 || val1 != val2 + addend)
return NULL;
}
@@ -2200,17 +2207,14 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
/* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
if it is undefined at zero or if it matches also for the
defined value there. */
- if (gimple_call_internal_p (call_stmt))
+ if (gimple_call_internal_p (call_stmt)
+ && gimple_call_num_args (call_stmt) == 2)
{
int val1, val2;
- int d1
- = CTZ_DEFINED_VALUE_AT_ZERO
- (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1);
+ val1 = tree_to_shwi (gimple_call_arg (call_stmt, 1));
int d2
= CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
val2);
- if (d1 != 2)
- break;
if (d2 != 2 || val1 != val2)
return NULL;
}
@@ -2260,7 +2264,20 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo,
/* Create B = .POPCOUNT (A). */
new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
- pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
+ tree arg2 = NULL_TREE;
+ int val;
+ if (ifn == IFN_CLZ
+ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
+ val) == 2)
+ arg2 = build_int_cst (integer_type_node, val);
+ else if (ifn == IFN_CTZ
+ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type),
+ val) == 2)
+ arg2 = build_int_cst (integer_type_node, val);
+ if (arg2)
+ pattern_stmt = gimple_build_call_internal (ifn, 2, unprom_diff.op, arg2);
+ else
+ pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op);
gimple_call_set_lhs (pattern_stmt, new_var);
gimple_set_location (pattern_stmt, gimple_location (last_stmt));
*type_out = vec_type;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ee89f47c468b..96e4a6cffade 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3115,6 +3115,7 @@ vectorizable_call (vec_info *vinfo,
enum { NARROW, NONE, WIDEN } modifier;
size_t i, nargs;
tree lhs;
+ tree clz_ctz_arg1 = NULL_TREE;
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
return false;
@@ -3160,6 +3161,14 @@ vectorizable_call (vec_info *vinfo,
nargs = 0;
rhs_type = unsigned_type_node;
}
+ /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
+ argument just says whether it is well-defined at zero or not and what
+ value should be returned for it. */
+ if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
+ {
+ nargs = 1;
+ clz_ctz_arg1 = gimple_call_arg (stmt, 1);
+ }
int mask_opno = -1;
if (internal_fn_p (cfn))
@@ -3425,6 +3434,8 @@ vectorizable_call (vec_info *vinfo,
ifn = cond_fn;
vect_nargs += 2;
}
+ if (clz_ctz_arg1)
+ ++vect_nargs;
if (modifier == NONE || ifn != IFN_LAST)
{
@@ -3462,6 +3473,9 @@ vectorizable_call (vec_info *vinfo,
}
if (masked_loop_p && reduc_idx >= 0)
vargs[varg++] = vargs[reduc_idx + 1];
+ if (clz_ctz_arg1)
+ vargs[varg++] = clz_ctz_arg1;
+
gimple *new_stmt;
if (modifier == NARROW)
{
@@ -3548,6 +3562,8 @@ vectorizable_call (vec_info *vinfo,
}
if (masked_loop_p && reduc_idx >= 0)
vargs[varg++] = vargs[reduc_idx + 1];
+ if (clz_ctz_arg1)
+ vargs[varg++] = clz_ctz_arg1;
if (len_opno >= 0 && len_loop_p)
{
From 69d69865a792a93cce2905617c53913769d0f260 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely
Date: Mon, 13 Nov 2023 12:03:31 +0000
Subject: [PATCH 012/169] c++: Link extended FP conversion pedwarns to
-Wnarrowing [PR111842]
Several users have been confused by the status of these warnings,
which can be misunderstood as "this might not be what you want",
rather than diagnostics required by the C++ standard. Add the text "ISO
C++ does not allow" to make this clear.
Also link them to -Wnarrowing so that they can be disabled or promoted
to errors independently of other pedwarns.
PR c++/111842
PR c++/112498
gcc/cp/ChangeLog:
* call.cc (convert_like_internal): Use OPT_Wnarrowing for
pedwarns about illformed conversions involving extended
floating-point types. Clarify that ISO C++ requires these
diagnostics.
gcc/testsuite/ChangeLog:
* g++.dg/cpp23/ext-floating16.C: New test.
* g++.dg/cpp23/ext-floating17.C: New test.
---
gcc/cp/call.cc | 10 +++--
gcc/testsuite/g++.dg/cpp23/ext-floating16.C | 40 ++++++++++++++++++++
gcc/testsuite/g++.dg/cpp23/ext-floating17.C | 42 +++++++++++++++++++++
3 files changed, 88 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/cpp23/ext-floating16.C
create mode 100644 gcc/testsuite/g++.dg/cpp23/ext-floating17.C
diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc
index 709fd74f55e3..86feff55dd1e 100644
--- a/gcc/cp/call.cc
+++ b/gcc/cp/call.cc
@@ -8303,15 +8303,17 @@ convert_like_internal (conversion *convs, tree expr, tree fn, int argnum,
totype))
{
case 2:
- if (pedwarn (loc, 0, "converting to %qH from %qI with greater "
- "conversion rank", totype, TREE_TYPE (expr)))
+ if (pedwarn (loc, OPT_Wnarrowing, "ISO C++ does not allow "
+ "converting to %qH from %qI with greater "
+ "conversion rank", totype, TREE_TYPE (expr)))
complained = 1;
else if (!complained)
complained = -1;
break;
case 3:
- if (pedwarn (loc, 0, "converting to %qH from %qI with unordered "
- "conversion ranks", totype, TREE_TYPE (expr)))
+ if (pedwarn (loc, OPT_Wnarrowing, "ISO C++ does not allow "
+ "converting to %qH from %qI with unordered "
+ "conversion rank", totype, TREE_TYPE (expr)))
complained = 1;
else if (!complained)
complained = -1;
diff --git a/gcc/testsuite/g++.dg/cpp23/ext-floating16.C b/gcc/testsuite/g++.dg/cpp23/ext-floating16.C
new file mode 100644
index 000000000000..d6a562d5cb06
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/ext-floating16.C
@@ -0,0 +1,40 @@
+// P1467R9 - Extended floating-point types and standard names.
+// { dg-do compile { target c++23 } }
+// { dg-options "-pedantic-errors -Wno-narrowing" }
+// { dg-add-options float16 }
+// { dg-add-options float32 }
+// { dg-add-options float64 }
+// { dg-add-options float128 }
+
+#include "ext-floating.h"
+
+#ifdef __STRICT_ANSI__
+#undef __SIZEOF_FLOAT128__
+#endif
+
+using namespace std;
+
+#ifdef __STDCPP_FLOAT16_T__
+#ifdef __STDCPP_FLOAT32_T__
+float16_t f16c = 1.0F32; // { dg-bogus "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float32' with greater conversion rank" "" { target { float16 && float32 } } }
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+float16_t f16e = 1.0F64; // { dg-bogus "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float64' with greater conversion rank" "" { target { float16 && float64 } } }
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+float16_t f16g = 1.0F128; // { dg-bogus "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float128' with greater conversion rank" "" { target { float16 && float128 } } }
+#endif
+#endif
+#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_FLOAT64_T__
+float32_t f32e = 1.0F64; // { dg-bogus "converting to 'std::float32_t' \\\{aka '_Float32'\\\} from '_Float64' with greater conversion rank" "" { target { float32 && float64 } } }
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+float32_t f32g = 1.0F128; // { dg-bogus "converting to 'std::float32_t' \\\{aka '_Float32'\\\} from '_Float128' with greater conversion rank" "" { target { float32 && float128 } } }
+#endif
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+#ifdef __STDCPP_FLOAT128_T__
+float64_t f64g = 1.0F128; // { dg-bogus "converting to 'std::float64_t' \\\{aka '_Float64'\\\} from '_Float128' with greater conversion rank" "" { target { float64 && float128 } } }
+#endif
+#endif
diff --git a/gcc/testsuite/g++.dg/cpp23/ext-floating17.C b/gcc/testsuite/g++.dg/cpp23/ext-floating17.C
new file mode 100644
index 000000000000..796e045537a6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/ext-floating17.C
@@ -0,0 +1,42 @@
+// P1467R9 - Extended floating-point types and standard names.
+// { dg-do compile { target c++23 } }
+// { dg-options "-Werror=narrowing" }
+// { dg-add-options float16 }
+// { dg-add-options float32 }
+// { dg-add-options float64 }
+// { dg-add-options float128 }
+// { dg-prune-output "some warnings being treated as errors" }
+
+#include "ext-floating.h"
+
+#ifdef __STRICT_ANSI__
+#undef __SIZEOF_FLOAT128__
+#endif
+
+using namespace std;
+
+#ifdef __STDCPP_FLOAT16_T__
+#ifdef __STDCPP_FLOAT32_T__
+float16_t f16c = 1.0F32; // { dg-error "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float32' with greater conversion rank" "" { target { float16 && float32 } } }
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+float16_t f16e = 1.0F64; // { dg-error "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float64' with greater conversion rank" "" { target { float16 && float64 } } }
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+float16_t f16g = 1.0F128; // { dg-error "converting to 'std::float16_t' \\\{aka '_Float16'\\\} from '_Float128' with greater conversion rank" "" { target { float16 && float128 } } }
+#endif
+#endif
+#ifdef __STDCPP_FLOAT32_T__
+#ifdef __STDCPP_FLOAT64_T__
+float32_t f32e = 1.0F64; // { dg-error "converting to 'std::float32_t' \\\{aka '_Float32'\\\} from '_Float64' with greater conversion rank" "" { target { float32 && float64 } } }
+#endif
+#ifdef __STDCPP_FLOAT128_T__
+float32_t f32g = 1.0F128; // { dg-error "converting to 'std::float32_t' \\\{aka '_Float32'\\\} from '_Float128' with greater conversion rank" "" { target { float32 && float128 } } }
+#endif
+#endif
+#ifdef __STDCPP_FLOAT64_T__
+#ifdef __STDCPP_FLOAT128_T__
+float64_t f64g = 1.0F128; // { dg-error "converting to 'std::float64_t' \\\{aka '_Float64'\\\} from '_Float128' with greater conversion rank" "" { target { float64 && float128 } } }
+#endif
+#endif
+
From a745d2064f3e8a368995c67e80146e563d6a8fec Mon Sep 17 00:00:00 2001
From: Andreas Krebbel
Date: Tue, 14 Nov 2023 11:33:44 +0100
Subject: [PATCH 013/169] IBM Z: Fix ICE with overloading and checking enabled
s390_resolve_overloaded_builtin, when called on NON_DEPENDENT_EXPR,
ICEs when using the type from it which ends up as error_mark_node.
This particular instance of the problem does not occur anymore since
NON_DEPENDENT_EXPR has been removed. Nevertheless that case needs to
be handled here.
gcc/ChangeLog:
* config/s390/s390-c.cc (s390_fn_types_compatible): Add a check
for error_mark_node.
gcc/testsuite/ChangeLog:
* g++.target/s390/zvec-templ-1.C: New test.
---
gcc/config/s390/s390-c.cc | 3 +++
gcc/testsuite/g++.target/s390/zvec-templ-1.C | 24 ++++++++++++++++++++
2 files changed, 27 insertions(+)
create mode 100644 gcc/testsuite/g++.target/s390/zvec-templ-1.C
diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 269f4f8e978d..fce569342f30 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -781,6 +781,9 @@ s390_fn_types_compatible (enum s390_builtin_ov_type_index typeindex,
tree in_arg = (*arglist)[i];
tree in_type = TREE_TYPE (in_arg);
+ if (in_type == error_mark_node)
+ goto mismatch;
+
if (VECTOR_TYPE_P (b_arg_type))
{
/* Vector types have to match precisely. */
diff --git a/gcc/testsuite/g++.target/s390/zvec-templ-1.C b/gcc/testsuite/g++.target/s390/zvec-templ-1.C
new file mode 100644
index 000000000000..07bb65f199b4
--- /dev/null
+++ b/gcc/testsuite/g++.target/s390/zvec-templ-1.C
@@ -0,0 +1,24 @@
+// { dg-do compile }
+// { dg-options "-O0 -mzvector -march=arch14 -mzarch" }
+// { dg-bogus "internal compiler error" "ICE" { target s390*-*-* } 23 }
+// { dg-excess-errors "" }
+
+/* This used to ICE with checking enabled because
+ s390_resolve_overloaded_builtin gets called on NON_DEPENDENT_EXPR
+ arguments. We then try to determine the type of it, get an error
+ node and ICEd consequently when using this.
+
+ This particular instance of the problem disappeared when
+ NON_DEPENDENT_EXPRs got removed with:
+
+ commit dad311874ac3b3cf4eca1c04f67cae80c953f7b8
+ Author: Patrick Palka
+ Date: Fri Oct 20 10:45:00 2023 -0400
+
+ c++: remove NON_DEPENDENT_EXPR, part 1
+
+ Nevertheless we should check for error mark nodes in that code. */
+
+template void foo() {
+ __builtin_s390_vec_perm( , , );
+}
From 426e92643285a081d2e32221268b2052d740aa35 Mon Sep 17 00:00:00 2001
From: Andreas Krebbel
Date: Tue, 14 Nov 2023 11:33:45 +0100
Subject: [PATCH 014/169] IBM Z: Add GTY marker to builtin data structures
This adds GTY markers to s390_builtin_types, s390_builtin_fn_types,
and s390_builtin_decls. These were missing causing problems in
particular when using builtins after including a precompiled header.
Unfortunately the declaration of these data structures use enum values
from s390-builtins.h. This file however is not included everywhere
and is rather large. In order to include it only for the purpose of
gtype-desc.cc we place a preprocessed copy of it in the build
directory and include only this.
This is going to be backported to GCC 12 and 13.
gcc/ChangeLog:
* config.gcc: Add s390-gen-builtins.h to target_gtfiles.
* config/s390/s390-builtins.h (s390_builtin_types)
(s390_builtin_fn_types, s390_builtin_decls): Add GTY marker.
* config/s390/t-s390 (EXTRA_GTYPE_DEPS): Add s390-gen-builtins.h.
Add build rule for s390-gen-builtins.h.
---
gcc/config.gcc | 1 +
gcc/config/s390/s390-builtins.h | 10 +++++-----
gcc/config/s390/t-s390 | 4 ++++
3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ba6d63e33ac6..c1460ca354e8 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -571,6 +571,7 @@ s390*-*-*)
d_target_objs="s390-d.o"
extra_options="${extra_options} fused-madd.opt"
extra_headers="s390intrin.h htmintrin.h htmxlintrin.h vecintrin.h"
+ target_gtfiles="./s390-gen-builtins.h"
;;
# Note the 'l'; we need to be able to match e.g. "shle" or "shl".
sh[123456789lbe]*-*-* | sh-*-*)
diff --git a/gcc/config/s390/s390-builtins.h b/gcc/config/s390/s390-builtins.h
index 45bba876828b..84676fe5b3f2 100644
--- a/gcc/config/s390/s390-builtins.h
+++ b/gcc/config/s390/s390-builtins.h
@@ -88,8 +88,8 @@ enum s390_builtin_ov_type_index
#define MAX_OV_OPERANDS 6
-extern tree s390_builtin_types[BT_MAX];
-extern tree s390_builtin_fn_types[BT_FN_MAX];
+extern GTY(()) tree s390_builtin_types[BT_MAX];
+extern GTY(()) tree s390_builtin_fn_types[BT_FN_MAX];
/* Builtins. */
@@ -172,6 +172,6 @@ opflags_for_builtin (int fcode)
return opflags_builtin[fcode];
}
-extern tree s390_builtin_decls[S390_BUILTIN_MAX +
- S390_OVERLOADED_BUILTIN_MAX +
- S390_OVERLOADED_BUILTIN_VAR_MAX];
+extern GTY(()) tree s390_builtin_decls[S390_BUILTIN_MAX +
+ S390_OVERLOADED_BUILTIN_MAX +
+ S390_OVERLOADED_BUILTIN_VAR_MAX];
diff --git a/gcc/config/s390/t-s390 b/gcc/config/s390/t-s390
index 828818bed2d4..4ab9718f6e27 100644
--- a/gcc/config/s390/t-s390
+++ b/gcc/config/s390/t-s390
@@ -19,6 +19,7 @@
TM_H += $(srcdir)/config/s390/s390-builtins.def
TM_H += $(srcdir)/config/s390/s390-builtin-types.def
PASSES_EXTRA += $(srcdir)/config/s390/s390-passes.def
+EXTRA_GTYPE_DEPS += ./s390-gen-builtins.h
s390-c.o: $(srcdir)/config/s390/s390-c.cc \
$(srcdir)/config/s390/s390-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \
@@ -30,3 +31,6 @@ s390-c.o: $(srcdir)/config/s390/s390-c.cc \
s390-d.o: $(srcdir)/config/s390/s390-d.cc
$(COMPILE) $<
$(POSTCOMPILE)
+
+s390-gen-builtins.h: $(srcdir)/config/s390/s390-builtins.h
+ $(COMPILER) -E $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< > $@
From e47e836f3138f89350e2fcd42ee1ec6741008bcb Mon Sep 17 00:00:00 2001
From: Lehua Ding
Date: Tue, 14 Nov 2023 16:42:19 +0800
Subject: [PATCH 015/169] x86: Make testcase apx-spill_to_egprs-1.c more robust
Hi,
This little patch adjust the assert in apx-spill_to_egprs-1.c testcase.
The -mapxf compilation option allows more registers to be used, which in
turn eliminates the need for local variables to be stored in stack memory.
Therefore, the assertion is changed to detects no memory loaded through the
%rsp register.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-spill_to_egprs-1.c: Make sure that no local
variables are stored on the stack.
---
.../gcc.target/i386/apx-spill_to_egprs-1.c | 19 +++----------------
1 file changed, 3 insertions(+), 16 deletions(-)
diff --git a/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c b/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
index 290863d63a78..d7952b4c5505 100644
--- a/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
+++ b/gcc/testsuite/gcc.target/i386/apx-spill_to_egprs-1.c
@@ -3,22 +3,9 @@
#include "spill_to_mask-1.c"
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r16d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r17d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r18d" } } */
-/* { dg-final { scan-assembler "movq\[ \t]+\[^\\n\\r\]*, %r19" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r20d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r21d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r22d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r23d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r24d" } } */
-/* { dg-final { scan-assembler "addl\[ \t]+\[^\\n\\r\]*, %r25d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r26d" } } */
-/* { dg-final { scan-assembler "movl\[ \t]+\[^\\n\\r\]*, %r27d" } } */
-/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r28d" } } */
-/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r29d" } } */
-/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r30d" } } */
-/* { dg-final { scan-assembler "movbel\[ \t]+\[^\\n\\r\]*, %r31d" } } */
+/* Make sure that no local variables are stored on the stack. */
+/* { dg-final { scan-assembler-not "\\(%rsp\\)" } } */
+
/* { dg-final { scan-assembler-not "knot" } } */
/* { dg-final { scan-assembler-not "kxor" } } */
/* { dg-final { scan-assembler-not "kor" } } */
From 3232e73c446a3a70fd2fcce4eabc0564b08312a7 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay
Date: Tue, 14 Nov 2023 12:05:19 +0100
Subject: [PATCH 016/169] LibF7: sinh: Fix loss of precision due to
cancellation for small values.
libgcc/config/avr/libf7/
* libf7-const.def [F7MOD_sinh_]: Add MiniMax polynomial.
* libf7.c (f7_sinh): Use it instead of (exp(x) - exp(-x)) / 2
when |x| < 0.5 to avoid loss of precision due to cancellation.
---
libgcc/config/avr/libf7/libf7-const.def | 10 ++++++++++
libgcc/config/avr/libf7/libf7.c | 17 +++++++++++++++++
2 files changed, 27 insertions(+)
diff --git a/libgcc/config/avr/libf7/libf7-const.def b/libgcc/config/avr/libf7/libf7-const.def
index 0e4c4d8701eb..f772adb1262b 100644
--- a/libgcc/config/avr/libf7/libf7-const.def
+++ b/libgcc/config/avr/libf7/libf7-const.def
@@ -194,5 +194,15 @@ F7_CONST_DEF (X, 1, 0xc7,0xb5,0x6a,0xf8,0x0e,0x32,0x07, -37)
F7_CONST_DEF (pi_low,0, 0xd3,0x13,0x19,0x8a,0x2e,0x03,0x70, 1 - F7_MANT_BITS-2)
#endif
+#elif defined (F7MOD_sinh_)
+// MiniMax for sinh(q)/q, q = sqrt(x) for q in [0, 0.2505]. Quality pQ10 > 70.
+// 0.99999999999999998094379 + 0.16666666666667217765428 x + 0.0083333333330755574996361 x^2 + 1.9841270281701916844502e-4 x^3 + 2.7556979384534689658282e-6 x^4 + 2.5172859336028750964929e-8 x^5
+F7_CONST_DEF (X, 0, 0xff,0xff,0xff,0xff,0xff,0xff,0xff, -1)
+F7_CONST_DEF (X, 0, 0xaa,0xaa,0xaa,0xaa,0xaa,0xb0,0xdf, -3)
+F7_CONST_DEF (X, 0, 0x88,0x88,0x88,0x88,0x76,0x64,0xdb, -7)
+F7_CONST_DEF (X, 0, 0xd0,0x0d,0x01,0x1d,0x88,0x4c,0xed, -13)
+F7_CONST_DEF (X, 0, 0xb8,0xee,0x87,0xb4,0x30,0xf0,0xa1, -19)
+F7_CONST_DEF (X, 0, 0xd8,0x3b,0xb3,0xfd,0x9e,0x6c,0xcf, -26)
+
#endif
#endif /* ! IN_LIBF7_H && ! F7MOD_const_ */
diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c
index da2a4b61b746..bf1cd140bd4a 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -2022,9 +2022,26 @@ void f7_sinhcosh (f7_t *cc, const f7_t *aa, bool do_sinh)
#ifdef F7MOD_sinh_
+
+#define ARRAY_NAME coeff_sinh
+#include "libf7-array.def"
+#undef ARRAY_NAME
+
F7_WEAK
void f7_sinh (f7_t *cc, const f7_t *aa)
{
+ if (aa->expo <= -2)
+ {
+ // For small values, exp(A) - exp(-A) suffers from cancellation, hence
+ // use a MiniMax polynomial for |A| < 0.5.
+ f7_t xx7, *xx = &xx7;
+ f7_t hh7, *hh = &hh7;
+ f7_square (xx, aa);
+ f7_horner (hh, xx, n_coeff_sinh, coeff_sinh, NULL);
+ f7_mul (cc, aa, hh);
+ return;
+ }
+
f7_sinhcosh (cc, aa, true);
}
#endif // F7MOD_sinh_
From 0a1dd8b551e5b34b16394ff92e83c66275a85b9a Mon Sep 17 00:00:00 2001
From: Jakub Jelinek
Date: Tue, 14 Nov 2023 13:09:43 +0100
Subject: [PATCH 017/169] contrib: Ignore the
r14-5312-g040e5b0edbca861196d9e2ea2af5e805769c8d5d bogus commit
The r14-5312-g040e5b0edbca861196d9e2ea2af5e805769c8d5d commit log contains
a line from git revert with correct hash, but unfortunately hand ammended
with explanation, so it got through the pre-commit hook but failed during
update_version_git generation. Please don't do this.
2023-11-14 Jakub Jelinek
contrib/ChangeLog:
* gcc-changelog/git_update_version.py: Add
040e5b0edbca861196d9e2ea2af5e805769c8d5d to ignored commits.
---
contrib/gcc-changelog/git_update_version.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/contrib/gcc-changelog/git_update_version.py b/contrib/gcc-changelog/git_update_version.py
index 3c438bba8920..84767b07f0d1 100755
--- a/contrib/gcc-changelog/git_update_version.py
+++ b/contrib/gcc-changelog/git_update_version.py
@@ -37,7 +37,8 @@ IGNORED_COMMITS = (
'3ab5c8cd03d92bf4ec41e351820349d92fbc40c4',
'86d8e0c0652ef5236a460b75c25e4f7093cc0651',
'e4cba49413ca429dc82f6aa2e88129ecb3fdd943',
- '1957bedf29a1b2cc231972aba680fe80199d5498')
+ '1957bedf29a1b2cc231972aba680fe80199d5498',
+ '040e5b0edbca861196d9e2ea2af5e805769c8d5d')
FORMAT = '%(asctime)s:%(levelname)s:%(name)s:%(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT,
From aad65285a1c681feb9fc5b041c86d841b24c3d2a Mon Sep 17 00:00:00 2001
From: Jakub Jelinek
Date: Tue, 14 Nov 2023 13:19:48 +0100
Subject: [PATCH 018/169] i386: Fix up 3_doubleword_lowpart
[PR112523]
On Sun, Nov 12, 2023 at 09:03:42PM -0000, Roger Sayle wrote:
> This patch improves register pressure during reload, inspired by PR 97756.
> Normally, a double-word right-shift by a constant produces a double-word
> result, the highpart of which is dead when followed by a truncation.
> The dead code calculating the high part gets cleaned up post-reload, so
> the issue isn't normally visible, except for the increased register
> pressure during reload, sometimes leading to odd register assignments.
> Providing a post-reload splitter, which clobbers a single wordmode
> result register instead of a doubleword result register, helps (a bit).
Unfortunately this broke bootstrap on i686-linux, broke all ACATS tests
on x86_64-linux as well as miscompiled e.g. __floattisf in libgcc there
as well.
The bug is that shrd{l,q} instruction expects the low part of the input
to be the same register as the output, rather than the high part as the
patch implemented.
split_double_mode (mode, &operands[1], 1, &operands[1], &operands[3]);
sets operands[1] to the lo_half and operands[3] to the hi_half, so if
operands[0] is not the same register as operands[1] (rather than [3]) after
RA, we should during splitting move operands[1] into operands[0].
Your testcase:
> #define MASK60 ((1ul << 60) - 1)
> unsigned long foo (__uint128_t n)
> {
> unsigned long a = n & MASK60;
> unsigned long b = (n >> 60);
> b = b & MASK60;
> unsigned long c = (n >> 120);
> return a+b+c;
> }
still has the same number of instructions.
Bootstrapped/regtested on x86_64-linux (where it e.g. turns
=== acats Summary ===
-# of unexpected failures 2328
+# of expected passes 2328
+# of unexpected failures 0
and fixes gcc.dg/torture/fp-int-convert-*timode.c FAILs as well)
and i686-linux (where it previously didn't bootstrap, but compared to
Friday evening's bootstrap the testresults are ok).
2023-11-14 Jakub Jelinek
PR target/112523
PR ada/112514
* config/i386/i386.md (3_doubleword_lowpart): Move
operands[1] aka low part of input rather than operands[3] aka high
part of input to output if not the same register.
---
gcc/config/i386/i386.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 29289f48e9c0..84cc50c7bb21 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14825,8 +14825,8 @@
{
split_double_mode (mode, &operands[1], 1, &operands[1], &operands[3]);
operands[4] = GEN_INT (( * BITS_PER_UNIT) - INTVAL (operands[2]));
- if (!rtx_equal_p (operands[0], operands[3]))
- emit_move_insn (operands[0], operands[3]);
+ if (!rtx_equal_p (operands[0], operands[1]))
+ emit_move_insn (operands[0], operands[1]);
})
(define_insn "x86_64_shrd"
From b9fd8399ec027ce42fd35a070ce38de8d69521b2 Mon Sep 17 00:00:00 2001
From: GCC Administrator
Date: Tue, 14 Nov 2023 12:23:39 +0000
Subject: [PATCH 019/169] Daily bump.
---
ChangeLog | 29 +
config/ChangeLog | 8 +
contrib/ChangeLog | 11 +
gcc/ChangeLog | 903 +++++++++++
gcc/DATESTAMP | 2 +-
gcc/ada/ChangeLog | 15 +
gcc/c-family/ChangeLog | 21 +
gcc/c/ChangeLog | 18 +
gcc/cp/ChangeLog | 34 +
gcc/jit/ChangeLog | 4 +
gcc/testsuite/ChangeLog | 2502 +++++++++++++++++++++++++++++
libatomic/ChangeLog | 5 +
libcpp/ChangeLog | 26 +
libgcc/ChangeLog | 30 +
libgcc/config/avr/libf7/ChangeLog | 12 +
libgm2/ChangeLog | 16 +
libgomp/ChangeLog | 5 +
libiberty/ChangeLog | 8 +
libstdc++-v3/ChangeLog | 184 +++
maintainer-scripts/ChangeLog | 8 +
20 files changed, 3840 insertions(+), 1 deletion(-)
diff --git a/ChangeLog b/ChangeLog
index 56cf1ff10d55..3fd01d64af62 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,32 @@
+2023-11-13 Arsen Arsenović
+
+ PR bootstrap/12596
+ * .gitignore: Add '/gettext*'.
+ * configure.ac (host_libs): Replace intl with gettext.
+ (hbaseargs, bbaseargs, baseargs): Split baseargs into
+ {h,b}baseargs.
+ (skip_barg): New flag. Skips appending current flag to
+ bbaseargs.
+ : Exempt --with-libintl-{type,prefix} from
+ target and build machine argument passing.
+ * configure: Regenerate.
+ * Makefile.def (host_modules): Replace intl module with gettext
+ module.
+ (configure-ld): Depend on configure-gettext.
+ * Makefile.in: Regenerate.
+
+2023-11-13 Arsen Arsenović
+
+ * intl/*: Remove.
+
+2023-11-09 Jivan Hakobyan
+
+ * MAINTAINERS: Add myself.
+
+2023-11-09 YunQiang Su
+
+ * MAINTAINERS: Update my email address.
+
2023-11-06 Carl Love
* MAINTAINERS: Update my email address.
diff --git a/config/ChangeLog b/config/ChangeLog
index 1c5e8b66a252..0c19f5d89368 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,11 @@
+2023-11-13 Arsen Arsenović
+
+ * intlmacosx.m4: Import from gettext-0.22 (serial 8).
+ * gettext.m4: Sync with gettext-0.22 (serial 77).
+ * gettext-sister.m4 (ZW_GNU_GETTEXT_SISTER_DIR): Load gettext's
+ uninstalled-config.sh, or call AM_GNU_GETTEXT if missing.
+ * iconv.m4: Sync with gettext-0.22 (serial 26).
+
2023-09-15 Yang Yujie
* mt-loongarch-mlib: New file. Pass -fmultiflags when building
diff --git a/contrib/ChangeLog b/contrib/ChangeLog
index 4e068d57b4d7..6e338cdf0e5c 100644
--- a/contrib/ChangeLog
+++ b/contrib/ChangeLog
@@ -1,3 +1,14 @@
+2023-11-14 Jakub Jelinek
+
+ * gcc-changelog/git_update_version.py: Add
+ 040e5b0edbca861196d9e2ea2af5e805769c8d5d to ignored commits.
+
+2023-11-13 Arsen Arsenović
+
+ * prerequisites.sha512: Add gettext.
+ * prerequisites.md5: Add gettext.
+ * download_prerequisites: Add gettext.
+
2023-10-05 Andrea Corallo
* mdcompact/mdcompact-testsuite.el: New file.
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6e70999e3d60..a90a43daec49 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,906 @@
+2023-11-14 Jakub Jelinek
+
+ PR target/112523
+ PR ada/112514
+ * config/i386/i386.md (3_doubleword_lowpart): Move
+ operands[1] aka low part of input rather than operands[3] aka high
+ part of input to output if not the same register.
+
+2023-11-14 Andreas Krebbel
+
+ * config.gcc: Add s390-gen-builtins.h to target_gtfiles.
+ * config/s390/s390-builtins.h (s390_builtin_types)
+ (s390_builtin_fn_types, s390_builtin_decls): Add GTY marker.
+ * config/s390/t-s390 (EXTRA_GTYPE_DEPS): Add s390-gen-builtins.h.
+ Add build rule for s390-gen-builtins.h.
+
+2023-11-14 Andreas Krebbel
+
+ * config/s390/s390-c.cc (s390_fn_types_compatible): Add a check
+ for error_mark_node.
+
+2023-11-14 Jakub Jelinek
+
+ PR c/111309
+ * builtins.def (BUILT_IN_CLZG, BUILT_IN_CTZG, BUILT_IN_CLRSBG,
+ BUILT_IN_FFSG, BUILT_IN_PARITYG, BUILT_IN_POPCOUNTG): New
+ builtins.
+ * builtins.cc (fold_builtin_bit_query): New function.
+ (fold_builtin_1): Use it for
+ BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
+ (fold_builtin_2): Use it for BUILT_IN_{CLZ,CTZ}G.
+ * fold-const-call.cc: Fix comment typo on tm.h inclusion.
+ (fold_const_call_ss): Handle
+ CFN_BUILT_IN_{CLZ,CTZ,CLRSB,FFS,PARITY,POPCOUNT}G.
+ (fold_const_call_sss): New function.
+ (fold_const_call_1): Call it for 2 argument functions returning
+ scalar when passed 2 INTEGER_CSTs.
+ * genmatch.cc (cmp_operand): For function calls also compare
+ number of arguments.
+ (fns_cmp): New function.
+ (dt_node::gen_kids): Sort fns and generic_fns.
+ (dt_node::gen_kids_1): Handle fns with the same id but different
+ number of arguments.
+ * match.pd (CLZ simplifications): Drop checks for defined behavior
+ at zero. Add variant of simplifications for IFN_CLZ with 2 arguments.
+ (CTZ simplifications): Drop checks for defined behavior at zero,
+ don't optimize precisions above MAX_FIXED_MODE_SIZE. Add variant of
+ simplifications for IFN_CTZ with 2 arguments.
+ (a != 0 ? CLZ(a) : CST -> .CLZ(a)): Use TREE_TYPE (@3) instead of
+ type, add BITINT_TYPE handling, create 2 argument IFN_CLZ rather than
+ one argument. Add variant for matching CLZ with 2 arguments.
+ (a != 0 ? CTZ(a) : CST -> .CTZ(a)): Similarly.
+ * gimple-lower-bitint.cc (bitint_large_huge::lower_bit_query): New
+ method.
+ (bitint_large_huge::lower_call): Use it for IFN_{CLZ,CTZ,CLRSB,FFS}
+ and IFN_{PARITY,POPCOUNT} calls.
+ * gimple-range-op.cc (cfn_clz::fold_range): Don't check
+ CLZ_DEFINED_VALUE_AT_ZERO for m_gimple_call_internal_p, instead
+ assume defined value at zero if the call has 2 arguments and use
+ second argument value for that case.
+ (cfn_ctz::fold_range): Similarly.
+ (gimple_range_op_handler::maybe_builtin_call): Use op_cfn_clz_internal
+ or op_cfn_ctz_internal only if internal fn call has 2 arguments and
+ set m_op2 in that case.
+ * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern,
+ vect_recog_popcount_clz_ctz_ffs_pattern): For value defined at zero
+ use second argument of calls if present, otherwise assume UB at zero,
+ create 2 argument .CLZ/.CTZ calls if needed.
+ * tree-vect-stmts.cc (vectorizable_call): Handle 2 argument .CLZ/.CTZ
+ calls.
+ * tree-ssa-loop-niter.cc (build_cltz_expr): Create 2 argument
+ .CLZ/.CTZ calls if needed.
+ * tree-ssa-forwprop.cc (simplify_count_trailing_zeroes): Create 2
+ argument .CTZ calls if needed.
+ * tree-ssa-phiopt.cc (cond_removal_in_builtin_zero_pattern): Handle
+ 2 argument .CLZ/.CTZ calls, handle BITINT_TYPE, create 2 argument
+ .CLZ/.CTZ calls.
+ * doc/extend.texi (__builtin_clzg, __builtin_ctzg, __builtin_clrsbg,
+ __builtin_ffsg, __builtin_parityg, __builtin_popcountg): Document.
+
+2023-11-14 Xi Ruoyao
+
+ PR target/112330
+ * config/loongarch/genopts/loongarch.opt.in: Add
+ -m[no]-pass-relax-to-as. Change the default of -m[no]-relax to
+ account conditional branch relaxation support status.
+ * config/loongarch/loongarch.opt: Regenerate.
+ * configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
+ the assembler supports conditional branch relaxation.
+ * configure: Regenerate.
+ * config.in: Regenerate. Note that there are some unrelated
+ changes introduced by r14-5424 (which does not contain a
+ config.in regeneration).
+ * config/loongarch/loongarch-opts.h
+ (HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
+ * config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
+ Define.
+ (ASM_MRELAX_SPEC): Define.
+ (ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
+ * config/loongarch/loongarch.cc: Take the setting of
+ -m[no-]relax into account when determining the default of
+ -mexplicit-relocs=.
+ * doc/invoke.texi: Document -m[no-]relax and
+ -m[no-]pass-mrelax-to-as for LoongArch. Update the default
+ value of -mexplicit-relocs=.
+
+2023-11-14 liuhongt
+
+ PR tree-optimization/112496
+ * tree-vect-loop.cc (vectorizable_nonlinear_induction): Return
+ false when !tree_nop_conversion_p (TREE_TYPE (vectype),
+ TREE_TYPE (init_expr)).
+
+2023-11-14 Xi Ruoyao
+
+ * config/loongarch/sync.md (mem_thread_fence): Remove redundant
+ check.
+ (mem_thread_fence_1): Emit finer-grained DBAR hints for
+ different memory models, instead of 0.
+
+2023-11-14 Jakub Jelinek
+
+ PR middle-end/112511
+ * tree.cc (type_contains_placeholder_1): Handle BITINT_TYPE like
+ INTEGER_TYPE.
+
+2023-11-14 Jakub Jelinek
+ Hu, Lin1
+
+ PR target/112435
+ * config/i386/sse.md (avx512vl_shuf_32x4_1,
+ avx512dq_shuf_64x2_1): Add
+ alternative with just x instead of v constraints and xjm instead of
+ vm and use vblendps as optimization only with that alternative.
+
+2023-11-14 liuhongt
+
+ PR tree-optimization/105735
+ PR tree-optimization/111972
+ * tree-scalar-evolution.cc
+ (analyze_and_compute_bitop_with_inv_effect): Handle bitop with
+ INTEGER_CST.
+
+2023-11-13 Arsen Arsenović
+
+ * configure: Regenerate.
+ * aclocal.m4: Regenerate.
+ * Makefile.in (LIBDEPS): Remove (potential) ./ prefix from
+ LIBINTL_DEP.
+ * doc/install.texi: Document new (notable) flags added by the
+ optional gettext tree and by AM_GNU_GETTEXT. Document libintl/libc
+ with gettext dependency.
+
+2023-11-13 Uros Bizjak
+
+ * config/i386/i386-expand.h (gen_pushfl): New prototype.
+ (gen_popfl): Ditto.
+ * config/i386/i386-expand.cc (ix86_expand_builtin)
+ [case IX86_BUILTIN_READ_FLAGS]: Use gen_pushfl.
+ [case IX86_BUILTIN_WRITE_FLAGS]: Use gen_popfl.
+ * config/i386/i386.cc (gen_pushfl): New function.
+ (gen_popfl): Ditto.
+ * config/i386/i386.md (unspec): Add UNSPEC_PUSHFL and UNSPEC_POPFL.
+ (@pushfl2): Rename from *pushfl2.
+ Rewrite as unspec using UNSPEC_PUSHFL.
+ (@popfl1): Rename from *popfl1.
+ Rewrite as unspec using UNSPEC_POPFL.
+
+2023-11-13 Uros Bizjak
+
+ PR target/112494
+ * config/i386/i386.cc (ix86_cc_mode) [default]: Return CCmode.
+
+2023-11-13 Robin Dapp
+
+ * config/riscv/riscv-vsetvl.cc (source_equal_p): Use pointer
+ equality for REG_EQUAL.
+
+2023-11-13 Richard Biener
+
+ PR tree-optimization/112495
+ * tree-data-ref.cc (runtime_alias_check_p): Reject checks
+ between different address spaces.
+
+2023-11-13 Richard Biener
+
+ PR middle-end/112487
+ * tree-inline.cc (setup_one_parameter): When the parameter
+ is unused only insert a debug bind when there's not a gross
+ mismatch in value and declared parameter type. Do not assert
+ there effectively isn't.
+
+2023-11-13 Juzhe-Zhong
+
+ * config/riscv/riscv-v.cc
+ (rvv_builder::combine_sequence_use_merge_profitable_p): New function.
+ (expand_vector_init_merge_combine_sequence): Ditto.
+ (expand_vec_init): Adapt for new optimization.
+
+2023-11-13 liuhongt
+
+ * config/i386/i386-expand.cc
+ (ix86_expand_vector_init_duplicate): Handle V4HF/V4BF and
+ V2HF/V2BF.
+ (ix86_expand_vector_init_one_nonzero): Ditto.
+ (ix86_expand_vector_init_one_var): Ditto.
+ (ix86_expand_vector_init_general): Ditto.
+ (ix86_expand_vector_set_var): Ditto.
+ (ix86_expand_vector_set): Ditto.
+ (ix86_expand_vector_extract): Ditto.
+ * config/i386/mmx.md
+ (mmxdoublevecmode): Extend to V4HF/V4BF/V2HF/V2BF.
+ (*mmx_pinsrw): Extend to V4FI_64, add a new alternative (&x,
+ x, x), add a new define_split after the pattern.
+ (*mmx_pextrw): New define_insn.
+ (mmx_pshufw_1): Rename to ..
+ (mmx_pshufw_1): .. this, extend to V4FI_64.
+ (*mmx_pblendw64): Extend to V4FI_64.
+ (*vec_dup): New define_insn.
+ (vec_setv4hi): Rename to ..
+ (vec_set): .. this, and extend to V4FI_64
+ (vec_extractv4hihi): Rename to ..
+ (vec_extract): .. this, and extend
+ to V4FI_64.
+ (vec_init): New define_insn.
+ (*pinsrw): Extend to V2FI_32, add a new alternative (&x,
+ x, x), and add a new define_split after it.
+ (*pextrw): New define_insn.
+ (vec_setv2hi): Rename to ..
+ (vec_set): .. this, extend to V2FI_32.
+ (vec_extractv2hihi): Rename to ..
+ (vec_extract): .. this, extend to
+ V2FI_32.
+ (*punpckwd): Extend to V2FI_32.
+ (*pshufw_1): Rename to ..
+ (*pshufw_1): .. this, extend to V2FI_32.
+ (vec_initv2hihi): Rename to ..
+ (vec_init): .. this, and extend to
+ V2FI_32.
+ (*vec_dup): New define_insn.
+ * config/i386/sse.md (*vec_extract): Refine constraint
+ from v to Yw.
+
+2023-11-13 Roger Sayle
+
+ * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
+ represents the carry flag being set if the operand is non-zero.
+ (adc_f): New define_insn representing adc with updated flags.
+ (ashrdi3): New define_expand that only handles shifts by 1.
+ (ashrdi3_cnt1): New pre-reload define_insn_and_split.
+ (lshrdi3): New define_expand that only handles shifts by 1.
+ (lshrdi3_cnt1): New pre-reload define_insn_and_split.
+ (rrcsi2): New define_insn for rrc (SImode rotate right through carry).
+ (rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
+ (rotldi3): New define_expand that only handles rotates by 1.
+ (rotldi3_cnt1): New pre-reload define_insn_and_split.
+ (rotrdi3): New define_expand that only handles rotates by 1.
+ (rotrdi3_cnt1): New pre-reload define_insn_and_split.
+ (lshrsi3_cnt1_carry): New define_insn for lsr.f.
+ (ashrsi3_cnt1_carry): New define_insn for asr.f.
+ (btst_0_carry): New define_insn for asr.f without result.
+
+2023-11-13 Roger Sayle
+
+ * config/arc/arc.cc (TARGET_FOLD_BUILTIN): Define to
+ arc_fold_builtin.
+ (arc_fold_builtin): New function. Convert ARC_BUILTIN_SWAP
+ into a rotate. Evaluate ARC_BUILTIN_NORM and
+ ARC_BUILTIN_NORMW of constant arguments.
+ * config/arc/arc.md (UNSPEC_ARC_SWAP): Delete.
+ (normw): Make output template/assembler whitespace consistent.
+ (swap): Remove define_insn, only use of SWAP UNSPEC.
+ * config/arc/builtins.def: Tweak indentation.
+ (SWAP): Expand using rotlsi2_cnt16 instead of using swap.
+
+2023-11-13 Roger Sayle
+
+ * config/i386/i386.md (3_doubleword_lowpart): New
+ define_insn_and_split to optimize register usage of doubleword
+ right shifts followed by truncation.
+
+2023-11-13 Jakub Jelinek
+
+ * config/i386/constraints.md: Remove j constraint letter from list of
+ unused letters.
+
+2023-11-13 Xi Ruoyao
+
+ PR rtl-optimization/112483
+ * simplify-rtx.cc (simplify_binary_operation_1) :
+ Fix the simplification of (fcopysign x, NEGATIVE_CONST).
+
+2023-11-13 Jakub Jelinek
+
+ PR tree-optimization/111967
+ * gimple-range-cache.cc (block_range_cache::set_bb_range): Grow
+ m_ssa_ranges to num_ssa_names rather than num_ssa_names + 1.
+ (block_range_cache::dump): Iterate from 1 rather than 0. Don't use
+ ssa_name (x) unless m_ssa_ranges[x] is non-NULL. Iterate to
+ m_ssa_ranges.length () rather than num_ssa_names.
+
+2023-11-13 Xi Ruoyao
+
+ * config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode
+ iterator.
+ (ST_ANY): New mode iterator.
+ (define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and
+ ST_ANY instead of QHWD for applicable patterns.
+
+2023-11-13 Xi Ruoyao
+
+ PR target/112476
+ * config/loongarch/loongarch.cc
+ (loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg
+ instead of gen_rtx_SUBREG.
+
+2023-11-13 Pan Li
+
+ * config/riscv/autovec.md: Add bridge mode to lrint and lround
+ pattern.
+ * config/riscv/riscv-protos.h (expand_vec_lrint): Add new arg
+ bridge machine mode.
+ (expand_vec_lround): Ditto.
+ * config/riscv/riscv-v.cc (emit_vec_widden_cvt_f_f): New helper
+ func impl to emit vfwcvt.f.f.
+ (emit_vec_rounding_to_integer): Handle the HF to DI rounding
+ with the bridge mode.
+ (expand_vec_lrint): Reorder the args.
+ (expand_vec_lround): Ditto.
+ (expand_vec_lceil): Ditto.
+ (expand_vec_lfloor): Ditto.
+ * config/riscv/vector-iterators.md: Add vector HFmode and bridge
+ mode for converting to DI.
+
+2023-11-12 Jeff Law
+
+ Revert:
+ 2023-11-11 Jin Ma
+
+ * haifa-sched.cc (use_or_clobber_starts_range_p): New.
+ (prune_ready_list): USE or CLOBBER should delay execution
+ if it starts a new live range.
+
+2023-11-12 Uros Bizjak
+
+ * config/i386/i386.md (*stack_protect_set_4s__di):
+ Remove alternative 0.
+
+2023-11-11 Eric Botcazou
+
+ * ipa-cp.cc (print_ipcp_constant_value): Move to...
+ (values_equal_for_ipcp_p): Deal with VAR_DECLs from the
+ constant pool.
+ * ipa-prop.cc (ipa_print_constant_value): ...here. Likewise.
+ (ipa_print_node_jump_functions_for_edge): Call the function
+ ipa_print_constant_value to print IPA_JF_CONST elements.
+
+2023-11-11 Jin Ma
+
+ * haifa-sched.cc (use_or_clobber_starts_range_p): New.
+ (prune_ready_list): USE or CLOBBER should delay execution
+ if it starts a new live range.
+
+2023-11-11 Jakub Jelinek
+
+ PR middle-end/112430
+ * tree-ssa-math-opts.cc (match_uaddc_usubc): Remove temp_stmts in the
+ order they were pushed rather than in reverse order. Call
+ release_defs after gsi_remove.
+
+2023-11-11 Richard Sandiford
+
+ * target.def (mode_switching.backprop): New hook.
+ * doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook.
+ * doc/tm.texi: Regenerate.
+ * mode-switching.cc (struct bb_info): Add single_succ.
+ (confluence_info): Add transp field.
+ (single_succ_confluence_n, single_succ_transfer): New functions.
+ (backprop_confluence_n, backprop_transfer): Likewise.
+ (optimize_mode_switching): Use them. Push mode transitions onto
+ a block's incoming edges, if the backprop hook requires it.
+
+2023-11-11 Richard Sandiford
+
+ * target.def (mode_switching.confluence): New hook.
+ * doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook.
+ * doc/tm.texi.in: Regenerate.
+ * mode-switching.cc (confluence_info): New variable.
+ (mode_confluence, forward_confluence_n, forward_transfer): New
+ functions.
+ (optimize_mode_switching): Use them to calculate mode_in when
+ TARGET_MODE_CONFLUENCE is defined.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (commit_mode_sets): Use 1-based edge aux values.
+
+2023-11-11 Richard Sandiford
+
+ * target.def (mode_switching.after): Add a regs_live parameter.
+ * doc/tm.texi: Regenerate.
+ * config/epiphany/epiphany-protos.h (epiphany_mode_after): Update
+ accordingly.
+ * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
+ (epiphany_mode_after): Likewise.
+ * config/i386/i386.cc (ix86_mode_after): Likewise.
+ * config/riscv/riscv.cc (riscv_mode_after): Likewise.
+ * config/sh/sh.cc (sh_mode_after): Likewise.
+ * mode-switching.cc (optimize_mode_switching): Likewise.
+
+2023-11-11 Richard Sandiford
+
+ * target.def (mode_switching.needed): Add a regs_live parameter.
+ * doc/tm.texi: Regenerate.
+ * config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update
+ accordingly.
+ * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
+ * config/epiphany/mode-switch-use.cc (insert_uses): Likewise.
+ * config/i386/i386.cc (ix86_mode_needed): Likewise.
+ * config/riscv/riscv.cc (riscv_mode_needed): Likewise.
+ * config/sh/sh.cc (sh_mode_needed): Likewise.
+ * mode-switching.cc (optimize_mode_switching): Likewise.
+ (create_pre_exit): Likewise, using the DF simulate functions
+ to calculate the required information.
+
+2023-11-11 Richard Sandiford
+
+ * target.def (mode_switching.eh_handler): New hook.
+ * doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook.
+ * doc/tm.texi: Regenerate.
+ * mode-switching.cc (optimize_mode_switching): Use eh_handler
+ to get the mode on entry to an exception handler.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (optimize_mode_switching): Mark the exit
+ block as nontransparent if it requires a specific mode.
+ Handle the entry and exit mode as sibling rather than nested
+ concepts. Remove outdated comment.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (optimize_mode_switching): Initially
+ compute transparency in a bit-per-block bitmap.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (seginfo): Add a prev_mode field.
+ (new_seginfo): Take and initialize the prev_mode.
+ (optimize_mode_switching): Update calls accordingly.
+ Use the recorded modes during the emit phase, rather than
+ computing one on the fly.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (add_seginfo): Replace head pointer with
+ a pointer to the tail pointer.
+ (optimize_mode_switching): Update calls accordingly.
+
+2023-11-11 Richard Sandiford
+
+ * mode-switching.cc (optimize_mode_switching): Call
+ df_note_add_problem.
+
+2023-11-11 Richard Sandiford
+
+ * target.def: Tweak documentation of mode-switching hooks.
+ * doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation.
+ (NUM_MODES_FOR_MODE_SWITCHING): Likewise.
+ * doc/tm.texi: Regenerate.
+
+2023-11-11 Martin Uecker
+
+ PR c/110815
+ PR c/112428
+ * gimple-ssa-warn-access.cc (pass_waccess::maybe_check_access_sizes):
+ remove warning for parameters declared with `static`.
+
+2023-11-11 Joern Rennecke
+
+ * doc/sourcebuild.texi (Scan the assembly output): Document change.
+
+2023-11-10 Mao
+
+ PR middle-end/110983
+ * doc/invoke.texi (Option Summary): Add -fpatchable-function-entry.
+
+2023-11-10 Maciej W. Rozycki
+
+ * config/riscv/riscv.md (length): Fix indentation for branch and
+ jump length calculation expressions.
+
+2023-11-10 Eric Botcazou
+
+ * fold-const.cc (operand_compare::operand_equal_p) :
+ Deal with nonempty constant CONSTRUCTORs.
+ (operand_compare::hash_operand) : Hash DECL_FIELD_OFFSET
+ and DECL_FIELD_BIT_OFFSET for FIELD_DECLs.
+
+2023-11-10 Vladimir N. Makarov
+
+ PR target/112337
+ * ira-costs.cc: (validate_autoinc_and_mem_addr_p): New function.
+ (equiv_can_be_consumed_p): Use it.
+
+2023-11-10 Richard Sandiford
+
+ * read-rtl.cc (md_reader::read_mapping): Allow iterators to
+ include other iterators.
+ * doc/md.texi: Document the change.
+ * config/aarch64/iterators.md (DREG2, VQ2, TX2, DX2, SX2): Include
+ the iterator that is being duplicated, rather than reproducing it.
+ (VSTRUCT_D): Redefine using VSTRUCT_[234]D.
+ (VSTRUCT_Q): Likewise VSTRUCT_[234]Q.
+ (VSTRUCT_2QD, VSTRUCT_3QD, VSTRUCT_4QD, VSTRUCT_QD): Redefine using
+ the individual D and Q iterators.
+
+2023-11-10 Uros Bizjak
+
+ * config/i386/i386.md (stack_protect_set_1 peephole2):
+ Explicitly check operand 2 for word_mode.
+ (stack_protect_set_1 peephole2 #2): Ditto.
+ (stack_protect_set_2 peephole2): Ditto.
+ (stack_protect_set_3 peephole2): Ditto.
+ (*stack_protect_set_4z__di): New insn patter.
+ (*stack_protect_set_4s__di): Ditto.
+ (stack_protect_set_4 peephole2): New peephole2 pattern to
+ substitute stack protector scratch register clear with unrelated
+ register initialization involving zero/sign-extend instruction.
+
+2023-11-10 Uros Bizjak
+
+ * config/i386/i386.md (shift): Use SAL insted of SLL
+ for ashift insn mnemonic.
+
+2023-11-10 Juzhe-Zhong
+
+ PR tree-optimization/112438
+ * tree-vect-loop.cc (vectorizable_induction): Bugfix when
+ LOOP_VINFO_USING_SELECT_VL_P.
+
+2023-11-10 Juzhe-Zhong
+
+ * config/riscv/riscv-protos.h (enum insn_type): New enum.
+ * config/riscv/riscv-v.cc
+ (rvv_builder::combine_sequence_use_slideup_profitable_p): New function.
+ (expand_vector_init_slideup_combine_sequence): Ditto.
+ (expand_vec_init): Add slideup combine optimization.
+
+2023-11-10 Robin Dapp
+
+ PR tree-optimization/112464
+ * tree-vect-loop.cc (vectorize_fold_left_reduction): Use
+ vect_orig_stmt on scalar_dest_def_info.
+
+2023-11-10 Jin Ma
+
+ * config/riscv/riscv.cc (riscv_for_each_saved_reg): Place the interrupt
+ operation before the XTheadMemPair.
+
+2023-11-10 Richard Biener
+
+ PR tree-optimization/110221
+ * tree-vect-slp.cc (vect_schedule_slp_node): When loop
+ masking / len is applied make sure to not schedule
+ intenal defs outside of the loop.
+
+2023-11-10 Andrew Stubbs
+
+ * expr.cc (store_constructor): Add "and" operation to uniform mask
+ generation.
+
+2023-11-10 Andrew Stubbs
+
+ PR target/112308
+ * config/gcn/gcn-valu.md (add3): Fix B constraint
+ and switch to the new format.
+ (add3_dup): Likewise.
+ (add3_vcc): Likewise.
+ (add3_vcc_dup): Likewise.
+ (add3_vcc_zext_dup): Likewise.
+ (add3_vcc_zext_dup_exec): Likewise.
+ (add3_vcc_zext_dup2): Likewise.
+ (add3_vcc_zext_dup2_exec): Likewise.
+
+2023-11-10 Richard Biener
+
+ PR middle-end/112469
+ * match.pd (cond ? op a : b -> .COND_op (cond, a, b)): Add
+ missing view_converts.
+
+2023-11-10 Andrew Stubbs
+
+ * config/gcn/gcn.cc (gcn_expand_reduc_scalar): Add clobber to DImode
+ min/max instructions.
+
+2023-11-10 Chenghui Pan
+
+ * config/loongarch/lsx.md: Fix instruction name typo in
+ lsx_vreplgr2vr_ template.
+
+2023-11-10 Juzhe-Zhong
+
+ * config/riscv/autovec.md (vec_init): Split patterns.
+
+2023-11-10 Pan Li
+
+ * config/riscv/riscv-v.cc (expand_vector_init_trailing_same_elem):
+ New fun impl to expand the insn when trailing same elements.
+ (expand_vec_init): Try trailing same elements when vec_init.
+
+2023-11-10 Juzhe-Zhong
+
+ * config/riscv/autovec-opt.md (*cond_copysign): Remove.
+ * config/riscv/autovec.md (cond_copysign): New pattern.
+
+2023-11-10 Pan Li
+
+ PR target/112432
+ * internal-fn.def (LRINT): Add FLOATN support.
+ (LROUND): Ditto.
+ (LLRINT): Ditto.
+ (LLROUND): Ditto.
+
+2023-11-10 Jeff Law
+
+ * config/h8300/combiner.md (single bit sign_extract): Avoid recently
+ added patterns for H8/SX.
+ (single bit zero_extract): New patterns.
+
+2023-11-10 liuhongt
+
+ PR target/112443
+ * config/i386/sse.md (*avx2_pcmp3_4): Fix swap condition
+ from LT to GT since there's not in the pattern.
+ (*avx2_pcmp3_5): Ditto.
+
+2023-11-10 Jose E. Marchesi
+
+ * config/bpf/bpf.cc (bpf_print_register): Accept modifier code 'W'
+ to force emitting register names using the wN form.
+ * config/bpf/bpf.md (*mulsidi3_zeroextend): Force operands to
+ always use wN written form in pseudo-C assembly syntax.
+
+2023-11-09 David Malcolm
+
+ * diagnostic-show-locus.cc (layout::m_line_table): New field.
+ (compatible_locations_p): Convert to...
+ (layout::compatible_locations_p): ...this, replacing uses of
+ line_table global with m_line_table.
+ (layout::layout): Convert "richloc" param from a pointer to a
+ const reference. Initialize m_line_table member.
+ (layout::maybe_add_location_range): Replace uses of line_table
+ global with m_line_table. Pass the latter to
+ linemap_client_expand_location_to_spelling_point.
+ (layout::print_leading_fixits): Pass m_line_table to
+ affects_line_p.
+ (layout::print_trailing_fixits): Likewise.
+ (gcc_rich_location::add_location_if_nearby): Update for change
+ to layout ctor params.
+ (diagnostic_show_locus): Convert to...
+ (diagnostic_context::maybe_show_locus): ...this, converting
+ richloc param from a pointer to a const reference. Make "loc"
+ const. Split out printing part of function to...
+ (diagnostic_context::show_locus): ...this.
+ (selftest::test_offset_impl): Update for change to layout ctor
+ params.
+ (selftest::test_layout_x_offset_display_utf8): Likewise.
+ (selftest::test_layout_x_offset_display_tab): Likewise.
+ (selftest::test_tab_expansion): Likewise.
+ * diagnostic.h (diagnostic_context::maybe_show_locus): New decl.
+ (diagnostic_context::show_locus): New decl.
+ (diagnostic_show_locus): Convert from a decl to an inline function.
+ * gdbinit.in (break-on-diagnostic): Update from a breakpoint
+ on diagnostic_show_locus to one on
+ diagnostic_context::maybe_show_locus.
+ * genmatch.cc (linemap_client_expand_location_to_spelling_point):
+ Add "set" param and use it in place of line_table global.
+ * input.cc (expand_location_1): Likewise.
+ (expand_location): Update for new param of expand_location_1.
+ (expand_location_to_spelling_point): Likewise.
+ (linemap_client_expand_location_to_spelling_point): Add "set"
+ param and use it in place of line_table global.
+ * tree-diagnostic-path.cc (event_range::print): Pass line_table
+ for new param of linemap_client_expand_location_to_spelling_point.
+
+2023-11-09 Uros Bizjak
+
+ * config/i386/i386.md (@stack_protect_set_1__):
+ Use W mode iterator instead of SWI48. Output MOV instead of XOR
+ for TARGET_USE_MOV0.
+ (stack_protect_set_1 peephole2): Use integer modes with
+ mode size <= word mode size for operand 3.
+ (stack_protect_set_1 peephole2 #2): New peephole2 pattern to
+ substitute stack protector scratch register clear with unrelated
+ register initialization, originally in front of stack
+ protector sequence.
+ (*stack_protect_set_3__): New insn pattern.
+ (stack_protect_set_1 peephole2): New peephole2 pattern to
+ substitute stack protector scratch register clear with unrelated
+ register initialization involving LEA instruction.
+
+2023-11-09 Vladimir N. Makarov
+
+ PR rtl-optimization/110215
+ * ira-lives.cc: (add_conflict_from_region_landing_pads): New
+ function.
+ (process_bb_node_lives): Use it.
+
+2023-11-09 Alexandre Oliva
+
+ * config/i386/i386.cc (symbolic_base_address_p,
+ base_address_p): New, factored out from...
+ (extract_base_offset_in_addr): ... here and extended to
+ recognize REG+GOTOFF, as in gcc.target/i386/sse2-load-multi.c
+ and sse2-store-multi.c with PIE enabled by default.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * config/aarch64/aarch64-sve.md (cond_copysign): New.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * config/aarch64/aarch64.md (copysign3): Handle
+ copysign (x, -1).
+ * config/aarch64/aarch64-simd.md (copysign3): Likewise.
+ * config/aarch64/aarch64-sve.md (copysign3): Likewise.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * config/aarch64/aarch64.md (3): Add SVE split case.
+ * config/aarch64/aarch64-simd.md (ior3): Likewise.
+ * config/aarch64/predicates.md(aarch64_orr_imm_sve_advsimd): New.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * config/aarch64/aarch64.md (*mov_aarch64, *movsi_aarch64,
+ *movdi_aarch64): Add new w -> Z case.
+ * config/aarch64/iterators.md (Vbtype): Add QI and HI.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * config/aarch64/aarch64-protos.h (aarch64_simd_special_constant_p,
+ aarch64_maybe_generate_simd_constant): New.
+ * config/aarch64/aarch64-simd.md (*aarch64_simd_mov,
+ *aarch64_simd_mov): Add new coden for special constants.
+ * config/aarch64/aarch64.cc (aarch64_extract_vec_duplicate_wide_int):
+ Take optional mode.
+ (aarch64_simd_special_constant_p,
+ aarch64_maybe_generate_simd_constant): New.
+ * config/aarch64/aarch64.md (*movdi_aarch64): Add new codegen for
+ special constants.
+ * config/aarch64/constraints.md (Dx): new.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * internal-fn.def (COPYSIGN): New.
+ * match.pd (UNCOND_BINARY, COND_BINARY): Map IFN_COPYSIGN to
+ IFN_COND_COPYSIGN.
+ * optabs.def (cond_copysign_optab, cond_len_copysign_optab): New.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * match.pd: Add new neg+abs rule, remove inverse copysign rule.
+
+2023-11-09 Tamar Christina
+
+ PR tree-optimization/109154
+ * match.pd: expand existing copysign optimizations.
+
+2023-11-09 Tatsuyuki Ishi
+
+ PR driver/111605
+ * collect2.cc (main): Do not prepend target triple to
+ -fuse-ld=lld,mold.
+
+2023-11-09 Richard Biener
+
+ PR tree-optimization/111133
+ * tree-vect-stmts.cc (vect_build_scatter_store_calls):
+ Remove and refactor to ...
+ (vect_build_one_scatter_store_call): ... this new function.
+ (vectorizable_store): Use vect_check_scalar_mask to record
+ the SLP node for the mask operand. Code generate scatters
+ with builtin decls from the main scatter vectorization
+ path and prepare that for SLP.
+ * tree-vect-slp.cc (vect_get_operand_map): Do not look
+ at the VDEF to decide between scatter or gather since that
+ doesn't work for patterns. Use the LHS being an SSA_NAME
+ or not instead.
+
+2023-11-09 Pan Li
+
+ * config/riscv/riscv.cc (riscv_frm_emit_after_bb_end): Only
+ perform once emit when at least one succ edge is abnormal.
+
+2023-11-09 Richard Biener
+
+ * tree-vect-loop.cc (vect_verify_full_masking_avx512):
+ Check we have integer mode masks as required by
+ vect_get_loop_mask.
+
+2023-11-09 Richard Biener
+
+ PR tree-optimization/112444
+ * tree-ssa-sccvn.cc (visit_phi): Avoid using not visited
+ defs as undefined vals.
+
+2023-11-09 YunQiang Su
+
+ * config/mips/mips.cc(mips_option_override): Set mips_abs to
+ 2008, if mips_abs is default and mips_nan is 2008.
+
+2023-11-09 Florian Weimer
+
+ * doc/invoke.texi (Warning Options): Document
+ -Wreturn-mismatch. Update -Wreturn-type documentation.
+
+2023-11-09 Stefan Schulze Frielinghaus
+
+ * config/s390/s390.md: Remove UNSPEC_VEC_ELTSWAP.
+ * config/s390/vector.md (eltswapv16qi): New expander.
+ (*eltswapv16qi): New insn and splitter.
+ (eltswapv8hi): New insn and splitter.
+ (eltswap): New insn and splitter for modes V_HW_4 as well
+ as V_HW_2.
+ * config/s390/vx-builtins.md (eltswap): Remove.
+ (*eltswapv16qi): Remove.
+ (*eltswap): Remove.
+ (*eltswap_emu): Remove.
+
+2023-11-09 Stefan Schulze Frielinghaus
+
+ * config/s390/s390.cc (expand_perm_with_rot): Remove.
+ (expand_perm_reverse_elements): New.
+ (expand_perm_with_vster): Remove.
+ (expand_perm_with_vstbrq): Remove.
+ (vectorize_vec_perm_const_1): Replace removed functions with new
+ one.
+
+2023-11-09 Stefan Schulze Frielinghaus
+
+ * config/s390/s390.cc (expand_perm_with_merge): Deal with cases
+ where vmr{l,h} are still applicable if the operands are swapped.
+ (expand_perm_with_vpdi): Likewise for vpdi.
+
+2023-11-09 Stefan Schulze Frielinghaus