From 3bd8241a1f19827586cbed7832a24f44ff3e22ac Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 6 Dec 2023 12:35:44 -0500 Subject: [PATCH 001/311] diagnostics: prettify JSON output formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously our JSON output emitted the JSON all on one line, with no indentation to show the structure of the values. Although it's easy to reformat such output (e.g. with "python -m json.tool"), I've found it's a pain to need to do so e.g. my text editor sometimes hangs when opening a multimegabyte json file all on one line. Similarly diff-ing is easier if the json is already formatted. This patch add whitespace to json output to show the structure. It turned out to be fairly easy to implement using pretty_printer's existing indentation machinery. The patch uses this formatting for the various JSON-based diagnostic output formats. For example, with this patch, the output from fdiagnostics-format=json-stderr looks like: [{"kind": "warning", "message": "stack-based buffer overflow", "option": "-Wanalyzer-out-of-bounds", "option_url": "https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html#index-Wanalyzer-out-of-bounds", "children": [{"kind": "note", "message": "write of 350 bytes to beyond the end of ‘buf’", "locations": [{"caret": {"file": "../../src/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c", "line": 20, "display-column": 3, "byte-column": 3, "column": 3}, "finish": {"file": "../../src/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c", "line": 20, "display-column": 27, "byte-column": 27, "column": 27}}], "escape-source": false}, {"kind": "note", "message": "valid subscripts for ‘buf’ are ‘[0]’ to ‘[99]’", "locations": [{"caret": {"file": "../../src/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c", "line": 20, "display-column": 3, "byte-column": 3, "column": 3}, "finish": {"file": "../../src/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-diagram-19.c", "line": 20, "display-column": 27, "byte-column": 27, "column": 27}}], "escape-source": false}], "column-origin": 1, ...snip...] I was able to update almost all of our DejaGnu test cases for JSON to handle this format tweak, and IMHO it improved the readability of these test cases, but a couple were more awkward. Hence I added -fno-diagnostics-json-formatting as an option to disable this formatting. The formatting does not affect the output of -fsave-optimization-record or the JSON output from gcov (but this could be enabled if desirable). gcc/analyzer/ChangeLog: * engine.cc (dump_analyzer_json): Use flag_diagnostics_json_formatting. gcc/ChangeLog: * common.opt (fdiagnostics-json-formatting): New. * diagnostic-format-json.cc: Add "formatted" boolean to json_output_format and subclasses, and to the diagnostic_output_format_init_json_* functions. Use it when printing JSON. * diagnostic-format-sarif.cc: Likewise for sarif_builder, sarif_output_format, and the various diagnostic_output_format_init_sarif_* functions. * diagnostic.cc (diagnostic_output_format_init): Add "json_formatting" boolean and pass on to the various cases. * diagnostic.h (diagnostic_output_format_init): Add "json_formatted" param. (diagnostic_output_format_init_json_stderr): Add "formatted" param (diagnostic_output_format_init_json_file): Likewise. (diagnostic_output_format_init_sarif_stderr): Likewise. (diagnostic_output_format_init_sarif_file): Likewise. (diagnostic_output_format_init_sarif_stream): Likewise. * doc/invoke.texi (-fdiagnostics-format=json): Remove discussion about JSON output needing formatting. (-fno-diagnostics-json-formatting): Add. * gcc.cc (driver_handle_option): Use opts->x_flag_diagnostics_json_formatting. * gcov.cc (generate_results): Pass "false" for new formatting option when printing json. * json.cc (value::dump): Add new "formatted" param. (object::print): Likewise, using it to add whitespace to format the JSON output. (array::print): Likewise. (float_number::print): Add new "formatted" param. (integer_number::print): Likewise. (string::print): Likewise. (literal::print): Likewise. (selftest::assert_print_eq): Add "formatted" param. (ASSERT_PRINT_EQ): Add "FORMATTED" param. (selftest::test_writing_objects): Test both formatted and unformatted printing. (selftest::test_writing_arrays): Likewise. (selftest::test_writing_float_numbers): Update for new param of ASSERT_PRINT_EQ. (selftest::test_writing_integer_numbers): Likewise. (selftest::test_writing_strings): Likewise. (selftest::test_writing_literals): Likewise. (selftest::test_formatting): New. (selftest::json_cc_tests): Call it. * json.h (value::print): Add "formatted" param. (value::dump): Likewise. (object::print): Likewise. (array::print): Likewise. (float_number::print): Likewise. (integer_number::print): Likewise. (string::print): Likewise. (literal::print): Likewise. * optinfo-emit-json.cc (optrecord_json_writer::write): Pass "false" for new formatting option when printing json. (selftest::test_building_json_from_dump_calls): Likewise. * opts.cc (common_handle_option): Use opts->x_flag_diagnostics_json_formatting. gcc/testsuite/ChangeLog: * c-c++-common/diagnostic-format-json-1.c: Update expected JSON output to reflect whitespace. * c-c++-common/diagnostic-format-json-2.c: Likewise. * c-c++-common/diagnostic-format-json-3.c: Likewise. * c-c++-common/diagnostic-format-json-4.c: Likewise. * c-c++-common/diagnostic-format-json-5.c: Likewise. * c-c++-common/diagnostic-format-json-stderr-1.c: Likewise. * g++.dg/pr90462.C: Add -fno-diagnostics-json-formatting. * gcc.dg/analyzer/malloc-sarif-1.c: Likewise. * gcc.dg/plugin/diagnostic-test-paths-3.c: Update expected JSON output to reflect whitespace. * gfortran.dg/diagnostic-format-json-1.F90: Likewise. * gfortran.dg/diagnostic-format-json-2.F90: Likewise. * gfortran.dg/diagnostic-format-json-3.F90: Likewise. Signed-off-by: David Malcolm --- gcc/analyzer/engine.cc | 2 +- gcc/common.opt | 4 + gcc/diagnostic-format-json.cc | 26 ++- gcc/diagnostic-format-sarif.cc | 37 ++-- gcc/diagnostic.cc | 17 +- gcc/diagnostic.h | 12 +- gcc/doc/invoke.texi | 22 ++- gcc/gcc.cc | 3 +- gcc/gcov.cc | 4 +- gcc/json.cc | 163 +++++++++++++----- gcc/json.h | 16 +- gcc/optinfo-emit-json.cc | 4 +- gcc/opts.cc | 3 +- .../c-c++-common/diagnostic-format-json-1.c | 42 ++--- .../c-c++-common/diagnostic-format-json-2.c | 48 +++--- .../c-c++-common/diagnostic-format-json-3.c | 48 +++--- .../c-c++-common/diagnostic-format-json-4.c | 93 ++++------ .../c-c++-common/diagnostic-format-json-5.c | 86 +++------ .../diagnostic-format-json-stderr-1.c | 42 ++--- gcc/testsuite/g++.dg/pr90462.C | 2 +- .../gcc.dg/analyzer/malloc-sarif-1.c | 2 +- .../gcc.dg/plugin/diagnostic-test-paths-3.c | 45 ++++- .../gfortran.dg/diagnostic-format-json-1.F90 | 45 ++--- .../gfortran.dg/diagnostic-format-json-2.F90 | 49 +++--- .../gfortran.dg/diagnostic-format-json-3.F90 | 49 +++--- 25 files changed, 470 insertions(+), 394 deletions(-) diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 1f930a21eb37..825b3af43fce 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -6068,7 +6068,7 @@ dump_analyzer_json (const supergraph &sg, toplev_obj->set ("egraph", eg.to_json ()); pretty_printer pp; - toplev_obj->print (&pp); + toplev_obj->print (&pp, flag_diagnostics_json_formatting); pp_formatted_text (&pp); delete toplev_obj; diff --git a/gcc/common.opt b/gcc/common.opt index f070aff8cbc1..5eb5ecff04bd 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1391,6 +1391,10 @@ Enum(diagnostic_color_rule) String(always) Value(DIAGNOSTICS_COLOR_YES) EnumValue Enum(diagnostic_color_rule) String(auto) Value(DIAGNOSTICS_COLOR_AUTO) +fdiagnostics-json-formatting +Common Var(flag_diagnostics_json_formatting) Init(1) +Enable formatting of JSON output. + fdiagnostics-urls= Driver Common Joined RejectNegative Var(flag_diagnostics_show_urls) Enum(diagnostic_url_rule) Init(DIAGNOSTICS_URL_AUTO) -fdiagnostics-urls=[never|always|auto] Embed URLs in diagnostics. diff --git a/gcc/diagnostic-format-json.cc b/gcc/diagnostic-format-json.cc index 418db74522d8..c013192de06e 100644 --- a/gcc/diagnostic-format-json.cc +++ b/gcc/diagnostic-format-json.cc @@ -56,11 +56,13 @@ public: } protected: - json_output_format (diagnostic_context &context) + json_output_format (diagnostic_context &context, + bool formatted) : diagnostic_output_format (context), m_toplevel_array (new json::array ()), m_cur_group (nullptr), - m_cur_children_array (nullptr) + m_cur_children_array (nullptr), + m_formatted (formatted) { } @@ -68,7 +70,7 @@ protected: void flush_to_file (FILE *outf) { - m_toplevel_array->dump (outf); + m_toplevel_array->dump (outf, m_formatted); fprintf (outf, "\n"); delete m_toplevel_array; m_toplevel_array = nullptr; @@ -84,6 +86,8 @@ private: /* The JSON array for the "children" array within the current diagnostic group. */ json::array *m_cur_children_array; + + bool m_formatted; }; /* Generate a JSON object for LOC. */ @@ -301,8 +305,9 @@ json_output_format::on_end_diagnostic (const diagnostic_info &diagnostic, class json_stderr_output_format : public json_output_format { public: - json_stderr_output_format (diagnostic_context &context) - : json_output_format (context) + json_stderr_output_format (diagnostic_context &context, + bool formatted) + : json_output_format (context, formatted) { } ~json_stderr_output_format () @@ -315,8 +320,9 @@ class json_file_output_format : public json_output_format { public: json_file_output_format (diagnostic_context &context, + bool formatted, const char *base_file_name) - : json_output_format (context), + : json_output_format (context, formatted), m_base_file_name (xstrdup (base_file_name)) { } @@ -367,10 +373,12 @@ diagnostic_output_format_init_json (diagnostic_context *context) /* Populate CONTEXT in preparation for JSON output to stderr. */ void -diagnostic_output_format_init_json_stderr (diagnostic_context *context) +diagnostic_output_format_init_json_stderr (diagnostic_context *context, + bool formatted) { diagnostic_output_format_init_json (context); - context->set_output_format (new json_stderr_output_format (*context)); + context->set_output_format (new json_stderr_output_format (*context, + formatted)); } /* Populate CONTEXT in preparation for JSON output to a file named @@ -378,10 +386,12 @@ diagnostic_output_format_init_json_stderr (diagnostic_context *context) void diagnostic_output_format_init_json_file (diagnostic_context *context, + bool formatted, const char *base_file_name) { diagnostic_output_format_init_json (context); context->set_output_format (new json_file_output_format (*context, + formatted, base_file_name)); } diff --git a/gcc/diagnostic-format-sarif.cc b/gcc/diagnostic-format-sarif.cc index 941fd0f5f747..05b2c6df2e27 100644 --- a/gcc/diagnostic-format-sarif.cc +++ b/gcc/diagnostic-format-sarif.cc @@ -157,7 +157,8 @@ private: class sarif_builder { public: - sarif_builder (diagnostic_context *context); + sarif_builder (diagnostic_context *context, + bool formatted); void end_diagnostic (diagnostic_context *context, const diagnostic_info &diagnostic, @@ -250,6 +251,8 @@ private: hash_set > m_cwe_id_set; int m_tabstop; + + bool m_formatted; }; /* class sarif_object : public json::object. */ @@ -401,7 +404,8 @@ sarif_thread_flow::sarif_thread_flow (const diagnostic_thread &thread) /* sarif_builder's ctor. */ -sarif_builder::sarif_builder (diagnostic_context *context) +sarif_builder::sarif_builder (diagnostic_context *context, + bool formatted) : m_context (context), m_invocation_obj (new sarif_invocation ()), m_results_array (new json::array ()), @@ -409,7 +413,8 @@ sarif_builder::sarif_builder (diagnostic_context *context) m_seen_any_relative_paths (false), m_rule_id_set (), m_rules_arr (new json::array ()), - m_tabstop (context->m_tabstop) + m_tabstop (context->m_tabstop), + m_formatted (formatted) { } @@ -472,7 +477,7 @@ sarif_builder::flush_to_file (FILE *outf) { m_invocation_obj->prepare_to_flush (m_context); json::object *top = make_top_level_object (m_invocation_obj, m_results_array); - top->dump (outf); + top->dump (outf, m_formatted); m_invocation_obj = NULL; m_results_array = NULL; fprintf (outf, "\n"); @@ -1721,9 +1726,10 @@ public: } protected: - sarif_output_format (diagnostic_context &context) + sarif_output_format (diagnostic_context &context, + bool formatted) : diagnostic_output_format (context), - m_builder (&context) + m_builder (&context, formatted) {} sarif_builder m_builder; @@ -1732,8 +1738,10 @@ protected: class sarif_stream_output_format : public sarif_output_format { public: - sarif_stream_output_format (diagnostic_context &context, FILE *stream) - : sarif_output_format (context), + sarif_stream_output_format (diagnostic_context &context, + bool formatted, + FILE *stream) + : sarif_output_format (context, formatted), m_stream (stream) { } @@ -1749,8 +1757,9 @@ class sarif_file_output_format : public sarif_output_format { public: sarif_file_output_format (diagnostic_context &context, - const char *base_file_name) - : sarif_output_format (context), + bool formatted, + const char *base_file_name) + : sarif_output_format (context, formatted), m_base_file_name (xstrdup (base_file_name)) { } @@ -1801,10 +1810,12 @@ diagnostic_output_format_init_sarif (diagnostic_context *context) /* Populate CONTEXT in preparation for SARIF output to stderr. */ void -diagnostic_output_format_init_sarif_stderr (diagnostic_context *context) +diagnostic_output_format_init_sarif_stderr (diagnostic_context *context, + bool formatted) { diagnostic_output_format_init_sarif (context); context->set_output_format (new sarif_stream_output_format (*context, + formatted, stderr)); } @@ -1813,10 +1824,12 @@ diagnostic_output_format_init_sarif_stderr (diagnostic_context *context) void diagnostic_output_format_init_sarif_file (diagnostic_context *context, + bool formatted, const char *base_file_name) { diagnostic_output_format_init_sarif (context); context->set_output_format (new sarif_file_output_format (*context, + formatted, base_file_name)); } @@ -1824,9 +1837,11 @@ diagnostic_output_format_init_sarif_file (diagnostic_context *context, void diagnostic_output_format_init_sarif_stream (diagnostic_context *context, + bool formatted, FILE *stream) { diagnostic_output_format_init_sarif (context); context->set_output_format (new sarif_stream_output_format (*context, + formatted, stream)); } diff --git a/gcc/diagnostic.cc b/gcc/diagnostic.cc index 5854c89a387f..b5b6a760ccf9 100644 --- a/gcc/diagnostic.cc +++ b/gcc/diagnostic.cc @@ -2429,7 +2429,8 @@ diagnostic_text_output_format::on_diagram (const diagnostic_diagram &diagram) void diagnostic_output_format_init (diagnostic_context *context, const char *base_file_name, - enum diagnostics_output_format format) + enum diagnostics_output_format format, + bool json_formatting) { switch (format) { @@ -2440,19 +2441,25 @@ diagnostic_output_format_init (diagnostic_context *context, break; case DIAGNOSTICS_OUTPUT_FORMAT_JSON_STDERR: - diagnostic_output_format_init_json_stderr (context); + diagnostic_output_format_init_json_stderr (context, + json_formatting); break; case DIAGNOSTICS_OUTPUT_FORMAT_JSON_FILE: - diagnostic_output_format_init_json_file (context, base_file_name); + diagnostic_output_format_init_json_file (context, + json_formatting, + base_file_name); break; case DIAGNOSTICS_OUTPUT_FORMAT_SARIF_STDERR: - diagnostic_output_format_init_sarif_stderr (context); + diagnostic_output_format_init_sarif_stderr (context, + json_formatting); break; case DIAGNOSTICS_OUTPUT_FORMAT_SARIF_FILE: - diagnostic_output_format_init_sarif_file (context, base_file_name); + diagnostic_output_format_init_sarif_file (context, + json_formatting, + base_file_name); break; } } diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h index 4fc31438b165..80e53ec92b06 100644 --- a/gcc/diagnostic.h +++ b/gcc/diagnostic.h @@ -1062,14 +1062,20 @@ extern char *build_message_string (const char *, ...) ATTRIBUTE_PRINTF_1; extern void diagnostic_output_format_init (diagnostic_context *, const char *base_file_name, - enum diagnostics_output_format); -extern void diagnostic_output_format_init_json_stderr (diagnostic_context *context); + enum diagnostics_output_format, + bool json_formatting); +extern void diagnostic_output_format_init_json_stderr (diagnostic_context *context, + bool formatted); extern void diagnostic_output_format_init_json_file (diagnostic_context *context, + bool formatted, const char *base_file_name); -extern void diagnostic_output_format_init_sarif_stderr (diagnostic_context *context); +extern void diagnostic_output_format_init_sarif_stderr (diagnostic_context *context, + bool formatted); extern void diagnostic_output_format_init_sarif_file (diagnostic_context *context, + bool formatted, const char *base_file_name); extern void diagnostic_output_format_init_sarif_stream (diagnostic_context *context, + bool formatted, FILE *stream); /* Compute the number of digits in the decimal representation of an integer. */ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 32f535e1ed46..8e9204302d19 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -306,6 +306,7 @@ Objective-C and Objective-C++ Dialects}. -fdiagnostics-color=@r{[}auto@r{|}never@r{|}always@r{]} -fdiagnostics-urls=@r{[}auto@r{|}never@r{|}always@r{]} -fdiagnostics-format=@r{[}text@r{|}sarif-stderr@r{|}sarif-file@r{|}json@r{|}json-stderr@r{|}json-file@r{]} +-fno-diagnostics-json-formatting -fno-diagnostics-show-option -fno-diagnostics-show-caret -fno-diagnostics-show-labels -fno-diagnostics-show-line-numbers -fno-diagnostics-show-cwe @@ -5754,8 +5755,7 @@ where the JSON is emitted to - with the former, the JSON is emitted to stderr, whereas with @samp{json-file} it is written to @file{@var{source}.gcc.json}. The emitted JSON consists of a top-level JSON array containing JSON objects -representing the diagnostics. The JSON is emitted as one line, without -formatting; the examples below have been formatted for clarity. +representing the diagnostics. Diagnostics can have child diagnostics. For example, this error and note: @@ -6018,6 +6018,24 @@ Diagnostics have a boolean attribute @code{escape-source}, hinting whether non-ASCII bytes should be escaped when printing the pertinent lines of source code (@code{true} for diagnostics involving source encoding issues). +@opindex fno-diagnostics-json-formatting +@opindex fdiagnostics-json-formatting +@item -fno-diagnostics-json-formatting +By default, when JSON is emitted for diagnostics (via +@option{-fdiagnostics-format=sarif-stderr}, +@option{-fdiagnostics-format=sarif-file}, +@option{-fdiagnostics-format=json}, +@option{-fdiagnostics-format=json-stderr}, +@option{-fdiagnostics-format=json-file}), +GCC will add newlines and indentation to visually emphasize the +hierarchical structure of the JSON. + +Use @option{-fno-diagnostics-json-formatting} to suppress this whitespace. +It must be passed before the option it is to affect. + +This is intended for compatibility with tools that do not expect the output +to contain newlines, such as that emitted by older GCC releases. + @end table @node Warning Options diff --git a/gcc/gcc.cc b/gcc/gcc.cc index 03ec6e1cb2fa..d73fb0414e98 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -4357,7 +4357,8 @@ driver_handle_option (struct gcc_options *opts, const char *basename = (opts->x_dump_base_name ? opts->x_dump_base_name : opts->x_main_input_basename); diagnostic_output_format_init (dc, basename, - (enum diagnostics_output_format)value); + (enum diagnostics_output_format)value, + opts->x_flag_diagnostics_json_formatting); break; } diff --git a/gcc/gcov.cc b/gcc/gcov.cc index 8b4748d3a1a8..6d4a7bde53ab 100644 --- a/gcc/gcov.cc +++ b/gcc/gcov.cc @@ -1600,13 +1600,13 @@ generate_results (const char *file_name) { if (flag_use_stdout) { - root->dump (stdout); + root->dump (stdout, false); printf ("\n"); } else { pretty_printer pp; - root->print (&pp); + root->print (&pp, false); pp_formatted_text (&pp); fnotice (stdout, "Creating '%s'\n", diff --git a/gcc/json.cc b/gcc/json.cc index d0f157f0dfe7..90ddd7ab3b15 100644 --- a/gcc/json.cc +++ b/gcc/json.cc @@ -32,17 +32,15 @@ using namespace json; /* Dump this json::value tree to OUTF. - No formatting is done. - The key/value pairs of json::objects are printed in the order in which the keys were originally inserted. */ void -value::dump (FILE *outf) const +value::dump (FILE *outf, bool formatted) const { pretty_printer pp; pp_buffer (&pp)->stream = outf; - print (&pp); + print (&pp, formatted); pp_flush (&pp); } @@ -63,9 +61,11 @@ object::~object () /* Implementation of json::value::print for json::object. */ void -object::print (pretty_printer *pp) const +object::print (pretty_printer *pp, bool formatted) const { pp_character (pp, '{'); + if (formatted) + pp_indentation (pp) += 1; /* Iterate in the order that the keys were inserted. */ unsigned i; @@ -73,15 +73,31 @@ object::print (pretty_printer *pp) const FOR_EACH_VEC_ELT (m_keys, i, key) { if (i > 0) - pp_string (pp, ", "); + { + pp_string (pp, ","); + if (formatted) + { + pp_newline (pp); + pp_indent (pp); + } + else + pp_space (pp); + } map_t &mut_map = const_cast (m_map); value *value = *mut_map.get (key); pp_doublequote (pp); pp_string (pp, key); // FIXME: escaping? pp_doublequote (pp); pp_string (pp, ": "); - value->print (pp); + const int indent = strlen (key) + 4; + if (formatted) + pp_indentation (pp) += indent; + value->print (pp, formatted); + if (formatted) + pp_indentation (pp) -= indent; } + if (formatted) + pp_indentation (pp) -= 1; pp_character (pp, '}'); } @@ -180,17 +196,30 @@ array::~array () /* Implementation of json::value::print for json::array. */ void -array::print (pretty_printer *pp) const +array::print (pretty_printer *pp, bool formatted) const { pp_character (pp, '['); + if (formatted) + pp_indentation (pp) += 1; unsigned i; value *v; FOR_EACH_VEC_ELT (m_elements, i, v) { if (i) - pp_string (pp, ", "); - v->print (pp); + { + pp_string (pp, ","); + if (formatted) + { + pp_newline (pp); + pp_indent (pp); + } + else + pp_space (pp); + } + v->print (pp, formatted); } + if (formatted) + pp_indentation (pp) -= 1; pp_character (pp, ']'); } @@ -208,7 +237,8 @@ array::append (value *v) /* Implementation of json::value::print for json::float_number. */ void -float_number::print (pretty_printer *pp) const +float_number::print (pretty_printer *pp, + bool formatted ATTRIBUTE_UNUSED) const { char tmp[1024]; snprintf (tmp, sizeof (tmp), "%g", m_value); @@ -220,7 +250,8 @@ float_number::print (pretty_printer *pp) const /* Implementation of json::value::print for json::integer_number. */ void -integer_number::print (pretty_printer *pp) const +integer_number::print (pretty_printer *pp, + bool formatted ATTRIBUTE_UNUSED) const { char tmp[1024]; snprintf (tmp, sizeof (tmp), "%ld", m_value); @@ -250,7 +281,8 @@ string::string (const char *utf8, size_t len) /* Implementation of json::value::print for json::string. */ void -string::print (pretty_printer *pp) const +string::print (pretty_printer *pp, + bool formatted ATTRIBUTE_UNUSED) const { pp_character (pp, '"'); for (size_t i = 0; i != m_len; ++i) @@ -294,7 +326,8 @@ string::print (pretty_printer *pp) const /* Implementation of json::value::print for json::literal. */ void -literal::print (pretty_printer *pp) const +literal::print (pretty_printer *pp, + bool formatted ATTRIBUTE_UNUSED) const { switch (m_kind) { @@ -322,15 +355,18 @@ namespace selftest { /* Verify that JV->print () prints EXPECTED_JSON. */ static void -assert_print_eq (const location &loc, const json::value &jv, const char *expected_json) +assert_print_eq (const location &loc, + const json::value &jv, + bool formatted, + const char *expected_json) { pretty_printer pp; - jv.print (&pp); + jv.print (&pp, formatted); ASSERT_STREQ_AT (loc, expected_json, pp_formatted_text (&pp)); } -#define ASSERT_PRINT_EQ(JV, EXPECTED_JSON) \ - assert_print_eq (SELFTEST_LOCATION, JV, EXPECTED_JSON) +#define ASSERT_PRINT_EQ(JV, FORMATTED, EXPECTED_JSON) \ + assert_print_eq (SELFTEST_LOCATION, JV, FORMATTED, EXPECTED_JSON) /* Verify that object::get works as expected. */ @@ -354,7 +390,11 @@ test_writing_objects () obj.set_string ("baz", "quux"); /* This test relies on json::object writing out key/value pairs in key-insertion order. */ - ASSERT_PRINT_EQ (obj, "{\"foo\": \"bar\", \"baz\": \"quux\"}"); + ASSERT_PRINT_EQ (obj, true, + "{\"foo\": \"bar\",\n" + " \"baz\": \"quux\"}"); + ASSERT_PRINT_EQ (obj, false, + "{\"foo\": \"bar\", \"baz\": \"quux\"}"); } /* Verify that JSON arrays are written correctly. */ @@ -363,13 +403,17 @@ static void test_writing_arrays () { array arr; - ASSERT_PRINT_EQ (arr, "[]"); + ASSERT_PRINT_EQ (arr, true, "[]"); arr.append (new json::string ("foo")); - ASSERT_PRINT_EQ (arr, "[\"foo\"]"); + ASSERT_PRINT_EQ (arr, true, "[\"foo\"]"); arr.append (new json::string ("bar")); - ASSERT_PRINT_EQ (arr, "[\"foo\", \"bar\"]"); + ASSERT_PRINT_EQ (arr, true, + "[\"foo\",\n" + " \"bar\"]"); + ASSERT_PRINT_EQ (arr, false, + "[\"foo\", \"bar\"]"); } /* Verify that JSON numbers are written correctly. */ @@ -377,20 +421,20 @@ test_writing_arrays () static void test_writing_float_numbers () { - ASSERT_PRINT_EQ (float_number (0), "0"); - ASSERT_PRINT_EQ (float_number (42), "42"); - ASSERT_PRINT_EQ (float_number (-100), "-100"); - ASSERT_PRINT_EQ (float_number (123456789), "1.23457e+08"); + ASSERT_PRINT_EQ (float_number (0), true, "0"); + ASSERT_PRINT_EQ (float_number (42), true, "42"); + ASSERT_PRINT_EQ (float_number (-100), true, "-100"); + ASSERT_PRINT_EQ (float_number (123456789), true, "1.23457e+08"); } static void test_writing_integer_numbers () { - ASSERT_PRINT_EQ (integer_number (0), "0"); - ASSERT_PRINT_EQ (integer_number (42), "42"); - ASSERT_PRINT_EQ (integer_number (-100), "-100"); - ASSERT_PRINT_EQ (integer_number (123456789), "123456789"); - ASSERT_PRINT_EQ (integer_number (-123456789), "-123456789"); + ASSERT_PRINT_EQ (integer_number (0), true, "0"); + ASSERT_PRINT_EQ (integer_number (42), true, "42"); + ASSERT_PRINT_EQ (integer_number (-100), true, "-100"); + ASSERT_PRINT_EQ (integer_number (123456789), true, "123456789"); + ASSERT_PRINT_EQ (integer_number (-123456789), true, "-123456789"); } /* Verify that JSON strings are written correctly. */ @@ -399,16 +443,16 @@ static void test_writing_strings () { string foo ("foo"); - ASSERT_PRINT_EQ (foo, "\"foo\""); + ASSERT_PRINT_EQ (foo, true, "\"foo\""); string contains_quotes ("before \"quoted\" after"); - ASSERT_PRINT_EQ (contains_quotes, "\"before \\\"quoted\\\" after\""); + ASSERT_PRINT_EQ (contains_quotes, true, "\"before \\\"quoted\\\" after\""); const char data[] = {'a', 'b', 'c', 'd', '\0', 'e', 'f'}; string not_terminated (data, 3); - ASSERT_PRINT_EQ (not_terminated, "\"abc\""); + ASSERT_PRINT_EQ (not_terminated, true, "\"abc\""); string embedded_null (data, sizeof data); - ASSERT_PRINT_EQ (embedded_null, "\"abcd\\0ef\""); + ASSERT_PRINT_EQ (embedded_null, true, "\"abcd\\0ef\""); } /* Verify that JSON literals are written correctly. */ @@ -416,12 +460,50 @@ test_writing_strings () static void test_writing_literals () { - ASSERT_PRINT_EQ (literal (JSON_TRUE), "true"); - ASSERT_PRINT_EQ (literal (JSON_FALSE), "false"); - ASSERT_PRINT_EQ (literal (JSON_NULL), "null"); + ASSERT_PRINT_EQ (literal (JSON_TRUE), true, "true"); + ASSERT_PRINT_EQ (literal (JSON_FALSE), true, "false"); + ASSERT_PRINT_EQ (literal (JSON_NULL), true, "null"); - ASSERT_PRINT_EQ (literal (true), "true"); - ASSERT_PRINT_EQ (literal (false), "false"); + ASSERT_PRINT_EQ (literal (true), true, "true"); + ASSERT_PRINT_EQ (literal (false), true, "false"); +} + +/* Verify that nested values are formatted correctly when written. */ + +static void +test_formatting () +{ + object obj; + object *child = new object; + object *grandchild = new object; + + obj.set_string ("str", "bar"); + obj.set ("child", child); + obj.set_integer ("int", 42); + + child->set ("grandchild", grandchild); + child->set_integer ("int", 1776); + + array *arr = new array; + for (int i = 0; i < 3; i++) + arr->append (new integer_number (i)); + grandchild->set ("arr", arr); + grandchild->set_integer ("int", 1066); + + /* This test relies on json::object writing out key/value pairs + in key-insertion order. */ + ASSERT_PRINT_EQ (obj, true, + ("{\"str\": \"bar\",\n" + " \"child\": {\"grandchild\": {\"arr\": [0,\n" + " 1,\n" + " 2],\n" + " \"int\": 1066},\n" + " \"int\": 1776},\n" + " \"int\": 42}")); + ASSERT_PRINT_EQ (obj, false, + ("{\"str\": \"bar\", \"child\": {\"grandchild\":" + " {\"arr\": [0, 1, 2], \"int\": 1066}," + " \"int\": 1776}, \"int\": 42}")); } /* Run all of the selftests within this file. */ @@ -436,6 +518,7 @@ json_cc_tests () test_writing_integer_numbers (); test_writing_strings (); test_writing_literals (); + test_formatting (); } } // namespace selftest diff --git a/gcc/json.h b/gcc/json.h index 6fadd119ba57..862e5676a636 100644 --- a/gcc/json.h +++ b/gcc/json.h @@ -80,9 +80,9 @@ class value public: virtual ~value () {} virtual enum kind get_kind () const = 0; - virtual void print (pretty_printer *pp) const = 0; + virtual void print (pretty_printer *pp, bool formatted) const = 0; - void dump (FILE *) const; + void dump (FILE *, bool formatted) const; }; /* Subclass of value for objects: a collection of key/value pairs @@ -97,7 +97,7 @@ class object : public value ~object (); enum kind get_kind () const final override { return JSON_OBJECT; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; void set (const char *key, value *v); value *get (const char *key) const; @@ -126,7 +126,7 @@ class array : public value ~array (); enum kind get_kind () const final override { return JSON_ARRAY; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; void append (value *v); @@ -142,7 +142,7 @@ class float_number : public value float_number (double value) : m_value (value) {} enum kind get_kind () const final override { return JSON_FLOAT; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; double get () const { return m_value; } @@ -158,7 +158,7 @@ class integer_number : public value integer_number (long value) : m_value (value) {} enum kind get_kind () const final override { return JSON_INTEGER; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; long get () const { return m_value; } @@ -177,7 +177,7 @@ class string : public value ~string () { free (m_utf8); } enum kind get_kind () const final override { return JSON_STRING; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; const char *get_string () const { return m_utf8; } size_t get_length () const { return m_len; } @@ -199,7 +199,7 @@ class literal : public value literal (bool value): m_kind (value ? JSON_TRUE : JSON_FALSE) {} enum kind get_kind () const final override { return m_kind; } - void print (pretty_printer *pp) const final override; + void print (pretty_printer *pp, bool formatted) const final override; private: enum kind m_kind; diff --git a/gcc/optinfo-emit-json.cc b/gcc/optinfo-emit-json.cc index 11cad42a4330..b181d6fb15d2 100644 --- a/gcc/optinfo-emit-json.cc +++ b/gcc/optinfo-emit-json.cc @@ -103,7 +103,7 @@ void optrecord_json_writer::write () const { pretty_printer pp; - m_root_tuple->print (&pp); + m_root_tuple->print (&pp, false); bool emitted_error = false; char *filename = concat (dump_base_name, ".opt-record.json.gz", NULL); @@ -466,7 +466,7 @@ test_building_json_from_dump_calls () /* Verify that the json is sane. */ pretty_printer pp; - json_obj->print (&pp); + json_obj->print (&pp, false); const char *json_str = pp_formatted_text (&pp); ASSERT_STR_CONTAINS (json_str, "impl_location"); ASSERT_STR_CONTAINS (json_str, "\"kind\": \"note\""); diff --git a/gcc/opts.cc b/gcc/opts.cc index 5d5efaf1b9eb..7a3830caaa31 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -2949,7 +2949,8 @@ common_handle_option (struct gcc_options *opts, const char *basename = (opts->x_dump_base_name ? opts->x_dump_base_name : opts->x_main_input_basename); diagnostic_output_format_init (dc, basename, - (enum diagnostics_output_format)value); + (enum diagnostics_output_format)value, + opts->x_flag_diagnostics_json_formatting); break; } diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-1.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-1.c index 6bab30e3e6cb..c95218c3cfe3 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-1.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-1.c @@ -3,28 +3,20 @@ #error message -/* Use dg-regexp to consume the JSON output starting with - the innermost values, and working outwards. */ - -/* { dg-regexp "\"kind\": \"error\"" } */ -/* { dg-regexp "\"column-origin\": 1" } */ -/* { dg-regexp "\"escape-source\": false" } */ -/* { dg-regexp "\"message\": \"#error message\"" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-1.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 2" } */ -/* { dg-regexp "\"display-column\": 2" } */ -/* { dg-regexp "\"byte-column\": 2" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-1.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 6" } */ -/* { dg-regexp "\"display-column\": 6" } */ -/* { dg-regexp "\"byte-column\": 6" } */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ -/* { dg-regexp "\"children\": \[\[\]\[\]\]" } */ -/* { dg-regexp "\[\[\{\}, \]*\]" } */ +/* { dg-begin-multiline-output "" } +[{"kind": "error", + "message": "#error message", + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 6, + "byte-column": 6, + "column": 6}}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-2.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-2.c index 3c12103c9f84..a8828b7b2df7 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-2.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-2.c @@ -3,30 +3,24 @@ #warning message -/* Use dg-regexp to consume the JSON output starting with - the innermost values, and working outwards. */ - -/* { dg-regexp "\"kind\": \"warning\"" } */ -/* { dg-regexp "\"column-origin\": 1" } */ -/* { dg-regexp "\"escape-source\": false" } */ -/* { dg-regexp "\"message\": \"#warning message\"" } */ -/* { dg-regexp "\"option\": \"-Wcpp\"" } */ -/* { dg-regexp "\"option_url\": \"https:\[^\n\r\"\]*#index-Wcpp\"" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-2.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 2" } */ -/* { dg-regexp "\"display-column\": 2" } */ -/* { dg-regexp "\"byte-column\": 2" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-2.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 8" } */ -/* { dg-regexp "\"display-column\": 8" } */ -/* { dg-regexp "\"byte-column\": 8" } */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ -/* { dg-regexp "\"children\": \[\[\]\[\]\]" } */ -/* { dg-regexp "\[\[\{\}, \]*\]" } */ +/* { dg-begin-multiline-output "" } +[{"kind": "warning", + "message": "#warning message", + "option": "-Wcpp", + { dg-end-multiline-output "" } */ +/* { dg-regexp " \"option_url\": \"https:\[^\n\r\"\]*#index-Wcpp\",\n" } */ +/* { dg-begin-multiline-output "" } + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 8, + "byte-column": 8, + "column": 8}}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-3.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-3.c index 11d74624ff16..178bbf94b5b2 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-3.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-3.c @@ -3,30 +3,24 @@ #warning message -/* Use dg-regexp to consume the JSON output starting with - the innermost values, and working outwards. */ - -/* { dg-regexp "\"kind\": \"error\"" } */ -/* { dg-regexp "\"column-origin\": 1" } */ -/* { dg-regexp "\"escape-source\": false" } */ -/* { dg-regexp "\"message\": \"#warning message\"" } */ -/* { dg-regexp "\"option\": \"-Werror=cpp\"" } */ -/* { dg-regexp "\"option_url\": \"https:\[^\n\r\"\]*#index-Wcpp\"" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-3.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 2" } */ -/* { dg-regexp "\"display-column\": 2" } */ -/* { dg-regexp "\"byte-column\": 2" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-3.c\"" } */ -/* { dg-regexp "\"line\": 4" } */ -/* { dg-regexp "\"column\": 8" } */ -/* { dg-regexp "\"display-column\": 8" } */ -/* { dg-regexp "\"byte-column\": 8" } */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ -/* { dg-regexp "\"children\": \[\[\]\[\]\]" } */ -/* { dg-regexp "\[\[\{\}, \]*\]" } */ +/* { dg-begin-multiline-output "" } +[{"kind": "error", + "message": "#warning message", + "option": "-Werror=cpp", + { dg-end-multiline-output "" } */ +/* { dg-regexp " \"option_url\": \"https:\[^\n\r\"\]*#index-Wcpp\",\n" } */ +/* { dg-begin-multiline-output "" } + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 8, + "byte-column": 8, + "column": 8}}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-4.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-4.c index cec1cf924b4f..899a03f0e5e5 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-4.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-4.c @@ -9,63 +9,36 @@ int test (void) return 5; } -/* Use dg-regexp to consume the JSON output starting with - the innermost values, and working outwards. */ - -/* Verify nested diagnostics. */ - -/* The nested diagnostic. */ - -/* { dg-regexp "\"kind\": \"note\"" } */ -/* { dg-regexp "\"message\": \"...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'\"" } */ -/* { dg-regexp "\"escape-source\": false" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-4.c\"" } */ -/* { dg-regexp "\"line\": 8" } */ -/* { dg-regexp "\"column\": 5" } */ -/* { dg-regexp "\"display-column\": 5" } */ -/* { dg-regexp "\"byte-column\": 5" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-4.c\"" } */ -/* { dg-regexp "\"line\": 8" } */ -/* { dg-regexp "\"column\": 10" } */ -/* { dg-regexp "\"display-column\": 10" } */ -/* { dg-regexp "\"byte-column\": 10" } */ - -/* The outer diagnostic. */ - -/* { dg-regexp "\"kind\": \"warning\"" } */ -/* { dg-regexp "\"column-origin\": 1" } */ -/* { dg-regexp "\"message\": \"this 'if' clause does not guard...\"" } */ -/* { dg-regexp "\"escape-source\": false" } */ -/* { dg-regexp "\"option\": \"-Wmisleading-indentation\"" } */ -/* { dg-regexp "\"option_url\": \"https:\[^\n\r\"\]*#index-Wmisleading-indentation\"" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-4.c\"" } */ -/* { dg-regexp "\"line\": 6" } */ -/* { dg-regexp "\"column\": 3" } */ -/* { dg-regexp "\"display-column\": 3" } */ -/* { dg-regexp "\"byte-column\": 3" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-4.c\"" } */ -/* { dg-regexp "\"line\": 6" } */ -/* { dg-regexp "\"column\": 4" } */ -/* { dg-regexp "\"display-column\": 4" } */ -/* { dg-regexp "\"byte-column\": 4" } */ - -/* More from the nested diagnostic (we can't guarantee what order the - "file" keys are consumed). */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ - -/* More from the outer diagnostic. */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ - -/* { dg-regexp "\"children\": \[\[\{\}, \]*\]" } */ -/* { dg-regexp "\[\[\{\}, \]*\]" } */ - +/* { dg-begin-multiline-output "" } +[{"kind": "warning", + "message": "this 'if' clause does not guard...", + "option": "-Wmisleading-indentation", + { dg-end-multiline-output "" } */ +/* { dg-regexp " \"option_url\": \"https:\[^\n\r\"\]*#index-Wmisleading-indentation\",\n" } */ +/* { dg-begin-multiline-output "" } + "children": [{"kind": "note", + "message": "...this statement, but the latter is misleadingly indented as if it were guarded by the 'if'", + "locations": [{"caret": {"file": + "line": 8, + "display-column": 5, + "byte-column": 5, + "column": 5}, + "finish": {"file": + "line": 8, + "display-column": 10, + "byte-column": 10, + "column": 10}}], + "escape-source": false}], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 6, + "display-column": 3, + "byte-column": 3, + "column": 3}, + "finish": {"file": + "line": 6, + "display-column": 4, + "byte-column": 4, + "column": 4}}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-5.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-5.c index 86f8c5fb3746..ed3139c7f1b2 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-5.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-5.c @@ -8,61 +8,31 @@ int test (struct s *ptr) return ptr->colour; } -/* Verify fix-it hints. - - Use dg-regexp to consume the JSON output from start to - finish, relying on the ordering of the keys. - The following uses indentation to visualize the structure - of the JSON (although the actual output is all on one line). - - { dg-regexp {\[} } - { dg-regexp {\{} } - { dg-regexp {"kind": "error"} } - { dg-regexp {, "message": "'struct s' has no member named 'colour'; did you mean 'color'\?"} } - { dg-regexp {, "children": \[\]} } - { dg-regexp {, "column-origin": 1} } - { dg-regexp {, "locations": } } - { dg-regexp {\[} } - { dg-regexp {\{} } - { dg-regexp {"caret": } } - { dg-regexp {\{} } - { dg-regexp {"file": "[^\n\r"]*diagnostic-format-json-5.c"} } - { dg-regexp {, "line": 8} } - { dg-regexp {, "display-column": 15} } - { dg-regexp {, "byte-column": 15} } - { dg-regexp {, "column": 15} } - { dg-regexp {\}} } - { dg-regexp {, "finish": } } - { dg-regexp {\{} } - { dg-regexp {"file": "[^\n\r"]*diagnostic-format-json-5.c"} } - { dg-regexp {, "line": 8} } - { dg-regexp {, "display-column": 20} } - { dg-regexp {, "byte-column": 20} } - { dg-regexp {, "column": 20} } - { dg-regexp {\}} } - { dg-regexp {\}} } - { dg-regexp {\]} } - { dg-regexp {, "fixits": } } - { dg-regexp {\[} } - { dg-regexp {\{} } - { dg-regexp {"start": } } - { dg-regexp {\{} } - { dg-regexp {"file": "[^\n\r"]*diagnostic-format-json-5.c"} } - { dg-regexp {, "line": 8} } - { dg-regexp {, "display-column": 15} } - { dg-regexp {, "byte-column": 15} } - { dg-regexp {, "column": 15} } - { dg-regexp {\}} } - { dg-regexp {, "next": } } - { dg-regexp {\{} } - { dg-regexp {"file": "[^\n\r"]*diagnostic-format-json-5.c"} } - { dg-regexp {, "line": 8} } - { dg-regexp {, "display-column": 21} } - { dg-regexp {, "byte-column": 21} } - { dg-regexp {, "column": 21} } - { dg-regexp {\}} } - { dg-regexp {, "string": "color"} } - { dg-regexp {\}} } - { dg-regexp {\]} } - { dg-regexp {, "escape-source": false\}} } - { dg-regexp {\]} } */ +/* { dg-begin-multiline-output "" } +[{"kind": "error", + "message": "'struct s' has no member named 'colour'; did you mean 'color'?", + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 8, + "display-column": 15, + "byte-column": 15, + "column": 15}, + "finish": {"file": + "line": 8, + "display-column": 20, + "byte-column": 20, + "column": 20}}], + "fixits": [{"start": {"file": + "line": 8, + "display-column": 15, + "byte-column": 15, + "column": 15}, + "next": {"file": + "line": 8, + "display-column": 21, + "byte-column": 21, + "column": 21}, + "string": "color"}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/c-c++-common/diagnostic-format-json-stderr-1.c b/gcc/testsuite/c-c++-common/diagnostic-format-json-stderr-1.c index bcfa92110f54..e798c6b21e1e 100644 --- a/gcc/testsuite/c-c++-common/diagnostic-format-json-stderr-1.c +++ b/gcc/testsuite/c-c++-common/diagnostic-format-json-stderr-1.c @@ -5,28 +5,20 @@ #error message -/* Use dg-regexp to consume the JSON output starting with - the innermost values, and working outwards. */ - -/* { dg-regexp "\"kind\": \"error\"" } */ -/* { dg-regexp "\"column-origin\": 1" } */ -/* { dg-regexp "\"escape-source\": false" } */ -/* { dg-regexp "\"message\": \"#error message\"" } */ - -/* { dg-regexp "\"caret\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-stderr-1.c\"" } */ -/* { dg-regexp "\"line\": 6" } */ -/* { dg-regexp "\"column\": 2" } */ -/* { dg-regexp "\"display-column\": 2" } */ -/* { dg-regexp "\"byte-column\": 2" } */ - -/* { dg-regexp "\"finish\": \{" } */ -/* { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-stderr-1.c\"" } */ -/* { dg-regexp "\"line\": 6" } */ -/* { dg-regexp "\"column\": 6" } */ -/* { dg-regexp "\"display-column\": 6" } */ -/* { dg-regexp "\"byte-column\": 6" } */ - -/* { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } */ -/* { dg-regexp "\"children\": \[\[\]\[\]\]" } */ -/* { dg-regexp "\[\[\{\}, \]*\]" } */ +/* { dg-begin-multiline-output "" } +[{"kind": "error", + "message": "#error message", + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 6, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 6, + "display-column": 6, + "byte-column": 6, + "column": 6}}], + "escape-source": false}] + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/g++.dg/pr90462.C b/gcc/testsuite/g++.dg/pr90462.C index 2585ba0dcdba..b35e41921a60 100644 --- a/gcc/testsuite/g++.dg/pr90462.C +++ b/gcc/testsuite/g++.dg/pr90462.C @@ -1,4 +1,4 @@ -/* { dg-options "-Wdeprecated-copy -fdiagnostics-format=json" } */ +/* { dg-options "-Wdeprecated-copy -fno-diagnostics-json-formatting -fdiagnostics-format=json" } */ template class b; struct B { diff --git a/gcc/testsuite/gcc.dg/analyzer/malloc-sarif-1.c b/gcc/testsuite/gcc.dg/analyzer/malloc-sarif-1.c index 3d798e687e6f..19ac89f2b67c 100644 --- a/gcc/testsuite/gcc.dg/analyzer/malloc-sarif-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/malloc-sarif-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-fdiagnostics-format=sarif-file" } */ +/* { dg-additional-options " -fno-diagnostics-json-formatting -fdiagnostics-format=sarif-file" } */ #include diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-3.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-3.c index 6971d7cb38b6..a315d208cab7 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-3.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-3.c @@ -32,7 +32,44 @@ make_a_list_of_random_ints_badly(PyObject *self, return list; } -/* FIXME: test the events within a path. */ -/* { dg-regexp "\"kind\": \"error\"" } */ -/* { dg-regexp "\"path\": " } */ -/* { dg-regexp ".*" } */ +/* { dg-begin-multiline-output "" } +[{"kind": "error", + "message": "passing NULL as argument 1 to 'PyList_Append' which requires a non-NULL parameter", + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": " + "line": 29, + "display-column": 5, + "byte-column": 5, + "column": 5}, + "finish": {"file": " + "line": 29, + "display-column": 29, + "byte-column": 29, + "column": 29}}], + "path": [{"location": {"file": " + "line": 25, + "display-column": 10, + "byte-column": 10, + "column": 10}, + "description": "when 'PyList_New' fails, returning NULL", + "function": "make_a_list_of_random_ints_badly", + "depth": 0}, + {"location": {"file": " + "line": 27, + "display-column": 17, + "byte-column": 17, + "column": 17}, + "description": "when 'i < count'", + "function": "make_a_list_of_random_ints_badly", + "depth": 0}, + {"location": {"file": " + "line": 29, + "display-column": 5, + "byte-column": 5, + "column": 5}, + "description": "when calling 'PyList_Append', passing NULL from (1) as argument 1", + "function": "make_a_list_of_random_ints_badly", + "depth": 0}], + "escape-source": false}] +{ dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gfortran.dg/diagnostic-format-json-1.F90 b/gcc/testsuite/gfortran.dg/diagnostic-format-json-1.F90 index 2993f7c852bf..b8cd61cff23a 100644 --- a/gcc/testsuite/gfortran.dg/diagnostic-format-json-1.F90 +++ b/gcc/testsuite/gfortran.dg/diagnostic-format-json-1.F90 @@ -3,29 +3,22 @@ #error message -! Use dg-regexp to consume the JSON output starting with -! the innermost values, and working outwards. -! We can't rely on any ordering of the keys. - -! { dg-regexp "\"kind\": \"error\"" } -! { dg-regexp "\"column-origin\": 1" } -! { dg-regexp "\"escape-source\": false" } -! { dg-regexp "\"message\": \"#error message\"" } - -! { dg-regexp "\"caret\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-1.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 2" } -! { dg-regexp "\"display-column\": 2" } -! { dg-regexp "\"byte-column\": 2" } - -! { dg-regexp "\"finish\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-1.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 6" } -! { dg-regexp "\"display-column\": 6" } -! { dg-regexp "\"byte-column\": 6" } - -! { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } -! { dg-regexp "\"children\": \[\[\]\[\]\]" } -! { dg-regexp "\[\[\{\}, \]*\]" } +#if 0 +{ dg-begin-multiline-output "" } +[{"kind": "error", + "message": "#error message", + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 6, + "byte-column": 6, + "column": 6}}], + "escape-source": false}] +{ dg-end-multiline-output "" } +#endif diff --git a/gcc/testsuite/gfortran.dg/diagnostic-format-json-2.F90 b/gcc/testsuite/gfortran.dg/diagnostic-format-json-2.F90 index 1681462fa086..9ff1ef59b343 100644 --- a/gcc/testsuite/gfortran.dg/diagnostic-format-json-2.F90 +++ b/gcc/testsuite/gfortran.dg/diagnostic-format-json-2.F90 @@ -3,31 +3,24 @@ #warning message -! Use dg-regexp to consume the JSON output starting with -! the innermost values, and working outwards. -! We can't rely on any ordering of the keys. - -! { dg-regexp "\"kind\": \"warning\"" } -! { dg-regexp "\"column-origin\": 1" } -! { dg-regexp "\"escape-source\": false" } -! { dg-regexp "\"message\": \"#warning message\"" } -! { dg-regexp "\"option\": \"-Wcpp\"" } -! { dg-regexp "\"option_url\": \"\[^\n\r\"\]*#index-Wcpp\"" } - -! { dg-regexp "\"caret\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-2.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 2" } -! { dg-regexp "\"display-column\": 2" } -! { dg-regexp "\"byte-column\": 2" } - -! { dg-regexp "\"finish\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-2.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 8" } -! { dg-regexp "\"display-column\": 8" } -! { dg-regexp "\"byte-column\": 8" } - -! { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } -! { dg-regexp "\"children\": \[\[\]\[\]\]" } -! { dg-regexp "\[\[\{\}, \]*\]" } +#if 0 +{ dg-begin-multiline-output "" } +[{"kind": "warning", + "message": "#warning message", + "option": "-Wcpp", + "option_url": + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 8, + "byte-column": 8, + "column": 8}}], + "escape-source": false}] +{ dg-end-multiline-output "" } +#endif diff --git a/gcc/testsuite/gfortran.dg/diagnostic-format-json-3.F90 b/gcc/testsuite/gfortran.dg/diagnostic-format-json-3.F90 index f0a67de76b08..750e186c8acf 100644 --- a/gcc/testsuite/gfortran.dg/diagnostic-format-json-3.F90 +++ b/gcc/testsuite/gfortran.dg/diagnostic-format-json-3.F90 @@ -3,31 +3,24 @@ #warning message -! Use dg-regexp to consume the JSON output starting with -! the innermost values, and working outwards. -! We can't rely on any ordering of the keys. - -! { dg-regexp "\"kind\": \"error\"" } -! { dg-regexp "\"column-origin\": 1" } -! { dg-regexp "\"escape-source\": false" } -! { dg-regexp "\"message\": \"#warning message\"" } -! { dg-regexp "\"option\": \"-Werror=cpp\"" } -! { dg-regexp "\"option_url\": \"\[^\n\r\"\]*#index-Wcpp\"" } - -! { dg-regexp "\"caret\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-3.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 2" } -! { dg-regexp "\"display-column\": 2" } -! { dg-regexp "\"byte-column\": 2" } - -! { dg-regexp "\"finish\": \{" } -! { dg-regexp "\"file\": \"\[^\n\r\"\]*diagnostic-format-json-3.F90\"" } -! { dg-regexp "\"line\": 4" } -! { dg-regexp "\"column\": 8" } -! { dg-regexp "\"display-column\": 8" } -! { dg-regexp "\"byte-column\": 8" } - -! { dg-regexp "\"locations\": \[\[\{\}, \]*\]" } -! { dg-regexp "\"children\": \[\[\]\[\]\]" } -! { dg-regexp "\[\[\{\}, \]*\]" } +#if 0 +{ dg-begin-multiline-output "" } +[{"kind": "error", + "message": "#warning message", + "option": "-Werror=cpp", + "option_url": + "children": [], + "column-origin": 1, + "locations": [{"caret": {"file": + "line": 4, + "display-column": 2, + "byte-column": 2, + "column": 2}, + "finish": {"file": + "line": 4, + "display-column": 8, + "byte-column": 8, + "column": 8}}], + "escape-source": false}] +{ dg-end-multiline-output "" } +#endif From 72bfb4a2d0c1650f842d4c98f03bee488e204068 Mon Sep 17 00:00:00 2001 From: Yang Yujie Date: Wed, 6 Dec 2023 10:47:16 -0700 Subject: [PATCH 002/311] [PATCH] testsuite: Adjust for the new permerror -Wincompatible-pointer-types r14-6037 turned -Wincompatible-pointer-types into a permerror, which causes the following tests to fail. gcc/testsuite/ChangeLog: * gcc.dg/fixed-point/composite-type.c: Replace dg-warning with dg-error. --- .../gcc.dg/fixed-point/composite-type.c | 64 +++++++++---------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/gcc/testsuite/gcc.dg/fixed-point/composite-type.c b/gcc/testsuite/gcc.dg/fixed-point/composite-type.c index 59351ff09b39..f91e480bcbfe 100644 --- a/gcc/testsuite/gcc.dg/fixed-point/composite-type.c +++ b/gcc/testsuite/gcc.dg/fixed-point/composite-type.c @@ -68,39 +68,39 @@ FIXED_POINT_COMPOSITE_DECL(_Sat unsigned long long _Accum, Sullk); /* { dg-erro int main() { - FIXED_POINT_COMPOSITE_TEST(short _Fract, sf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Fract, f); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(long _Fract, lf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(long long _Fract, llf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned short _Fract, usf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned _Fract, uf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned long _Fract, ulf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned long long _Fract, ullf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat short _Fract, Ssf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat _Fract, Sf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat long _Fract, Slf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat long long _Fract, Sllf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned short _Fract, Susf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned _Fract, Suf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long _Fract, Sulf); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long long _Fract, Sullf); /* { dg-warning "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(short _Fract, sf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Fract, f); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(long _Fract, lf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(long long _Fract, llf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned short _Fract, usf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned _Fract, uf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned long _Fract, ulf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned long long _Fract, ullf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat short _Fract, Ssf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat _Fract, Sf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat long _Fract, Slf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat long long _Fract, Sllf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned short _Fract, Susf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned _Fract, Suf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long _Fract, Sulf); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long long _Fract, Sullf); /* { dg-error "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(short _Accum, sk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Accum, k); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(long _Accum, lk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(long long _Accum, llk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned short _Accum, usk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned _Accum, uk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned long _Accum, ulk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(unsigned long long _Accum, ullk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat short _Accum, Ssk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat _Accum, Sk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat long _Accum, Slk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat long long _Accum, Sllk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned short _Accum, Susk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned _Accum, Suk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long _Accum, Sulk); /* { dg-warning "incompatible pointer type" } */ - FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long long _Accum, Sullk); /* { dg-warning "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(short _Accum, sk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Accum, k); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(long _Accum, lk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(long long _Accum, llk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned short _Accum, usk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned _Accum, uk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned long _Accum, ulk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(unsigned long long _Accum, ullk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat short _Accum, Ssk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat _Accum, Sk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat long _Accum, Slk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat long long _Accum, Sllk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned short _Accum, Susk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned _Accum, Suk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long _Accum, Sulk); /* { dg-error "incompatible pointer type" } */ + FIXED_POINT_COMPOSITE_TEST(_Sat unsigned long long _Accum, Sullk); /* { dg-error "incompatible pointer type" } */ return 0; } From ec266cbb859160aa207b6b292cfd974280ca8ff9 Mon Sep 17 00:00:00 2001 From: Eric Gallager Date: Mon, 4 Dec 2023 10:13:55 -0500 Subject: [PATCH 003/311] remove qmtest-related Makefile targets On GitHub, Joseph Myers (@jsm28 there) says in MentorEmbedded/qmtest#1 that the qmtest-related targets should have been removed long ago. This patch does so. Ref: https://github.com/MentorEmbedded/qmtest/issues/1 gcc/ChangeLog: * Makefile.in: Remove qmtest-related targets. --- gcc/Makefile.in | 53 ------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 68410a86af5a..f284c1387e27 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3735,7 +3735,6 @@ distclean: clean lang.distclean -rm -f testsuite/*.log testsuite/*.sum -cd testsuite && rm -f x *.x *.x? *.exe *.rpo *.o *.s *.S *.cc -cd testsuite && rm -f *.out *.gcov *$(coverageexts) - -rm -rf ${QMTEST_DIR} stamp-qmtest -rm -f .gdbinit configargs.h -rm -f gcov.pod # Delete po/*.gmo only if we are not building in the source directory. @@ -4417,58 +4416,6 @@ check-parallel-% : site.exp fi ; \ fi ) -# QMTest targets - -# The path to qmtest. -QMTEST_PATH=qmtest - -# The flags to pass to qmtest. -QMTESTFLAGS= - -# The flags to pass to "qmtest run". -QMTESTRUNFLAGS=-f none --result-stream dejagnu_stream.DejaGNUStream - -# The command to use to invoke qmtest. -QMTEST=${QMTEST_PATH} ${QMTESTFLAGS} - -# The tests (or suites) to run. -QMTEST_GPP_TESTS=g++ - -# The subdirectory of the OBJDIR that will be used to store the QMTest -# test database configuration and that will be used for temporary -# scratch space during QMTest's execution. -QMTEST_DIR=qmtestsuite - -# Create the QMTest database configuration. -${QMTEST_DIR} stamp-qmtest: - ${QMTEST} -D ${QMTEST_DIR} create-tdb \ - -c gcc_database.GCCDatabase \ - -a srcdir=`cd ${srcdir}/testsuite && ${PWD_COMMAND}` && \ - $(STAMP) stamp-qmtest - -# Create the QMTest context file. -${QMTEST_DIR}/context: stamp-qmtest - rm -f $@ - echo "CompilerTable.languages=c cplusplus" >> $@ - echo "CompilerTable.c_kind=GCC" >> $@ - echo "CompilerTable.c_path=${objdir}/xgcc" >> $@ - echo "CompilerTable.c_options=-B${objdir}/" >> $@ - echo "CompilerTable.cplusplus_kind=GCC" >> $@ - echo "CompilerTable.cplusplus_path=${objdir}/xg++" >> $@ - echo "CompilerTable.cplusplus_options=-B${objdir}/" >> $@ - echo "DejaGNUTest.target=${target_noncanonical}" >> $@ - -# Run the G++ testsuite using QMTest. -qmtest-g++: ${QMTEST_DIR}/context - cd ${QMTEST_DIR} && ${QMTEST} run ${QMTESTRUNFLAGS} -C context \ - -o g++.qmr ${QMTEST_GPP_TESTS} - -# Use the QMTest GUI. -qmtest-gui: ${QMTEST_DIR}/context - cd ${QMTEST_DIR} && ${QMTEST} gui -C context - -.PHONY: qmtest-g++ - # Run Paranoia on real.cc. paranoia.o: $(srcdir)/../contrib/paranoia.cc $(CONFIG_H) $(SYSTEM_H) $(TREE_H) From 1bd15d87031e8bf8fe9585fbc166b315303f676c Mon Sep 17 00:00:00 2001 From: Edwin Lu Date: Tue, 5 Dec 2023 16:15:10 -0800 Subject: [PATCH 004/311] RISC-V: Remove xfail from ssa-fre-3.c testcase Ran the test case at 122e7b4f9d0c2d54d865272463a1d812002d0a5c where the xfail was introduced. The test did pass at that hash and has continued to pass since then. Remove the xfail gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/ssa-fre-3.c: Remove xfail Signed-off-by: Edwin Lu --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c index 224dd4f72ef8..b2924837a22d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c @@ -18,4 +18,4 @@ foo (int a, int b) return aa + bb; } -/* { dg-final { scan-tree-dump "Replaced \\\(int\\\) aa_.*with a_" "fre1" { xfail { riscv*-*-* && lp64 } } } } */ +/* { dg-final { scan-tree-dump "Replaced \\\(int\\\) aa_.*with a_" "fre1" } } */ From 16a05fac33ddde7a50da9cb937a9b83ea7c111f6 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Wed, 3 May 2023 12:02:54 +0100 Subject: [PATCH 005/311] aarch64: Add march flags for +the and +d128 arch extensions Given the introduction of optional 128-bit page table descriptor and translation hardening extension support with the Arm9.4-a architecture, this introduces the relevant flags to enable the reading and writing of 128-bit system registers. The `+d128' -march modifier enables the use of the following ACLE builtin functions: * __uint128_t __arm_rsr128(const char *special_register); * void __arm_wsr128(const char *special_register, __uint128_t value); and defines the __ARM_FEATURE_SYSREG128 macro to 1. Finally, the `rcwmask_el1' and `rcwsmask_el1' 128-bit system register implementations are also reliant on the enablement of the `+the' flag, which is thus also implemented in this patch. gcc/ChangeLog: * config/aarch64/aarch64-c.cc (__ARM_FEATURE_SYSREG128): New. * config/aarch64/aarch64-arches.def (armv8.9-a): New. (armv9.4-a): Likewise. * config/aarch64/aarch64-option-extensions.def (d128): Likewise. (the): Likewise. * config/aarch64/aarch64.h (AARCH64_ISA_V9_4A): Likewise. (AARCH64_ISA_V8_9A): Likewise. (TARGET_ARMV9_4): Likewise. (AARCH64_ISA_D128): Likewise. (AARCH64_ISA_THE): Likewise. (TARGET_D128): Likewise. * doc/invoke.texi (AArch64 Options): Document new -march flags and extensions. --- gcc/config/aarch64/aarch64-arches.def | 2 ++ gcc/config/aarch64/aarch64-c.cc | 1 + gcc/config/aarch64/aarch64-option-extensions.def | 4 ++++ gcc/config/aarch64/aarch64.h | 15 +++++++++++++++ gcc/doc/invoke.texi | 7 +++++++ 5 files changed, 29 insertions(+) diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def index 6b9a19c490ba..1fe6b796001d 100644 --- a/gcc/config/aarch64/aarch64-arches.def +++ b/gcc/config/aarch64/aarch64-arches.def @@ -39,10 +39,12 @@ AARCH64_ARCH("armv8.5-a", generic_armv8_a, V8_5A, 8, (V8_4A, SB, SSBS AARCH64_ARCH("armv8.6-a", generic_armv8_a, V8_6A, 8, (V8_5A, I8MM, BF16)) AARCH64_ARCH("armv8.7-a", generic_armv8_a, V8_7A, 8, (V8_6A, LS64)) AARCH64_ARCH("armv8.8-a", generic_armv8_a, V8_8A, 8, (V8_7A, MOPS)) +AARCH64_ARCH("armv8.9-a", generic_armv8_a, V8_9A, 8, (V8_8A)) AARCH64_ARCH("armv8-r", generic_armv8_a, V8R , 8, (V8_4A)) AARCH64_ARCH("armv9-a", generic_armv9_a, V9A , 9, (V8_5A, SVE2)) AARCH64_ARCH("armv9.1-a", generic_armv9_a, V9_1A, 9, (V8_6A, V9A)) AARCH64_ARCH("armv9.2-a", generic_armv9_a, V9_2A, 9, (V8_7A, V9_1A)) AARCH64_ARCH("armv9.3-a", generic_armv9_a, V9_3A, 9, (V8_8A, V9_2A)) +AARCH64_ARCH("armv9.4-a", generic_armv9_a, V9_4A, 9, (V8_9A, V9_3A)) #undef AARCH64_ARCH diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 18422bb5663e..115a2a8b7568 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -254,6 +254,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); + aarch64_def_or_undef (TARGET_D128, "__ARM_FEATURE_SYSREG128", pfile); aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index c156d2ee76a1..97f03395ffca 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -159,4 +159,8 @@ AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "") AARCH64_OPT_EXTENSION("sme2", SME2, (SME), (), (), "sme2") +AARCH64_OPT_EXTENSION("d128", D128, (), (), (), "d128") + +AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") + #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index f277e784fc52..f0af0b43358e 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -254,13 +254,17 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) #define AARCH64_ISA_V8_7A (aarch64_isa_flags & AARCH64_FL_V8_7A) #define AARCH64_ISA_V8_8A (aarch64_isa_flags & AARCH64_FL_V8_8A) +#define AARCH64_ISA_V8_9A (aarch64_isa_flags & AARCH64_FL_V8_9A) #define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A) #define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1A) #define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2A) #define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A) +#define AARCH64_ISA_V9_4A (aarch64_isa_flags & AARCH64_FL_V9_4A) #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) #define AARCH64_ISA_CSSC (aarch64_isa_flags & AARCH64_FL_CSSC) +#define AARCH64_ISA_D128 (aarch64_isa_flags & AARCH64_FL_D128) +#define AARCH64_ISA_THE (aarch64_isa_flags & AARCH64_FL_THE) /* The current function is a normal non-streaming function. */ #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) @@ -450,6 +454,17 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; /* ARMv8.1-A Adv.SIMD support. */ #define TARGET_SIMD_RDMA (TARGET_SIMD && AARCH64_ISA_RDMA) +/* Armv9.4-A features. */ +#define TARGET_ARMV9_4 (AARCH64_ISA_V9_4A) + +/* 128-bit System Registers and Instructions from Armv9.4-a are enabled + through +d128. */ +#define TARGET_D128 (AARCH64_ISA_D128) + +/* Armv8.9-A/9.4-A Translation Hardening Extension system registers are + enabled through +the. */ +#define TARGET_THE (AARCH64_ISA_THE) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8e9204302d19..f5e5546080da 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21045,10 +21045,12 @@ and the features that they enable by default: @item @samp{armv8.6-a} @tab Armv8.6-A @tab @samp{armv8.5-a}, @samp{+bf16}, @samp{+i8mm} @item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64} @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops} +@item @samp{armv8.9-a} @tab Armv8.9-a @tab @samp{armv8.8-a} @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2} @item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm} @item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64} @item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops} +@item @samp{armv9.4-a} @tab Armv9.4-A @tab @samp{armv9.3-a} @item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r} @end multitable @@ -21359,6 +21361,11 @@ Enable the FEAT_SME_I16I64 extension to SME. Enable the FEAT_SME_F64F64 extension to SME. +@item sme2 Enable the Scalable Matrix Extension 2. This also enables SME instructions. +@item d128 +Enable support for 128-bit system register read/write instructions. +@item the +Enable support for Armv8.9-a/9.4-a translation hardening extension. + @end table Feature @option{crypto} implies @option{aes}, @option{sha2}, and @option{simd}, From 3aba045882d1f589d36eaedd947a014ac6eb5ec3 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Fri, 3 Nov 2023 16:44:56 +0000 Subject: [PATCH 006/311] aarch64: Add support for GCS system registers with the +gcs modifier Given the introduction of system registers associated with the Guarded Control Stack extension to Armv9.4-a in Binutils and their reliance on the `+gcs' modifier, we implement the necessary changes in GCC to allow for them to be recognized by the compiler. gcc/ChangeLog: * config/aarch64/aarch64-option-extensions.def (gcs): New. * config/aarch64/aarch64.h (AARCH64_ISA_GCS): New. (TARGET_THE): Likewise. * doc/invoke.texi (AArch64 Options): Describe GCS. --- gcc/config/aarch64/aarch64-option-extensions.def | 2 ++ gcc/config/aarch64/aarch64.h | 6 ++++++ gcc/doc/invoke.texi | 2 ++ 3 files changed, 10 insertions(+) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 97f03395ffca..02fc895338e7 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -163,4 +163,6 @@ AARCH64_OPT_EXTENSION("d128", D128, (), (), (), "d128") AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") +AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") + #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index f0af0b43358e..5a776dfdff0f 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -265,6 +265,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; #define AARCH64_ISA_CSSC (aarch64_isa_flags & AARCH64_FL_CSSC) #define AARCH64_ISA_D128 (aarch64_isa_flags & AARCH64_FL_D128) #define AARCH64_ISA_THE (aarch64_isa_flags & AARCH64_FL_THE) +#define AARCH64_ISA_GCS (aarch64_isa_flags & AARCH64_FL_GCS) /* The current function is a normal non-streaming function. */ #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) @@ -465,6 +466,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; enabled through +the. */ #define TARGET_THE (AARCH64_ISA_THE) +/* Armv9.4-A Guarded Control Stack extension system registers are + enabled through +gcs. */ +#define TARGET_GCS (AARCH64_ISA_GCS) + + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f5e5546080da..bff3645eedcf 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21363,6 +21363,8 @@ Enable the FEAT_SME_F64F64 extension to SME. Enable the Scalable Matrix Extension 2. This also enables SME instructions. @item d128 Enable support for 128-bit system register read/write instructions. +@item gcs +Enable support for Armv9.4-a Guarded Control Stack extension. @item the Enable support for Armv8.9-a/9.4-a translation hardening extension. From eac59af05aa5d7b420408b12865437dbc8a54218 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Fri, 3 Nov 2023 02:42:22 +0000 Subject: [PATCH 007/311] aarch64: Sync `aarch64-sys-regs.def' with Binutils. This patch updates `aarch64-sys-regs.def', bringing it into sync with the Binutils source. gcc/ChangeLog: * config/aarch64/aarch64-sys-regs.def: Copy from Binutils. --- gcc/config/aarch64/aarch64-sys-regs.def | 30 +++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sys-regs.def b/gcc/config/aarch64/aarch64-sys-regs.def index d24a24555034..96bdadb0b0fe 100644 --- a/gcc/config/aarch64/aarch64-sys-regs.def +++ b/gcc/config/aarch64/aarch64-sys-regs.def @@ -419,6 +419,16 @@ SYSREG ("fpcr", CPENC (3,3,4,4,0), 0, AARCH64_NO_FEATURES) SYSREG ("fpexc32_el2", CPENC (3,4,5,3,0), 0, AARCH64_NO_FEATURES) SYSREG ("fpsr", CPENC (3,3,4,4,1), 0, AARCH64_NO_FEATURES) + SYSREG ("gcspr_el0", CPENC (3,3,2,5,1), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcspr_el1", CPENC (3,0,2,5,1), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcspr_el2", CPENC (3,4,2,5,1), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcspr_el12", CPENC (3,5,2,5,1), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcspr_el3", CPENC (3,6,2,5,1), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcscre0_el1", CPENC (3,0,2,5,2), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcscr_el1", CPENC (3,0,2,5,0), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcscr_el2", CPENC (3,4,2,5,0), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcscr_el12", CPENC (3,5,2,5,0), F_ARCHEXT, AARCH64_FEATURE (GCS)) + SYSREG ("gcscr_el3", CPENC (3,6,2,5,0), F_ARCHEXT, AARCH64_FEATURE (GCS)) SYSREG ("gcr_el1", CPENC (3,0,1,0,6), F_ARCHEXT, AARCH64_FEATURE (MEMTAG)) SYSREG ("gmid_el1", CPENC (3,1,0,0,4), F_REG_READ|F_ARCHEXT, AARCH64_FEATURE (MEMTAG)) SYSREG ("gpccr_el3", CPENC (3,6,2,1,6), 0, AARCH64_NO_FEATURES) @@ -584,7 +594,7 @@ SYSREG ("oslar_el1", CPENC (2,0,1,0,4), F_REG_WRITE, AARCH64_NO_FEATURES) SYSREG ("oslsr_el1", CPENC (2,0,1,1,4), F_REG_READ, AARCH64_NO_FEATURES) SYSREG ("pan", CPENC (3,0,4,2,3), F_ARCHEXT, AARCH64_FEATURE (PAN)) - SYSREG ("par_el1", CPENC (3,0,7,4,0), 0, AARCH64_NO_FEATURES) + SYSREG ("par_el1", CPENC (3,0,7,4,0), F_REG_128, AARCH64_NO_FEATURES) SYSREG ("pmbidr_el1", CPENC (3,0,9,10,7), F_REG_READ|F_ARCHEXT, AARCH64_FEATURE (PROFILE)) SYSREG ("pmblimitr_el1", CPENC (3,0,9,10,0), F_ARCHEXT, AARCH64_FEATURE (PROFILE)) SYSREG ("pmbptr_el1", CPENC (3,0,9,10,1), F_ARCHEXT, AARCH64_FEATURE (PROFILE)) @@ -746,6 +756,8 @@ SYSREG ("prlar_el2", CPENC (3,4,6,8,1), F_ARCHEXT, AARCH64_FEATURE (V8R)) SYSREG ("prselr_el1", CPENC (3,0,6,2,1), F_ARCHEXT, AARCH64_FEATURE (V8R)) SYSREG ("prselr_el2", CPENC (3,4,6,2,1), F_ARCHEXT, AARCH64_FEATURE (V8R)) + SYSREG ("rcwmask_el1", CPENC (3,0,13,0,6), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (THE)) + SYSREG ("rcwsmask_el1", CPENC (3,0,13,0,3), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (THE)) SYSREG ("revidr_el1", CPENC (3,0,0,0,6), F_REG_READ, AARCH64_NO_FEATURES) SYSREG ("rgsr_el1", CPENC (3,0,1,0,5), F_ARCHEXT, AARCH64_FEATURE (MEMTAG)) SYSREG ("rmr_el1", CPENC (3,0,12,0,2), 0, AARCH64_NO_FEATURES) @@ -1034,13 +1046,13 @@ SYSREG ("trfcr_el1", CPENC (3,0,1,2,1), F_ARCHEXT, AARCH64_FEATURE (V8_4A)) SYSREG ("trfcr_el12", CPENC (3,5,1,2,1), F_ARCHEXT, AARCH64_FEATURE (V8_4A)) SYSREG ("trfcr_el2", CPENC (3,4,1,2,1), F_ARCHEXT, AARCH64_FEATURE (V8_4A)) - SYSREG ("ttbr0_el1", CPENC (3,0,2,0,0), 0, AARCH64_NO_FEATURES) - SYSREG ("ttbr0_el12", CPENC (3,5,2,0,0), F_ARCHEXT, AARCH64_FEATURE (V8_1A)) - SYSREG ("ttbr0_el2", CPENC (3,4,2,0,0), F_ARCHEXT, AARCH64_FEATURE (V8A)) + SYSREG ("ttbr0_el1", CPENC (3,0,2,0,0), F_REG_128, AARCH64_NO_FEATURES) + SYSREG ("ttbr0_el12", CPENC (3,5,2,0,0), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (V8_1A)) + SYSREG ("ttbr0_el2", CPENC (3,4,2,0,0), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (V8A)) SYSREG ("ttbr0_el3", CPENC (3,6,2,0,0), 0, AARCH64_NO_FEATURES) - SYSREG ("ttbr1_el1", CPENC (3,0,2,0,1), 0, AARCH64_NO_FEATURES) - SYSREG ("ttbr1_el12", CPENC (3,5,2,0,1), F_ARCHEXT, AARCH64_FEATURE (V8_1A)) - SYSREG ("ttbr1_el2", CPENC (3,4,2,0,1), F_ARCHEXT, AARCH64_FEATURES (2, V8A, V8_1A)) + SYSREG ("ttbr1_el1", CPENC (3,0,2,0,1), F_REG_128, AARCH64_NO_FEATURES) + SYSREG ("ttbr1_el12", CPENC (3,5,2,0,1), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (V8_1A)) + SYSREG ("ttbr1_el2", CPENC (3,4,2,0,1), F_ARCHEXT|F_REG_128, AARCH64_FEATURES (2, V8A, V8_1A)) SYSREG ("uao", CPENC (3,0,4,2,4), F_ARCHEXT, AARCH64_FEATURE (V8_2A)) SYSREG ("vbar_el1", CPENC (3,0,12,0,0), 0, AARCH64_NO_FEATURES) SYSREG ("vbar_el12", CPENC (3,5,12,0,0), F_ARCHEXT, AARCH64_FEATURE (V8_1A)) @@ -1057,8 +1069,8 @@ SYSREG ("vstcr_el2", CPENC (3,4,2,6,2), F_ARCHEXT, AARCH64_FEATURE (V8_4A)) SYSREG ("vsttbr_el2", CPENC (3,4,2,6,0), F_ARCHEXT, AARCH64_FEATURES (2, V8A, V8_4A)) SYSREG ("vtcr_el2", CPENC (3,4,2,1,2), 0, AARCH64_NO_FEATURES) - SYSREG ("vttbr_el2", CPENC (3,4,2,1,0), F_ARCHEXT, AARCH64_FEATURE (V8A)) + SYSREG ("vttbr_el2", CPENC (3,4,2,1,0), F_ARCHEXT|F_REG_128, AARCH64_FEATURE (V8A)) SYSREG ("zcr_el1", CPENC (3,0,1,2,0), F_ARCHEXT, AARCH64_FEATURE (SVE)) SYSREG ("zcr_el12", CPENC (3,5,1,2,0), F_ARCHEXT, AARCH64_FEATURE (SVE)) SYSREG ("zcr_el2", CPENC (3,4,1,2,0), F_ARCHEXT, AARCH64_FEATURE (SVE)) - SYSREG ("zcr_el3", CPENC (3,6,1,2,0), F_ARCHEXT, AARCH64_FEATURE (SVE)) \ No newline at end of file + SYSREG ("zcr_el3", CPENC (3,6,1,2,0), F_ARCHEXT, AARCH64_FEATURE (SVE)) From 88157c88172b06f1afb6136e9bd8fce1de5ba823 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Sun, 29 Oct 2023 00:38:59 +0100 Subject: [PATCH 008/311] aarch64: Implement 128-bit extension to ACLE sysreg r/w builtins Implement the ACLE builtins for 128-bit system register manipulation: * __uint128_t __arm_rsr128(const char *special_register); * void __arm_wsr128(const char *special_register, __uint128_t value); gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (AARCH64_RSR128): New `enum aarch64_builtins' value. (AARCH64_WSR128): Likewise. (aarch64_init_rwsr_builtins): Init `__builtin_aarch64_rsr128' and `__builtin_aarch64_wsr128' builtins. (aarch64_expand_rwsr_builtin): Extend function to handle `__builtin_aarch64_{rsr|wsr}128'. * config/aarch64/aarch64-protos.h (aarch64_retrieve_sysreg): Update function signature. * config/aarch64/aarch64.cc (F_REG_128): New. (aarch64_retrieve_sysreg): Add 128-bit register mode check. * config/aarch64/aarch64.md (UNSPEC_SYSREG_RTI): New. (UNSPEC_SYSREG_WTI): Likewise. (aarch64_read_sysregti): Likewise. (aarch64_write_sysregti): Likewise. * config/aarch64/arm_acle.h (__arm_rsr128): New. (__arm_wsr128): Likewise. --- gcc/config/aarch64/aarch64-builtins.cc | 51 ++++++++++++++++++++++---- gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64.cc | 9 ++++- gcc/config/aarch64/aarch64.md | 18 +++++++++ gcc/config/aarch64/arm_acle.h | 11 ++++++ 5 files changed, 80 insertions(+), 11 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index f780b1094f38..452ff19f89b7 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -815,11 +815,13 @@ enum aarch64_builtins AARCH64_RSR64, AARCH64_RSRF, AARCH64_RSRF64, + AARCH64_RSR128, AARCH64_WSR, AARCH64_WSRP, AARCH64_WSR64, AARCH64_WSRF, AARCH64_WSRF64, + AARCH64_WSR128, AARCH64_BUILTIN_MAX }; @@ -1842,6 +1844,10 @@ aarch64_init_rwsr_builtins (void) = build_function_type_list (double_type_node, const_char_ptr_type, NULL); AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype); + fntype + = build_function_type_list (uint128_type_node, const_char_ptr_type, NULL); + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR128, rsr128, fntype); + fntype = build_function_type_list (void_type_node, const_char_ptr_type, uint32_type_node, NULL); @@ -1867,6 +1873,12 @@ aarch64_init_rwsr_builtins (void) = build_function_type_list (void_type_node, const_char_ptr_type, double_type_node, NULL); AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype); + + fntype + = build_function_type_list (void_type_node, const_char_ptr_type, + uint128_type_node, NULL); + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype); + } /* Initialize the memory tagging extension (MTE) builtins. */ @@ -2710,6 +2722,7 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) tree arg0, arg1; rtx const_str, input_val, subreg; enum machine_mode mode; + enum insn_code icode; class expand_operand ops[2]; arg0 = CALL_EXPR_ARG (exp, 0); @@ -2718,7 +2731,18 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) || fcode == AARCH64_WSRP || fcode == AARCH64_WSR64 || fcode == AARCH64_WSRF - || fcode == AARCH64_WSRF64); + || fcode == AARCH64_WSRF64 + || fcode == AARCH64_WSR128); + + bool op128 = (fcode == AARCH64_RSR128 || fcode == AARCH64_WSR128); + enum machine_mode sysreg_mode = op128 ? TImode : DImode; + + if (op128 && !TARGET_D128) + { + error_at (EXPR_LOCATION (exp), "128-bit system register support requires" + " the % extension"); + return const0_rtx; + } /* Argument 0 (system register name) must be a string literal. */ gcc_assert (TREE_CODE (arg0) == ADDR_EXPR @@ -2740,7 +2764,8 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) for (unsigned pos = 0; pos <= len; pos++) sysreg_name[pos] = TOLOWER (sysreg_name[pos]); - const char *name_output = aarch64_retrieve_sysreg (sysreg_name, write_op); + const char* name_output = aarch64_retrieve_sysreg ((const char *) sysreg_name, + write_op, op128); if (name_output == NULL) { error_at (EXPR_LOCATION (exp), "invalid system register name %qs", @@ -2760,13 +2785,17 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) mode = TYPE_MODE (TREE_TYPE (arg1)); input_val = copy_to_mode_reg (mode, expand_normal (arg1)); + icode = (op128 ? CODE_FOR_aarch64_write_sysregti + : CODE_FOR_aarch64_write_sysregdi); + switch (fcode) { case AARCH64_WSR: case AARCH64_WSRP: case AARCH64_WSR64: case AARCH64_WSRF64: - subreg = lowpart_subreg (DImode, input_val, mode); + case AARCH64_WSR128: + subreg = lowpart_subreg (sysreg_mode, input_val, mode); break; case AARCH64_WSRF: subreg = gen_lowpart_SUBREG (SImode, input_val); @@ -2775,8 +2804,8 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) } create_fixed_operand (&ops[0], const_str); - create_input_operand (&ops[1], subreg, DImode); - expand_insn (CODE_FOR_aarch64_write_sysregdi, 2, ops); + create_input_operand (&ops[1], subreg, sysreg_mode); + expand_insn (icode, 2, ops); return target; } @@ -2784,10 +2813,13 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) /* Read operations are implied by !write_op. */ gcc_assert (call_expr_nargs (exp) == 1); + icode = (op128 ? CODE_FOR_aarch64_read_sysregti + : CODE_FOR_aarch64_read_sysregdi); + /* Emit the initial read_sysregdi rtx. */ - create_output_operand (&ops[0], target, DImode); + create_output_operand (&ops[0], target, sysreg_mode); create_fixed_operand (&ops[1], const_str); - expand_insn (CODE_FOR_aarch64_read_sysregdi, 2, ops); + expand_insn (icode, 2, ops); target = ops[0].value; /* Do any necessary post-processing on the result. */ @@ -2797,7 +2829,8 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) case AARCH64_RSRP: case AARCH64_RSR64: case AARCH64_RSRF64: - return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, DImode); + case AARCH64_RSR128: + return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, sysreg_mode); case AARCH64_RSRF: subreg = gen_lowpart_SUBREG (SImode, target); return gen_lowpart_SUBREG (SFmode, subreg); @@ -3044,11 +3077,13 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, case AARCH64_RSR64: case AARCH64_RSRF: case AARCH64_RSRF64: + case AARCH64_RSR128: case AARCH64_WSR: case AARCH64_WSRP: case AARCH64_WSR64: case AARCH64_WSRF: case AARCH64_WSRF64: + case AARCH64_WSR128: return aarch64_expand_rwsr_builtin (exp, target, fcode); } diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 60ff61f6d547..f2075a177326 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -840,7 +840,7 @@ bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *); bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *, enum simd_immediate_check w = AARCH64_CHECK_MOV); bool aarch64_valid_sysreg_name_p (const char *); -const char *aarch64_retrieve_sysreg (const char *, bool); +const char *aarch64_retrieve_sysreg (const char *, bool, bool); rtx aarch64_check_zero_based_sve_index_immediate (rtx); bool aarch64_maybe_generate_simd_constant (rtx, rtx, machine_mode); bool aarch64_simd_special_constant_p (rtx, machine_mode); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fd44e5922a20..8f50a70083d7 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -498,6 +498,8 @@ typedef struct { #define F_ARCHEXT (1 << 4) /* Flag indicating register name is alias for another system register. */ #define F_REG_ALIAS (1 << 5) +/* Flag indicatinig registers which may be implemented with 128-bits. */ +#define F_REG_128 (1 << 6) /* Database of system registers, their encodings and architectural requirements. */ @@ -29083,9 +29085,10 @@ aarch64_valid_sysreg_name_p (const char *regname) /* Return the generic sysreg specification for a valid system register name, otherwise NULL. WRITE_P is true iff the register is being - written to. */ + written to. IS128OP indicates the requested system register should + be checked for a 128-bit implementation. */ const char * -aarch64_retrieve_sysreg (const char *regname, bool write_p) +aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op) { const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname); if (sysreg == NULL) @@ -29095,6 +29098,8 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p) else return NULL; } + if (is128op && !(sysreg->properties & F_REG_128)) + return NULL; if ((write_p && (sysreg->properties & F_REG_READ)) || (!write_p && (sysreg->properties & F_REG_WRITE))) return NULL; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a6d5e8c2a1af..38da76a1ee20 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -339,7 +339,9 @@ UNSPEC_RDFFR UNSPEC_WRFFR UNSPEC_SYSREG_RDI + UNSPEC_SYSREG_RTI UNSPEC_SYSREG_WDI + UNSPEC_SYSREG_WTI ;; Represents an SVE-style lane index, in which the indexing applies ;; within the containing 128-bit block. UNSPEC_SVE_LANE_SELECT @@ -558,6 +560,14 @@ "mrs\t%x0, %1" ) +(define_insn "aarch64_read_sysregti" + [(set (match_operand:TI 0 "register_operand" "=r") + (unspec_volatile:TI [(match_operand 1 "aarch64_sysreg_string" "")] + UNSPEC_SYSREG_RTI))] + "TARGET_D128" + "mrrs\t%x0, %H0, %x1" +) + (define_insn "aarch64_write_sysregdi" [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "") (match_operand:DI 1 "register_operand" "rZ")] @@ -566,6 +576,14 @@ "msr\t%0, %x1" ) +(define_insn "aarch64_write_sysregti" + [(unspec_volatile:TI [(match_operand 0 "aarch64_sysreg_string" "") + (match_operand:TI 1 "register_operand" "r")] + UNSPEC_SYSREG_WTI)] + "TARGET_D128" + "msrr\t%x0, %x1, %H1" +) + (define_insn "indirect_jump" [(set (pc) (match_operand:DI 0 "register_operand" "r"))] "" diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 71ada8782996..80282b361a4f 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -344,6 +344,17 @@ __rndrrs (uint64_t *__res) #define __arm_wsrf64(__regname, __value) \ __builtin_aarch64_wsrf64 (__regname, __value) +#pragma GCC push_options +#pragma GCC target ("+nothing+d128") + +#define __arm_rsr128(__regname) \ + __builtin_aarch64_rsr128 (__regname) + +#define __arm_wsr128(__regname, __value) \ + __builtin_aarch64_wsr128 (__regname, __value) + +#pragma GCC pop_options + #ifdef __cplusplus } #endif From 09a08df71939cc0035ebae85220ff0214a38fb7c Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Sun, 29 Oct 2023 01:49:45 +0100 Subject: [PATCH 009/311] aarch64: Add rsr128 and wsr128 ACLE tests Extend existing unit tests for the ACLE system register manipulation functions to include 128-bit tests. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/rwsr.c (get_rsr128): New. (set_wsr128): Likewise. --- gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c index 93c48c4caf0a..6feb0bef2d6f 100644 --- a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c @@ -6,6 +6,38 @@ #include +#pragma GCC push_options +#pragma GCC target ("arch=armv9.4-a+d128") + +#ifndef __ARM_FEATURE_SYSREG128 +#error "__ARM_FEATURE_SYSREG128 feature macro not defined." +#endif + +/* +** get_rsr128: +** mrrs x0, x1, s3_0_c7_c4_0 +** ... +*/ +__uint128_t +get_rsr128 () +{ + __arm_rsr128 ("par_el1"); +} + +/* +** set_wsr128: +** ... +** msrr s3_0_c7_c4_0, x0, x1 +** ... +*/ +void +set_wsr128 (__uint128_t c) +{ + __arm_wsr128 ("par_el1", c); +} + +#pragma GCC pop_options + /* ** get_rsr: ** ... From db642d60ee81cd99d55f4e228d01de704b5b7cfa Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Thu, 7 Dec 2023 06:35:02 +0800 Subject: [PATCH 010/311] RISC-V: Fix PR112888 ICE Committed as it is obvious. gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (extract_single_source): new function. (pre_vsetvl::compute_lcm_local_properties): Fix ICE. --- gcc/config/riscv/riscv-vsetvl.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 68f0be7e81d1..90477f331d7c 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -596,6 +596,14 @@ extract_single_source (set_info *set) return first_insn; } +static insn_info * +extract_single_source (def_info *def) +{ + if (!def) + return nullptr; + return extract_single_source (dyn_cast (def)); +} + static bool same_equiv_note_p (set_info *set1, set_info *set2) { @@ -2692,9 +2700,7 @@ pre_vsetvl::compute_lcm_local_properties () def_lookup dl = crtl->ssa->find_def (resource, insn); def_info *def = dl.matching_set_or_last_def_of_prev_group (); - gcc_assert (def); - insn_info *def_insn = extract_single_source ( - dyn_cast (def)); + insn_info *def_insn = extract_single_source (def); if (def_insn && vsetvl_insn_p (def_insn->rtl ())) { vsetvl_info def_info = vsetvl_info (def_insn); From ae9e48e5c0acaecf181117bdf3632b6eabf907ec Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 7 Dec 2023 00:17:06 +0000 Subject: [PATCH 011/311] Daily bump. --- gcc/ChangeLog | 303 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 5 + gcc/c-family/ChangeLog | 5 + gcc/cp/ChangeLog | 14 ++ gcc/fortran/ChangeLog | 6 + gcc/jit/ChangeLog | 11 ++ gcc/m2/ChangeLog | 4 + gcc/testsuite/ChangeLog | 94 +++++++++++++ libcc1/ChangeLog | 5 + libgcc/ChangeLog | 7 + libgomp/ChangeLog | 75 ++++++++++ libstdc++-v3/ChangeLog | 16 +++ 13 files changed, 546 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 23c65f471eb6..1f38282e0c96 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,306 @@ +2023-12-06 Juzhe-Zhong + + * config/riscv/riscv-vsetvl.cc (extract_single_source): new function. + (pre_vsetvl::compute_lcm_local_properties): Fix ICE. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-builtins.cc (AARCH64_RSR128): New + `enum aarch64_builtins' value. + (AARCH64_WSR128): Likewise. + (aarch64_init_rwsr_builtins): Init `__builtin_aarch64_rsr128' + and `__builtin_aarch64_wsr128' builtins. + (aarch64_expand_rwsr_builtin): Extend function to handle + `__builtin_aarch64_{rsr|wsr}128'. + * config/aarch64/aarch64-protos.h (aarch64_retrieve_sysreg): + Update function signature. + * config/aarch64/aarch64.cc (F_REG_128): New. + (aarch64_retrieve_sysreg): Add 128-bit register mode check. + * config/aarch64/aarch64.md (UNSPEC_SYSREG_RTI): New. + (UNSPEC_SYSREG_WTI): Likewise. + (aarch64_read_sysregti): Likewise. + (aarch64_write_sysregti): Likewise. + * config/aarch64/arm_acle.h (__arm_rsr128): New. + (__arm_wsr128): Likewise. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-sys-regs.def: Copy from Binutils. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-option-extensions.def (gcs): New. + * config/aarch64/aarch64.h (AARCH64_ISA_GCS): New. + (TARGET_THE): Likewise. + * doc/invoke.texi (AArch64 Options): Describe GCS. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-c.cc (__ARM_FEATURE_SYSREG128): New. + * config/aarch64/aarch64-arches.def (armv8.9-a): New. + (armv9.4-a): Likewise. + * config/aarch64/aarch64-option-extensions.def (d128): Likewise. + (the): Likewise. + * config/aarch64/aarch64.h (AARCH64_ISA_V9_4A): Likewise. + (AARCH64_ISA_V8_9A): Likewise. + (TARGET_ARMV9_4): Likewise. + (AARCH64_ISA_D128): Likewise. + (AARCH64_ISA_THE): Likewise. + (TARGET_D128): Likewise. + * doc/invoke.texi (AArch64 Options): Document new -march flags + and extensions. + +2023-12-06 Eric Gallager + + * Makefile.in: Remove qmtest-related targets. + +2023-12-06 David Malcolm + + * common.opt (fdiagnostics-json-formatting): New. + * diagnostic-format-json.cc: Add "formatted" boolean + to json_output_format and subclasses, and to the + diagnostic_output_format_init_json_* functions. Use it when + printing JSON. + * diagnostic-format-sarif.cc: Likewise for sarif_builder, + sarif_output_format, and the various + diagnostic_output_format_init_sarif_* functions. + * diagnostic.cc (diagnostic_output_format_init): Add + "json_formatting" boolean and pass on to the various cases. + * diagnostic.h (diagnostic_output_format_init): Add + "json_formatted" param. + (diagnostic_output_format_init_json_stderr): Add "formatted" param + (diagnostic_output_format_init_json_file): Likewise. + (diagnostic_output_format_init_sarif_stderr): Likewise. + (diagnostic_output_format_init_sarif_file): Likewise. + (diagnostic_output_format_init_sarif_stream): Likewise. + * doc/invoke.texi (-fdiagnostics-format=json): Remove discussion + about JSON output needing formatting. + (-fno-diagnostics-json-formatting): Add. + * gcc.cc (driver_handle_option): Use + opts->x_flag_diagnostics_json_formatting. + * gcov.cc (generate_results): Pass "false" for new formatting + option when printing json. + * json.cc (value::dump): Add new "formatted" param. + (object::print): Likewise, using it to add whitespace to format + the JSON output. + (array::print): Likewise. + (float_number::print): Add new "formatted" param. + (integer_number::print): Likewise. + (string::print): Likewise. + (literal::print): Likewise. + (selftest::assert_print_eq): Add "formatted" param. + (ASSERT_PRINT_EQ): Add "FORMATTED" param. + (selftest::test_writing_objects): Test both formatted and + unformatted printing. + (selftest::test_writing_arrays): Likewise. + (selftest::test_writing_float_numbers): Update for new param of + ASSERT_PRINT_EQ. + (selftest::test_writing_integer_numbers): Likewise. + (selftest::test_writing_strings): Likewise. + (selftest::test_writing_literals): Likewise. + (selftest::test_formatting): New. + (selftest::json_cc_tests): Call it. + * json.h (value::print): Add "formatted" param. + (value::dump): Likewise. + (object::print): Likewise. + (array::print): Likewise. + (float_number::print): Likewise. + (integer_number::print): Likewise. + (string::print): Likewise. + (literal::print): Likewise. + * optinfo-emit-json.cc (optrecord_json_writer::write): Pass + "false" for new formatting option when printing json. + (selftest::test_building_json_from_dump_calls): Likewise. + * opts.cc (common_handle_option): Use + opts->x_flag_diagnostics_json_formatting. + +2023-12-06 David Malcolm + + * diagnostic-format-json.cc (on_begin_diagnostic): Convert param + to const reference. + (on_end_diagnostic): Likewise. + (json_output_format::on_end_diagnostic): Likewise. + * diagnostic-format-sarif.cc + (sarif_invocation::add_notification_for_ice): Likewise. + (sarif_result::on_nested_diagnostic): Likewise. + (sarif_ice_notification::sarif_ice_notification): Likewise. + (sarif_builder::end_diagnostic): Likewise. + (sarif_builder::make_result_object): Likewise. + (make_reporting_descriptor_object_for_warning): Likewise. + (sarif_builder::make_locations_arr): Likewise. + (sarif_output_format::on_begin_diagnostic): Likewise. + (sarif_output_format::on_end_diagnostic): Likewise. + * diagnostic.cc (default_diagnostic_starter): Make diagnostic_info + param const. + (default_diagnostic_finalizer): Likewise. + (diagnostic_context::report_diagnostic): Pass diagnostic by + reference to on_{begin,end}_diagnostic. + (diagnostic_text_output_format::on_begin_diagnostic): Convert + param to const reference. + (diagnostic_text_output_format::on_end_diagnostic): Likewise. + * diagnostic.h (diagnostic_starter_fn): Make diagnostic_info param + const. + (diagnostic_finalizer_fn): Likeewise. + (diagnostic_output_format::on_begin_diagnostic): Convert param to + const reference. + (diagnostic_output_format::on_end_diagnostic): Likewise. + (diagnostic_text_output_format::on_begin_diagnostic): Likewise. + (diagnostic_text_output_format::on_end_diagnostic): Likewise. + (default_diagnostic_starter): Make diagnostic_info param const. + (default_diagnostic_finalizer): Likewise. + * langhooks-def.h (lhd_print_error_function): Make diagnostic_info + param const. + * langhooks.cc (lhd_print_error_function): Likewise. + * langhooks.h (lang_hooks::print_error_function): Likewise. + * tree-diagnostic.cc (diagnostic_report_current_function): + Likewise. + (default_tree_diagnostic_starter): Likewise. + (virt_loc_aware_diagnostic_finalizer): Likewise. + * tree-diagnostic.h (diagnostic_report_current_function): + Likewise. + (virt_loc_aware_diagnostic_finalizer): Likewise. + +2023-12-06 Andrew Stubbs + + * config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in. + * config/gcn/gcn.cc (gcn_init_machine_status): Disable global + addressing. + (gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR. + +2023-12-06 Juzhe-Zhong + + PR target/112855 + * config/riscv/riscv-vsetvl.cc + (pre_vsetvl::compute_lcm_local_properties): Fix transparant LCM data. + (pre_vsetvl::earliest_fuse_vsetvl_info): Disable earliest fusion for unrelated edge. + +2023-12-06 Marek Polacek + + PR target/112762 + * config/linux.h: Redefine TARGET_FORTIFY_SOURCE_DEFAULT_LEVEL for + glibc only. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64.cc + (aarch64_test_sysreg_encoding_clashes): New. + (aarch64_run_selftests): add call to + aarch64_test_sysreg_encoding_clashes selftest. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-builtins.cc (aarch64_general_check_builtin_call): + New. + * config/aarch64/aarch64-c.cc (aarch64_check_builtin_call): + Add `aarch64_general_check_builtin_call' call. + * config/aarch64/aarch64-protos.h (aarch64_general_check_builtin_call): + New. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-builtins.cc (enum aarch64_builtins): + Add enums for new builtins. + (aarch64_init_rwsr_builtins): New. + (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins. + (aarch64_expand_rwsr_builtin): New. + (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin. + * config/aarch64/aarch64.md (read_sysregdi): New insn_and_split. + (write_sysregdi): Likewise. + * config/aarch64/arm_acle.h (__arm_rsr): New. + (__arm_rsrp): Likewise. + (__arm_rsr64): Likewise. + (__arm_rsrf): Likewise. + (__arm_rsrf64): Likewise. + (__arm_wsr): Likewise. + (__arm_wsrp): Likewise. + (__arm_wsr64): Likewise. + (__arm_wsrf): Likewise. + (__arm_wsrf64): Likewise. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-protos.h (aarch64_valid_sysreg_name_p): New. + (aarch64_retrieve_sysreg): Likewise. + * config/aarch64/aarch64.cc (is_implem_def_reg): Likewise. + (aarch64_valid_sysreg_name_p): Likewise. + (aarch64_retrieve_sysreg): Likewise. + (aarch64_register_sysreg): Likewise. + (aarch64_init_sysregs): Likewise. + (aarch64_lookup_sysreg_map): Likewise. + * config/aarch64/predicates.md (aarch64_sysreg_string): New. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64.cc (sysreg_t): New. + (aarch64_sysregs): Likewise. + (AARCH64_FEATURE): Likewise. + (AARCH64_FEATURES): Likewise. + (AARCH64_NO_FEATURES): Likewise. + * config/aarch64/aarch64.h (AARCH64_ISA_V8A): Add missing + ISA flag. + (AARCH64_ISA_V8_1A): Likewise. + (AARCH64_ISA_V8_7A): Likewise. + (AARCH64_ISA_V8_8A): Likewise. + (AARCH64_NO_FEATURES): Likewise. + (AARCH64_FL_RAS): New ISA flag alias. + (AARCH64_FL_LOR): Likewise. + (AARCH64_FL_PAN): Likewise. + (AARCH64_FL_AMU): Likewise. + (AARCH64_FL_SCXTNUM): Likewise. + (AARCH64_FL_ID_PFR2): Likewise. + (F_DEPRECATED): New. + (F_REG_READ): Likewise. + (F_REG_WRITE): Likewise. + (F_ARCHEXT): Likewise. + (F_REG_ALIAS): Likewise. + +2023-12-06 Victor Do Nascimento + + * config/aarch64/aarch64-sys-regs.def: New. + +2023-12-06 Robin Dapp + + PR target/112854 + PR target/112872 + * config/riscv/autovec.md (vec_initqi): New expander. + +2023-12-06 Jakub Jelinek + + PR rtl-optimization/112760 + * config/i386/i386-passes.def (pass_insert_vzeroupper): Insert + after pass_postreload_cse rather than pass_reload. + * config/i386/i386-features.cc (rest_of_handle_insert_vzeroupper): + Adjust comment for it. + +2023-12-06 Jakub Jelinek + + PR tree-optimization/112809 + * gimple-lower-bitint.cc (bitint_large_huge::lower_mergeable_stmt): For + separate_ext in kind == bitint_prec_huge mode if rem == 0, create for + i == cnt - 1 the loop rather than using size_int (end). + +2023-12-06 Jakub Jelinek + + * gcc.cc (driver_handle_option): Add /* FALLTHROUGH */ comment + between OPT_pie and OPT_r cases. + +2023-12-06 Tobias Burnus + + * tsystem.h (calloc, realloc): Declare when inhibit_libc. + +2023-12-06 Richard Biener + + PR tree-optimization/112843 + * tree-ssa-operands.cc (update_stmt_operands): Do not call + update_stmt from ranger. + * value-query.h (range_query::update_stmt): Remove. + * gimple-range.h (gimple_ranger::update_stmt): Likewise. + * gimple-range.cc (gimple_ranger::update_stmt): Likewise. + +2023-12-06 xuli + + * config/riscv/riscv.md: Remove. + 2023-12-06 Alexandre Oliva * Makefile.in (OBJS): Add ipa-strub.o. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 76ff872532c3..af1f8a0d7194 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231206 +20231207 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index b6e8d51c31dc..8dd3709b7aae 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,8 @@ +2023-12-06 David Malcolm + + * engine.cc (dump_analyzer_json): Use + flag_diagnostics_json_formatting. + 2023-12-01 David Malcolm * analyzer.h (class saved_diagnostic): New forward decl. diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 3dc8cbc7da0e..fa9576ffab33 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,8 @@ +2023-12-06 David Malcolm + + * c-opts.cc (c_diagnostic_finalizer): Make "diagnostic" param + const. + 2023-12-06 Alexandre Oliva * c-attribs.cc: Include ipa-strub.h. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 308d02b95aeb..53c8dce26ffd 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,17 @@ +2023-12-06 David Malcolm + + * cp-tree.h (cxx_print_error_function): Make diagnostic_info param + const. + * error.cc (cxx_print_error_function): Likewise. + (cp_diagnostic_starter): Likewise. + (cp_print_error_function): Likewise. + +2023-12-06 Jason Merrill + + DR 532 + PR c++/53499 + * pt.cc (more_specialized_fn): Fix object parameter handling. + 2023-12-05 Jakub Jelinek PR c++/112795 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index be4fe9afa699..5a823eefcd87 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2023-12-06 David Malcolm + + * error.cc (gfc_diagnostic_starter): Make diagnostic_info param + const. + (gfc_diagnostic_finalizer): Likewise. + 2023-12-05 Harald Anlauf Tobias Burnus diff --git a/gcc/jit/ChangeLog b/gcc/jit/ChangeLog index 11a0cb1fb02f..5c83dcebceb2 100644 --- a/gcc/jit/ChangeLog +++ b/gcc/jit/ChangeLog @@ -1,3 +1,14 @@ +2023-12-06 David Malcolm + + * dummy-frontend.cc (jit_begin_diagnostic): Make diagnostic_info + param const. + (jit_end_diagnostic): Likewise. Pass to add_diagnostic by + reference. + * jit-playback.cc (jit::playback::context::add_diagnostic): + Convert diagnostic_info to const reference. + * jit-playback.h (jit::playback::context::add_diagnostic): + Likewise. + 2023-12-05 Richard Sandiford * dummy-frontend.cc (jit_gnu_attribute_table): Add extra braces diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index a1b339208ca3..486ca569145e 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,7 @@ +2023-12-06 Thomas Schwinge + + * lang.opt (-isysroot): New. + 2023-12-05 Gaius Mulley PR modula2/112865 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b5162970262e..3b1c0c7f9661 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,97 @@ +2023-12-06 Victor Do Nascimento + + * gcc.target/aarch64/acle/rwsr.c (get_rsr128): New. + (set_wsr128): Likewise. + +2023-12-06 Edwin Lu + + * gcc.dg/tree-ssa/ssa-fre-3.c: Remove xfail + +2023-12-06 Yang Yujie + + * gcc.dg/fixed-point/composite-type.c: Replace dg-warning with dg-error. + +2023-12-06 David Malcolm + + * c-c++-common/diagnostic-format-json-1.c: Update expected JSON + output to reflect whitespace. + * c-c++-common/diagnostic-format-json-2.c: Likewise. + * c-c++-common/diagnostic-format-json-3.c: Likewise. + * c-c++-common/diagnostic-format-json-4.c: Likewise. + * c-c++-common/diagnostic-format-json-5.c: Likewise. + * c-c++-common/diagnostic-format-json-stderr-1.c: Likewise. + * g++.dg/pr90462.C: Add -fno-diagnostics-json-formatting. + * gcc.dg/analyzer/malloc-sarif-1.c: Likewise. + * gcc.dg/plugin/diagnostic-test-paths-3.c: Update expected JSON + output to reflect whitespace. + * gfortran.dg/diagnostic-format-json-1.F90: Likewise. + * gfortran.dg/diagnostic-format-json-2.F90: Likewise. + * gfortran.dg/diagnostic-format-json-3.F90: Likewise. + +2023-12-06 David Malcolm + + * g++.dg/plugin/show_template_tree_color_plugin.c + (noop_starter_fn): Make diagnostic_info param const. + * gcc.dg/plugin/diagnostic_group_plugin.c + (test_diagnostic_starter): Likewise. + * gcc.dg/plugin/diagnostic_plugin_test_show_locus.c + (custom_diagnostic_finalizer): Likewise. + * gcc.dg/plugin/location_overflow_plugin.c + (verify_unpacked_ranges): Likewise. + (verify_no_columns): Likewise. + +2023-12-06 John David Anglin + + * c-c++-common/fhardened-1.c: Ignore __SSP_STRONG__ define + if __hppa__ is defined. + * c-c++-common/fhardened-2.c: Ignore __SSP__ define + if __hppa__ is defined. + +2023-12-06 Juzhe-Zhong + + PR target/112855 + * gcc.target/riscv/rvv/autovec/pr112855.c: New test. + +2023-12-06 Jason Merrill + + DR 532 + PR c++/53499 + * g++.dg/template/partial-order4.C: New test. + * g++.dg/template/spec26.C: Adjust for CWG532. + +2023-12-06 Victor Do Nascimento + + * gcc.target/aarch64/acle/rwsr-3.c: New. + +2023-12-06 Victor Do Nascimento + + * gcc.target/aarch64/acle/rwsr.c: New. + * gcc.target/aarch64/acle/rwsr-1.c: Likewise. + * gcc.target/aarch64/acle/rwsr-2.c: Likewise. + * gcc.dg/pch/rwsr-pch.c: Likewise. + * gcc.dg/pch/rwsr-pch.hs: Likewise. + +2023-12-06 Robin Dapp + + * gcc.target/riscv/rvv/autovec/pr112854.c: New test. + * gcc.target/riscv/rvv/autovec/pr112872.c: New test. + +2023-12-06 Jakub Jelinek + + PR rtl-optimization/112760 + * gcc.dg/pr112760.c: New test. + +2023-12-06 Jakub Jelinek + + PR tree-optimization/112809 + * gcc.dg/bitint-48.c: New test. + +2023-12-06 Hans-Peter Nilsson + + PR testsuite/112419 + * gcc.dg/Wnonnull-4.c (test_fda_n_5): Expect warning for exceeding + maximum object size for 32-bit targets. + 2023-12-06 Alexandre Oliva * c-c++-common/strub-O0.c: New. diff --git a/libcc1/ChangeLog b/libcc1/ChangeLog index 3582cf620500..6cdab1188b2b 100644 --- a/libcc1/ChangeLog +++ b/libcc1/ChangeLog @@ -1,3 +1,8 @@ +2023-12-06 David Malcolm + + * context.cc (plugin_print_error_function): Make diagnostic_info + param const. + 2023-11-16 Tom Tromey * libcc1plugin.cc (plugin_build_enum_type): Set diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 542cfe5aa4ea..74bad754cc82 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,10 @@ +2023-12-06 Thomas Schwinge + Jakub Jelinek + + PR libgcc/109289 + * emutls.c: Add GCC diagnostic ignored "-Wbuiltin-declaration-mismatch" + pragma. + 2023-12-06 Alexandre Oliva * Makefile.in (LIB2ADD): Add strub.c. diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index bcb1712b41ad..9e7453787560 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,78 @@ +2023-12-06 Andrew Stubbs + + * config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here. + (TEAM_ARENA_FREE): Likewise. + (TEAM_ARENA_END): Likewise. + (GCN_LOWLAT_HEAP): New. + * config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h. + (__gcn_lowlat_init): New prototype. + (gomp_gcn_enter_kernel): Initialize the low-latency heap. + * libgomp.h (TEAM_ARENA_START): Move to libgomp.h. + (TEAM_ARENA_FREE): Likewise. + (TEAM_ARENA_END): Likewise. + * plugin/plugin-gcn.c (lowlat_size): New variable. + (print_kernel_dispatch): Label the group_segment_size purpose. + (init_environment_variables): Read GOMP_GCN_LOWLAT_POOL. + (create_kernel_dispatch): Pass low-latency head allocation to kernel. + (run_kernel): Use shadow; don't assume values. + * testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn. + * config/gcn/allocator.c: New file. + * libgomp.texi: Document low-latency implementation details. + +2023-12-06 Andrew Stubbs + + * allocator.c (MEMSPACE_VALIDATE): New macro. + (omp_init_allocator): Use MEMSPACE_VALIDATE. + (omp_aligned_alloc): Use OMP_LOW_LAT_MEM_ALLOC_INVALID. + (omp_aligned_calloc): Likewise. + (omp_realloc): Likewise. + * config/nvptx/allocator.c (nvptx_memspace_validate): New function. + (MEMSPACE_VALIDATE): New macro. + (OMP_LOW_LAT_MEM_ALLOC_INVALID): New define. + * libgomp.texi: Document low-latency implementation details. + * testsuite/libgomp.c/omp_alloc-1.c (main): Add gnu_lowlat. + * testsuite/libgomp.c/omp_alloc-2.c (main): Add gnu_lowlat. + * testsuite/libgomp.c/omp_alloc-3.c (main): Add gnu_lowlat. + * testsuite/libgomp.c/omp_alloc-4.c (main): Add access trait. + * testsuite/libgomp.c/omp_alloc-5.c (main): Add gnu_lowlat. + * testsuite/libgomp.c/omp_alloc-6.c (main): Add access trait. + * testsuite/libgomp.c/omp_alloc-traits.c: New test. + +2023-12-06 Andrew Stubbs + Kwok Cheung Yeung + Thomas Schwinge + + * allocator.c (MEMSPACE_ALLOC): New macro. + (MEMSPACE_CALLOC): New macro. + (MEMSPACE_REALLOC): New macro. + (MEMSPACE_FREE): New macro. + (predefined_alloc_mapping): New array. Add _Static_assert to match. + (ARRAY_SIZE): New macro. + (omp_aligned_alloc): Use MEMSPACE_ALLOC. + Implement fall-backs for predefined allocators. Simplify existing + fall-backs. + (omp_free): Use MEMSPACE_FREE. + (omp_calloc): Use MEMSPACE_CALLOC. Implement fall-backs for + predefined allocators. Simplify existing fall-backs. + (omp_realloc): Use MEMSPACE_REALLOC, MEMSPACE_ALLOC, and MEMSPACE_FREE. + Implement fall-backs for predefined allocators. Simplify existing + fall-backs. + * config/nvptx/team.c (__nvptx_lowlat_pool): New asm variable. + (__nvptx_lowlat_init): New prototype. + (gomp_nvptx_main): Call __nvptx_lowlat_init. + * libgomp.texi: Update memory space table. + * plugin/plugin-nvptx.c (lowlat_pool_size): New variable. + (GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar. + (GOMP_OFFLOAD_run): Apply lowlat_pool_size. + * basic-allocator.c: New file. + * config/nvptx/allocator.c: New file. + * testsuite/libgomp.c/omp_alloc-1.c: New test. + * testsuite/libgomp.c/omp_alloc-2.c: New test. + * testsuite/libgomp.c/omp_alloc-3.c: New test. + * testsuite/libgomp.c/omp_alloc-4.c: New test. + * testsuite/libgomp.c/omp_alloc-5.c: New test. + * testsuite/libgomp.c/omp_alloc-6.c: New test. + 2023-11-30 Thomas Schwinge * testsuite/libgomp.c/declare-variant-4-fiji.c: Adjust. diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index c2e220ee6c38..43b161ccb3df 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,19 @@ +2023-12-06 Jason Merrill + + DR 532 + PR c++/53499 + * testsuite/23_containers/vector/ext_pointer/types/1.cc + * testsuite/23_containers/vector/ext_pointer/types/2.cc + (N::operator-): Make less specialized. + +2023-12-06 Alexandre Oliva + + Revert: + 2023-12-02 Alexandre Oliva + + * libsupc++/atexit_thread.cc [__GXX_WEAK__]: Add dynamic + detection of __cxa_thread_atexit_impl. + 2023-12-05 Jonathan Wakely PR libstdc++/111948 From 08b7462d3ad8e5acd941b7c777c5b26b4064d686 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 6 Dec 2023 19:25:26 -0500 Subject: [PATCH 012/311] analyzer: fix taint false positives with UNKNOWN [PR112850] PR analyzer/112850 reports a false positive from -Wanalyzer-tainted-allocation-size on the Linux kernel [1] where -fanalyzer complains that an allocation size is attacker-controlled despite the value being correctly sanitized against upper and lower limits. The root cause is that the expression is sufficiently complex to exceed the -param=analyzer-max-svalue-depth= threshold, currently at 12, with depth 13, and so it is treated as UNKNOWN. Hence the sanitizations are seen as comparisons of an UNKNOWN symbolic value against constants, and these were being ignored by the taint state machine. The expression in question is relatively typical for those seen in Linux kernel ioctl handlers, and I was surprised that it had exceeded the analyzer's default expression complexity limit. This patch addresses this problem in three ways: (a) the default value of the threshold parameter is increased, from 12 to 18, so that such expressions are precisely handled (b) adding a new -Wanalyzer-symbol-too-complex to warn when the symbol complexity limit is reached. This is off by default for users, and on by default in the test suite. (c) the taint state machine handles comparisons against UNKNOWN svalues by dropping all taint information on that execution path, so that if the complexity limit has been exceeded we don't generate false positives As well as fixing the taint false positive (PR analyzer/112850), the patch also fixes a couple of leak false positives seen on flex-generated scanners (PR analyzer/103546). [1] specifically, in sound/core/rawmidi.c's handler for SNDRV_RAWMIDI_STREAM_OUTPUT. gcc/ChangeLog: PR analyzer/103546 PR analyzer/112850 * doc/invoke.texi: Add -Wanalyzer-symbol-too-complex. gcc/analyzer/ChangeLog: PR analyzer/103546 PR analyzer/112850 * analyzer.opt (-param=analyzer-max-svalue-depth=): Increase from 12 to 18. (Wanalyzer-symbol-too-complex): New. * diagnostic-manager.cc (null_assignment_sm_context::clear_all_per_svalue_state): New. * engine.cc (impl_sm_context::clear_all_per_svalue_state): New. * program-state.cc (sm_state_map::clear_all_per_svalue_state): New. * program-state.h (sm_state_map::clear_all_per_svalue_state): New decl. * region-model-manager.cc (region_model_manager::reject_if_too_complex): Add -Wanalyzer-symbol-too-complex. * sm-taint.cc (taint_state_machine::on_condition): Handle comparisons against UNKNOWN. * sm.h (sm_context::clear_all_per_svalue_state): New. gcc/testsuite/ChangeLog: PR analyzer/103546 PR analyzer/112850 * c-c++-common/analyzer/call-summaries-pr107158-2.c: Add -Wno-analyzer-symbol-too-complex. * c-c++-common/analyzer/call-summaries-pr107158.c: Likewise. * c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c: Likewise. * c-c++-common/analyzer/feasibility-3.c: Add -Wno-analyzer-too-complex and -Wno-analyzer-symbol-too-complex. * c-c++-common/analyzer/flex-with-call-summaries.c: Add -Wno-analyzer-symbol-too-complex. Remove fail for PR analyzer/103546 leak false positive. * c-c++-common/analyzer/flex-without-call-summaries.c: Remove xfail for PR analyzer/103546 leak false positive. * c-c++-common/analyzer/infinite-recursion-3.c: Add -Wno-analyzer-symbol-too-complex. * c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c: Likewise. * c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c: Likewise. * c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c: Likewise. * c-c++-common/analyzer/null-deref-pr108806-qemu.c: Likewise. * c-c++-common/analyzer/null-deref-pr108830.c: Likewise. * c-c++-common/analyzer/pr94596.c: Likewise. * c-c++-common/analyzer/strtok-2.c: Likewise. * c-c++-common/analyzer/strtok-4.c: Add -Wno-analyzer-too-complex and -Wno-analyzer-symbol-too-complex. * c-c++-common/analyzer/strtok-cppreference.c: Likewise. * gcc.dg/analyzer/analyzer.exp: Add -Wanalyzer-symbol-too-complex to DEFAULT_CFLAGS. * gcc.dg/analyzer/attr-const-3.c: Add -Wno-analyzer-symbol-too-complex. * gcc.dg/analyzer/call-summaries-pr107072.c: Likewise. * gcc.dg/analyzer/doom-s_sound-pr108867.c: Likewise. * gcc.dg/analyzer/explode-4.c: Likewise. * gcc.dg/analyzer/null-deref-pr102671-1.c: Likewise. * gcc.dg/analyzer/null-deref-pr105755.c: Likewise. * gcc.dg/analyzer/out-of-bounds-curl.c: Likewise. * gcc.dg/analyzer/pr101503.c: Likewise. * gcc.dg/analyzer/pr103892.c: Add -Wno-analyzer-too-complex and -Wno-analyzer-symbol-too-complex. * gcc.dg/analyzer/pr94851-4.c: Add -Wno-analyzer-symbol-too-complex. * gcc.dg/analyzer/pr96860-1.c: Likewise. * gcc.dg/analyzer/pr96860-2.c: Likewise. * gcc.dg/analyzer/pr98918.c: Likewise. * gcc.dg/analyzer/pr99044-2.c: Likewise. * gcc.dg/analyzer/uninit-pr108806-qemu.c: Likewise. * gcc.dg/analyzer/use-after-free.c: Add -Wno-analyzer-too-complex and -Wno-analyzer-symbol-too-complex. * gcc.dg/plugin/plugin.exp: Add new tests for analyzer_kernel_plugin.c. * gcc.dg/plugin/taint-CVE-2011-0521-4.c: Update expected results. * gcc.dg/plugin/taint-CVE-2011-0521-5.c: Likewise. * gcc.dg/plugin/taint-CVE-2011-0521-6.c: Likewise. * gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c: Remove xfail. * gcc.dg/plugin/taint-pr112850-precise.c: New test. * gcc.dg/plugin/taint-pr112850-too-complex.c: New test. * gcc.dg/plugin/taint-pr112850-unsanitized.c: New test. * gcc.dg/plugin/taint-pr112850.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/analyzer.opt | 6 ++- gcc/analyzer/diagnostic-manager.cc | 5 ++ gcc/analyzer/engine.cc | 5 ++ gcc/analyzer/program-state.cc | 8 +++ gcc/analyzer/program-state.h | 1 + gcc/analyzer/region-model-manager.cc | 10 ++++ gcc/analyzer/sm-taint.cc | 14 +++++ gcc/analyzer/sm.h | 2 + gcc/doc/invoke.texi | 14 +++++ .../analyzer/call-summaries-pr107158-2.c | 2 +- .../analyzer/call-summaries-pr107158.c | 2 +- ...f-before-check-pr109060-haproxy-cfgparse.c | 2 + .../c-c++-common/analyzer/feasibility-3.c | 2 + .../analyzer/flex-with-call-summaries.c | 4 +- .../analyzer/flex-without-call-summaries.c | 3 +- .../analyzer/infinite-recursion-3.c | 2 +- ...f-pr108251-smp_fetch_ssl_fc_has_early-O2.c | 2 +- ...eref-pr108251-smp_fetch_ssl_fc_has_early.c | 1 + .../null-deref-pr108400-SoftEtherVPN-WebUi.c | 2 + .../analyzer/null-deref-pr108806-qemu.c | 2 + .../analyzer/null-deref-pr108830.c | 2 +- gcc/testsuite/c-c++-common/analyzer/pr94596.c | 3 ++ .../c-c++-common/analyzer/strtok-2.c | 2 + .../c-c++-common/analyzer/strtok-4.c | 2 + .../analyzer/strtok-cppreference.c | 2 + gcc/testsuite/gcc.dg/analyzer/analyzer.exp | 2 +- gcc/testsuite/gcc.dg/analyzer/attr-const-3.c | 2 +- .../gcc.dg/analyzer/call-summaries-pr107072.c | 2 +- .../gcc.dg/analyzer/doom-s_sound-pr108867.c | 2 +- gcc/testsuite/gcc.dg/analyzer/explode-4.c | 2 +- .../gcc.dg/analyzer/null-deref-pr102671-1.c | 2 +- .../gcc.dg/analyzer/null-deref-pr105755.c | 2 +- .../gcc.dg/analyzer/out-of-bounds-curl.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr101503.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr103892.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr94851-4.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr96860-1.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr96860-2.c | 2 +- gcc/testsuite/gcc.dg/analyzer/pr98918.c | 2 + gcc/testsuite/gcc.dg/analyzer/pr99044-2.c | 2 + .../gcc.dg/analyzer/uninit-pr108806-qemu.c | 2 + .../gcc.dg/analyzer/use-after-free.c | 2 + gcc/testsuite/gcc.dg/plugin/plugin.exp | 6 ++- .../gcc.dg/plugin/taint-CVE-2011-0521-4.c | 4 +- .../plugin/taint-CVE-2011-0521-5-fixed.c | 3 +- .../gcc.dg/plugin/taint-CVE-2011-0521-5.c | 4 +- .../gcc.dg/plugin/taint-CVE-2011-0521-6.c | 4 +- .../gcc.dg/plugin/taint-pr112850-precise.c | 50 ++++++++++++++++++ .../plugin/taint-pr112850-too-complex.c | 51 +++++++++++++++++++ .../plugin/taint-pr112850-unsanitized.c | 50 ++++++++++++++++++ gcc/testsuite/gcc.dg/plugin/taint-pr112850.c | 47 +++++++++++++++++ 51 files changed, 321 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-pr112850-precise.c create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-pr112850-too-complex.c create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-pr112850-unsanitized.c create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-pr112850.c diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index a3c30caf2abe..d0fe5a437880 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -43,7 +43,7 @@ Common Joined UInteger Var(param_analyzer_max_recursion_depth) Init(2) Param The maximum number of times a callsite can appear in a call stack within the analyzer, before terminating analysis of a call that would recurse deeper. -param=analyzer-max-svalue-depth= -Common Joined UInteger Var(param_analyzer_max_svalue_depth) Init(12) Param +Common Joined UInteger Var(param_analyzer_max_svalue_depth) Init(18) Param The maximum depth of a symbolic value, before approximating the value as unknown. -param=analyzer-min-snodes-for-call-summary= @@ -262,6 +262,10 @@ Wanalyzer-use-of-uninitialized-value Common Var(warn_analyzer_use_of_uninitialized_value) Init(1) Warning Warn about code paths in which an uninitialized value is used. +Wanalyzer-symbol-too-complex +Common Var(warn_analyzer_symbol_too_complex) Init(0) Warning +Warn if expressions are too complicated for the analyzer to fully track. + Wanalyzer-too-complex Common Var(warn_analyzer_too_complex) Init(0) Warning Warn if the code is too complicated for the analyzer to fully explore. diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index ecd57376b549..38bd308a9a48 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -2043,6 +2043,11 @@ struct null_assignment_sm_context : public sm_context /* No-op. */ } + void clear_all_per_svalue_state () final override + { + /* No-op. */ + } + void on_custom_transition (custom_transition *) final override { } diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 825b3af43fce..d2524e34f586 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -474,6 +474,11 @@ public: m_new_state->m_checker_states[m_sm_idx]->set_global_state (state); } + void clear_all_per_svalue_state () final override + { + m_new_state->m_checker_states[m_sm_idx]->clear_all_per_svalue_state (); + } + void on_custom_transition (custom_transition *transition) final override { transition->impl_transition (&m_eg, diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 9bb81e6ddddc..78f739ef5efa 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -526,6 +526,14 @@ sm_state_map::clear_any_state (const svalue *sval) m_map.remove (sval); } +/* Clear all per-svalue state within this state map. */ + +void +sm_state_map::clear_all_per_svalue_state () +{ + m_map.empty (); +} + /* Set the "global" state within this state map to STATE. */ void diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h index c9b3aa0cbfce..ef1a2ad54a98 100644 --- a/gcc/analyzer/program-state.h +++ b/gcc/analyzer/program-state.h @@ -146,6 +146,7 @@ public: const svalue *origin, const extrinsic_state &ext_state); void clear_any_state (const svalue *sval); + void clear_all_per_svalue_state (); void set_global_state (state_machine::state_t state); state_machine::state_t get_global_state () const; diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index 921edc558681..b631bcb04d04 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -185,6 +185,16 @@ region_model_manager::reject_if_too_complex (svalue *sval) return false; } + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + sval->dump_to_pp (&pp, true); + if (warning_at (input_location, OPT_Wanalyzer_symbol_too_complex, + "symbol too complicated: %qs", + pp_formatted_text (&pp))) + inform (input_location, + "max_depth %i exceeds --param=analyzer-max-svalue-depth=%i", + c.m_max_depth, param_analyzer_max_svalue_depth); + delete sval; return true; } diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index d01e3f03951d..6b5d51c62af9 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -1038,6 +1038,20 @@ taint_state_machine::on_condition (sm_context *sm_ctxt, if (stmt == NULL) return; + if (lhs->get_kind () == SK_UNKNOWN + || rhs->get_kind () == SK_UNKNOWN) + { + /* If we have a comparison against UNKNOWN, then + we've presumably hit the svalue complexity limit, + and we don't know what is being sanitized. + Give up on any taint already found on this execution path. */ + // TODO: warn about this + if (get_logger ()) + get_logger ()->log ("comparison against UNKNOWN; removing all taint"); + sm_ctxt->clear_all_per_svalue_state (); + return; + } + // TODO switch (op) { diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h index 3ff9c2607807..ef63d73a5415 100644 --- a/gcc/analyzer/sm.h +++ b/gcc/analyzer/sm.h @@ -299,6 +299,8 @@ public: virtual state_machine::state_t get_global_state () const = 0; virtual void set_global_state (state_machine::state_t) = 0; + virtual void clear_all_per_svalue_state () = 0; + /* A vfunc for handling custom transitions, such as when registering a signal handler. */ virtual void on_custom_transition (custom_transition *transition) = 0; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index bff3645eedcf..f8d6f799e11a 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -491,6 +491,7 @@ Objective-C and Objective-C++ Dialects}. -Wno-analyzer-tainted-divisor -Wno-analyzer-tainted-offset -Wno-analyzer-tainted-size +-Wanalyzer-symbol-too-complex -Wanalyzer-too-complex -Wno-analyzer-undefined-behavior-strtok -Wno-analyzer-unsafe-call-within-signal-handler @@ -10562,6 +10563,19 @@ Enabling this option effectively enables the following warnings: This option is only available if GCC was configured with analyzer support enabled. +@opindex Wanalyzer-symbol-too-complex +@opindex Wno-analyzer-symbol-too-complex +@item -Wanalyzer-symbol-too-complex +If @option{-fanalyzer} is enabled, the analyzer uses various heuristics +to attempt to track the state of memory, but these can be defeated by +sufficiently complicated code. + +By default, the analysis silently stops tracking values of expressions +if they exceed the threshold defined by +@option{--param analyzer-max-svalue-depth=@var{value}}, and falls back +to an imprecise representation for such expressions. +The @option{-Wanalyzer-symbol-too-complex} option warns if this occurs. + @opindex Wanalyzer-too-complex @opindex Wno-analyzer-too-complex @item -Wanalyzer-too-complex diff --git a/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158-2.c b/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158-2.c index 4561e10cafd0..b395623cccac 100644 --- a/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158-2.c +++ b/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158-2.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fanalyzer-call-summaries -Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-fanalyzer-call-summaries -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ /* { dg-skip-if "c++98 has no noreturn attribute" { c++98_only } } */ #ifdef __cplusplus diff --git a/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158.c b/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158.c index d4cf079cef84..de7058362491 100644 --- a/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158.c +++ b/gcc/testsuite/c-c++-common/analyzer/call-summaries-pr107158.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fanalyzer-call-summaries" } */ +/* { dg-additional-options "-fanalyzer-call-summaries -Wno-analyzer-symbol-too-complex" } */ typedef __SIZE_TYPE__ size_t; enum { _ISspace = ((5) < 8 ? ((1 << (5)) << 8) : ((1 << (5)) >> 8)) }; diff --git a/gcc/testsuite/c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c b/gcc/testsuite/c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c index 1d28e10747cd..c4561fcd8a7e 100644 --- a/gcc/testsuite/c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c +++ b/gcc/testsuite/c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c @@ -1,5 +1,7 @@ /* Reduced from haproxy-2.7.1's cfgparse.c. */ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ + typedef __SIZE_TYPE__ size_t; extern int diff --git a/gcc/testsuite/c-c++-common/analyzer/feasibility-3.c b/gcc/testsuite/c-c++-common/analyzer/feasibility-3.c index 2fcd064e801f..06194f85069c 100644 --- a/gcc/testsuite/c-c++-common/analyzer/feasibility-3.c +++ b/gcc/testsuite/c-c++-common/analyzer/feasibility-3.c @@ -1,6 +1,8 @@ /* Reduced and adapted from Linux: fs/proc/inode.c: proc_reg_open (GPL v2.0). */ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ + /* Types. */ typedef unsigned char u8; diff --git a/gcc/testsuite/c-c++-common/analyzer/flex-with-call-summaries.c b/gcc/testsuite/c-c++-common/analyzer/flex-with-call-summaries.c index 45edacf0e53d..963a84bc9ab8 100644 --- a/gcc/testsuite/c-c++-common/analyzer/flex-with-call-summaries.c +++ b/gcc/testsuite/c-c++-common/analyzer/flex-with-call-summaries.c @@ -5,6 +5,7 @@ /* { dg-skip-if "" { "avr-*-*" } } */ /* { dg-additional-options "-fanalyzer-call-summaries" } */ /* { dg-additional-options "-Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ /* A lexical scanner generated by flex */ @@ -885,8 +886,7 @@ static int yy_get_next_buffer (void) } else /* Can't grow it, we don't own it. */ - b->yy_ch_buf = NULL; /* { dg-bogus "leak" "" { xfail *-*-* } } */ - /* TODO: leak false positive: PR analyzer/103546. */ + b->yy_ch_buf = NULL; /* { dg-bogus "leak" "PR analyzer/103546" } */ if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( diff --git a/gcc/testsuite/c-c++-common/analyzer/flex-without-call-summaries.c b/gcc/testsuite/c-c++-common/analyzer/flex-without-call-summaries.c index 5369f7685c28..b1c233121378 100644 --- a/gcc/testsuite/c-c++-common/analyzer/flex-without-call-summaries.c +++ b/gcc/testsuite/c-c++-common/analyzer/flex-without-call-summaries.c @@ -886,8 +886,7 @@ static int yy_get_next_buffer (void) } else /* Can't grow it, we don't own it. */ - b->yy_ch_buf = NULL; /* { dg-bogus "leak" "" { xfail *-*-* } } */ - /* TODO: leak false positive: PR analyzer/103546. */ + b->yy_ch_buf = NULL; /* { dg-bogus "leak" "PR analyzer/103546" */ if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( diff --git a/gcc/testsuite/c-c++-common/analyzer/infinite-recursion-3.c b/gcc/testsuite/c-c++-common/analyzer/infinite-recursion-3.c index 68c4fa396caa..2ae20a1108a7 100644 --- a/gcc/testsuite/c-c++-common/analyzer/infinite-recursion-3.c +++ b/gcc/testsuite/c-c++-common/analyzer/infinite-recursion-3.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-fno-analyzer-call-summaries -Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-fno-analyzer-call-summaries -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ struct node { diff --git a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c index c46ffe91a6b4..c1c8e6f6a39a 100644 --- a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c +++ b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c @@ -1,7 +1,7 @@ /* Reduced from haproxy's src/ssl_sample.c */ /* { dg-require-effective-target ptr_eq_long } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -Wno-analyzer-symbol-too-complex" } */ union sample_value { long long int sint; diff --git a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c index ef34a76c50d6..c5f1fa42e6f1 100644 --- a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c +++ b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c @@ -1,6 +1,7 @@ /* Reduced from haproxy's src/ssl_sample.c */ /* { dg-require-effective-target ptr_eq_long } */ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ union sample_value { long long int sint; diff --git a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c index 1151d622519b..9dcf7aa31f10 100644 --- a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c +++ b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c @@ -1,4 +1,6 @@ /* Reduced from SoftEtherVPN's src/Cedar/WebUI.c. */ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + #include "../../gcc.dg/analyzer/analyzer-decls.h" typedef int (COMPARE)(void *p1, void *p2); typedef unsigned int UINT; diff --git a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108806-qemu.c b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108806-qemu.c index f7f6923927fc..16ef6574d2f4 100644 --- a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108806-qemu.c +++ b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108806-qemu.c @@ -1,5 +1,7 @@ /* Reduced from qemu-7.2.0's hw/intc/omap_intc.c */ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + #include "../../gcc.dg/analyzer/analyzer-decls.h" typedef unsigned char __uint8_t; diff --git a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108830.c b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108830.c index 0c95148ebd57..1cb1ebe65f96 100644 --- a/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108830.c +++ b/gcc/testsuite/c-c++-common/analyzer/null-deref-pr108830.c @@ -1,6 +1,6 @@ /* Reduced from apr-1.7.0/tables/apr_hash.c: 'apr_hash_merge' */ -/* { dg-additional-options "-Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ #include "../../gcc.dg/analyzer/analyzer-decls.h" diff --git a/gcc/testsuite/c-c++-common/analyzer/pr94596.c b/gcc/testsuite/c-c++-common/analyzer/pr94596.c index 10ea549924e0..0d6240941d7b 100644 --- a/gcc/testsuite/c-c++-common/analyzer/pr94596.c +++ b/gcc/testsuite/c-c++-common/analyzer/pr94596.c @@ -17,6 +17,9 @@ */ #include "../../gcc.dg/analyzer/analyzer-decls.h" + +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + typedef __SIZE_TYPE__ size_t; #ifndef __cplusplus diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-2.c b/gcc/testsuite/c-c++-common/analyzer/strtok-2.c index 0336bf0cfe9d..f34b4a7198a8 100644 --- a/gcc/testsuite/c-c++-common/analyzer/strtok-2.c +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-2.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + #include "../../gcc.dg/analyzer/analyzer-decls.h" extern char *strtok (char *str, const char *delim) diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-4.c b/gcc/testsuite/c-c++-common/analyzer/strtok-4.c index b6b7d49e3c3c..793c7fcb7f40 100644 --- a/gcc/testsuite/c-c++-common/analyzer/strtok-4.c +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-4.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ + #include "../../gcc.dg/analyzer/analyzer-decls.h" extern char *strtok (char *str, const char *delim); diff --git a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c index a2e912341d68..a396c643f116 100644 --- a/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c +++ b/gcc/testsuite/c-c++-common/analyzer/strtok-cppreference.c @@ -11,6 +11,8 @@ should be released under an equivalent license so that everyone could benefit from the modified versions. " */ +/* { dg-additional-options " -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ + #define __STDC_WANT_LIB_EXT1__ 0 #include #include diff --git a/gcc/testsuite/gcc.dg/analyzer/analyzer.exp b/gcc/testsuite/gcc.dg/analyzer/analyzer.exp index cedf3c0466fd..ba5aa680fc7a 100644 --- a/gcc/testsuite/gcc.dg/analyzer/analyzer.exp +++ b/gcc/testsuite/gcc.dg/analyzer/analyzer.exp @@ -30,7 +30,7 @@ if [info exists DEFAULT_CFLAGS] then { } # If a testcase doesn't have special options, use these. -set DEFAULT_CFLAGS "-fanalyzer -Wanalyzer-too-complex -fanalyzer-call-summaries" +set DEFAULT_CFLAGS "-fanalyzer -Wanalyzer-too-complex -Wanalyzer-symbol-too-complex -fanalyzer-call-summaries" if { [istarget "*-*-darwin*" ] } { # On macOS, system headers redefine by default some macros (memcpy, diff --git a/gcc/testsuite/gcc.dg/analyzer/attr-const-3.c b/gcc/testsuite/gcc.dg/analyzer/attr-const-3.c index fc8527a5d0e7..11238a77a658 100644 --- a/gcc/testsuite/gcc.dg/analyzer/attr-const-3.c +++ b/gcc/testsuite/gcc.dg/analyzer/attr-const-3.c @@ -1,7 +1,7 @@ /* Verify that we handle unknown values passed to __attribute__ ((const)) (by imposing a complexity limit). */ -/* { dg-additional-options "--param analyzer-max-svalue-depth=4" } */ +/* { dg-additional-options "--param analyzer-max-svalue-depth=4 -Wno-analyzer-symbol-too-complex" } */ #include "analyzer-decls.h" diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c index 6e583d0228ff..f59318b428f4 100644 --- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c +++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-pr107072.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target int32plus } */ -/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0" } */ +/* { dg-additional-options "-fanalyzer-call-summaries --param analyzer-min-snodes-for-call-summary=0 -Wno-analyzer-symbol-too-complex" } */ /* There need to be at least two calls to a function for the call-summarization code to be used. diff --git a/gcc/testsuite/gcc.dg/analyzer/doom-s_sound-pr108867.c b/gcc/testsuite/gcc.dg/analyzer/doom-s_sound-pr108867.c index ae58f03d3b32..fdc21a2bec3d 100644 --- a/gcc/testsuite/gcc.dg/analyzer/doom-s_sound-pr108867.c +++ b/gcc/testsuite/gcc.dg/analyzer/doom-s_sound-pr108867.c @@ -1,6 +1,6 @@ /* Reduced from Doom's linuxdoom-1.10/s_sound.c, which is GPLv2 or later. */ -/* { dg-additional-options "-fno-analyzer-call-summaries -Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-fno-analyzer-call-summaries -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ /* { dg-require-effective-target size32plus } */ typedef struct _IO_FILE FILE; diff --git a/gcc/testsuite/gcc.dg/analyzer/explode-4.c b/gcc/testsuite/gcc.dg/analyzer/explode-4.c index 874b1e9c3008..a98dfb56bf50 100644 --- a/gcc/testsuite/gcc.dg/analyzer/explode-4.c +++ b/gcc/testsuite/gcc.dg/analyzer/explode-4.c @@ -3,7 +3,7 @@ conjured_svalues whilst handling a long chain of external function calls. */ -/* { dg-additional-options "-Wno-implicit-function-declaration -Wno-int-conversion -Wno-analyzer-too-complex" } */ +/* { dg-additional-options "-Wno-implicit-function-declaration -Wno-int-conversion -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ #define NULL ((void *)0) typedef unsigned char uint8_t; diff --git a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr102671-1.c b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr102671-1.c index 3fe061bdbd88..65c7bac1f7e9 100644 --- a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr102671-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr102671-1.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target ptr_eq_long } */ -/* { dg-additional-options "-O2 -Wno-shift-count-overflow" } */ +/* { dg-additional-options "-O2 -Wno-shift-count-overflow -Wno-analyzer-symbol-too-complex" } */ struct lisp; union vectorlike_header { long size; }; diff --git a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr105755.c b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr105755.c index 2b0ba292e00c..5375b4dd6f6f 100644 --- a/gcc/testsuite/gcc.dg/analyzer/null-deref-pr105755.c +++ b/gcc/testsuite/gcc.dg/analyzer/null-deref-pr105755.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target int32plus } */ -/* { dg-additional-options "-Wno-analyzer-too-complex -O2" } */ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex -O2" } */ typedef long int ptrdiff_t; typedef long int EMACS_INT; diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-curl.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-curl.c index e34b572966e2..d14661cbc6ae 100644 --- a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-curl.c +++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-curl.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -Wno-analyzer-symbol-too-complex" } */ #include /* Reduced from curl lib/smb.c. */ diff --git a/gcc/testsuite/gcc.dg/analyzer/pr101503.c b/gcc/testsuite/gcc.dg/analyzer/pr101503.c index 16faf6eac2f7..cc4d801d8a6c 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr101503.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr101503.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "--param analyzer-max-svalue-depth=0" } */ +/* { dg-additional-options "--param analyzer-max-svalue-depth=0 -Wno-analyzer-symbol-too-complex" } */ int val; diff --git a/gcc/testsuite/gcc.dg/analyzer/pr103892.c b/gcc/testsuite/gcc.dg/analyzer/pr103892.c index d16cd83c472f..a17c3b7e119e 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr103892.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr103892.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ /* C only: C++ FE optimizes argstr_get_word completely away and therefore the number of SN diminishes compared to C, diff --git a/gcc/testsuite/gcc.dg/analyzer/pr94851-4.c b/gcc/testsuite/gcc.dg/analyzer/pr94851-4.c index 2a15a5d7f5bf..a5130c59cb00 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr94851-4.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr94851-4.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -Wno-analyzer-symbol-too-complex" } */ #include diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96860-1.c b/gcc/testsuite/gcc.dg/analyzer/pr96860-1.c index 8f298ec04e71..8be30b3a6dad 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr96860-1.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr96860-1.c @@ -1,5 +1,5 @@ /* { dg-require-effective-target int128 } */ -/* { dg-additional-options "--param analyzer-max-svalue-depth=0" } */ +/* { dg-additional-options "--param analyzer-max-svalue-depth=0 -Wno-analyzer-symbol-too-complex" } */ void x7 (void) { diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96860-2.c b/gcc/testsuite/gcc.dg/analyzer/pr96860-2.c index 90a818cb2836..d12b9a1e1fa6 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr96860-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr96860-2.c @@ -1,4 +1,4 @@ -/* { dg-additional-options "--param analyzer-max-svalue-depth=0" } */ +/* { dg-additional-options "--param analyzer-max-svalue-depth=0 -Wno-analyzer-symbol-too-complex" } */ void x7 (void) { diff --git a/gcc/testsuite/gcc.dg/analyzer/pr98918.c b/gcc/testsuite/gcc.dg/analyzer/pr98918.c index ac626ba1f308..c3bbce3e6ec1 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr98918.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr98918.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + #include struct marker { diff --git a/gcc/testsuite/gcc.dg/analyzer/pr99044-2.c b/gcc/testsuite/gcc.dg/analyzer/pr99044-2.c index fd71d35d7e4e..f7badb92f448 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr99044-2.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr99044-2.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + struct node { struct node *next; diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c b/gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c index 34fe802f4952..092720111286 100644 --- a/gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c +++ b/gcc/testsuite/gcc.dg/analyzer/uninit-pr108806-qemu.c @@ -5,6 +5,8 @@ struct omap_intr_handler_bank_s* bank; */ +/* { dg-additional-options "-Wno-analyzer-symbol-too-complex" } */ + typedef unsigned char __uint8_t; typedef unsigned int __uint32_t; typedef unsigned long int __uint64_t; diff --git a/gcc/testsuite/gcc.dg/analyzer/use-after-free.c b/gcc/testsuite/gcc.dg/analyzer/use-after-free.c index d7e4bc2c6cac..76a85f563350 100644 --- a/gcc/testsuite/gcc.dg/analyzer/use-after-free.c +++ b/gcc/testsuite/gcc.dg/analyzer/use-after-free.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-Wno-analyzer-too-complex -Wno-analyzer-symbol-too-complex" } */ + #include #include "analyzer-decls.h" diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index f098a327d319..f0b4bb7a051f 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -162,7 +162,11 @@ set plugin_test_list [list \ taint-CVE-2011-0521-5.c \ taint-CVE-2011-0521-5-fixed.c \ taint-CVE-2011-0521-6.c \ - taint-antipatterns-1.c } \ + taint-antipatterns-1.c \ + taint-pr112850.c \ + taint-pr112850-precise.c \ + taint-pr112850-too-complex.c \ + taint-pr112850-unsanitized.c } \ { analyzer_cpython_plugin.c \ cpython-plugin-test-no-Python-h.c \ cpython-plugin-test-PyList_Append.c \ diff --git a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-4.c b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-4.c index 06b3468fca58..e268a8eab8fc 100644 --- a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-4.c +++ b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-4.c @@ -32,9 +32,9 @@ int test_1(struct file *file, unsigned int cmd, unsigned long arg) if (info->num > 1) return -EINVAL; av7110->ci_slot[info->num].num = info->num; /* { dg-warning "attacker-controlled value" } */ - av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? /* { dg-warning "attacker-controlled value" } */ + av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); /* { dg-warning "attacker-controlled value" } */ + memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); } copy_to_user((void __user *)arg, parg, sizeof(sbuf)); diff --git a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c index 076ada3a20a8..b39e693da632 100644 --- a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c +++ b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c @@ -39,8 +39,7 @@ int test_1(struct file *file, unsigned int cmd, unsigned long arg) av7110->ci_slot[info->num].num = info->num; av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); /* { dg-bogus "use of attacker-controlled value in array lookup without bounds checking" "" { xfail *-*-* } } */ - // FIXME: why the above false +ve? + memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); /* { dg-bogus "use of attacker-controlled value in array lookup without bounds checking" } */ } copy_to_user((void __user *)arg, &sbuf, sizeof(sbuf)); diff --git a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5.c b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5.c index e27ee469df8f..fe216c0a3c44 100644 --- a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5.c +++ b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-5.c @@ -37,9 +37,9 @@ int test_1(struct file *file, unsigned int cmd, unsigned long arg) __analyzer_dump_state ("taint", info->num); /* { dg-warning "has_ub" } */ av7110->ci_slot[info->num].num = info->num; /* { dg-warning "use of attacker-controlled value '\\*info\\.num' in array lookup without checking for negative" } */ - av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? /* { dg-warning "use of attacker-controlled value '\\*info\\.num' in array lookup without checking for negative" } */ + av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); /* { dg-warning "use of attacker-controlled value in array lookup without bounds checking" } */ + memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); } copy_to_user((void __user *)arg, &sbuf, sizeof(sbuf)); diff --git a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-6.c b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-6.c index fea70ee57617..5b68de324708 100644 --- a/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-6.c +++ b/gcc/testsuite/gcc.dg/plugin/taint-CVE-2011-0521-6.c @@ -34,9 +34,9 @@ int test_1(struct file *file, unsigned int cmd, unsigned long arg) //__analyzer_break (); av7110->ci_slot[info->num].num = info->num; /* { dg-warning "use of attacker-controlled value '\\*info\\.num' in array lookup without bounds checking" } */ - av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? /* { dg-warning "use of attacker-controlled value '\\*info\\.num' in array lookup without bounds checking" } */ + av7110->ci_slot[info->num].type = FW_CI_LL_SUPPORT(av7110->arm_app) ? CA_CI_LINK : CA_CI; - memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); /* { dg-warning "use of attacker-controlled value in array lookup without bounds checking" } */ + memcpy(info, &av7110->ci_slot[info->num], sizeof(ca_slot_info_t)); } copy_to_user((void __user *)arg, &sbuf, sizeof(sbuf)); diff --git a/gcc/testsuite/gcc.dg/plugin/taint-pr112850-precise.c b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-precise.c new file mode 100644 index 000000000000..558f0fb1a8ae --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-precise.c @@ -0,0 +1,50 @@ +/* Reduced from false positive in Linux kernel in sound/core/rawmidi.c. + + Use a value of --param=analyzer-max-svalue-depth= high enough to avoid + UNKNOWN svalues; make sure we don't get false positives with this case. */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer -O2 -Wanalyzer-symbol-too-complex --param=analyzer-max-svalue-depth=13" } */ +/* { dg-require-effective-target analyzer } */ + +typedef unsigned long __kernel_ulong_t; +typedef __kernel_ulong_t __kernel_size_t; +typedef __kernel_size_t size_t; +typedef unsigned int gfp_t; + +extern unsigned long copy_from_user(void* to, const void* from, unsigned long n); + +extern +__attribute__((__alloc_size__(1))) +__attribute__((__malloc__)) void* +kvzalloc(size_t size, gfp_t flags); + +struct snd_rawmidi_params +{ + int stream; + size_t buffer_size; +}; + +char *newbuf; + +static int +resize_runtime_buffer(struct snd_rawmidi_params* params) +{ + if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L) /* { dg-bogus "symbol too complicated" } */ + return -22; + newbuf = kvzalloc(params->buffer_size, /* { dg-bogus "use of attacker-controlled value '\\*params.buffer_size' as allocation size without upper-bounds checking" "PR analyzer/112850" } */ + (((gfp_t)(0x400u | 0x800u)) | ((gfp_t)0x40u) | ((gfp_t)0x80u))); + if (!newbuf) + return -12; + return 0; +} + +long +snd_rawmidi_ioctl(unsigned long arg) +{ + void* argp = (void*)arg; + struct snd_rawmidi_params params; + if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) + return -14; + return resize_runtime_buffer(¶ms); +} diff --git a/gcc/testsuite/gcc.dg/plugin/taint-pr112850-too-complex.c b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-too-complex.c new file mode 100644 index 000000000000..2a4ee8197c38 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-too-complex.c @@ -0,0 +1,51 @@ +/* Reduced from false positive in Linux kernel in sound/core/rawmidi.c. + + With --param=analyzer-max-svalue-depth=12, the value being compared + at the sanitization is too complex and becomes UNKNOWN; make sure + this doesn't lead to a false positive. */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer -O2 -Wanalyzer-symbol-too-complex --param=analyzer-max-svalue-depth=12" } */ +/* { dg-require-effective-target analyzer } */ + +typedef unsigned long __kernel_ulong_t; +typedef __kernel_ulong_t __kernel_size_t; +typedef __kernel_size_t size_t; +typedef unsigned int gfp_t; + +extern unsigned long copy_from_user(void* to, const void* from, unsigned long n); + +extern +__attribute__((__alloc_size__(1))) +__attribute__((__malloc__)) void* +kvzalloc(size_t size, gfp_t flags); + +struct snd_rawmidi_params +{ + int stream; + size_t buffer_size; +}; + +char *newbuf; + +static int +resize_runtime_buffer(struct snd_rawmidi_params* params) +{ + if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L) /* { dg-warning "symbol too complicated" } */ + return -22; + newbuf = kvzalloc(params->buffer_size, /* { dg-bogus "use of attacker-controlled value '\\*params.buffer_size' as allocation size without upper-bounds checking" "PR analyzer/112850" } */ + (((gfp_t)(0x400u | 0x800u)) | ((gfp_t)0x40u) | ((gfp_t)0x80u))); + if (!newbuf) + return -12; + return 0; +} + +long +snd_rawmidi_ioctl(unsigned long arg) +{ + void* argp = (void*)arg; + struct snd_rawmidi_params params; + if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) + return -14; + return resize_runtime_buffer(¶ms); +} diff --git a/gcc/testsuite/gcc.dg/plugin/taint-pr112850-unsanitized.c b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-unsanitized.c new file mode 100644 index 000000000000..e46fcb6c8a10 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/taint-pr112850-unsanitized.c @@ -0,0 +1,50 @@ +/* Reduced from false positive in Linux kernel in sound/core/rawmidi.c, + with sanitization removed to make it a true positive. + + Verify that we detect this (with default params). */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer -O2 -Wanalyzer-too-complex" } */ +/* { dg-require-effective-target analyzer } */ + +typedef unsigned long __kernel_ulong_t; +typedef __kernel_ulong_t __kernel_size_t; +typedef __kernel_size_t size_t; +typedef unsigned int gfp_t; + +extern unsigned long copy_from_user(void* to, const void* from, unsigned long n); + +extern +__attribute__((__alloc_size__(1))) +__attribute__((__malloc__)) void* +kvzalloc(size_t size, gfp_t flags); + +struct snd_rawmidi_params +{ + int stream; + size_t buffer_size; +}; + +char *newbuf; + +static int +resize_runtime_buffer(struct snd_rawmidi_params* params) +{ + /* No sanitization, so we should complain. */ + + newbuf = kvzalloc(params->buffer_size, /* { dg-warning "use of attacker-controlled value '\\*params.buffer_size' as allocation size without upper-bounds checking" "PR analyzer/112850" } */ + (((gfp_t)(0x400u | 0x800u)) | ((gfp_t)0x40u) | ((gfp_t)0x80u))); + if (!newbuf) + return -12; + return 0; +} + +long +snd_rawmidi_ioctl(unsigned long arg) +{ + void* argp = (void*)arg; + struct snd_rawmidi_params params; + if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) + return -14; + return resize_runtime_buffer(¶ms); +} diff --git a/gcc/testsuite/gcc.dg/plugin/taint-pr112850.c b/gcc/testsuite/gcc.dg/plugin/taint-pr112850.c new file mode 100644 index 000000000000..6fa1d0f9bd4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/taint-pr112850.c @@ -0,0 +1,47 @@ +/* Reduced from false positive in Linux kernel in sound/core/rawmidi.c. */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer -O2 -Wanalyzer-symbol-too-complex" } */ +/* { dg-require-effective-target analyzer } */ + +typedef unsigned long __kernel_ulong_t; +typedef __kernel_ulong_t __kernel_size_t; +typedef __kernel_size_t size_t; +typedef unsigned int gfp_t; + +extern unsigned long copy_from_user(void* to, const void* from, unsigned long n); + +extern +__attribute__((__alloc_size__(1))) +__attribute__((__malloc__)) void* +kvzalloc(size_t size, gfp_t flags); + +struct snd_rawmidi_params +{ + int stream; + size_t buffer_size; +}; + +char *newbuf; + +static int +resize_runtime_buffer(struct snd_rawmidi_params* params) +{ + if (params->buffer_size < 32 || params->buffer_size > 1024L * 1024L) /* { dg-bogus "symbol too complicated" } */ + return -22; + newbuf = kvzalloc(params->buffer_size, /* { dg-bogus "use of attacker-controlled value '\\*params.buffer_size' as allocation size without upper-bounds checking" "PR analyzer/112850" } */ + (((gfp_t)(0x400u | 0x800u)) | ((gfp_t)0x40u) | ((gfp_t)0x80u))); + if (!newbuf) + return -12; + return 0; +} + +long +snd_rawmidi_ioctl(unsigned long arg) +{ + void* argp = (void*)arg; + struct snd_rawmidi_params params; + if (copy_from_user(¶ms, argp, sizeof(struct snd_rawmidi_params))) + return -14; + return resize_runtime_buffer(¶ms); +} From e21b2caf6da7efbc87e7140e5472538c2349721c Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 14 Dec 2022 10:10:19 +0800 Subject: [PATCH 013/311] [APX NDD] Support Intel APX NDD for legacy add insn APX NDD provides an extra destination register operand for several gpr related legacy insns, so a new alternative can be adopted to operand1 with "r" constraint. This first patch supports NDD for add instruction, and keeps to use lea when all operands are registers since lea have shorter encoding. For add operations containing mem NDD will be adopted to save an extra move. In legacy x86 binary operation expand it will force operands[0] and operands[1] to be the same so add a helper function to allow NDD form pattern that operands[0] and operands[1] can be different. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_fixup_binary_operands): Add new use_ndd flag to check whether ndd can be used for this binop and adjust operand emit. (ix86_binary_operator_ok): Likewise. (ix86_expand_binary_operator): Likewise, and void postreload expand generate lea pattern when use_ndd is explicit parsed. * config/i386/i386-options.cc (ix86_option_override_internal): Prohibit apx subfeatures when not in 64bit mode. * config/i386/i386-protos.h (ix86_binary_operator_ok): Add use_ndd flag. (ix86_fixup_binary_operand): Likewise. (ix86_expand_binary_operand): Likewise. * config/i386/i386.md (*add_1): Extend with new alternatives to support NDD, and adjust output template. (*addhi_1): Likewise. (*addqi_1): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: New test. --- gcc/config/i386/i386-expand.cc | 19 ++--- gcc/config/i386/i386-options.cc | 2 + gcc/config/i386/i386-protos.h | 6 +- gcc/config/i386/i386.md | 102 ++++++++++++++---------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 21 +++++ 5 files changed, 96 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 4bd7d4f39c82..3ecda989cf8c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1260,14 +1260,14 @@ ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, return false; } - /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the destination to use for the operation. If different from the true - destination in operands[0], a copy operation will be required. */ + destination in operands[0], a copy operation will be required except + under TARGET_APX_NDD. */ rtx ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, - rtx operands[]) + rtx operands[], bool use_ndd) { rtx dst = operands[0]; rtx src1 = operands[1]; @@ -1307,7 +1307,7 @@ ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, src1 = force_reg (mode, src1); /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) + if (!use_ndd && MEM_P (src1) && !rtx_equal_p (dst, src1)) src1 = force_reg (mode, src1); /* Improve address combine. */ @@ -1338,11 +1338,11 @@ ix86_fixup_binary_operands_no_copy (enum rtx_code code, void ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) + rtx operands[], bool use_ndd) { rtx src1, src2, dst, op, clob; - dst = ix86_fixup_binary_operands (code, mode, operands); + dst = ix86_fixup_binary_operands (code, mode, operands, use_ndd); src1 = operands[1]; src2 = operands[2]; @@ -1352,7 +1352,8 @@ ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, if (reload_completed && code == PLUS - && !rtx_equal_p (dst, src1)) + && !rtx_equal_p (dst, src1) + && !use_ndd) { /* This is going to be an LEA; avoid splitting it later. */ emit_insn (op); @@ -1451,7 +1452,7 @@ ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, bool ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, - rtx operands[3]) + rtx operands[3], bool use_ndd) { rtx dst = operands[0]; rtx src1 = operands[1]; @@ -1475,7 +1476,7 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, return false; /* Source 1 cannot be a non-matching memory. */ - if (MEM_P (src1) && !rtx_equal_p (dst, src1)) + if (!use_ndd && MEM_P (src1) && !rtx_equal_p (dst, src1)) /* Support "andhi/andsi/anddi" as a zero-extending move. */ return (code == AND && (mode == HImode diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index f86ad332aadb..7d0a253e07f9 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2129,6 +2129,8 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_APX_F && !TARGET_64BIT) error ("%<-mapxf%> is not supported for 32-bit code"); + else if (opts->x_ix86_apx_features != apx_none && !TARGET_64BIT) + error ("%<-mapx-features=%> option is not supported for 32-bit code"); if (TARGET_UINTR && !TARGET_64BIT) error ("%<-muintr%> not supported for 32-bit code"); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 28d0eab11d59..a9d0c568bba1 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -108,14 +108,14 @@ extern void ix86_expand_move (machine_mode, rtx[]); extern void ix86_expand_vector_move (machine_mode, rtx[]); extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]); extern rtx ix86_fixup_binary_operands (enum rtx_code, - machine_mode, rtx[]); + machine_mode, rtx[], bool = false); extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, machine_mode, rtx[]); extern void ix86_expand_binary_operator (enum rtx_code, - machine_mode, rtx[]); + machine_mode, rtx[], bool = false); extern void ix86_expand_vector_logical_operator (enum rtx_code, machine_mode, rtx[]); -extern bool ix86_binary_operator_ok (enum rtx_code, machine_mode, rtx[3]); +extern bool ix86_binary_operator_ok (enum rtx_code, machine_mode, rtx[3], bool = false); extern bool ix86_avoid_lea_for_add (rtx_insn *, rtx[]); extern bool ix86_use_lea_for_mov (rtx_insn *, rtx[]); extern bool ix86_avoid_lea_for_addr (rtx_insn *, rtx[]); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index df7f91723813..a5b123a51bdc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -562,7 +562,7 @@ ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, - x64_avx,x64_avx512bw,x64_avx512dq,aes, + x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, @@ -960,6 +960,8 @@ (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL") (eq_attr "isa" "vpclmulqdqvl") (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL") + (eq_attr "isa" "apx_ndd") + (symbol_ref "TARGET_APX_NDD") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") @@ -6288,7 +6290,8 @@ (plus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" - "ix86_expand_binary_operator (PLUS, mode, operands); DONE;") + "ix86_expand_binary_operator (PLUS, mode, operands, + TARGET_APX_NDD); DONE;") (define_insn_and_split "*add3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") @@ -6415,26 +6418,29 @@ "split_double_mode (mode, &operands[0], 1, &operands[0], &operands[5]);") (define_insn "*add_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r") (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r") - (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le"))) + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r") + (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,re,BM"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, mode, operands)" + "ix86_binary_operator_ok (PLUS, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) - return "inc{}\t%0"; + return use_ndd ? "inc{}\t{%1, %0|%0, %1}" + : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{}\t%0"; + return use_ndd ? "dec{}\t{%1, %0|%0, %1}" + : "dec{}\t%0"; } default: @@ -6443,14 +6449,16 @@ if (which_alternative == 2) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], mode)) - return "sub{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" + : "sub{}\t{%2, %0|%0, %2}"; - return "add{}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" + : "add{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") + (set (attr "type") (cond [(eq_attr "alternative" "3") (const_string "lea") (match_operand:SWI48 2 "incdec_operand") @@ -6519,25 +6527,26 @@ (set_attr "mode" "SI")]) (define_insn "*addhi_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp") - (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp") - (match_operand:HI 2 "general_operand" "rn,m,0,ln"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r,Yp,r,r") + (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r,Yp,rm,r") + (match_operand:HI 2 "general_operand" "rn,m,0,ln,rn,m"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, HImode, operands)" + "ix86_binary_operator_ok (PLUS, HImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) - return "inc{w}\t%0"; + return use_ndd ? "inc{w}\t{%1, %0|%0, %1}" : "inc{w}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{w}\t%0"; + return use_ndd ? "dec{w}\t{%1, %0|%0, %1}" : "dec{w}\t%0"; } default: @@ -6546,14 +6555,16 @@ if (which_alternative == 2) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], HImode)) - return "sub{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{w}\t{%2, %1, %0|%0, %1, %2}" + : "sub{w}\t{%2, %0|%0, %2}"; - return "add{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{w}\t{%2, %1, %0|%0, %1, %2}" + : "add{w}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") + (set (attr "type") (cond [(eq_attr "alternative" "3") (const_string "lea") (match_operand:HI 2 "incdec_operand") @@ -6565,30 +6576,35 @@ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) - (set_attr "mode" "HI,HI,HI,SI")]) + (set_attr "mode" "HI,HI,HI,SI,HI,HI")]) (define_insn "*addqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp") - (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp") - (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,q,r,r,Yp,r,r") + (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,q,0,r,Yp,rm,r") + (match_operand:QI 2 "general_operand" "qn,m,0,rn,0,ln,rn,m"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, QImode, operands)" + "ix86_binary_operator_ok (PLUS, QImode, operands, TARGET_APX_NDD)" { bool widen = (get_attr_mode (insn) != MODE_QI); - + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: return "#"; case TYPE_INCDEC: - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (operands[2] == const1_rtx) - return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; + if (use_ndd) + return "inc{b}\t{%1, %0|%0, %1}"; + else + return widen ? "inc{l}\t%k0" : "inc{b}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; + if (use_ndd) + return "dec{b}\t{%1, %0|%0, %1}"; + else + return widen ? "dec{l}\t%k0" : "dec{b}\t%0"; } default: @@ -6597,21 +6613,23 @@ if (which_alternative == 2 || which_alternative == 4) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], QImode)) { - if (widen) - return "sub{l}\t{%2, %k0|%k0, %2}"; + if (use_ndd) + return "sub{b}\t{%2, %1, %0|%0, %1, %2}"; else - return "sub{b}\t{%2, %0|%0, %2}"; + return widen ? "sub{l}\t{%2, %k0|%k0, %2}" + : "sub{b}\t{%2, %0|%0, %2}"; } - if (widen) - return "add{l}\t{%k2, %k0|%k0, %k2}"; + if (use_ndd) + return "add{b}\t{%2, %1, %0|%0, %1, %2}"; else - return "add{b}\t{%2, %0|%0, %2}"; + return widen ? "add{l}\t{%k2, %k0|%k0, %k2}" + : "add{b}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,*,*,*,apx_ndd,apx_ndd") + (set (attr "type") (cond [(eq_attr "alternative" "5") (const_string "lea") (match_operand:QI 2 "incdec_operand") @@ -6623,7 +6641,7 @@ (and (eq_attr "type" "alu") (match_operand 2 "const128_operand")) (const_string "1") (const_string "*"))) - (set_attr "mode" "QI,QI,QI,SI,SI,SI") + (set_attr "mode" "QI,QI,QI,SI,SI,SI,QI,QI") ;; Potential partial reg stall on alternatives 3 and 4. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "3,4") diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c new file mode 100644 index 000000000000..056a323a647b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mapxf -march=x86-64 -O2" } */ +/* { dg-final { scan-assembler-not "movl"} } */ + +int foo (int *a) +{ + int b = *a - 1; + return b; +} + +int foo2 (int a, int b) +{ + int c = a + b; + return c; +} + +int foo3 (int *a, int b) +{ + int c = *a + b; + return c; +} From 7abcef725e40589553a079df9258ae094b811751 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 18 Jan 2023 17:23:29 +0800 Subject: [PATCH 014/311] [APX NDD] Support APX NDD for optimization patterns of add gcc/ChangeLog: * config/i386/i386.md: (addsi_1_zext): Add new alternatives for NDD and adjust output templates. (*add_2): Likewise. (*addsi_2_zext): Likewise. (*add_3): Likewise. (*addsi_3_zext): Likewise. (*adddi_4): Likewise. (*add_4): Likewise. (*add_5): Likewise. (*addv4): Likewise. (*addv4_1): Likewise. (*add3_cconly_overflow_1): Likewise. (*add3_cc_overflow_1): Likewise. (*addsi3_zext_cc_overflow_1): Likewise. (*add3_cconly_overflow_2): Likewise. (*add3_cc_overflow_2): Likewise. (*addsi3_zext_cc_overflow_2): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add more test. --- gcc/config/i386/i386.md | 310 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 53 ++-- 2 files changed, 232 insertions(+), 131 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a5b123a51bdc..1e8461833478 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6479,13 +6479,15 @@ ;; patterns constructed from addsi_1 to match. (define_insn "addsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r,r") (zero_extend:DI - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r") - (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le")))) + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,le,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -6493,11 +6495,13 @@ case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{l}\t%k0"; + return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" + : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{l}\t%k0"; + return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" + : "dec{l}\t%k0"; } default: @@ -6507,12 +6511,15 @@ std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) - return "sub{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sub{l}\t{%2 ,%1, %k0|%k0, %1, %2}" + : "sub{l}\t{%2, %k0|%k0, %2}"; - return "add{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "add{l}\t{%2 ,%1, %k0|%k0, %1, %2}" + : "add{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd") + (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (match_operand:SI 2 "incdec_operand") @@ -6814,37 +6821,42 @@ [(set (reg FLAGS_REG) (compare (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0,") - (match_operand:SWI 2 "" ",,0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,,rm,r") + (match_operand:SWI 2 "" ",,0,r,")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, mode, operands)" + && ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{}\t%0"; + return use_ndd ? "inc{}\t{%1, %0|%0, %1}" + : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{}\t%0"; + return use_ndd ? "dec{}\t{%1, %0|%0, %1}" + : "dec{}\t%0"; } default: if (which_alternative == 2) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], mode)) - return "sub{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" + : "sub{}\t{%2, %0|%0, %2}"; - return "add{}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" + : "add{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd") + (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -6859,23 +6871,26 @@ (define_insn "*addsi_2_zext" [(set (reg FLAGS_REG) (compare - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r") - (match_operand:SI 2 "x86_64_general_operand" "rBMe,0")) + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r,r") + (set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (PLUS, SImode, operands)" + && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{l}\t%k0"; + return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" + : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{l}\t%k0"; + return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" + : "dec{l}\t%k0"; } default: @@ -6883,12 +6898,15 @@ std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) - return "sub{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sub{l}\t{%2, %k0|%k0, %2}"; - return "add{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "add{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set (attr "type") (if_then_else (match_operand:SI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -6902,35 +6920,40 @@ (define_insn "*add_3" [(set (reg FLAGS_REG) (compare - (neg:SWI (match_operand:SWI 2 "" ",0")) - (match_operand:SWI 1 "nonimmediate_operand" "%0,"))) - (clobber (match_scratch:SWI 0 "=,"))] + (neg:SWI (match_operand:SWI 2 "" ",0,,re")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,,r,rm"))) + (clobber (match_scratch:SWI 0 "=,,r,r"))] "ix86_match_ccmode (insn, CCZmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{}\t%0"; + return use_ndd ? "inc{}\t{%1, %0|%0, %1}" + : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{}\t%0"; + return use_ndd ? "dec{}\t{%1, %0|%0, %1}" + : "dec{}\t%0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], mode)) - return "sub{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" + : "sub{}\t{%2, %0|%0, %2}"; - return "add{}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" + : "add{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -6945,22 +6968,23 @@ (define_insn "*addsi_3_zext" [(set (reg FLAGS_REG) (compare - (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0")) - (match_operand:SI 1 "nonimmediate_operand" "%0,r"))) - (set (match_operand:DI 0 "register_operand" "=r,r") + (neg:SI (match_operand:SI 2 "x86_64_general_operand" "rBMe,0,rBMe,re")) + (match_operand:SI 1 "nonimmediate_operand" "%0,r,r,rm"))) + (set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCZmode) - && ix86_binary_operator_ok (PLUS, SImode, operands)" + && ix86_binary_operator_ok (PLUS, SImode, operands, TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{l}\t%k0"; + return use_ndd ? "inc{l}\t{%1, %k0|%k0, %1}" : "inc{l}\t%k0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{l}\t%k0"; + return use_ndd ? "dec{l}\t{%1, %k0|%k0, %1}" : "dec{l}\t%k0"; } default: @@ -6968,12 +6992,15 @@ std::swap (operands[1], operands[2]); if (x86_maybe_negate_const_int (&operands[2], SImode)) - return "sub{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sub{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sub{l}\t{%2, %k0|%k0, %2}"; - return "add{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "add{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "add{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set (attr "type") (if_then_else (match_operand:SI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -6994,31 +7021,35 @@ (define_insn "*adddi_4" [(set (reg FLAGS_REG) (compare - (match_operand:DI 1 "nonimmediate_operand" "0") - (match_operand:DI 2 "x86_64_immediate_operand" "e"))) - (clobber (match_scratch:DI 0 "=r"))] + (match_operand:DI 1 "nonimmediate_operand" "0,rm") + (match_operand:DI 2 "x86_64_immediate_operand" "e,e"))) + (clobber (match_scratch:DI 0 "=r,r"))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGCmode)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == constm1_rtx) - return "inc{q}\t%0"; + return use_ndd ? "inc{q}\t{%1, %0|%0, %1}" : "inc{q}\t%0"; else { gcc_assert (operands[2] == const1_rtx); - return "dec{q}\t%0"; + return use_ndd ? "dec{q}\t{%1, %0|%0, %1}" : "dec{q}\t%0"; } default: if (x86_maybe_negate_const_int (&operands[2], DImode)) - return "add{q}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{q}\t{%2, %1, %0|%0, %1, %2}" + : "add{q}\t{%2, %0|%0, %2}"; - return "sub{q}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{q}\t{%2, %1, %0|%0, %1, %2}" + : "sub{q}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") (if_then_else (match_operand:DI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -7039,30 +7070,36 @@ (define_insn "*add_4" [(set (reg FLAGS_REG) (compare - (match_operand:SWI124 1 "nonimmediate_operand" "0") + (match_operand:SWI124 1 "nonimmediate_operand" "0,rm") (match_operand:SWI124 2 "const_int_operand"))) - (clobber (match_scratch:SWI124 0 "="))] + (clobber (match_scratch:SWI124 0 "=,r"))] "ix86_match_ccmode (insn, CCGCmode)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == constm1_rtx) - return "inc{}\t%0"; + return use_ndd ? "inc{}\t{%1, %0|%0, %1}" + : "inc{}\t%0"; else { gcc_assert (operands[2] == const1_rtx); - return "dec{}\t%0"; + return use_ndd ? "dec{}\t{%1, %0|%0, %1}" + : "dec{}\t%0"; } default: if (x86_maybe_negate_const_int (&operands[2], mode)) - return "add{}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" + : "add{}\t{%2, %0|%0, %2}"; - return "sub{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" + : "sub{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") (if_then_else (match_operand: 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -7077,36 +7114,41 @@ [(set (reg FLAGS_REG) (compare (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,") - (match_operand:SWI 2 "" ",0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,,r,rm") + (match_operand:SWI 2 "" ",0,,re")) (const_int 0))) - (clobber (match_scratch:SWI 0 "=,"))] + (clobber (match_scratch:SWI 0 "=,,r,r"))] "ix86_match_ccmode (insn, CCGOCmode) && !(MEM_P (operands[1]) && MEM_P (operands[2]))" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_INCDEC: if (operands[2] == const1_rtx) - return "inc{}\t%0"; + return use_ndd ? "inc{}\t{%1, %0|%0, %1}" + : "inc{}\t%0"; else { gcc_assert (operands[2] == constm1_rtx); - return "dec{}\t%0"; + return use_ndd ? "dec{}\t{%1, %0|%0, %1}" + : "dec{}\t%0"; } default: if (which_alternative == 1) std::swap (operands[1], operands[2]); - gcc_assert (rtx_equal_p (operands[0], operands[1])); if (x86_maybe_negate_const_int (&operands[2], mode)) - return "sub{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sub{}\t{%2, %1, %0|%0, %1, %2}" + : "sub{}\t{%2, %0|%0, %2}"; - return "add{}\t{%2, %0|%0, %2}"; + return use_ndd ? "add{}\t{%2, %1, %0|%0, %1, %2}" + : "add{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set (attr "type") (if_then_else (match_operand:SWI 2 "incdec_operand") (const_string "incdec") (const_string "alu"))) @@ -7319,35 +7361,43 @@ [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) (sign_extend: - (match_operand:SWI 2 "" "We,m"))) + (match_operand:SWI 2 "" "We,m,rWe,m"))) (sign_extend: (plus:SWI (match_dup 1) (match_dup 2))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "addv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (match_operand: 3 "const_int_operand")) (sign_extend: (plus:SWI (match_dup 1) - (match_operand:SWI 2 "x86_64_immediate_operand" ""))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (match_operand:SWI 2 "x86_64_immediate_operand" ","))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (plus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands) + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[3])" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") @@ -9190,27 +9240,36 @@ [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0") - (match_operand:SWI 2 "" "")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm") + (match_operand:SWI 2 "" ",,re")) (match_dup 1))) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r,r"))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "@add3_cc_overflow_1" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI 2 "" ",")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r") + (match_operand:SWI 2 "" ",,r,")) (match_dup 1))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_peephole2 @@ -9255,55 +9314,74 @@ [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SI - (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")) + (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (match_dup 1))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" - "add{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, + TARGET_APX_NDD)" + "@ + add{l}\t{%2, %k0|%k0, %2} + add{l}\t{%2, %1, %k0|%k0, %1, %2} + add{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*add3_cconly_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0") - (match_operand:SWI 2 "" "")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,r,rm") + (match_operand:SWI 2 "" ",,re")) (match_dup 2))) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r,r"))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*add3_cc_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI 2 "" ",")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r") + (match_operand:SWI 2 "" ",,r,")) (match_dup 2))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "add{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %0|%0, %2} + add{}\t{%2, %1, %0|%0, %1, %2} + add{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*addsi3_zext_cc_overflow_2" [(set (reg:CCC FLAGS_REG) (compare:CCC (plus:SI - (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")) + (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (match_dup 2))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))] - "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" - "add{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, + TARGET_APX_NDD)" + "@ + add{l}\t{%2, %k0|%k0, %2} + add{l}\t{%2, %1, %k0|%k0, %1, %2} + add{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn_and_split "*add3_doubleword_cc_overflow_1" diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 056a323a647b..c1049022f2aa 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -2,20 +2,43 @@ /* { dg-options "-mapxf -march=x86-64 -O2" } */ /* { dg-final { scan-assembler-not "movl"} } */ -int foo (int *a) -{ - int b = *a - 1; - return b; -} +#define FOO(TYPE, OP_NAME, OP) \ +TYPE \ +__attribute__ ((noipa)) \ +foo_##OP_NAME##_##TYPE (TYPE *a) \ +{ \ + TYPE b = *a OP 1; \ + return b; \ +} -int foo2 (int a, int b) -{ - int c = a + b; - return c; -} +#define FOO1(TYPE, OP_NAME, OP) \ +TYPE \ +__attribute__ ((noipa)) \ +foo1_##OP_NAME##_##TYPE (TYPE a, TYPE b) \ +{ \ + TYPE c = a OP b; \ + return c; \ +} + +#define FOO2(TYPE, OP_NAME, OP) \ +TYPE \ +__attribute__ ((noipa)) \ +foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \ +{ \ + TYPE c = *a OP b; \ + return c; \ +} + +FOO (char, add, +) +FOO1 (char, add, +) +FOO2 (char, add, +) +FOO (short, add, +) +FOO1 (short, add, +) +FOO2 (short, add, +) +FOO (int, add, +) +FOO1 (int, add, +) +FOO2 (int, add, +) +FOO (long, add, +) +FOO1 (long, add, +) +FOO2 (long, add, +) -int foo3 (int *a, int b) -{ - int c = *a + b; - return c; -} From d564198f960a2f5994dde3f6b83d7a62021e49c3 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Mon, 13 Nov 2023 18:49:07 +0800 Subject: [PATCH 015/311] [APX NDD] Disable seg_prefixed memory usage for NDD add NDD uses evex prefix, so when segment prefix is also applied, the instruction could excceed its 15byte limit, especially adding immediates. This could happen when "e" constraint accepts any UNSPEC_TPOFF/UNSPEC_NTPOFF constant and it will add the offset to segment register, which will be encoded using segment prefix. Disable those *POFF constant usage in NDD add alternatives with new constraint. gcc/ChangeLog: * config/i386/constraints.md (je): New constraint. * config/i386/i386-protos.h (x86_poff_operand_p): New function to check any *POFF constant in operand. * config/i386/i386.cc (x86_poff_operand_p): New prototype. * config/i386/i386.md (*add_1): Split out je alternative for add. --- gcc/config/i386/constraints.md | 5 +++++ gcc/config/i386/i386-protos.h | 1 + gcc/config/i386/i386.cc | 25 +++++++++++++++++++++++++ gcc/config/i386/i386.md | 8 ++++---- 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index cbee31fa40a0..f4c3c3dd9522 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -433,3 +433,8 @@ (define_register_constraint "jc" "TARGET_APX_EGPR && !TARGET_AVX ? GENERAL_GPR16 : GENERAL_REGS") + +(define_constraint "je" + "@internal constant that do not allow any unspec global offsets" + (and (match_operand 0 "x86_64_immediate_operand") + (match_test "!x86_poff_operand_p (op)"))) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index a9d0c568bba1..7dfeb6af2252 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -66,6 +66,7 @@ extern bool x86_extended_QIreg_mentioned_p (rtx_insn *); extern bool x86_extended_reg_mentioned_p (rtx); extern bool x86_extended_rex2reg_mentioned_p (rtx); extern bool x86_evex_reg_mentioned_p (rtx [], int); +extern bool x86_poff_operand_p (rtx); extern bool x86_maybe_negate_const_int (rtx *, machine_mode); extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 7c5cab4e2c6b..8aa33aef7e17 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -23331,6 +23331,31 @@ x86_evex_reg_mentioned_p (rtx operands[], int nops) return false; } +/* Return true when rtx operand does not contain any UNSPEC_*POFF related + constant to avoid APX_NDD instructions excceed encoding length limit. */ +bool +x86_poff_operand_p (rtx operand) +{ + if (GET_CODE (operand) == CONST) + { + rtx op = XEXP (operand, 0); + if (GET_CODE (op) == PLUS) + op = XEXP (op, 0); + + if (GET_CODE (op) == UNSPEC) + { + int unspec = XINT (op, 1); + return (unspec == UNSPEC_NTPOFF + || unspec == UNSPEC_TPOFF + || unspec == UNSPEC_DTPOFF + || unspec == UNSPEC_GOTTPOFF + || unspec == UNSPEC_GOTNTPOFF + || unspec == UNSPEC_INDNTPOFF); + } + } + return false; +} + /* If profitable, negate (without causing overflow) integer constant of mode MODE at location LOC. Return true in this case. */ bool diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1e8461833478..a16261212278 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6418,10 +6418,10 @@ "split_double_mode (mode, &operands[0], 1, &operands[0], &operands[5]);") (define_insn "*add_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r,r,r,r,r") (plus:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r") - (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,re,BM"))) + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r,rm,r,m,r") + (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le,r,e,je,BM"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" @@ -6457,7 +6457,7 @@ : "add{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd") + [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "3") (const_string "lea") From 592dc08e0509bab1dc786db1699f197e5f0fdcea Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 18 Jan 2023 17:52:52 +0800 Subject: [PATCH 016/311] [APX NDD] Support APX NDD for adc insns Legacy adc patterns are commonly adopted to TImode add, when extending TImode add to NDD version, operands[0] and operands[1] can be different, so extra move should be emitted if those patterns have optimization when adding const0_rtx. For TImode insn, there could be register overlapping between operands[0] and operands[1] as x86 allocates TImode register sequentially like rax:rdi, rdi:rdx. After postreload split for TImode, write to 1st highpart rdi will be overrided by the 2nd lowpart rdi if 2nd lowpart rdi have different src as input, then the write to 1st highpart rdi will missed and cause miscompliation. In addition, when input operands contain memory, the address register may also overlaps with dest register if it is marked dead after one of highpart/lowpart operation was done. So the earlyclobber modifier '&' should be added to NDD dest to avoid overlapping between dest and src operands. NDD instructions will automatically zero-extend dest register to 64bit, so for zext patterns it can adopt all NDD form that have memory src input. gcc/ChangeLog: * config/i386/i386.md (*add3_doubleword): Add ndd alternatives, adopt '&' to ndd dest and move operands[1] to operands[0] when they are not equal. (*add3_doubleword_cc_overflow_1): Likewise. (*addv4_doubleword): Likewise. (*addv4_doubleword_1): Likewise. (*add3_doubleword_zext): Likewise. (addv4_overflow_1): Add ndd alternatives. (*addv4_overflow_2): Likewise. (@add3_carry): Likewise. (*add3_carry_0): Likewise. (*addsi3_carry_zext): Likewise. (addcarry): Likewise. (addcarry_0): Likewise. (*addcarry_1): Likewise. (*add3_eq): Likewise. (*add3_ne): Likewise. (*addsi3_carry_zext_0): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd-adc.c: New test. --- gcc/config/i386/i386.md | 193 ++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd-adc.c | 15 ++ 2 files changed, 136 insertions(+), 72 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-adc.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a16261212278..8dd8216041e1 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -6294,12 +6294,12 @@ TARGET_APX_NDD); DONE;") (define_insn_and_split "*add3_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") + [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (plus: - (match_operand: 1 "nonimmediate_operand" "%0,0") - (match_operand: 2 "x86_64_hilo_general_operand" "r,o"))) + (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,r"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, mode, operands)" + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) @@ -6319,24 +6319,34 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { + /* Under NDD op0 and op1 may not equal, do not delete insn then. */ + bool emit_insn_deleted_note_p = true; + if (!rtx_equal_p (operands[0], operands[1])) + { + emit_move_insn (operands[0], operands[1]); + emit_insn_deleted_note_p = false; + } if (operands[5] != const0_rtx) - ix86_expand_binary_operator (PLUS, mode, &operands[3]); + ix86_expand_binary_operator (PLUS, mode, &operands[3], + TARGET_APX_NDD); else if (!rtx_equal_p (operands[3], operands[4])) emit_move_insn (operands[3], operands[4]); - else + else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); DONE; } -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*add3_doubleword_zext" - [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + [(set (match_operand: 0 "nonimmediate_operand" "=r,o,&r,&r") (plus: (zero_extend: - (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")) - (match_operand: 1 "nonimmediate_operand" "0,0"))) + (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r")) + (match_operand: 1 "nonimmediate_operand" "0,0,r,m"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (UNKNOWN, mode, operands)" + "ix86_binary_operator_ok (UNKNOWN, mode, operands, + TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) @@ -6352,7 +6362,8 @@ (match_dup 4)) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] - "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);") + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*add3_doubleword_concat" [(set (match_operand: 0 "register_operand" "=&r") @@ -7414,14 +7425,14 @@ (eq:CCO (plus: (sign_extend: - (match_operand: 1 "nonimmediate_operand" "%0,0")) + (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r")) (sign_extend: - (match_operand: 2 "nonimmediate_operand" "r,o"))) + (match_operand: 2 "nonimmediate_operand" "r,o,r,o"))) (sign_extend: (plus: (match_dup 1) (match_dup 2))))) - (set (match_operand: 0 "nonimmediate_operand" "=ro,r") + (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (plus: (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) @@ -7451,22 +7462,23 @@ (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*addv4_doubleword_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (plus: (sign_extend: - (match_operand: 1 "nonimmediate_operand" "%0")) - (match_operand: 3 "const_scalar_int_operand" "n")) + (match_operand: 1 "nonimmediate_operand" "%0,rm")) + (match_operand: 3 "const_scalar_int_operand" "n,n")) (sign_extend: (plus: (match_dup 1) - (match_operand: 2 "x86_64_hilo_general_operand" ""))))) - (set (match_operand: 0 "nonimmediate_operand" "=ro") + (match_operand: 2 "x86_64_hilo_general_operand" ","))))) + (set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (plus: (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands) + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_SCALAR_INT_P (operands[2]) && rtx_equal_p (operands[2], operands[3])" "#" @@ -7500,11 +7512,14 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); emit_insn (gen_addv4_1 (operands[3], operands[4], operands[5], operands[5])); DONE; } -}) +} +[(set_attr "isa" "*,apx_ndd")]) (define_insn "*addv4_overflow_1" [(set (reg:CCO FLAGS_REG) @@ -7514,9 +7529,9 @@ (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%0,0"))) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r"))) (sign_extend: - (match_operand:SWI 2 "" "rWe,m"))) + (match_operand:SWI 2 "" "rWe,m,rWe,m"))) (sign_extend: (plus:SWI (plus:SWI @@ -7524,15 +7539,20 @@ [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r") (plus:SWI (plus:SWI (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "adc{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %1, %0|%0, %1, %2} + adc{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*addv4_overflow_2" @@ -7543,26 +7563,29 @@ (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%0"))) - (match_operand: 6 "const_int_operand" "n")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,rm"))) + (match_operand: 6 "const_int_operand" "n,n")) (sign_extend: (plus:SWI (plus:SWI (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)]) (match_dup 1)) - (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") + (match_operand:SWI 2 "x86_64_immediate_operand" "e,e"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") (plus:SWI (plus:SWI (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands) + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[6])" - "adc{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") @@ -8384,17 +8407,22 @@ ;; Add with carry and subtract with borrow (define_insn "@add3_carry" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (plus:SWI (plus:SWI (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) - (match_operand:SWI 2 "" ","))) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) + (match_operand:SWI 2 "" ",,r,"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "adc{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %1, %0|%0, %1, %2} + adc{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) @@ -8481,31 +8509,39 @@ (set_attr "mode" "")]) (define_insn "*addsi3_carry_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (plus:SI (plus:SI (match_operator:SI 3 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 1 "register_operand" "%0")) - (match_operand:SI 2 "x86_64_general_operand" "rBMe")))) + (match_operand:SI 1 "nonimmediate_operand" "%0,r,rm")) + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" - "adc{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands, + TARGET_APX_NDD)" + "@ + adc{l}\t{%2, %k0|%k0, %2} + adc{l}\t{%2, %1, %k0|%k0, %1, %2} + adc{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) (define_insn "*addsi3_carry_zext_0" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (plus:SI (match_operator:SI 2 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 1 "register_operand" "0")))) + (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "adc{l}\t{$0, %k0|%k0, 0}" - [(set_attr "type" "alu") + "@ + adc{l}\t{$0, %k0|%k0, 0} + adc{l}\t{$0, %1, %k0|%k0, %1, 0}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -8534,20 +8570,25 @@ (plus:SWI48 (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0")) - (match_operand:SWI48 2 "nonimmediate_operand" "r,rm"))) + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r")) + (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m"))) (plus: (zero_extend: (match_dup 2)) (match_operator: 4 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])))) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r") (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" - "adc{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" + "@ + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %1, %0|%0, %1, %2} + adc{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) @@ -8705,7 +8746,8 @@ (match_dup 1))) (set (match_operand:SWI48 0 "nonimmediate_operand") (plus:SWI48 (match_dup 1) (match_dup 2)))])] - "ix86_binary_operator_ok (PLUS, mode, operands)") + "ix86_binary_operator_ok (PLUS, mode, operands, + TARGET_APX_NDD)") (define_insn "*addcarry_1" [(set (reg:CCC FLAGS_REG) @@ -8715,18 +8757,18 @@ (plus:SWI48 (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) - (match_operand:SWI48 1 "nonimmediate_operand" "%0")) - (match_operand:SWI48 2 "x86_64_immediate_operand" "e"))) + (match_operand:SWI48 1 "nonimmediate_operand" "%0,rm")) + (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e"))) (plus: (match_operand: 6 "const_scalar_int_operand") (match_operator: 4 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])))) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") (plus:SWI48 (plus:SWI48 (match_op_dup 5 [(match_dup 3) (const_int 0)]) (match_dup 1)) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands) + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && CONST_INT_P (operands[2]) /* Check that operands[6] is operands[2] zero extended from mode to mode. */ @@ -8739,8 +8781,11 @@ && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0) == UINTVAL (operands[2])) && CONST_WIDE_INT_ELT (operands[6], 1) == 0))" - "adc{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + adc{}\t{%2, %0|%0, %2} + adc{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "") @@ -9388,12 +9433,12 @@ [(set (reg:CCC FLAGS_REG) (compare:CCC (plus: - (match_operand: 1 "nonimmediate_operand" "%0,0") - (match_operand: 2 "x86_64_hilo_general_operand" "r,o")) + (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,o")) (match_dup 1))) - (set (match_operand: 0 "nonimmediate_operand" "=ro,r") + (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (plus: (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (PLUS, mode, operands)" + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CCC FLAGS_REG) @@ -9422,6 +9467,8 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); emit_insn (gen_addcarry_0 (operands[3], operands[4], operands[5])); DONE; } @@ -9430,7 +9477,8 @@ operands[5], mode); else operands[6] = gen_rtx_ZERO_EXTEND (mode, operands[5]); -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) ;; x == 0 with zero flag test can be done also as x < 1U with carry flag ;; test, where the latter is preferrable if we have some carry consuming @@ -9445,7 +9493,7 @@ (match_operand:SWI 1 "nonimmediate_operand")) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (PLUS, mode, operands) + "ix86_binary_operator_ok (PLUS, mode, operands, TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" @@ -9469,7 +9517,8 @@ "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) - && ix86_binary_operator_ok (PLUS, mode, operands) + && ix86_binary_operator_ok (PLUS, mode, operands, + TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c b/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c new file mode 100644 index 000000000000..9d5991457da7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd-adc.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { int128 && { ! ia32 } } } } */ +/* { dg-options "-mapxf -O2" } */ + +#include "pr91681-1.c" +// *addti3_doubleword +// *addti3_doubleword_zext +// *adddi3_cc_overflow_1 +// *adddi3_carry + +int foo3 (int *a, int b) +{ + int c = *a + b + (a > b); /* { dg-warning "comparison between pointer and integer" } */ + return c; +} +/* { dg-final { scan-assembler-not "xor" } } */ From c601744469390f5c66075de1cead46ed0d5c7a5d Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Thu, 2 Feb 2023 10:37:32 +0800 Subject: [PATCH 017/311] [APX NDD] Support APX NDD for sub insns gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_fixup_binary_operands_no_copy): Add use_ndd parameter and parse it. * config/i386/i386-protos.h (ix86_fixup_binary_operands_no_copy): Change define. * config/i386/i386.md (sub3): Add new alternatives for NDD and adjust output templates. (*sub_1): Likewise. (*sub_2): Likewise. (subv4): Likewise. (*subv4): Likewise. (subv4_1): Likewise. (usubv4): Likewise. (*sub_3): Likewise. (*subsi_1_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternatives. (*subsi_2_zext): Likewise. (*subsi_3_zext): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add test for ndd sub. --- gcc/config/i386/i386-expand.cc | 5 +- gcc/config/i386/i386-protos.h | 2 +- gcc/config/i386/i386.md | 155 ++++++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 13 ++ 4 files changed, 120 insertions(+), 55 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 3ecda989cf8c..93ecde4b4a86 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1326,9 +1326,10 @@ ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, void ix86_fixup_binary_operands_no_copy (enum rtx_code code, - machine_mode mode, rtx operands[]) + machine_mode mode, rtx operands[], + bool use_ndd) { - rtx dst = ix86_fixup_binary_operands (code, mode, operands); + rtx dst = ix86_fixup_binary_operands (code, mode, operands, use_ndd); gcc_assert (dst == operands[0]); } diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 7dfeb6af2252..481527872e81 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -111,7 +111,7 @@ extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]); extern rtx ix86_fixup_binary_operands (enum rtx_code, machine_mode, rtx[], bool = false); extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, - machine_mode, rtx[]); + machine_mode, rtx[], bool = false); extern void ix86_expand_binary_operator (enum rtx_code, machine_mode, rtx[], bool = false); extern void ix86_expand_vector_logical_operator (enum rtx_code, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8dd8216041e1..6ec498725aaf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7777,7 +7777,8 @@ (minus:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand") (match_operand:SDWIM 2 "")))] "" - "ix86_expand_binary_operator (MINUS, mode, operands); DONE;") + "ix86_expand_binary_operator (MINUS, mode, operands, + TARGET_APX_NDD); DONE;") (define_insn_and_split "*sub3_doubleword" [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") @@ -7803,7 +7804,10 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { - ix86_expand_binary_operator (MINUS, mode, &operands[3]); + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + ix86_expand_binary_operator (MINUS, mode, &operands[3], + TARGET_APX_NDD); DONE; } }) @@ -7832,25 +7836,36 @@ "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);") (define_insn "*sub_1" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0,0") - (match_operand:SWI 2 "" ","))) + (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") + (match_operand:SWI 2 "" ",,r,"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (MINUS, mode, operands)" - "sub{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %1, %0|%0, %1, %2} + sub{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")))) + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sub{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands, + TARGET_APX_NDD)" + "@ + sub{l}\t{%2, %k0|%k0, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "SI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. @@ -7941,31 +7956,42 @@ [(set (reg FLAGS_REG) (compare (minus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0,0") - (match_operand:SWI 2 "" ",")) + (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") + (match_operand:SWI 2 "" ",,r,")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (MINUS, mode, operands)" - "sub{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + && ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %1, %0|%0, %1, %2} + sub{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subsi_2_zext" [(set (reg FLAGS_REG) (compare - (minus:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")) + (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sub{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + && ix86_binary_operator_ok (MINUS, SImode, operands, + TARGET_APX_NDD)" + "@ + sub{l}\t{%2, %k0|%k0, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "SI")]) (define_insn "*subqi_ext_0" @@ -8077,7 +8103,8 @@ (pc)))] "" { - ix86_fixup_binary_operands_no_copy (MINUS, mode, operands); + ix86_fixup_binary_operands_no_copy (MINUS, mode, operands, + TARGET_APX_NDD); if (CONST_SCALAR_INT_P (operands[2])) operands[4] = operands[2]; else @@ -8088,35 +8115,45 @@ [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "0,0")) + (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r")) (sign_extend: - (match_operand:SWI 2 "" "We,m"))) + (match_operand:SWI 2 "" "We,m,rWe,m"))) (sign_extend: (minus:SWI (match_dup 1) (match_dup 2))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands)" - "sub{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %1, %0|%0, %1, %2} + sub{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "subv4_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "0")) + (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (match_operand: 3 "const_int_operand")) (sign_extend: (minus:SWI (match_dup 1) - (match_operand:SWI 2 "x86_64_immediate_operand" ""))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (match_operand:SWI 2 "x86_64_immediate_operand" ","))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (minus:SWI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands) + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[3])" - "sub{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (cond [(match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") @@ -8212,6 +8249,8 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); emit_insn (gen_subv4_1 (operands[3], operands[4], operands[5], operands[5])); DONE; @@ -8293,18 +8332,25 @@ (label_ref (match_operand 3)) (pc)))] "" - "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);") + "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands, + TARGET_APX_NDD);") (define_insn "*sub_3" [(set (reg FLAGS_REG) - (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0") - (match_operand:SWI 2 "" ","))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") + (match_operand:SWI 2 "" ",,r,"))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,i,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) - && ix86_binary_operator_ok (MINUS, mode, operands)" - "sub{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + && ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %0|%0, %2} + sub{}\t{%2, %1, %0|%0, %1, %2} + sub{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_peephole2 @@ -8392,16 +8438,21 @@ (define_insn "*subsi_3_zext" [(set (reg FLAGS_REG) - (compare (match_operand:SI 1 "register_operand" "0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe"))) - (set (match_operand:DI 0 "register_operand" "=r") + (compare (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re"))) + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCmode) - && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sub{l}\t{%2, %1|%1, %2}" - [(set_attr "type" "alu") + && ix86_binary_operator_ok (MINUS, SImode, operands, + TARGET_APX_NDD)" + "@ + sub{l}\t{%2, %1|%1, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2} + sub{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "SI")]) ;; Add with carry and subtract with borrow diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index c1049022f2aa..0c7952ef0184 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -42,3 +42,16 @@ FOO (long, add, +) FOO1 (long, add, +) FOO2 (long, add, +) +FOO (char, sub, -) +FOO1 (char, sub, -) +FOO (short, sub, -) +FOO1 (short, sub, -) +FOO (int, sub, -) +FOO1 (int, sub, -) +FOO (long, sub, -) +FOO1 (long, sub, -) +/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), %(?:|r|e)di, %(?:|r|e)a(?:x|l)" 4 } } */ From 57fdb5c2440a8b9d8d0b2dd8be866b6ae012a788 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 18 Jan 2023 15:51:23 +0800 Subject: [PATCH 018/311] [APX NDD] Support APX NDD for sbb insn Similar to *add3_doubleword, operands[1] may not equal to operands[0] so extra move and earlyclobber are required. gcc/ChangeLog: * config/i386/i386.md (*sub3_doubleword): Add new alternative for NDD, adopt '&' modifier to NDD dest and emit move when operands[0] not equal to operands[1]. (*sub3_doubleword_zext): Likewise. (*subv4_doubleword): Likewise. (*subv4_doubleword_1): Likewise. (*subv4_overflow_1): Add NDD alternatives and adjust output templates. (*subv4_overflow_2): Likewise. (@sub3_carry): Likewise. (*addsi3_carry_zext_0r): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (*subsi3_carry_zext): Likewise. (subborrow): Parse TARGET_APX_NDD to ix86_binary_operator_ok. (subborrow_0): Likewise. (*sub3_eq): Likewise. (*sub3_ne): Likewise. (*sub3_eq_1): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd-sbb.c: New test. --- gcc/config/i386/i386.md | 160 ++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c | 6 + 2 files changed, 107 insertions(+), 59 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6ec498725aaf..90981e733bd6 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -7781,12 +7781,13 @@ TARGET_APX_NDD); DONE;") (define_insn_and_split "*sub3_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") + [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (minus: - (match_operand: 1 "nonimmediate_operand" "0,0") - (match_operand: 2 "x86_64_hilo_general_operand" "r,o"))) + (match_operand: 1 "nonimmediate_operand" "0,0,ro,r") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,o"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (MINUS, mode, operands)" + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) @@ -7810,16 +7811,18 @@ TARGET_APX_NDD); DONE; } -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*sub3_doubleword_zext" - [(set (match_operand: 0 "nonimmediate_operand" "=r,o") + [(set (match_operand: 0 "nonimmediate_operand" "=r,o,&r,&r") (minus: - (match_operand: 1 "nonimmediate_operand" "0,0") + (match_operand: 1 "nonimmediate_operand" "0,0,r,o") (zero_extend: - (match_operand:DWIH 2 "nonimmediate_operand" "rm,r")))) + (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r")))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (UNKNOWN, mode, operands)" + "ix86_binary_operator_ok (UNKNOWN, mode, operands, + TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) @@ -7833,7 +7836,8 @@ (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))) (const_int 0))) (clobber (reg:CC FLAGS_REG))])] - "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);") + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[3]);" +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn "*sub_1" [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") @@ -8167,14 +8171,15 @@ (eq:CCO (minus: (sign_extend: - (match_operand: 1 "nonimmediate_operand" "0,0")) + (match_operand: 1 "nonimmediate_operand" "0,0,ro,r")) (sign_extend: - (match_operand: 2 "nonimmediate_operand" "r,o"))) + (match_operand: 2 "nonimmediate_operand" "r,o,r,o"))) (sign_extend: (minus: (match_dup 1) (match_dup 2))))) - (set (match_operand: 0 "nonimmediate_operand" "=ro,r") + (set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (minus: (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands)" + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel [(set (reg:CC FLAGS_REG) @@ -8202,22 +8207,24 @@ (match_dup 5)))])] { split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn_and_split "*subv4_doubleword_1" [(set (reg:CCO FLAGS_REG) (eq:CCO (minus: (sign_extend: - (match_operand: 1 "nonimmediate_operand" "0")) + (match_operand: 1 "nonimmediate_operand" "0,ro")) (match_operand: 3 "const_scalar_int_operand")) (sign_extend: (minus: (match_dup 1) - (match_operand: 2 "x86_64_hilo_general_operand" ""))))) - (set (match_operand: 0 "nonimmediate_operand" "=ro") + (match_operand: 2 "x86_64_hilo_general_operand" ","))))) + (set (match_operand: 0 "nonimmediate_operand" "=ro,&r") (minus: (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands) + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && CONST_SCALAR_INT_P (operands[2]) && rtx_equal_p (operands[2], operands[3])" "#" @@ -8255,7 +8262,8 @@ operands[5])); DONE; } -}) +} +[(set_attr "isa" "*,apx_ndd")]) (define_insn "*subv4_overflow_1" [(set (reg:CCO FLAGS_REG) @@ -8263,11 +8271,11 @@ (minus: (minus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%0,0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) (sign_extend: - (match_operand:SWI 2 "" "rWe,m"))) + (match_operand:SWI 2 "" "rWe,m,rWe,m"))) (sign_extend: (minus:SWI (minus:SWI @@ -8275,15 +8283,21 @@ (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r,r,r") (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands)" - "sbb{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "")]) (define_insn "*subv4_overflow_2" @@ -8292,28 +8306,32 @@ (minus: (minus: (sign_extend: - (match_operand:SWI 1 "nonimmediate_operand" "%0")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,rm")) (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) - (match_operand: 6 "const_int_operand" "n")) + (match_operand: 6 "const_int_operand" "n,n")) (sign_extend: (minus:SWI (minus:SWI (match_dup 1) (match_operator:SWI 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) - (match_operand:SWI 2 "x86_64_immediate_operand" "e"))))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=rm") + (match_operand:SWI 2 "x86_64_immediate_operand" "e,e"))))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=rm,r") (minus:SWI (minus:SWI (match_dup 1) (match_op_dup 5 [(match_dup 3) (const_int 0)])) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands) + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && CONST_INT_P (operands[2]) && INTVAL (operands[2]) == INTVAL (operands[6])" - "sbb{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "mode" "") (set (attr "length_immediate") (if_then_else (match_test "IN_RANGE (INTVAL (operands[2]), -128, 127)") @@ -8598,15 +8616,18 @@ (set_attr "mode" "SI")]) (define_insn "*addsi3_carry_zext_0r" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (plus:SI (match_operator:SI 2 "ix86_carry_flag_unset_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 1 "register_operand" "0")))) + (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "sbb{l}\t{$-1, %k0|%k0, -1}" - [(set_attr "type" "alu") + "@ + sbb{l}\t{$-1, %k0|%k0, -1} + sbb{l}\t{$-1, %1, %k0|%k0, %1, -1}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -8846,17 +8867,23 @@ (const_string "4")))]) (define_insn "@sub3_carry" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (minus:SWI (minus:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0,0") + (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") (match_operator:SWI 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)])) - (match_operand:SWI 2 "" ","))) + (match_operand:SWI 2 "" ",,r,"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (MINUS, mode, operands)" - "sbb{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) @@ -8943,18 +8970,23 @@ (set_attr "mode" "")]) (define_insn "*subsi3_carry_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (minus:SI (minus:SI - (match_operand:SI 1 "register_operand" "0") + (match_operand:SI 1 "nonimmediate_operand" "0,r,rm") (match_operator:SI 3 "ix86_carry_flag_operator" [(reg FLAGS_REG) (const_int 0)])) - (match_operand:SI 2 "x86_64_general_operand" "rBMe")))) + (match_operand:SI 2 "x86_64_general_operand" "rBMe,rBMe,re")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)" - "sbb{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "alu") + "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands, + TARGET_APX_NDD)" + "@ + sbb{l}\t{%2, %k0|%k0, %2} + sbb{l}\t{%2, %1, %k0|%k0, %1, %2} + sbb{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -9039,21 +9071,27 @@ [(set (reg:CCC FLAGS_REG) (compare:CCC (zero_extend: - (match_operand:SWI48 1 "nonimmediate_operand" "0,0")) + (match_operand:SWI48 1 "nonimmediate_operand" "0,0,r,rm")) (plus: (match_operator: 4 "ix86_carry_flag_operator" [(match_operand 3 "flags_reg_operand") (const_int 0)]) (zero_extend: - (match_operand:SWI48 2 "nonimmediate_operand" "r,rm"))))) - (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,rm,r"))))) + (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r") (minus:SWI48 (minus:SWI48 (match_dup 1) (match_operator:SWI48 5 "ix86_carry_flag_operator" [(match_dup 3) (const_int 0)])) (match_dup 2)))] - "ix86_binary_operator_ok (MINUS, mode, operands)" - "sbb{}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)" + "@ + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %0|%0, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2} + sbb{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "")]) @@ -9214,7 +9252,8 @@ (match_operand:SWI48 2 ""))) (set (match_operand:SWI48 0 "register_operand") (minus:SWI48 (match_dup 1) (match_dup 2)))])] - "ix86_binary_operator_ok (MINUS, mode, operands)") + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD)") (define_expand "uaddc5" [(match_operand:SWI48 0 "register_operand") @@ -9639,7 +9678,8 @@ (const_int 0))) (match_operand:SWI 2 ""))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (MINUS, mode, operands) + "ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" @@ -9664,7 +9704,8 @@ "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) - && ix86_binary_operator_ok (MINUS, mode, operands) + && ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" @@ -9693,7 +9734,8 @@ "CONST_INT_P (operands[2]) && (mode != DImode || INTVAL (operands[2]) != HOST_WIDE_INT_C (-0x80000000)) - && ix86_binary_operator_ok (MINUS, mode, operands) + && ix86_binary_operator_ok (MINUS, mode, operands, + TARGET_APX_NDD) && ix86_pre_reload_split ()" "#" "&& 1" diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c b/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c new file mode 100644 index 000000000000..662e3c607d85 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd-sbb.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { int128 && { ! ia32 } } } } */ +/* { dg-options "-mapxf -O2" } */ + +#include "pr91681-2.c" + +/* { dg-final { scan-assembler-times "sbbq\[^\n\r]*0, %rdi, %rdx" 1 } } */ From 042519b61772c3fd7aa1bb9b492e1df75df23eb5 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Fri, 19 May 2023 17:15:52 +0800 Subject: [PATCH 019/311] [APX NDD] Support APX NDD for neg insn gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_expand_unary_operator): Add use_ndd parameter and adjust for NDD. * config/i386/i386-protos.h: Add use_ndd parameter for ix86_unary_operator_ok and ix86_expand_unary_operator. * config/i386/i386.cc (ix86_unary_operator_ok): Add use_ndd parameter and adjust for NDD. * config/i386/i386.md (neg2): Add new constraint for NDD and adjust output template. (*neg_1): Likewise. (*neg2_doubleword): Likewise and adopt '&' to NDD dest. (*neg_2): Likewise. (*neg_ccc_1): Likewise. (*neg_ccc_2): Likewise. (*negsi_1_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternatives. (*negsi_2_zext): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add neg test. --- gcc/config/i386/i386-expand.cc | 4 +- gcc/config/i386/i386-protos.h | 5 +- gcc/config/i386/i386.cc | 5 +- gcc/config/i386/i386.md | 77 ++++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 29 ++++++++++ 5 files changed, 87 insertions(+), 33 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 93ecde4b4a86..d4bbd33ce079 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1494,7 +1494,7 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, void ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, - rtx operands[]) + rtx operands[], bool use_ndd) { bool matching_memory = false; rtx src, dst, op, clob; @@ -1513,7 +1513,7 @@ ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, } /* When source operand is memory, destination must match. */ - if (MEM_P (src) && !matching_memory) + if (!use_ndd && MEM_P (src) && !matching_memory) src = force_reg (mode, src); /* Emit the instruction. */ diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 481527872e81..fa9524097296 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -127,7 +127,7 @@ extern bool ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high); extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); extern bool ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn); extern void ix86_expand_unary_operator (enum rtx_code, machine_mode, - rtx[]); + rtx[], bool = false); extern rtx ix86_build_const_vector (machine_mode, bool, rtx); extern rtx ix86_build_signbit_mask (machine_mode, bool, bool); extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx, @@ -147,7 +147,8 @@ extern void ix86_split_fp_absneg_operator (enum rtx_code, machine_mode, rtx[]); extern void ix86_expand_copysign (rtx []); extern void ix86_expand_xorsign (rtx []); -extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[2]); +extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[2], + bool = false); extern bool ix86_match_ccmode (rtx, machine_mode); extern bool ix86_match_ptest_ccmode (rtx); extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 8aa33aef7e17..4b6bad37c8fc 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -16209,11 +16209,12 @@ ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) bool ix86_unary_operator_ok (enum rtx_code, machine_mode, - rtx operands[2]) + rtx operands[2], + bool use_ndd) { /* If one of operands is memory, source and destination must match. */ if ((MEM_P (operands[0]) - || MEM_P (operands[1])) + || (!use_ndd && MEM_P (operands[1]))) && ! rtx_equal_p (operands[0], operands[1])) return false; return true; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 90981e733bd6..e97c1784e9ad 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -13287,13 +13287,14 @@ [(set (match_operand:SDWIM 0 "nonimmediate_operand") (neg:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))] "" - "ix86_expand_unary_operator (NEG, mode, operands); DONE;") + "ix86_expand_unary_operator (NEG, mode, operands, + TARGET_APX_NDD); DONE;") (define_insn_and_split "*neg2_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro") - (neg: (match_operand: 1 "nonimmediate_operand" "0"))) + [(set (match_operand: 0 "nonimmediate_operand" "=ro,&r") + (neg: (match_operand: 1 "nonimmediate_operand" "0,ro"))) (clobber (reg:CC FLAGS_REG))] - "ix86_unary_operator_ok (NEG, mode, operands)" + "ix86_unary_operator_ok (NEG, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel @@ -13310,7 +13311,8 @@ [(set (match_dup 2) (neg:DWIH (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);") + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);" + [(set_attr "isa" "*,apx_ndd")]) ;; Convert: ;; mov %esi, %edx @@ -13399,22 +13401,29 @@ (clobber (reg:CC FLAGS_REG))])]) (define_insn "*neg_1" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") - (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0"))) + [(set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm"))) (clobber (reg:CC FLAGS_REG))] - "ix86_unary_operator_ok (NEG, mode, operands)" - "neg{}\t%0" + "ix86_unary_operator_ok (NEG, mode, operands, TARGET_APX_NDD)" + "@ + neg{}\t%0 + neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*negsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI - (neg:SI (match_operand:SI 1 "register_operand" "0")))) + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands)" - "neg{l}\t%k0" + "TARGET_64BIT && ix86_unary_operator_ok (NEG, SImode, operands, + TARGET_APX_NDD)" + "@ + neg{l}\t%k0 + neg{l}\t{%k1, %k0|%k0, %k1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) ;; Alternative 1 is needed to work around LRA limitation, see PR82524. @@ -13440,51 +13449,65 @@ (define_insn "*neg_2" [(set (reg FLAGS_REG) (compare - (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (neg:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (neg:SWI (match_dup 1)))] "ix86_match_ccmode (insn, CCGOCmode) - && ix86_unary_operator_ok (NEG, mode, operands)" - "neg{}\t%0" + && ix86_unary_operator_ok (NEG, mode, operands, + TARGET_APX_NDD)" + "@ + neg{}\t%0 + neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*negsi_2_zext" [(set (reg FLAGS_REG) (compare - (neg:SI (match_operand:SI 1 "register_operand" "0")) + (neg:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (neg:SI (match_dup 1))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode) - && ix86_unary_operator_ok (NEG, SImode, operands)" - "neg{l}\t%k0" + && ix86_unary_operator_ok (NEG, SImode, operands, + TARGET_APX_NDD)" + "@ + neg{l}\t%k0 + neg{l}\t{%1, %k0|%k0, %1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*neg_ccc_1" [(set (reg:CCC FLAGS_REG) (unspec:CCC - [(match_operand:SWI 1 "nonimmediate_operand" "0") + [(match_operand:SWI 1 "nonimmediate_operand" "0,rm") (const_int 0)] UNSPEC_CC_NE)) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (neg:SWI (match_dup 1)))] "" - "neg{}\t%0" + "@ + neg{}\t%0 + neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_insn "*neg_ccc_2" [(set (reg:CCC FLAGS_REG) (unspec:CCC - [(match_operand:SWI 1 "nonimmediate_operand" "0") + [(match_operand:SWI 1 "nonimmediate_operand" "0,rm") (const_int 0)] UNSPEC_CC_NE)) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r"))] "" - "neg{}\t%0" + "@ + neg{}\t%0 + neg{}\t{%1, %0|%0, %1}" [(set_attr "type" "negnot") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_expand "x86_neg_ccc" diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 0c7952ef0184..c351f71265e4 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -27,8 +27,25 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \ { \ TYPE c = *a OP b; \ return c; \ +} + +#define F(TYPE, OP_NAME, OP) \ +TYPE \ +__attribute__ ((noipa)) \ +f_##OP_NAME##_##TYPE (TYPE *a) \ +{ \ + TYPE b = OP*a; \ + return b; \ } +#define F1(TYPE, OP_NAME, OP) \ +TYPE \ +__attribute__ ((noipa)) \ +f1_##OP_NAME##_##TYPE (TYPE a) \ +{ \ + TYPE b = OP a; \ + return b; \ +} FOO (char, add, +) FOO1 (char, add, +) FOO2 (char, add, +) @@ -50,8 +67,20 @@ FOO (int, sub, -) FOO1 (int, sub, -) FOO (long, sub, -) FOO1 (long, sub, -) + +F (char, neg, -) +F1 (char, neg, -) +F (short, neg, -) +F1 (short, neg, -) +F (int, neg, -) +F1 (int, neg, -) +F (long, neg, -) +F1 (long, neg, -) /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "sub(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), %(?:|r|e)di, %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 } } */ +/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 3 } } */ +/* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ From c778241dbdb509baee6ddb193a465dc3794c1a05 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Mon, 22 May 2023 10:08:39 +0800 Subject: [PATCH 020/311] [APX NDD] Support APX NDD for not insn For *one_cmplsi2_2_zext, it will be splitted to xor, so its NDD form will be added together with xor NDD support. gcc/ChangeLog: * config/i386/i386.md (one_cmpl2): Add new constraints for NDD and adjust output template. (*one_cmpl2_1): Likewise. (*one_cmplqi2_1): Likewise. (*one_cmpl2_doubleword): Likewise, and adopt '&' to NDD dest. (*one_cmpl2_2): Likewise. (*one_cmplsi2_1_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add not test. --- gcc/config/i386/i386.md | 58 ++++++++++++++----------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 11 +++++ 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e97c1784e9ad..61b7b79543bc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14006,57 +14006,63 @@ [(set (match_operand:SDWIM 0 "nonimmediate_operand") (not:SDWIM (match_operand:SDWIM 1 "nonimmediate_operand")))] "" - "ix86_expand_unary_operator (NOT, mode, operands); DONE;") + "ix86_expand_unary_operator (NOT, mode, operands, + TARGET_APX_NDD); DONE;") (define_insn_and_split "*one_cmpl2_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro") - (not: (match_operand: 1 "nonimmediate_operand" "0")))] - "ix86_unary_operator_ok (NOT, mode, operands)" + [(set (match_operand: 0 "nonimmediate_operand" "=ro,&r") + (not: (match_operand: 1 "nonimmediate_operand" "0,ro")))] + "ix86_unary_operator_ok (NOT, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(set (match_dup 0) (not:DWIH (match_dup 1))) (set (match_dup 2) (not:DWIH (match_dup 3)))] - "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);") + "split_double_mode (mode, &operands[0], 2, &operands[0], &operands[2]);" + [(set_attr "isa" "*,apx_ndd")]) (define_insn "*one_cmpl2_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,?k") - (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,k")))] - "ix86_unary_operator_ok (NOT, mode, operands)" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + (not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0,rm,k")))] + "ix86_unary_operator_ok (NOT, mode, operands, TARGET_APX_NDD)" "@ not{}\t%0 + not{}\t{%1, %0|%0, %1} #" - [(set_attr "isa" "*,") - (set_attr "type" "negnot,msklog") + [(set_attr "isa" "*,apx_ndd,") + (set_attr "type" "negnot,negnot,msklog") (set_attr "mode" "")]) (define_insn "*one_cmplsi2_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,?k") + [(set (match_operand:DI 0 "register_operand" "=r,r,?k") (zero_extend:DI - (not:SI (match_operand:SI 1 "register_operand" "0,k"))))] - "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands)" + (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,k"))))] + "TARGET_64BIT && ix86_unary_operator_ok (NOT, SImode, operands, + TARGET_APX_NDD)" "@ not{l}\t%k0 + not{l}\t{%1, %k0|%k0, %1} #" - [(set_attr "isa" "x64,avx512bw_512") - (set_attr "type" "negnot,msklog") - (set_attr "mode" "SI,SI")]) + [(set_attr "isa" "x64,apx_ndd,avx512bw_512") + (set_attr "type" "negnot,negnot,msklog") + (set_attr "mode" "SI,SI,SI")]) (define_insn "*one_cmplqi2_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,?k") - (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] - "ix86_unary_operator_ok (NOT, QImode, operands)" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,r,?k") + (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,rm,k")))] + "ix86_unary_operator_ok (NOT, QImode, operands, TARGET_APX_NDD)" "@ not{b}\t%0 not{l}\t%k0 + not{b}\t{%1, %0|%0, %1} #" - [(set_attr "isa" "*,*,avx512f") - (set_attr "type" "negnot,negnot,msklog") + [(set_attr "isa" "*,*,apx_ndd,avx512f") + (set_attr "type" "negnot,negnot,negnot,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "1") (const_string "SI") - (and (eq_attr "alternative" "2") + (and (eq_attr "alternative" "3") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] @@ -14086,14 +14092,16 @@ (define_insn "*one_cmpl2_2" [(set (reg FLAGS_REG) - (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")) + (compare (not:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (not:SWI (match_dup 1)))] "ix86_match_ccmode (insn, CCNOmode) - && ix86_unary_operator_ok (NOT, mode, operands)" + && ix86_unary_operator_ok (NOT, mode, operands, + TARGET_APX_NDD)" "#" [(set_attr "type" "alu1") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "")]) (define_split diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index c351f71265e4..2bd551614c40 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -76,6 +76,15 @@ F (int, neg, -) F1 (int, neg, -) F (long, neg, -) F1 (long, neg, -) + +F (char, not, ~) +F1 (char, not, ~) +F (short, not, ~) +F1 (short, not, ~) +F (int, not, ~) +F1 (int, not, ~) +F (long, not, ~) +F1 (long, not, ~) /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -84,3 +93,5 @@ F1 (long, neg, -) /* { dg-final { scan-assembler-times "negb\[^\n\r]\\(%rdi\\), %(?:|r|e)al" 1 } } */ /* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)ax" 3 } } */ /* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "not(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ From 7463df5c2a454aeaec786848fd8a4551e79b71a9 Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Wed, 17 May 2023 17:20:37 +0800 Subject: [PATCH 021/311] [APX NDD] Support APX NDD for and insn For NDD form AND insn, there are three splitter fixes after extending legacy patterns. 1. APX NDD does not support high QImode registers like ah, bh, ch, dh, so for some optimization splitters that generates highpart zero_extract for QImode need to be prohibited under NDD pattern. 2. Legacy AND insn will use r/qm/L constraint, and a post-reload splitter will transform it into zero_extend move. But for NDD form AND, the splitter is not strict enough as the splitter assum such AND will have the const_int operand matching the constraint "L", then NDD form AND allows const_int with any QI values. Restrict the splitter condition to match "L" constraint that strictly matches zero-extend sematic. 3. Legacy AND insn will adopt r/0/Z constraint, a splitter will try to optimize such form into strict_lowpart QImode AND when 7th bit is not set. But the splitter will wronly convert non-zext form of NDD and with memory src, then the strict_lowpart transform matches alternative 1 of *_slp_1 and generates *movstrict_1 so the zext sematic was omitted. This could cause highpart of dest not cleared and generates wrong code. Disable the splitter when NDD adopted and operands[0] and operands[1] are not equal. gcc/ChangeLog: * config/i386/i386.md (and3): Add NDD alternatives and adjust output template. (*anddi_1): Likewise. (*and_1): Likewise. (*andqi_1): Likewise. (*andsi_1_zext): Likewise. (*anddi_2): Likewise. (*andsi_2_zext): Likewise. (*andqi_2_maybe_si): Likewise. (*and_2): Likewise. (*and3_doubleword): Add NDD alternative, adopt '&' to NDD dest and emit move for optimized case if operands[0] not equal to operands[1]. (define_split for QI highpart AND): Prohibit splitter to split NDD form AND insn to qi_ext_3. (define_split for QI strict_lowpart optimization): Prohibit splitter to split NDD form AND insn to *3_1_slp. (define_split for zero_extend and optimization): Prohibit splitter to split NDD form AND insn to zero_extend insn. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add and test. --- gcc/config/i386/i386.md | 175 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 13 ++ 2 files changed, 127 insertions(+), 61 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 61b7b79543bc..d2528e0dcf6c 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -11710,18 +11710,19 @@ (operands[0], gen_lowpart (mode, operands[1]), mode, mode, 1)); else - ix86_expand_binary_operator (AND, mode, operands); + ix86_expand_binary_operator (AND, mode, operands, + TARGET_APX_NDD); DONE; }) (define_insn_and_split "*and3_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") + [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (and: - (match_operand: 1 "nonimmediate_operand" "%0,0") - (match_operand: 2 "x86_64_hilo_general_operand" "r,o"))) + (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,o"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (AND, mode, operands)" + "ix86_binary_operator_ok (AND, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(const_int:DWIH 0)] @@ -11733,39 +11734,53 @@ if (operands[2] == const0_rtx) emit_move_insn (operands[0], const0_rtx); else if (operands[2] == constm1_rtx) - emit_insn_deleted_note_p = true; + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_insn_deleted_note_p = true; + } else - ix86_expand_binary_operator (AND, mode, &operands[0]); + ix86_expand_binary_operator (AND, mode, &operands[0], + TARGET_APX_NDD); if (operands[5] == const0_rtx) emit_move_insn (operands[3], const0_rtx); else if (operands[5] == constm1_rtx) { - if (emit_insn_deleted_note_p) + if (!rtx_equal_p (operands[3], operands[4])) + emit_move_insn (operands[3], operands[4]); + else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); } else - ix86_expand_binary_operator (AND, mode, &operands[3]); + ix86_expand_binary_operator (AND, mode, &operands[3], + TARGET_APX_NDD); DONE; -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,?k") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,rm,r,r,r,r,?k") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,L,k"))) + (match_operand:DI 1 "nonimmediate_operand" "%0,r,0,0,rm,r,qm,k") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,Z,re,m,re,m,L,k"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands, + TARGET_APX_NDD)" "@ and{l}\t{%k2, %k0|%k0, %k2} + and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} and{q}\t{%2, %0|%0, %2} and{q}\t{%2, %0|%0, %2} + and{q}\t{%2, %1, %0|%0, %1, %2} + and{q}\t{%2, %1, %0|%0, %1, %2} # #" - [(set_attr "isa" "x64,x64,x64,x64,avx512bw_512") - (set_attr "type" "alu,alu,alu,imovx,msklog") - (set_attr "length_immediate" "*,*,*,0,*") + [(set_attr "isa" "x64,apx_ndd,x64,x64,apx_ndd,apx_ndd,x64,avx512bw_512") + (set_attr "type" "alu,alu,alu,alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -11773,7 +11788,7 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,DI,DI,SI,DI")]) + (set_attr "mode" "SI,SI,DI,DI,DI,DI,SI,DI")]) (define_insn_and_split "*anddi_1_btr" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -11828,36 +11843,45 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*andsi_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")))) + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)" - "and{l}\t{%2, %k0|%k0, %2}" + "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands, + TARGET_APX_NDD)" + "@ + and{l}\t{%2, %k0|%k0, %2} + and{l}\t{%2, %1, %k0|%k0, %1, %2} + and{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*and_1" - [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,Ya,?k") - (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,qm,k") - (match_operand:SWI24 2 "" "r,,L,k"))) + [(set (match_operand:SWI24 0 "nonimmediate_operand" "=rm,r,r,r,Ya,?k") + (and:SWI24 (match_operand:SWI24 1 "nonimmediate_operand" "%0,0,rm,r,qm,k") + (match_operand:SWI24 2 "" "r,,r,,L,k"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (AND, mode, operands)" + "ix86_binary_operator_ok (AND, mode, operands, TARGET_APX_NDD)" "@ and{}\t{%2, %0|%0, %2} and{}\t{%2, %0|%0, %2} + and{}\t{%2, %1, %0|%0, %1, %2} + and{}\t{%2, %1, %0|%0, %1, %2} # #" [(set (attr "isa") - (cond [(eq_attr "alternative" "3") + (cond [(eq_attr "alternative" "2,3") + (const_string "apx_ndd") + (eq_attr "alternative" "5") (if_then_else (eq_attr "mode" "SI") (const_string "avx512bw") (const_string "avx512f")) ] (const_string "*"))) - (set_attr "type" "alu,alu,imovx,msklog") - (set_attr "length_immediate" "*,*,0,*") + (set_attr "type" "alu,alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,*,0,*") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -11865,24 +11889,27 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" ",,SI,")]) + (set_attr "mode" ",,,,SI,")]) (define_insn "*andqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") - (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") - (match_operand:QI 2 "general_operand" "qn,m,rn,k"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") + (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (AND, QImode, operands)" + "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD)" "@ and{b}\t{%2, %0|%0, %2} and{b}\t{%2, %0|%0, %2} and{l}\t{%k2, %k0|%k0, %k2} + and{b}\t{%2, %1, %0|%0, %1, %2} + and{b}\t{%2, %1, %0|%0, %1, %2} #" - [(set_attr "type" "alu,alu,alu,msklog") + [(set_attr "type" "alu,alu,alu,alu,alu,msklog") + (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,*") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") - (and (eq_attr "alternative" "3") + (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] @@ -11985,7 +12012,10 @@ (clobber (reg:CC FLAGS_REG))] "reload_completed && (!REG_P (operands[1]) - || REGNO (operands[0]) != REGNO (operands[1]))" + || REGNO (operands[0]) != REGNO (operands[1])) + && (UINTVAL (operands[2]) == GET_MODE_MASK (SImode) + || UINTVAL (operands[2]) == GET_MODE_MASK (HImode) + || UINTVAL (operands[2]) == GET_MODE_MASK (QImode))" [(const_int 0)] { unsigned HOST_WIDE_INT ival = UINTVAL (operands[2]); @@ -12058,10 +12088,10 @@ [(set (reg FLAGS_REG) (compare (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m")) + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,r,rm,r") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,m,Z,re,m")) (const_int 0))) - (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") + (set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,r,r") (and:DI (match_dup 1) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode @@ -12075,38 +12105,46 @@ && (!CONST_INT_P (operands[2]) || val_signbit_known_set_p (SImode, INTVAL (operands[2])))) ? CCZmode : CCNOmode) - && ix86_binary_operator_ok (AND, DImode, operands)" + && ix86_binary_operator_ok (AND, DImode, operands, TARGET_APX_NDD)" "@ and{l}\t{%k2, %k0|%k0, %k2} and{q}\t{%2, %0|%0, %2} - and{q}\t{%2, %0|%0, %2}" + and{q}\t{%2, %0|%0, %2} + and{l}\t{%k2, %k1, %k0|%k0, %k1, %k2} + and{q}\t{%2, %1, %0|%0, %1, %2} + and{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") - (set_attr "mode" "SI,DI,DI")]) + (set_attr "isa" "*,*,*,apx_ndd,apx_ndd,apx_ndd") + (set_attr "mode" "SI,DI,DI,SI,DI,DI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*andsi_2_zext" [(set (reg FLAGS_REG) (compare (and:SI - (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")) + (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (AND, SImode, operands)" - "and{l}\t{%2, %k0|%k0, %2}" + && ix86_binary_operator_ok (AND, SImode, operands, TARGET_APX_NDD)" + "@ + and{l}\t{%2, %k0|%k0, %2} + and{l}\t{%2, %1, %k0|%k0, %1, %2} + and{l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*andqi_2_maybe_si" [(set (reg FLAGS_REG) (compare (and:QI - (match_operand:QI 1 "nonimmediate_operand" "%0,0,0") - (match_operand:QI 2 "general_operand" "qn,m,n")) + (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r") + (match_operand:QI 2 "general_operand" "qn,m,n,rn,m")) (const_int 0))) - (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r") + (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r") (and:QI (match_dup 1) (match_dup 2)))] - "ix86_binary_operator_ok (AND, QImode, operands) + "ix86_binary_operator_ok (AND, QImode, operands, TARGET_APX_NDD) && ix86_match_ccmode (insn, CONST_INT_P (operands[2]) && INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)" @@ -12117,11 +12155,16 @@ operands[2] = GEN_INT (INTVAL (operands[2]) & 0xff); return "and{l}\t{%2, %k0|%k0, %2}"; } + if (which_alternative > 2) + return "and{b}\t{%2, %1, %0|%0, %1, %2}"; return "and{b}\t{%2, %0|%0, %2}"; } [(set_attr "type" "alu") + (set_attr "isa" "*,*,*,apx_ndd,apx_ndd") (set (attr "mode") - (cond [(eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "3,4") + (const_string "QI") + (eq_attr "alternative" "2") (const_string "SI") (and (match_test "optimize_insn_for_size_p ()") (and (match_operand 0 "ext_QIreg_operand") @@ -12138,15 +12181,21 @@ (define_insn "*and_2" [(set (reg FLAGS_REG) (compare (and:SWI124 - (match_operand:SWI124 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI124 2 "" ",")) + (match_operand:SWI124 1 "nonimmediate_operand" "%0,0,rm,r") + (match_operand:SWI124 2 "" ",,r,")) (const_int 0))) - (set (match_operand:SWI124 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI124 0 "nonimmediate_operand" "=m,,r,r") (and:SWI124 (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (AND, mode, operands)" - "and{}\t{%2, %0|%0, %2}" + && ix86_binary_operator_ok (AND, mode, operands, + TARGET_APX_NDD)" + "@ + and{}\t{%2, %0|%0, %2} + and{}\t{%2, %0|%0, %2} + and{}\t{%2, %1, %0|%0, %1, %2} + and{}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "mode" "")]) (define_insn "*qi_ext_0" @@ -12392,6 +12441,7 @@ ;; Don't do the splitting with memory operands, since it introduces risk ;; of memory mismatch stalls. We may want to do the splitting for optimizing ;; for size, but that can (should?) be handled by generic code instead. +;; Don't do the splitting for APX NDD as NDD does not support *h registers. (define_split [(set (match_operand:SWI248 0 "QIreg_operand") (and:SWI248 (match_operand:SWI248 1 "register_operand") @@ -12399,7 +12449,8 @@ (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - && !(~INTVAL (operands[2]) & ~(255 << 8))" + && !(~INTVAL (operands[2]) & ~(255 << 8)) + && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))" [(parallel [(set (zero_extract:HI (match_dup 0) (const_int 8) @@ -12428,7 +12479,9 @@ "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(~INTVAL (operands[2]) & ~255) - && !(INTVAL (operands[2]) & 128)" + && !(INTVAL (operands[2]) & 128) + && !(TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1]))" [(parallel [(set (strict_low_part (match_dup 0)) (and:QI (match_dup 1) (match_dup 2))) diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 2bd551614c40..be436d57bdfb 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -85,6 +85,15 @@ F (int, not, ~) F1 (int, not, ~) F (long, not, ~) F1 (long, not, ~) + +FOO (char, and, &) +FOO1 (char, and, &) +FOO (short, and, &) +FOO1 (short, and, &) +FOO (int, and, &) +FOO1 (int, and, &) +FOO (long, and, &) +FOO1 (long, and, &) /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -95,3 +104,7 @@ F1 (long, not, ~) /* { dg-final { scan-assembler-times "neg(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "not(?:b|l|w|q)\[^\n\r]\\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "not(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "andb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */ +/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ +/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ From c95f67b8966dff4f7b22e794e410c5aa7490877a Mon Sep 17 00:00:00 2001 From: Kong Lingling Date: Fri, 19 May 2023 10:50:29 +0800 Subject: [PATCH 022/311] [APX NDD] Support APX NDD for or/xor insn Similar to AND insn, two splitters need to be adjusted to prevent misoptimizaiton for NDD OR/XOR. Also adjust *one_cmplsi2_2_zext and its corresponding splitter that will generate xor insn. gcc/ChangeLog: * config/i386/i386.md (3): Add new alternative for NDD and adjust output templates. (*_1): Likewise. (*qi_1): Likewise. (*notxor_1): Likewise. (*si_1_zext): Likewise. (*notxorqi_1): Likewise. (*_2): Likewise. (*si_2_zext): Likewise. (*si_2_zext_imm): Likewise. (*si_1_zext_imm): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (*one_cmplsi2_2_zext): Likewise. (define_split for *one_cmplsi2_2_zext): Use nonimmediate_operand for operands[3]. (*3_doubleword): Add NDD constraints, adopt '&' to NDD dest and emit move for optimized case if operands[0] != operands[1] or operands[4] != operands[5]. (define_split for QI highpart OR/XOR): Prohibit splitter to split NDD form OR/XOR insn to qi_ext_3. (define_split for QI strict_lowpart optimization): Prohibit splitter to split NDD form AND insn to *3_1_slp. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add or and xor test. --- gcc/config/i386/i386.md | 186 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 26 ++++ 2 files changed, 143 insertions(+), 69 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d2528e0dcf6c..ad4c958a1e84 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -12703,17 +12703,19 @@ && !x86_64_hilo_general_operand (operands[2], mode)) operands[2] = force_reg (mode, operands[2]); - ix86_expand_binary_operator (, mode, operands); + ix86_expand_binary_operator (, mode, operands, + TARGET_APX_NDD); DONE; }) (define_insn_and_split "*3_doubleword" - [(set (match_operand: 0 "nonimmediate_operand" "=ro,r") + [(set (match_operand: 0 "nonimmediate_operand" "=ro,r,&r,&r") (any_or: - (match_operand: 1 "nonimmediate_operand" "%0,0") - (match_operand: 2 "x86_64_hilo_general_operand" "r,o"))) + (match_operand: 1 "nonimmediate_operand" "%0,0,ro,r") + (match_operand: 2 "x86_64_hilo_general_operand" "r,o,r,o"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" "#" "&& reload_completed" [(const_int:DWIH 0)] @@ -12725,20 +12727,29 @@ split_double_mode (mode, &operands[0], 3, &operands[0], &operands[3]); if (operands[2] == const0_rtx) - emit_insn_deleted_note_p = true; + { + if (!rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + else + emit_insn_deleted_note_p = true; + } else if (operands[2] == constm1_rtx) { if ( == IOR) emit_move_insn (operands[0], constm1_rtx); else - ix86_expand_unary_operator (NOT, mode, &operands[0]); + ix86_expand_unary_operator (NOT, mode, &operands[0], + TARGET_APX_NDD); } else - ix86_expand_binary_operator (, mode, &operands[0]); + ix86_expand_binary_operator (, mode, &operands[0], + TARGET_APX_NDD); if (operands[5] == const0_rtx) { - if (emit_insn_deleted_note_p) + if (!rtx_equal_p (operands[3], operands[4])) + emit_move_insn (operands[3], operands[4]); + else if (emit_insn_deleted_note_p) emit_note (NOTE_INSN_DELETED); } else if (operands[5] == constm1_rtx) @@ -12746,37 +12757,43 @@ if ( == IOR) emit_move_insn (operands[3], constm1_rtx); else - ix86_expand_unary_operator (NOT, mode, &operands[3]); + ix86_expand_unary_operator (NOT, mode, &operands[3], + TARGET_APX_NDD); } else - ix86_expand_binary_operator (, mode, &operands[3]); + ix86_expand_binary_operator (, mode, &operands[3], + TARGET_APX_NDD); DONE; -}) +} +[(set_attr "isa" "*,*,apx_ndd,apx_ndd")]) (define_insn "*_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k") (any_or:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") - (match_operand:SWI248 2 "" "r,,k"))) + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k") + (match_operand:SWI248 2 "" "r,,r,,k"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" "@ {}\t{%2, %0|%0, %2} {}\t{%2, %0|%0, %2} + {}\t{%2, %1, %0|%0, %1, %2} + {}\t{%2, %1, %0|%0, %1, %2} #" - [(set_attr "isa" "*,*,") - (set_attr "type" "alu, alu, msklog") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd,") + (set_attr "type" "alu, alu, alu, alu, msklog") (set_attr "mode" "")]) (define_insn_and_split "*notxor_1" - [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,r,r,?k") (not:SWI248 (xor:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") - (match_operand:SWI248 2 "" "r,,k")))) + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,rm,r,k") + (match_operand:SWI248 2 "" "r,,r,,k")))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (XOR, mode, operands)" + "ix86_binary_operator_ok (XOR, mode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel @@ -12792,8 +12809,8 @@ DONE; } } - [(set_attr "isa" "*,*,") - (set_attr "type" "alu, alu, msklog") + [(set_attr "isa" "*,*,apx_ndd,apx_ndd,") + (set_attr "type" "alu, alu, alu, alu, msklog") (set_attr "mode" "")]) (define_insn_and_split "*iordi_1_bts" @@ -12881,44 +12898,55 @@ ;; See comment for addsi_1_zext why we do use nonimmediate_operand (define_insn "*si_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")))) + (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" - "{l}\t{%2, %k0|%k0, %2}" + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" + "@ + {l}\t{%2, %k0|%k0, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*si_1_zext_imm" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (any_or:DI - (zero_extend:DI (match_operand:SI 1 "register_operand" "%0")) - (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z"))) + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "%0,rm")) + (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z,Z"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" - "{l}\t{%2, %k0|%k0, %2}" + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" + "@ + {l}\t{%2, %k0|%k0, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*qi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") - (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") - (match_operand:QI 2 "general_operand" "qn,m,rn,k"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") + (any_or:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, QImode, operands)" + "ix86_binary_operator_ok (, QImode, operands, TARGET_APX_NDD)" "@ {b}\t{%2, %0|%0, %2} {b}\t{%2, %0|%0, %2} {l}\t{%k2, %k0|%k0, %k2} + {b}\t{%2, %1, %0|%0, %1, %2} + {b}\t{%2, %1, %0|%0, %1, %2} #" - [(set_attr "isa" "*,*,*,avx512f") - (set_attr "type" "alu,alu,alu,msklog") + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") + (set_attr "type" "alu,alu,alu,alu,alu,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") - (and (eq_attr "alternative" "3") + (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] @@ -12930,12 +12958,12 @@ (symbol_ref "true")))]) (define_insn_and_split "*notxorqi_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,r,r,?k") (not:QI - (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") - (match_operand:QI 2 "general_operand" "qn,m,rn,k")))) + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,rm,r,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,rn,m,k")))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (XOR, QImode, operands)" + "ix86_binary_operator_ok (XOR, QImode, operands, TARGET_APX_NDD)" "#" "&& reload_completed" [(parallel @@ -12951,12 +12979,12 @@ DONE; } } - [(set_attr "isa" "*,*,*,avx512f") - (set_attr "type" "alu,alu,alu,msklog") + [(set_attr "isa" "*,*,*,apx_ndd,apx_ndd,avx512f") + (set_attr "type" "alu,alu,alu,alu,alu,msklog") (set (attr "mode") (cond [(eq_attr "alternative" "2") (const_string "SI") - (and (eq_attr "alternative" "3") + (and (eq_attr "alternative" "5") (match_test "!TARGET_AVX512DQ")) (const_string "HI") ] @@ -13004,44 +13032,59 @@ (define_insn "*_2" [(set (reg FLAGS_REG) (compare (any_or:SWI - (match_operand:SWI 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI 2 "" ",")) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r") + (match_operand:SWI 2 "" ",,r,")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m,") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,r,r") (any_or:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (, mode, operands)" - "{}\t{%2, %0|%0, %2}" + && ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" + "@ + {}\t{%2, %0|%0, %2} + {}\t{%2, %0|%0, %2} + {}\t{%2, %1, %0|%0, %1, %2} + {}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,*,apx_ndd,apx_ndd") (set_attr "mode" "")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*si_2_zext" [(set (reg FLAGS_REG) - (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_general_operand" "rBMe")) + (compare (any_or:SI (match_operand:SI 1 "nonimmediate_operand" "%0,rm,r") + (match_operand:SI 2 "x86_64_general_operand" "rBMe,re,BM")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (, SImode, operands)" - "{l}\t{%2, %k0|%k0, %2}" + && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" + "@ + {l}\t{%2, %k0|%k0, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*si_2_zext_imm" [(set (reg FLAGS_REG) (compare (any_or:SI - (match_operand:SI 1 "nonimmediate_operand" "%0") - (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z")) + (match_operand:SI 1 "nonimmediate_operand" "%0,rm") + (match_operand:SI 2 "x86_64_zext_immediate_operand" "Z,Z")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - && ix86_binary_operator_ok (, SImode, operands)" - "{l}\t{%2, %k0|%k0, %2}" + && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" + "@ + {l}\t{%2, %k0|%k0, %2} + {l}\t{%2, %1, %k0|%k0, %1, %2}" [(set_attr "type" "alu") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_insn "*_3" @@ -13062,6 +13105,7 @@ ;; Don't do the splitting with memory operands, since it introduces risk ;; of memory mismatch stalls. We may want to do the splitting for optimizing ;; for size, but that can (should?) be handled by generic code instead. +;; Don't do the splitting for APX NDD as NDD does not support *h registers. (define_split [(set (match_operand:SWI248 0 "QIreg_operand") (any_or:SWI248 (match_operand:SWI248 1 "register_operand") @@ -13069,7 +13113,8 @@ (clobber (reg:CC FLAGS_REG))] "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) - && !(INTVAL (operands[2]) & ~(255 << 8))" + && !(INTVAL (operands[2]) & ~(255 << 8)) + && !(TARGET_APX_NDD && REGNO (operands[0]) != REGNO (operands[1]))" [(parallel [(set (zero_extract:HI (match_dup 0) (const_int 8) @@ -13107,7 +13152,9 @@ "reload_completed && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) && !(INTVAL (operands[2]) & ~255) - && (INTVAL (operands[2]) & 128)" + && (INTVAL (operands[2]) & 128) + && !(TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1]))" [(parallel [(set (strict_low_part (match_dup 0)) (any_or:QI (match_dup 1) (match_dup 2))) @@ -14173,20 +14220,21 @@ (define_insn "*one_cmplsi2_2_zext" [(set (reg FLAGS_REG) - (compare (not:SI (match_operand:SI 1 "register_operand" "0")) + (compare (not:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (not:SI (match_dup 1))))] "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode) - && ix86_unary_operator_ok (NOT, SImode, operands)" + && ix86_unary_operator_ok (NOT, SImode, operands, TARGET_APX_NDD)" "#" [(set_attr "type" "alu1") + (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) (define_split [(set (match_operand 0 "flags_reg_operand") (match_operator 2 "compare_operator" - [(not:SI (match_operand:SI 3 "register_operand")) + [(not:SI (match_operand:SI 3 "nonimmediate_operand")) (const_int 0)])) (set (match_operand:DI 1 "register_operand") (zero_extend:DI (not:SI (match_dup 3))))] diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index be436d57bdfb..d97648c876d4 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -94,6 +94,24 @@ FOO (int, and, &) FOO1 (int, and, &) FOO (long, and, &) FOO1 (long, and, &) + +FOO (char, or, |) +FOO1 (char, or, |) +FOO (short, or, |) +FOO1 (short, or, |) +FOO (int, or, |) +FOO1 (int, or, |) +FOO (long, or, |) +FOO1 (long, or, |) + +FOO (char, xor, ^) +FOO1 (char, xor, ^) +FOO (short, xor, ^) +FOO1 (short, xor, ^) +FOO (int, xor, ^) +FOO1 (int, xor, ^) +FOO (long, xor, ^) +FOO1 (long, xor, ^) /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -108,3 +126,11 @@ FOO1 (long, and, &) /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ /* { dg-final { scan-assembler-times "and(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "orb\[^\n\r]*1, \\(%rdi\\), %al" 2} } */ +/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 6 } } */ +/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "or(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "xorb\[^\n\r]*1, \\(%rdi\\), %al" 1 } } */ +/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ +/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ From 03655cd427b9d8e3b06c950255332eb988b0ade1 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Wed, 25 Oct 2023 15:07:29 +0800 Subject: [PATCH 023/311] [APX NDD] Support APX NDD for left shift insns For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl 1 can be optimized to add. As NDD form of add requires src operand to be register since NDD cannot take 2 memory src, we currently just keep using NDD form shift instead of add. The optimization TARGET_SHIFT1 will try to remove constant 1 to use shorter opcode, but under NDD assembler will automatically use it whether $1 exist or not, so do not involve NDD with it. The doubleword insns for left shift calls ix86_expand_ashl, which assume all shift related pattern has same operand[0] and operand[1]. For these pattern we will support them in a standalone patch. gcc/ChangeLog: * config/i386/i386.md (*ashl3_1): Extend with new alternatives to support NDD, limit the new alternative to generate sal only, and adjust output template for NDD. (*ashlsi3_1_zext): Likewise. (*ashlhi3_1): Likewise. (*ashlqi3_1): Likewise. (*ashl3_cmp): Likewise. (*ashlsi3_cmp_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (*ashl3_cconly): Likewise. (*ashl3_doubleword_highpart): Adjust codegen for NDD. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add tests for sal. --- gcc/config/i386/i386.md | 172 ++++++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 22 +++ 2 files changed, 136 insertions(+), 58 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ad4c958a1e84..c67896cf97c8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14472,10 +14472,19 @@ { split_double_mode (mode, &operands[0], 1, &operands[0], &operands[3]); int bits = INTVAL (operands[2]) - ( * BITS_PER_UNIT); - if (!rtx_equal_p (operands[3], operands[1])) - emit_move_insn (operands[3], operands[1]); - if (bits > 0) - emit_insn (gen_ashl3 (operands[3], operands[3], GEN_INT (bits))); + bool op_equal_p = rtx_equal_p (operands[3], operands[1]); + if (bits == 0) + { + if (!op_equal_p) + emit_move_insn (operands[3], operands[1]); + } + else + { + if (!op_equal_p && !TARGET_APX_NDD) + emit_move_insn (operands[3], operands[1]); + rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3]; + emit_insn (gen_ashl3 (operands[3], op_tmp, GEN_INT (bits))); + } ix86_expand_clear (operands[0]); DONE; }) @@ -14782,12 +14791,14 @@ (set_attr "mode" "")]) (define_insn "*ashl3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k") - (match_operand:QI 2 "nonmemory_operand" "c,M,r,"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm") + (match_operand:QI 2 "nonmemory_operand" "c,M,r,,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, mode, operands)" + "ix86_binary_operator_ok (ASHIFT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14802,18 +14813,25 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + /* For NDD form instructions related to TARGET_SHIFT1, the $1 + immediate do not need to be omitted as assembler will map it + to use shorter encoding. */ + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,bmi2,") + [(set_attr "isa" "*,*,bmi2,,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14855,13 +14873,15 @@ (set_attr "mode" "SI")]) (define_insn "*ashlsi3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (zero_extend:DI - (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,M,r")))) + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14874,18 +14894,22 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,*,bmi2") + [(set_attr "isa" "*,*,bmi2,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "ishiftx") + (eq_attr "alternative" "3") + (const_string "ishift") (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") @@ -14915,12 +14939,14 @@ "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, HImode, operands)" + "ix86_binary_operator_ok (ASHIFT, HImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14933,18 +14959,22 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{w}\t%0"; else - return "sal{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}" + : "sal{w}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,avx512f") + [(set_attr "isa" "*,*,avx512f,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") (eq_attr "alternative" "2") (const_string "msklog") + (eq_attr "alternative" "3") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -14960,15 +14990,17 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "HI,SI,HI")]) + (set_attr "mode" "HI,SI,HI,HI")]) (define_insn "*ashlqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFT, QImode, operands)" + "ix86_binary_operator_ok (ASHIFT, QImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_LEA: @@ -14984,7 +15016,8 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) { if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t%k0"; @@ -14996,16 +15029,19 @@ if (get_attr_mode (insn) == MODE_SI) return "sal{l}\t{%2, %k0|%k0, %2}"; else - return "sal{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}" + : "sal{b}\t{%2, %0|%0, %2}"; } } } - [(set_attr "isa" "*,*,*,avx512dq") + [(set_attr "isa" "*,*,*,avx512dq,apx_ndd") (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") (eq_attr "alternative" "3") (const_string "msklog") + (eq_attr "alternative" "4") + (const_string "ishift") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -15021,10 +15057,10 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI,QI") + (set_attr "mode" "QI,SI,SI,QI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") - (cond [(eq_attr "alternative" "1") + (cond [(eq_attr "alternative" "1,4") (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) @@ -15119,10 +15155,10 @@ (define_insn "*ashl3_cmp" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "" "")) + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (ashift:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL @@ -15130,8 +15166,10 @@ && (TARGET_SHIFT1 || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0]))))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, mode, operands)" + && ix86_binary_operator_ok (ASHIFT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: @@ -15140,14 +15178,19 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") @@ -15167,10 +15210,10 @@ (define_insn "*ashlsi3_cmp_zext" [(set (reg FLAGS_REG) (compare - (ashift:SI (match_operand:SI 1 "register_operand" "0") + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) @@ -15179,8 +15222,10 @@ && (TARGET_SHIFT1 || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (ASHIFT, SImode, operands)" + && ix86_binary_operator_ok (ASHIFT, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: @@ -15189,14 +15234,19 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{l}\t%k0"; else - return "sal{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") - (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 2 "const1_operand")) (const_string "alu") ] @@ -15215,10 +15265,10 @@ (define_insn "*ashl3_cconly" [(set (reg FLAGS_REG) (compare - (ashift:SWI (match_operand:SWI 1 "register_operand" "0") - (match_operand:QI 2 "" "")) + (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx @@ -15226,22 +15276,28 @@ || TARGET_DOUBLE_WITH_ADD))) && ix86_match_ccmode (insn, CCGOCmode)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ALU: gcc_assert (operands[2] == const1_rtx); return "add{}\t%0, %0"; - default: + default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sal{}\t%0"; else - return "sal{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sal{}\t{%2, %1, %0|%0, %1, %2}" + : "sal{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") - (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD") + [(set_attr "isa" "*,apx_ndd") + (set (attr "type") + (cond [(eq_attr "alternative" "1") + (const_string "ishift") + (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index d97648c876d4..9951fb00a4c5 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -29,6 +29,16 @@ foo2_##OP_NAME##_##TYPE (TYPE *a, TYPE b) \ return c; \ } +#define FOO3(TYPE, OP_NAME, OP, IMM) \ +TYPE \ +__attribute__ ((noipa)) \ +foo3_##OP_NAME##_##TYPE (TYPE a) \ +{ \ + TYPE b = a OP IMM; \ + return b; \ +} + + #define F(TYPE, OP_NAME, OP) \ TYPE \ __attribute__ ((noipa)) \ @@ -112,6 +122,16 @@ FOO (int, xor, ^) FOO1 (int, xor, ^) FOO (long, xor, ^) FOO1 (long, xor, ^) + +FOO (char, shl, <<) +FOO3 (char, shl, <<, 7) +FOO (short, shl, <<) +FOO3 (short, shl, <<, 7) +FOO (int, shl, <<) +FOO3 (int, shl, <<, 7) +FOO (long, shl, <<) +FOO3 (long, shl, <<, 7) + /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -134,3 +154,5 @@ FOO1 (long, xor, ^) /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */ /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ +/* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ From 16172db2dfc6307860c545aa95897b84d8e157e1 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Wed, 25 Oct 2023 16:26:49 +0800 Subject: [PATCH 024/311] [APX NDD] Support APX NDD for right shift insns Similar to LSHIFT, rshift do not need to omit $1 for NDD form. gcc/ChangeLog: * config/i386/i386.md (ashr3_cvt): Extend with new alternatives to support NDD, and adjust output templates. (*ashr3_1): Likewise for SI/DI mode. (*lshr3_1): Likewise. (*si3_1_zext): Likewise. (*ashr3_1): Likewise for QI/HI mode. (*lshrqi3_1): Likewise. (*lshrhi3_1): Likewise. (3_cmp): Likewise. (*3_cconly): Likewise. (*ashrsi3_cvt_zext): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (*highpartdisi2): Likewise. (*si3_cmp_zext): Likewise. (3_carry): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add l/ashiftrt tests. --- gcc/config/i386/i386.md | 232 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 24 +++ 2 files changed, 166 insertions(+), 90 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c67896cf97c8..d1eae7248d99 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15808,39 +15808,45 @@ [(SI "{cltd|cdq}") (DI "{cqto|cqo}")]) (define_insn "ashr3_cvt" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=*d,rm,r") (ashiftrt:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "*a,0") + (match_operand:SWI48 1 "nonimmediate_operand" "*a,0,rm") (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC FLAGS_REG))] "INTVAL (operands[2]) == GET_MODE_BITSIZE (mode)-1 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) - && ix86_binary_operator_ok (ASHIFTRT, mode, operands)" + && ix86_binary_operator_ok (ASHIFTRT, mode, operands, + TARGET_APX_NDD)" "@ - sar{}\t{%2, %0|%0, %2}" - [(set_attr "type" "imovx,ishift") - (set_attr "prefix_0f" "0,*") - (set_attr "length_immediate" "0,*") - (set_attr "modrm" "0,1") + sar{}\t{%2, %0|%0, %2} + sar{}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd") + (set_attr "type" "imovx,ishift,ishift") + (set_attr "prefix_0f" "0,*,*") + (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,1,1") (set_attr "mode" "")]) (define_insn "*ashrsi3_cvt_zext" - [(set (match_operand:DI 0 "register_operand" "=*d,r") + [(set (match_operand:DI 0 "register_operand" "=*d,r,r") (zero_extend:DI - (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0") + (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0,rm") (match_operand:QI 2 "const_int_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun)) - && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)" + && ix86_binary_operator_ok (ASHIFTRT, SImode, operands, + TARGET_APX_NDD)" "@ {cltd|cdq} - sar{l}\t{%2, %k0|%k0, %2}" - [(set_attr "type" "imovx,ishift") - (set_attr "prefix_0f" "0,*") - (set_attr "length_immediate" "0,*") - (set_attr "modrm" "0,1") + sar{l}\t{%2, %k0|%k0, %2} + sar{l}\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd") + (set_attr "type" "imovx,ishift,ishift") + (set_attr "prefix_0f" "0,*,*") + (set_attr "length_immediate" "0,*,*") + (set_attr "modrm" "0,1,1") (set_attr "mode" "SI")]) (define_expand "@x86_shift_adj_3" @@ -15882,13 +15888,15 @@ (set_attr "mode" "")]) (define_insn "*ashr3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (ashiftrt:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "c,r"))) + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "c,r,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: @@ -15896,14 +15904,16 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sar{}\t%0"; else - return "sar{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}" + : "sar{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,bmi2") - (set_attr "type" "ishift,ishiftx") + [(set_attr "isa" "*,bmi2,apx_ndd") + (set_attr "type" "ishift,ishiftx,ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -15916,8 +15926,8 @@ ;; Specialization of *lshr3_1 below, extracting the SImode ;; highpart of a DI to be extracted, but allowing it to be clobbered. (define_insn_and_split "*highpartdisi2" - [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k") 0) - (lshiftrt:DI (match_operand:DI 1 "register_operand" "0,0,k") + [(set (subreg:DI (match_operand:SI 0 "register_operand" "=r,x,?k,r") 0) + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,0,k,rm") (const_int 32))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" @@ -15936,16 +15946,20 @@ DONE; } operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); -}) +} +[(set_attr "isa" "*,*,*,apx_ndd")]) + (define_insn "*lshr3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k,r") (lshiftrt:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k") - (match_operand:QI 2 "nonmemory_operand" "c,r,"))) + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k,rm") + (match_operand:QI 2 "nonmemory_operand" "c,r,,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, mode, operands)" + "ix86_binary_operator_ok (LSHIFTRT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: @@ -15954,14 +15968,16 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "shr{}\t%0"; else - return "shr{}\t{%2, %0|%0, %2}"; + return use_ndd ? "shr{}\t{%2, %1, %0|%0, %1, %2}" + : "shr{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,bmi2,") - (set_attr "type" "ishift,ishiftx,msklog") + [(set_attr "isa" "*,bmi2,,apx_ndd") + (set_attr "type" "ishift,ishiftx,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") @@ -15994,13 +16010,15 @@ (set_attr "mode" "SI")]) (define_insn "*si3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,r")))) + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,r,cI")))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" + "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFTX: @@ -16008,14 +16026,16 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{l}\t%k0"; else - return "{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,bmi2") - (set_attr "type" "ishift,ishiftx") + [(set_attr "isa" "*,bmi2,apx_ndd") + (set_attr "type" "ishift,ishiftx,ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16038,20 +16058,25 @@ "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashr3_1" - [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m, r") (ashiftrt:SWI12 - (match_operand:SWI12 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "c"))) + (match_operand:SWI12 1 "nonimmediate_operand" "0, rm") + (match_operand:QI 2 "nonmemory_operand" "c, c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "sar{}\t%0"; else - return "sar{}\t{%2, %0|%0, %2}"; + return use_ndd ? "sar{}\t{%2, %1, %0|%0, %1, %2}" + : "sar{}\t{%2, %0|%0, %2}"; } - [(set_attr "type" "ishift") + [(set_attr "isa" "*, apx_ndd") + (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16062,29 +16087,33 @@ (set_attr "mode" "")]) (define_insn "*lshrqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k") + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k,r") (lshiftrt:QI - (match_operand:QI 1 "nonimmediate_operand" "0, k") - (match_operand:QI 2 "nonmemory_operand" "cI,Wb"))) + (match_operand:QI 1 "nonimmediate_operand" "0, k, rm") + (match_operand:QI 2 "nonmemory_operand" "cI,Wb,cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFT: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "shr{b}\t%0"; else - return "shr{b}\t{%2, %0|%0, %2}"; + return use_ndd ? "shr{b}\t{%2, %1, %0|%0, %1, %2}" + : "shr{b}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: gcc_unreachable (); } } - [(set_attr "isa" "*,avx512dq") - (set_attr "type" "ishift,msklog") + [(set_attr "isa" "*,avx512dq,apx_ndd") + (set_attr "type" "ishift,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") @@ -16096,29 +16125,33 @@ (set_attr "mode" "QI")]) (define_insn "*lshrhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k") + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k, r") (lshiftrt:HI - (match_operand:HI 1 "nonimmediate_operand" "0, k") - (match_operand:QI 2 "nonmemory_operand" "cI, Ww"))) + (match_operand:HI 1 "nonimmediate_operand" "0, k, rm") + (match_operand:QI 2 "nonmemory_operand" "cI, Ww, cI"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ISHIFT: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "shr{w}\t%0"; else - return "shr{w}\t{%2, %0|%0, %2}"; + return use_ndd ? "shr{w}\t{%2, %1, %0|%0, %1, %2}" + : "shr{w}\t{%2, %0|%0, %2}"; case TYPE_MSKLOG: return "#"; default: gcc_unreachable (); } } - [(set_attr "isa" "*, avx512f") - (set_attr "type" "ishift,msklog") + [(set_attr "isa" "*, avx512f, apx_ndd") + (set_attr "type" "ishift,msklog,ishift") (set (attr "length_immediate") (if_then_else (and (and (match_operand 2 "const1_operand") @@ -16171,25 +16204,30 @@ [(set (reg FLAGS_REG) (compare (any_shiftrt:SWI - (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "" "")) + (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=m") + (set (match_operand:SWI 0 "nonimmediate_operand" "=m,r") (any_shiftrt:SWI (match_dup 1) (match_dup 2)))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (, mode, operands)" + && ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" + : "{}\t{%2, %0|%0, %2}"; } - [(set_attr "type" "ishift") + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16202,10 +16240,10 @@ (define_insn "*si3_cmp_zext" [(set (reg FLAGS_REG) (compare - (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0") + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "const_1_to_31_operand")) (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") + (set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "TARGET_64BIT && (optimize_function_for_size_p (cfun) @@ -16213,15 +16251,20 @@ || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode) - && ix86_binary_operator_ok (, SImode, operands)" + && ix86_binary_operator_ok (, SImode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{l}\t%k0"; else - return "{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "{l}\t{%2, %k0|%k0, %2}"; } - [(set_attr "type" "ishift") + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16235,23 +16278,28 @@ [(set (reg FLAGS_REG) (compare (any_shiftrt:SWI - (match_operand:SWI 1 "register_operand" "0") - (match_operand:QI 2 "" "")) + (match_operand:SWI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "" ",")) (const_int 0))) - (clobber (match_scratch:SWI 0 "="))] + (clobber (match_scratch:SWI 0 "=,r"))] "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL || (operands[2] == const1_rtx && TARGET_SHIFT1)) && ix86_match_ccmode (insn, CCGOCmode)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return use_ndd + ? "{}\t{%2, %1, %0|%0, %1, %2}" + : "{}\t{%2, %0|%0, %2}"; } - [(set_attr "type" "ishift") + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "ishift") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16855,18 +16903,22 @@ ;; Versions of sar and shr that set the carry flag. (define_insn "3_carry" [(set (reg:CCC FLAGS_REG) - (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "register_operand" "0") + (unspec:CCC [(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") (const_int 1)) (const_int 0)] UNSPEC_CC_NE)) - (set (match_operand:SWI48 0 "register_operand" "=r") + (set (match_operand:SWI48 0 "register_operand" "=r,r") (any_shiftrt:SWI48 (match_dup 1) (const_int 1)))] "" { - if (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; + if ((TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{}\t%0"; - return "{}\t{1, %0|%0, 1}"; + return use_ndd ? "{}\t{$1, %1, %0|%0, %1, 1}" + : "{}\t{$1, %0|%0, 1}"; } - [(set_attr "type" "ishift1") + [(set_attr "isa" "*, apx_ndd") + (set_attr "type" "ishift1") (set (attr "length_immediate") (if_then_else (ior (match_test "TARGET_SHIFT1") diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 9951fb00a4c5..239c427514a3 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -2,6 +2,8 @@ /* { dg-options "-mapxf -march=x86-64 -O2" } */ /* { dg-final { scan-assembler-not "movl"} } */ +#include + #define FOO(TYPE, OP_NAME, OP) \ TYPE \ __attribute__ ((noipa)) \ @@ -132,6 +134,24 @@ FOO3 (int, shl, <<, 7) FOO (long, shl, <<) FOO3 (long, shl, <<, 7) +FOO (char, sar, >>) +FOO3 (char, sar, >>, 7) +FOO (short, sar, >>) +FOO3 (short, sar, >>, 7) +FOO (int, sar, >>) +FOO3 (int, sar, >>, 7) +FOO (long, sar, >>) +FOO3 (long, sar, >>, 7) + +FOO (uint8_t, shr, >>) +FOO3 (uint8_t, shr, >>, 7) +FOO (uint16_t, shr, >>) +FOO3 (uint16_t, shr, >>, 7) +FOO (uint32_t, shr, >>) +FOO3 (uint32_t, shr, >>, 7) +FOO (uint64_t, shr, >>) +FOO3 (uint64_t, shr, >>, 7) + /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -156,3 +176,7 @@ FOO3 (long, shl, <<, 7) /* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */ /* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */ +/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ From d1dea413ef3761335bd741263a61e0f31e67b7d6 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Tue, 31 Oct 2023 14:21:16 +0800 Subject: [PATCH 025/311] [APX NDD] Support APX NDD for rotate insns gcc/ChangeLog: * config/i386/i386.md (*3_1): Extend with a new alternative to support NDD for SI/DI rotate, and adjust output template. (*si3_1_zext): Likewise. (*3_1): Likewise for QI/HI modes. (rcrsi2): Likewise, and use nonimmediate_operand for operands[1] to accept memory input for NDD alternative. (rcrdi2): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd.c: Add test for left/right rotate. --- gcc/config/i386/i386.md | 79 +++++++++++++++---------- gcc/testsuite/gcc.target/i386/apx-ndd.c | 20 +++++++ 2 files changed, 69 insertions(+), 30 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d1eae7248d99..6e4ac776f8a0 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16667,13 +16667,15 @@ (set_attr "mode" "")]) (define_insn "*3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") (any_rotate:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "c,"))) + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "c,,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ROTATEX: @@ -16681,14 +16683,16 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return use_ndd ? "{}\t{%2, %1, %0|%0, %1, %2}" + : "{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,bmi2") - (set_attr "type" "rotate,rotatex") + [(set_attr "isa" "*,bmi2,apx_ndd") + (set_attr "type" "rotate,rotatex,rotate") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "0") (symbol_ref "true")] @@ -16738,13 +16742,14 @@ (set_attr "mode" "SI")]) (define_insn "*si3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,I")))) + (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,I,cI")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; switch (get_attr_type (insn)) { case TYPE_ROTATEX: @@ -16752,14 +16757,16 @@ default: if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{l}\t%k0"; else - return "{l}\t{%2, %k0|%k0, %2}"; + return use_ndd ? "{l}\t{%2, %1, %k0|%k0, %1, %2}" + : "{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,bmi2") - (set_attr "type" "rotate,rotatex") + [(set_attr "isa" "*,bmi2,apx_ndd") + (set_attr "type" "rotate,rotatex,rotate") (set (attr "preferred_for_size") (cond [(eq_attr "alternative" "0") (symbol_ref "true")] @@ -16803,19 +16810,25 @@ (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) (define_insn "*3_1" - [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") - (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "c"))) + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m,r") + (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (, mode, operands, + TARGET_APX_NDD)" { + bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD; if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) + && !use_ndd) return "{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return use_ndd + ? "{}\t{%2, %1, %0|%0, %1, %2}" + : "{}\t{%2, %0|%0, %2}"; } - [(set_attr "type" "rotate") + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "rotate") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16872,31 +16885,37 @@ ;; Rotations through carry flag (define_insn "rcrsi2" - [(set (match_operand:SI 0 "register_operand" "=r") + [(set (match_operand:SI 0 "register_operand" "=r,r") (plus:SI - (lshiftrt:SI (match_operand:SI 1 "register_operand" "0") + (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") (const_int 1)) (ashift:SI (ltu:SI (reg:CCC FLAGS_REG) (const_int 0)) (const_int 31)))) (clobber (reg:CC FLAGS_REG))] "" - "rcr{l}\t%0" - [(set_attr "type" "ishift1") + "@ + rcr{l}\t%0 + rcr{l}\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "ishift1") (set_attr "memory" "none") (set_attr "length_immediate" "0") (set_attr "mode" "SI")]) (define_insn "rcrdi2" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=r,r") (plus:DI - (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0,rm") (const_int 1)) (ashift:DI (ltu:DI (reg:CCC FLAGS_REG) (const_int 0)) (const_int 63)))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT" - "rcr{q}\t%0" - [(set_attr "type" "ishift1") + "@ + rcr{q}\t%0 + rcr{q}\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,apx_ndd") + (set_attr "type" "ishift1") (set_attr "length_immediate" "0") (set_attr "mode" "DI")]) diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c index 239c427514a3..b215f66d3e24 100644 --- a/gcc/testsuite/gcc.target/i386/apx-ndd.c +++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c @@ -40,6 +40,14 @@ foo3_##OP_NAME##_##TYPE (TYPE a) \ return b; \ } +#define FOO4(TYPE, OP_NAME, OP1, OP2, IMM1) \ +TYPE \ +__attribute__ ((noipa)) \ +foo4_##OP_NAME##_##TYPE (TYPE a) \ +{ \ + TYPE b = (a OP1 IMM1 | a OP2 (8 * sizeof(TYPE) - IMM1)); \ + return b; \ +} #define F(TYPE, OP_NAME, OP) \ TYPE \ @@ -152,6 +160,16 @@ FOO3 (uint32_t, shr, >>, 7) FOO (uint64_t, shr, >>) FOO3 (uint64_t, shr, >>, 7) +FOO4 (uint8_t, ror, >>, <<, 1) +FOO4 (uint16_t, ror, >>, <<, 1) +FOO4 (uint32_t, ror, >>, <<, 1) +FOO4 (uint64_t, ror, >>, <<, 1) + +FOO4 (uint8_t, rol, <<, >>, 1) +FOO4 (uint16_t, rol, <<, >>, 1) +FOO4 (uint32_t, rol, <<, >>, 1) +FOO4 (uint64_t, rol, <<, >>, 1) + /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */ /* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ @@ -180,3 +198,5 @@ FOO3 (uint64_t, shr, >>, 7) /* { dg-final { scan-assembler-times "sar(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */ /* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]*1, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ +/* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]*1, %(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */ From 5fb807e1e8e68c4ac291f051e60942404ff0c800 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Tue, 7 Nov 2023 16:28:28 +0800 Subject: [PATCH 026/311] [APX NDD] Support APX NDD for shld/shrd insns For shld/shrd insns, the old pattern use match_dup 0 as its shift src and use +r*m as its constraint. To support NDD we added new define_insns to handle NDD form pattern with extra input and dest operand to be fixed in register. gcc/ChangeLog: * config/i386/i386.md (x86_64_shld_ndd): New define_insn. (x86_64_shld_ndd_1): Likewise. (*x86_64_shld_ndd_2): Likewise. (x86_shld_ndd): Likewise. (x86_shld_ndd_1): Likewise. (*x86_shld_ndd_2): Likewise. (x86_64_shrd_ndd): Likewise. (x86_64_shrd_ndd_1): Likewise. (*x86_64_shrd_ndd_2): Likewise. (x86_shrd_ndd): Likewise. (x86_shrd_ndd_1): Likewise. (*x86_shrd_ndd_2): Likewise. (*x86_64_shld_shrd_1_nozext): Adjust codegen under TARGET_APX_NDD. (*x86_shld_shrd_1_nozext): Likewise. (*x86_64_shrd_shld_1_nozext): Likewise. (*x86_shrd_shld_1_nozext): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd-shld-shrd.c: New test. --- gcc/config/i386/i386.md | 322 +++++++++++++++++- .../gcc.target/i386/apx-ndd-shld-shrd.c | 24 ++ 2 files changed, 344 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6e4ac776f8a0..5c6275430d62 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14510,6 +14510,23 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_64_shld_ndd" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") + (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") + (const_int 63))) + (subreg:DI + (lshiftrt:TI + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r")) + (minus:QI (const_int 64) + (and:QI (match_dup 3) (const_int 63)))) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD" + "shld{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + (define_insn "x86_64_shld_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (ashift:DI (match_dup 0) @@ -14531,6 +14548,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_64_shld_ndd_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "rm") + (match_operand:QI 3 "const_0_to_63_operand")) + (subreg:DI + (lshiftrt:TI + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r")) + (match_operand:QI 4 "const_0_to_255_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" + "shld{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI") + (set_attr "length_immediate" "1")]) + + (define_insn_and_split "*x86_64_shld_shrd_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (ashift:DI (match_operand:DI 4 "nonimmediate_operand") @@ -14556,6 +14591,23 @@ operands[4] = force_reg (DImode, operands[4]); emit_insn (gen_x86_64_shrd_1 (operands[0], operands[4], operands[3], operands[2])); } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } else { operands[1] = force_reg (DImode, operands[1]); @@ -14588,6 +14640,33 @@ (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn_and_split "*x86_64_shld_ndd_2" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "nonmemory_operand")) + (lshiftrt:DI (match_operand:DI 2 "register_operand") + (minus:QI (const_int 64) (match_dup 3))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 4) + (ior:DI (ashift:DI (match_dup 1) + (and:QI (match_dup 3) (const_int 63))) + (subreg:DI + (lshiftrt:TI + (zero_extend:TI (match_dup 2)) + (minus:QI (const_int 64) + (and:QI (match_dup 3) + (const_int 63)))) 0))) + (clobber (reg:CC FLAGS_REG)) + (set (match_dup 0) (match_dup 4))])] +{ + operands[4] = gen_reg_rtx (DImode); + emit_move_insn (operands[4], operands[0]); +}) + (define_insn "x86_shld" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) @@ -14610,6 +14689,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_shld_ndd" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") + (const_int 31))) + (subreg:SI + (lshiftrt:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (minus:QI (const_int 32) + (and:QI (match_dup 3) (const_int 31)))) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD" + "shld{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + + (define_insn "x86_shld_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (ashift:SI (match_dup 0) @@ -14631,6 +14728,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_shld_ndd_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:QI 3 "const_0_to_31_operand")) + (subreg:SI + (lshiftrt:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (match_operand:QI 4 "const_0_to_63_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && INTVAL (operands[4]) == 32 - INTVAL (operands[3])" + "shld{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "length_immediate" "1") + (set_attr "mode" "SI")]) + + (define_insn_and_split "*x86_shld_shrd_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (ashift:SI (match_operand:SI 4 "nonimmediate_operand") @@ -14655,7 +14770,24 @@ operands[4] = force_reg (SImode, operands[4]); emit_insn (gen_x86_shrd_1 (operands[0], operands[4], operands[3], operands[2])); } - else + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shld_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else { operands[1] = force_reg (SImode, operands[1]); rtx tmp = gen_reg_rtx (SImode); @@ -14687,6 +14819,33 @@ (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn_and_split "*x86_shld_ndd_2" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "nonmemory_operand")) + (lshiftrt:SI (match_operand:SI 2 "register_operand") + (minus:QI (const_int 32) (match_dup 3))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 4) + (ior:SI (ashift:SI (match_dup 1) + (and:QI (match_dup 3) (const_int 31))) + (subreg:SI + (lshiftrt:DI + (zero_extend:DI (match_dup 2)) + (minus:QI (const_int 32) + (and:QI (match_dup 3) + (const_int 31)))) 0))) + (clobber (reg:CC FLAGS_REG)) + (set (match_dup 0) (match_dup 4))])] +{ + operands[4] = gen_reg_rtx (SImode); + emit_move_insn (operands[4], operands[0]); +}) + (define_expand "@x86_shift_adj_1" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (and:QI (match_operand:QI 2 "register_operand") @@ -15626,6 +15785,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_64_shrd_ndd" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") + (and:QI (match_operand:QI 3 "nonmemory_operand" "Jc") + (const_int 63))) + (subreg:DI + (ashift:TI + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r")) + (minus:QI (const_int 64) + (and:QI (match_dup 3) (const_int 63)))) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD" + "shrd{q}\t{%s3%2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "mode" "DI")]) + + (define_insn "x86_64_shrd_1" [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m") (ior:DI (lshiftrt:DI (match_dup 0) @@ -15647,6 +15824,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_64_shrd_ndd_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "rm") + (match_operand:QI 3 "const_0_to_63_operand")) + (subreg:DI + (ashift:TI + (zero_extend:TI + (match_operand:DI 2 "register_operand" "r")) + (match_operand:QI 4 "const_0_to_255_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && INTVAL (operands[4]) == 64 - INTVAL (operands[3])" + "shrd{q}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "length_immediate" "1") + (set_attr "mode" "DI")]) + + (define_insn_and_split "*x86_64_shrd_shld_1_nozext" [(set (match_operand:DI 0 "nonimmediate_operand") (ior:DI (lshiftrt:DI (match_operand:DI 4 "nonimmediate_operand") @@ -15672,6 +15867,23 @@ operands[4] = force_reg (DImode, operands[4]); emit_insn (gen_x86_64_shld_1 (operands[0], operands[4], operands[3], operands[2])); } + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (DImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (DImode, operands[1]); + emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_64_shld_ndd_1 (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_64_shrd_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } else { operands[1] = force_reg (DImode, operands[1]); @@ -15704,6 +15916,33 @@ (const_int 63)))) 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn_and_split "*x86_64_shrd_ndd_2" + [(set (match_operand:DI 0 "nonimmediate_operand") + (ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand") + (match_operand:QI 3 "nonmemory_operand")) + (ashift:DI (match_operand:DI 2 "register_operand") + (minus:QI (const_int 64) (match_dup 2))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 4) + (ior:DI (lshiftrt:DI (match_dup 1) + (and:QI (match_dup 3) (const_int 63))) + (subreg:DI + (ashift:TI + (zero_extend:TI (match_dup 2)) + (minus:QI (const_int 64) + (and:QI (match_dup 3) + (const_int 63)))) 0))) + (clobber (reg:CC FLAGS_REG)) + (set (match_dup 0) (match_dup 4))])] +{ + operands[4] = gen_reg_rtx (DImode); + emit_move_insn (operands[4], operands[0]); +}) + (define_insn "x86_shrd" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) @@ -15726,6 +15965,23 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_shrd_ndd" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (and:QI (match_operand:QI 3 "nonmemory_operand" "Ic") + (const_int 31))) + (subreg:SI + (ashift:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (minus:QI (const_int 32) + (and:QI (match_dup 3) (const_int 31)))) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD" + "shrd{l}\t{%s3%2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "mode" "SI")]) + (define_insn "x86_shrd_1" [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m") (ior:SI (lshiftrt:SI (match_dup 0) @@ -15747,6 +16003,24 @@ (set_attr "amdfam10_decode" "vector") (set_attr "bdver1_decode" "vector")]) +(define_insn "x86_shrd_ndd_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:QI 3 "const_0_to_31_operand")) + (subreg:SI + (ashift:DI + (zero_extend:DI + (match_operand:SI 2 "register_operand" "r")) + (match_operand:QI 4 "const_0_to_63_operand")) 0))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && (INTVAL (operands[4]) == 32 - INTVAL (operands[3]))" + "shrd{l}\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ishift") + (set_attr "length_immediate" "1") + (set_attr "mode" "SI")]) + + (define_insn_and_split "*x86_shrd_shld_1_nozext" [(set (match_operand:SI 0 "nonimmediate_operand") (ior:SI (lshiftrt:SI (match_operand:SI 4 "nonimmediate_operand") @@ -15771,7 +16045,24 @@ operands[4] = force_reg (SImode, operands[4]); emit_insn (gen_x86_shld_1 (operands[0], operands[4], operands[3], operands[2])); } - else + else if (TARGET_APX_NDD) + { + rtx tmp = gen_reg_rtx (SImode); + if (MEM_P (operands[4])) + { + operands[1] = force_reg (SImode, operands[1]); + emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + } + else if (MEM_P (operands[1])) + emit_insn (gen_x86_shld_ndd_1 (tmp, operands[1], operands[4], + operands[3], operands[2])); + else + emit_insn (gen_x86_shrd_ndd_1 (tmp, operands[4], operands[1], + operands[2], operands[3])); + emit_move_insn (operands[0], tmp); + } + else { operands[1] = force_reg (SImode, operands[1]); rtx tmp = gen_reg_rtx (SImode); @@ -15803,6 +16094,33 @@ (const_int 31)))) 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn_and_split "*x86_shrd_ndd_2" + [(set (match_operand:SI 0 "nonimmediate_operand") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand") + (match_operand:QI 3 "nonmemory_operand")) + (ashift:SI (match_operand:SI 2 "register_operand") + (minus:QI (const_int 32) (match_dup 3))))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_APX_NDD + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(parallel [(set (match_dup 4) + (ior:SI (lshiftrt:SI (match_dup 1) + (and:QI (match_dup 3) (const_int 31))) + (subreg:SI + (ashift:DI + (zero_extend:DI (match_dup 2)) + (minus:QI (const_int 32) + (and:QI (match_dup 3) + (const_int 31)))) 0))) + (clobber (reg:CC FLAGS_REG)) + (set (match_dup 0) (match_dup 4))])] +{ + operands[4] = gen_reg_rtx (SImode); + emit_move_insn (operands[4], operands[0]); +}) + ;; Base name for insn mnemonic. (define_mode_attr cvt_mnemonic [(SI "{cltd|cdq}") (DI "{cqto|cqo}")]) diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c new file mode 100644 index 000000000000..87068ea31aae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd-shld-shrd.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -Wno-shift-count-overflow -m64 -mapxf" } */ +/* { dg-final { scan-assembler-times {(?n)shld[ql]?[\t ]*\$2} 4 } } */ +/* { dg-final { scan-assembler-times {(?n)shrd[ql]?[\t ]*\$2} 4 } } */ + +typedef unsigned long u64; +typedef unsigned int u32; + +long a; +int c; +const char n = 2; + +long test64r (long e) { long t = ((u64)a >> n) | (e << (64 - n)); return t;} +long test64l (u64 e) { long t = (a << n) | (e >> (64 - n)); return t;} +int test32r (int f) { int t = ((u32)c >> n) | (f << (32 - n)); return t; } +int test32l (u32 f) { int t = (c << n) | (f >> (32 - n)); return t; } + +u64 ua; +u32 uc; + +u64 testu64l (u64 ue) { u64 ut = (ua << n) | (ue >> (64 - n)); return ut; } +u64 testu64r (u64 ue) { u64 ut = (ua >> n) | (ue << (64 - n)); return ut; } +u32 testu32l (u32 uf) { u32 ut = (uc << n) | (uf >> (32 - n)); return ut; } +u32 testu32r (u32 uf) { u32 ut = (uc >> n) | (uf << (32 - n)); return ut; } From 42cb34f94b1b6e78624320b0d5b564c8aa7bb030 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Wed, 8 Nov 2023 16:04:26 +0800 Subject: [PATCH 027/311] [APX NDD] Support APX NDD for cmove insns gcc/ChangeLog: * config/i386/i386.md (*movcc_noc): Extend with new constraints to support NDD. (*movsicc_noc_zext): Likewise. (*movsicc_noc_zext_1): Likewise. (*movqicc_noc): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd-cmov.c: New test. --- gcc/config/i386/i386.md | 48 ++++++++++++-------- gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c | 16 +++++++ 2 files changed, 45 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 5c6275430d62..017ab7202930 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -24417,47 +24417,56 @@ (neg:SWI (ltu:SWI (reg:CCC FLAGS_REG) (const_int 0))))]) (define_insn "*movcc_noc" - [(set (match_operand:SWI248 0 "register_operand" "=r,r") + [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r") (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SWI248 2 "nonimmediate_operand" "rm,0") - (match_operand:SWI248 3 "nonimmediate_operand" "0,rm")))] + (match_operand:SWI248 2 "nonimmediate_operand" "rm,0,rm,r") + (match_operand:SWI248 3 "nonimmediate_operand" "0,rm,r,rm")))] "TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %0|%0, %2} - cmov%O2%c1\t{%3, %0|%0, %3}" - [(set_attr "type" "icmov") + cmov%O2%c1\t{%3, %0|%0, %3} + cmov%O2%C1\t{%2, %3, %0|%0, %3, %2} + cmov%O2%c1\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "icmov") (set_attr "mode" "")]) (define_insn "*movsicc_noc_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (if_then_else:DI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) (zero_extend:DI - (match_operand:SI 2 "nonimmediate_operand" "rm,0")) + (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r")) (zero_extend:DI - (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))] + (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))] "TARGET_64BIT && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %k0|%k0, %2} - cmov%O2%c1\t{%3, %k0|%k0, %3}" - [(set_attr "type" "icmov") + cmov%O2%c1\t{%3, %k0|%k0, %3} + cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2} + cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "icmov") (set_attr "mode" "SI")]) (define_insn "*movsicc_noc_zext_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r") (zero_extend:DI (if_then_else:SI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:SI 2 "nonimmediate_operand" "rm,0") - (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))] + (match_operand:SI 2 "nonimmediate_operand" "rm,0,rm,r") + (match_operand:SI 3 "nonimmediate_operand" "0,rm,r,rm"))))] "TARGET_64BIT && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "@ cmov%O2%C1\t{%2, %k0|%k0, %2} - cmov%O2%c1\t{%3, %k0|%k0, %3}" - [(set_attr "type" "icmov") + cmov%O2%c1\t{%3, %k0|%k0, %3} + cmov%O2%C1\t{%2, %3, %k0|%k0, %3, %2} + cmov%O2%c1\t{%3, %2, %k0|%k0, %2, %3}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "icmov") (set_attr "mode" "SI")]) @@ -24482,14 +24491,15 @@ }) (define_insn "*movqicc_noc" - [(set (match_operand:QI 0 "register_operand" "=r,r") + [(set (match_operand:QI 0 "register_operand" "=r,r,r") (if_then_else:QI (match_operator 1 "ix86_comparison_operator" [(reg FLAGS_REG) (const_int 0)]) - (match_operand:QI 2 "register_operand" "r,0") - (match_operand:QI 3 "register_operand" "0,r")))] + (match_operand:QI 2 "register_operand" "r,0,r") + (match_operand:QI 3 "register_operand" "0,r,r")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL" "#" - [(set_attr "type" "icmov") + [(set_attr "isa" "*,*,apx_ndd") + (set_attr "type" "icmov") (set_attr "mode" "QI")]) (define_split diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c b/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c new file mode 100644 index 000000000000..459dc965342b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd-cmov.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -m64 -mapxf" } */ +/* { dg-final { scan-assembler-times "cmove\[^\n\r]*, %eax" 1 } } */ +/* { dg-final { scan-assembler-times "cmovge\[^\n\r]*, %eax" 1 } } */ + +unsigned int c[4]; + +unsigned long long foo1 (int a, unsigned int b) +{ + return a ? b : c[1]; +} + +unsigned int foo3 (int a, int b, unsigned int c, unsigned int d) +{ + return a < b ? c : d; +} From 3ba505c7b17a208b1c546b7a974a02e8003b60ef Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Sat, 2 Dec 2023 12:55:59 +0800 Subject: [PATCH 028/311] [APX NDD] Support TImode shift for NDD For TImode shifts, they are splitted by splitter functions, which assume operands[0] and operands[1] to be the same. For the NDD alternative the assumption may not be true so add split functions for NDD to emit the NDD form instructions, and omit the handling of !64bit target split. Although the NDD form allows memory src, for post-reload splitter there are no extra register to accept NDD form shift, especially shld/shrd. So only accept register alternative for shift src under NDD. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_split_ashl_ndd): New function to split NDD form lshift. (ix86_split_rshift_ndd): Likewise for l/ashiftrt. * config/i386/i386-protos.h (ix86_split_ashl_ndd): New prototype. (ix86_split_rshift_ndd): Likewise. * config/i386/i386.md (ashl3_doubleword): Add NDD alternative, call ndd split function when operands[0] not equal to operands[1]. (define_split for doubleword lshift): Likewise. (define_peephole for doubleword lshift): Likewise. (3_doubleword): Likewise for l/ashiftrt. (define_split for doubleword l/ashiftrt): Likewise. (define_peephole for doubleword l/ashiftrt): Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ndd-ti-shift.c: New test. --- gcc/config/i386/i386-expand.cc | 136 ++++++++++++++++++ gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.md | 56 ++++++-- .../gcc.target/i386/apx-ndd-ti-shift.c | 91 ++++++++++++ 4 files changed, 273 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index d4bbd33ce079..a53d69d54000 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -6678,6 +6678,142 @@ ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) } } +/* Helper function to split TImode ashl under NDD. */ +void +ix86_split_ashl_ndd (rtx *operands, rtx scratch) +{ + gcc_assert (TARGET_APX_NDD); + int half_width = GET_MODE_BITSIZE (TImode) >> 1; + + rtx low[2], high[2]; + int count; + + split_double_mode (TImode, operands, 2, low, high); + if (CONST_INT_P (operands[2])) + { + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1); + + if (count >= half_width) + { + count = count - half_width; + if (count == 0) + { + if (!rtx_equal_p (high[0], low[1])) + emit_move_insn (high[0], low[1]); + } + else if (count == 1) + emit_insn (gen_adddi3 (high[0], low[1], low[1])); + else + emit_insn (gen_ashldi3 (high[0], low[1], GEN_INT (count))); + + ix86_expand_clear (low[0]); + } + else if (count == 1) + { + rtx x3 = gen_rtx_REG (CCCmode, FLAGS_REG); + rtx x4 = gen_rtx_LTU (TImode, x3, const0_rtx); + emit_insn (gen_add3_cc_overflow_1 (DImode, low[0], + low[1], low[1])); + emit_insn (gen_add3_carry (DImode, high[0], high[1], high[1], + x3, x4)); + } + else + { + emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1], + GEN_INT (count))); + emit_insn (gen_ashldi3 (low[0], low[1], GEN_INT (count))); + } + } + else + { + emit_insn (gen_x86_64_shld_ndd (high[0], high[1], low[1], + operands[2])); + emit_insn (gen_ashldi3 (low[0], low[1], operands[2])); + if (TARGET_CMOVE && scratch) + { + ix86_expand_clear (scratch); + emit_insn (gen_x86_shift_adj_1 + (DImode, high[0], low[0], operands[2], scratch)); + } + else + emit_insn (gen_x86_shift_adj_2 (DImode, high[0], low[0], operands[2])); + } +} + +/* Helper function to split TImode l/ashr under NDD. */ +void +ix86_split_rshift_ndd (enum rtx_code code, rtx *operands, rtx scratch) +{ + gcc_assert (TARGET_APX_NDD); + int half_width = GET_MODE_BITSIZE (TImode) >> 1; + bool ashr_p = code == ASHIFTRT; + rtx (*gen_shr)(rtx, rtx, rtx) = ashr_p ? gen_ashrdi3 + : gen_lshrdi3; + + rtx low[2], high[2]; + int count; + + split_double_mode (TImode, operands, 2, low, high); + if (CONST_INT_P (operands[2])) + { + count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (TImode) - 1); + + if (ashr_p && (count == GET_MODE_BITSIZE (TImode) - 1)) + { + emit_insn (gen_shr (high[0], high[1], + GEN_INT (half_width - 1))); + emit_move_insn (low[0], high[0]); + } + else if (count >= half_width) + { + if (ashr_p) + emit_insn (gen_shr (high[0], high[1], + GEN_INT (half_width - 1))); + else + ix86_expand_clear (high[0]); + + if (count > half_width) + emit_insn (gen_shr (low[0], high[1], + GEN_INT (count - half_width))); + else + emit_move_insn (low[0], high[1]); + } + else + { + emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1], + GEN_INT (count))); + emit_insn (gen_shr (high[0], high[1], GEN_INT (count))); + } + } + else + { + emit_insn (gen_x86_64_shrd_ndd (low[0], low[1], high[1], + operands[2])); + emit_insn (gen_shr (high[0], high[1], operands[2])); + + if (TARGET_CMOVE && scratch) + { + if (ashr_p) + { + emit_move_insn (scratch, high[0]); + emit_insn (gen_shr (scratch, scratch, + GEN_INT (half_width - 1))); + } + else + ix86_expand_clear (scratch); + + emit_insn (gen_x86_shift_adj_1 + (DImode, low[0], high[0], operands[2], scratch)); + } + else if (ashr_p) + emit_insn (gen_x86_shift_adj_3 + (DImode, low[0], high[0], operands[2])); + else + emit_insn (gen_x86_shift_adj_2 + (DImode, low[0], high[0], operands[2])); + } +} + /* Expand move of V1TI mode register X to a new TI mode register. */ static rtx ix86_expand_v1ti_to_ti (rtx x) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index fa9524097296..56349064a6ca 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -174,8 +174,10 @@ extern void x86_initialize_trampoline (rtx, rtx, rtx); extern rtx ix86_zero_extend_to_Pmode (rtx); extern void ix86_split_long_move (rtx[]); extern void ix86_split_ashl (rtx *, rtx, machine_mode); +extern void ix86_split_ashl_ndd (rtx *, rtx); extern void ix86_split_ashr (rtx *, rtx, machine_mode); extern void ix86_split_lshr (rtx *, rtx, machine_mode); +extern void ix86_split_rshift_ndd (enum rtx_code, rtx *, rtx); extern void ix86_expand_v1ti_shift (enum rtx_code, rtx[]); extern void ix86_expand_v1ti_rotate (enum rtx_code, rtx[]); extern void ix86_expand_v1ti_ashiftrt (rtx[]); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 017ab7202930..b4db50f61cdd 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14425,13 +14425,14 @@ }) (define_insn "ashl3_doubleword" - [(set (match_operand:DWI 0 "register_operand" "=&r") - (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n") - (match_operand:QI 2 "nonmemory_operand" "c"))) + [(set (match_operand:DWI 0 "register_operand" "=&r,&r") + (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n,r") + (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] "" "#" - [(set_attr "type" "multi")]) + [(set_attr "type" "multi") + (set_attr "isa" "*,apx_ndd")]) (define_split [(set (match_operand:DWI 0 "register_operand") @@ -14440,7 +14441,15 @@ (clobber (reg:CC FLAGS_REG))] "epilogue_completed" [(const_int 0)] - "ix86_split_ashl (operands, NULL_RTX, mode); DONE;") +{ + if (TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1]) + && REG_P (operands[1])) + ix86_split_ashl_ndd (operands, NULL_RTX); + else + ix86_split_ashl (operands, NULL_RTX, mode); + DONE; +}) ;; By default we don't ask for a scratch register, because when DWImode ;; values are manipulated, registers are already at a premium. But if @@ -14456,7 +14465,15 @@ (match_dup 3)] "TARGET_CMOVE" [(const_int 0)] - "ix86_split_ashl (operands, operands[3], mode); DONE;") +{ + if (TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1]) + && (REG_P (operands[1]))) + ix86_split_ashl_ndd (operands, operands[3]); + else + ix86_split_ashl (operands, operands[3], mode); + DONE; +}) (define_insn_and_split "*ashl3_doubleword_highpart" [(set (match_operand: 0 "register_operand" "=r") @@ -15713,16 +15730,24 @@ }) (define_insn_and_split "3_doubleword" - [(set (match_operand:DWI 0 "register_operand" "=&r") - (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "c"))) + [(set (match_operand:DWI 0 "register_operand" "=&r,&r") + (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0,r") + (match_operand:QI 2 "nonmemory_operand" "c,c"))) (clobber (reg:CC FLAGS_REG))] "" "#" "epilogue_completed" [(const_int 0)] - "ix86_split_ (operands, NULL_RTX, mode); DONE;" - [(set_attr "type" "multi")]) +{ + if (TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1])) + ix86_split_rshift_ndd (, operands, NULL_RTX); + else + ix86_split_ (operands, NULL_RTX, mode); + DONE; +} + [(set_attr "type" "multi") + (set_attr "isa" "*,apx_ndd")]) ;; By default we don't ask for a scratch register, because when DWImode ;; values are manipulated, registers are already at a premium. But if @@ -15738,7 +15763,14 @@ (match_dup 3)] "TARGET_CMOVE" [(const_int 0)] - "ix86_split_ (operands, operands[3], mode); DONE;") +{ + if (TARGET_APX_NDD + && !rtx_equal_p (operands[0], operands[1])) + ix86_split_rshift_ndd (, operands, operands[3]); + else + ix86_split_ (operands, operands[3], mode); + DONE; +}) ;; Split truncations of double word right shifts into x86_shrd_1. (define_insn_and_split "3_doubleword_lowpart" diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c b/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c new file mode 100644 index 000000000000..0489712b7f6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/apx-ndd-ti-shift.c @@ -0,0 +1,91 @@ +/* { dg-do run { target { int128 && { ! ia32 } } } } */ +/* { dg-require-effective-target apxf } */ +/* { dg-options "-O2" } */ + +#include + +#define APX_TARGET __attribute__((noinline, target("apxf"))) +#define NO_APX __attribute__((noinline, target("no-apxf"))) +typedef __uint128_t u128; +typedef __int128 i128; + +#define TI_SHIFT_FUNC(TYPE, op, name) \ +APX_TARGET \ +TYPE apx_##name##TYPE (TYPE a, char b) \ +{ \ + return a op b; \ +} \ +TYPE noapx_##name##TYPE (TYPE a, char b) \ +{ \ + return a op b; \ +} \ + +#define TI_SHIFT_FUNC_CONST(TYPE, i, op, name) \ +APX_TARGET \ +TYPE apx_##name##TYPE##_const (TYPE a) \ +{ \ + return a op i; \ +} \ +NO_APX \ +TYPE noapx_##name##TYPE##_const (TYPE a) \ +{ \ + return a op i; \ +} + +#define TI_SHIFT_TEST(TYPE, name, val) \ +{\ + if (apx_##name##TYPE (val, b) != noapx_##name##TYPE (val, b)) \ + abort (); \ +} + +#define TI_SHIFT_CONST_TEST(TYPE, name, val) \ +{\ + if (apx_##name##1##TYPE##_const (val) \ + != noapx_##name##1##TYPE##_const (val)) \ + abort (); \ + if (apx_##name##2##TYPE##_const (val) \ + != noapx_##name##2##TYPE##_const (val)) \ + abort (); \ + if (apx_##name##3##TYPE##_const (val) \ + != noapx_##name##3##TYPE##_const (val)) \ + abort (); \ + if (apx_##name##4##TYPE##_const (val) \ + != noapx_##name##4##TYPE##_const (val)) \ + abort (); \ +} + +TI_SHIFT_FUNC(i128, <<, ashl) +TI_SHIFT_FUNC(i128, >>, ashr) +TI_SHIFT_FUNC(u128, >>, lshr) + +TI_SHIFT_FUNC_CONST(i128, 1, <<, ashl1) +TI_SHIFT_FUNC_CONST(i128, 65, <<, ashl2) +TI_SHIFT_FUNC_CONST(i128, 64, <<, ashl3) +TI_SHIFT_FUNC_CONST(i128, 87, <<, ashl4) +TI_SHIFT_FUNC_CONST(i128, 127, >>, ashr1) +TI_SHIFT_FUNC_CONST(i128, 87, >>, ashr2) +TI_SHIFT_FUNC_CONST(i128, 27, >>, ashr3) +TI_SHIFT_FUNC_CONST(i128, 64, >>, ashr4) +TI_SHIFT_FUNC_CONST(u128, 127, >>, lshr1) +TI_SHIFT_FUNC_CONST(u128, 87, >>, lshr2) +TI_SHIFT_FUNC_CONST(u128, 27, >>, lshr3) +TI_SHIFT_FUNC_CONST(u128, 64, >>, lshr4) + +int main (void) +{ + if (!__builtin_cpu_supports ("apxf")) + return 0; + + u128 ival = 0x123456788765432FLL; + u128 uval = 0xF234567887654321ULL; + char b = 28; + + TI_SHIFT_TEST(i128, ashl, ival) + TI_SHIFT_TEST(i128, ashr, ival) + TI_SHIFT_TEST(u128, lshr, uval) + TI_SHIFT_CONST_TEST(i128, ashl, ival) + TI_SHIFT_CONST_TEST(i128, ashr, ival) + TI_SHIFT_CONST_TEST(u128, lshr, uval) + + return 0; +} From df193bda748c9c3f1e23cc2c4a636db578239001 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Wed, 3 May 2023 12:02:54 +0100 Subject: [PATCH 029/311] aarch64: rcpc3: Add +rcpc3 extension Given the optional LRCPC3 target support for Armv8.2-a cores onwards, the +rcpc3 arch feature modifier is added to GCC's command-line options. gcc/ChangeLog: * config/aarch64/aarch64-option-extensions.def (rcpc3): New. * config/aarch64/aarch64.h (AARCH64_ISA_RCPC3): Likewise. (TARGET_RCPC3): Likewise. * doc/invoke.texi (rcpc3): Document feature in AArch64 Options. --- gcc/config/aarch64/aarch64-option-extensions.def | 1 + gcc/config/aarch64/aarch64.h | 4 ++++ gcc/doc/invoke.texi | 2 ++ 3 files changed, 7 insertions(+) diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 02fc895338e7..5aa37ac4e0ed 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -165,4 +165,5 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the") AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") +AARCH64_OPT_EXTENSION("rcpc3", RCPC3, (), (), (), "rcpc3") #undef AARCH64_OPT_EXTENSION diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 5a776dfdff0f..2cd0bc552eba 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -250,6 +250,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) +#define AARCH64_ISA_RCPC3 (aarch64_isa_flags & AARCH64_FL_RCPC3) #define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R) #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) #define AARCH64_ISA_V8_7A (aarch64_isa_flags & AARCH64_FL_V8_7A) @@ -433,6 +434,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; and sign-extending versions.*/ #define TARGET_RCPC2 (AARCH64_ISA_RCPC8_4) +/* RCPC3 (Release Consistency) extensions, optional from Armv8.2-a. */ +#define TARGET_RCPC3 (AARCH64_ISA_RCPC3) + /* Apply the workaround for Cortex-A53 erratum 835769. */ #define TARGET_FIX_ERR_A53_835769 \ ((aarch64_fix_a53_err835769 == 2) \ diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f8d6f799e11a..43341fe6e5e0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21381,6 +21381,8 @@ Enable support for 128-bit system register read/write instructions. Enable support for Armv9.4-a Guarded Control Stack extension. @item the Enable support for Armv8.9-a/9.4-a translation hardening extension. +@item rcpc3 +Enable the RCpc3 (Release Consistency) extension. @end table From 1750c038f9d0e0e6d1ad977e7b9f69ae7cb67455 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Mon, 23 Oct 2023 15:39:41 +0100 Subject: [PATCH 030/311] aarch64: rcpc3: Add relevant iterators to handle Neon intrinsics The LDAP1 and STL1 Neon ACLE intrinsics, operating on 64-bit data values, operate on single-lane (Vt.1D) or twin-lane (Vt.2D) SIMD register configurations, either in the DI or DF modes. This leads to the need for a mode iterator accounting for the V1DI, V1DF, V2DI and V2DF modes. This patch therefore introduces the new V12DIF mode iterator with which to generate functions operating on signed 64-bit integer and float values and V12DIUP for generating the unsigned and polynomial-type counterparts. Along with this, we modify the associated mode attributes accordingly in order to allow for the implementation of the relevant backend patterns for the intrinsics. gcc/ChangeLog: * config/aarch64/iterators.md (V12DIF): New. (V12DUP): Likewise. (VEL): Add support for all V12DIF-associated modes. (Vetype): Add support for V1DI and V1DF. (Vel): Likewise. --- gcc/config/aarch64/iterators.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f204850850c4..9bbcacd9d37d 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -314,6 +314,12 @@ ;; All byte modes. (define_mode_iterator VB [V8QI V16QI]) +;; 1 and 2 lane DI and DF modes. +(define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF]) + +;; 1 and 2 lane DI mode. +(define_mode_iterator V12DI [V1DI V2DI]) + ;; 2 and 4 lane SI modes. (define_mode_iterator VS [V2SI V4SI]) @@ -1324,10 +1330,10 @@ (define_mode_attr Vetype [(V8QI "b") (V16QI "b") (V4HI "h") (V8HI "h") (V2SI "s") (V4SI "s") - (V2DI "d") + (V2DI "d") (V1DI "d") (V4HF "h") (V8HF "h") (V2SF "s") (V4SF "s") - (V2DF "d") + (V2DF "d") (V1DF "d") (V2x8QI "b") (V2x4HI "h") (V2x2SI "s") (V2x1DI "d") (V2x4HF "h") (V2x2SF "s") @@ -1498,10 +1504,12 @@ (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (V4HI "HI") (V8HI "HI") (V2SI "SI") (V4SI "SI") - (DI "DI") (V2DI "DI") + (DI "DI") (V1DI "DI") + (V2DI "DI") (V4HF "HF") (V8HF "HF") (V2SF "SF") (V4SF "SF") - (DF "DF") (V2DF "DF") + (DF "DF") (V1DF "DF") + (V2DF "DF") (SI "SI") (HI "HI") (QI "QI") (V4BF "BF") (V8BF "BF") @@ -1518,12 +1526,13 @@ (define_mode_attr Vel [(V8QI "qi") (V16QI "qi") (V4HI "hi") (V8HI "hi") (V2SI "si") (V4SI "si") - (DI "di") (V2DI "di") + (DI "di") (V1DI "si") + (V2DI "di") (V4HF "hf") (V8HF "hf") (V2SF "sf") (V4SF "sf") - (V2DF "df") (DF "df") - (SI "si") (HI "hi") - (QI "qi") + (V1DF "df") (V2DF "df") + (DF "df") (SI "si") + (HI "hi") (QI "qi") (V4BF "bf") (V8BF "bf") (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi") (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi") From 142abf03bcbf066c521fd73edcb465db69a29040 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Tue, 24 Oct 2023 10:21:58 +0100 Subject: [PATCH 031/311] aarch64: rcpc3: Add Neon ACLE intrinsics Register the target specific builtins in `aarch64-simd-builtins.def' and implement their associated backend patterns in `aarch64-simd.md'. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (vec_ldap1_lane): New. (vec_stl1_lane): Likewise. * config/aarch64/aarch64-simd.md (aarch64_vec_stl1_lanes_lane): New. (aarch64_vec_stl1_lane): Likewise. (aarch64_vec_ldap1_lanes_lane): Likewise. (aarch64_vec_ldap1_lane): Likewise. * config/aarch64/aarch64.md (UNSPEC_LDAP1_LANE): New. (UNSPEC_STL1_LANE): Likewise. --- gcc/config/aarch64/aarch64-simd-builtins.def | 7 +++ gcc/config/aarch64/aarch64-simd.md | 65 ++++++++++++++++++++ gcc/config/aarch64/aarch64.md | 2 + 3 files changed, 74 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index e2b94ad8247f..395970cab5bc 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -43,6 +43,13 @@ help describe the attributes (for example, pure) for the intrinsic function. */ + BUILTIN_V12DIF (LOADSTRUCT_LANE, vec_ldap1_lane, 0, ALL) + BUILTIN_V12DI (LOADSTRUCT_LANE_U, vec_ldap1_lane, 0, ALL) + BUILTIN_V12DI (LOADSTRUCT_LANE_P, vec_ldap1_lane, 0, ALL) + BUILTIN_V12DIF (STORESTRUCT_LANE, vec_stl1_lane, 0, ALL) + BUILTIN_V12DI (STORESTRUCT_LANE_U, vec_stl1_lane, 0, ALL) + BUILTIN_V12DI (STORESTRUCT_LANE_P, vec_stl1_lane, 0, ALL) + BUILTIN_VDC (BINOP, combine, 0, AUTO_FP) BUILTIN_VD_I (BINOPU, combine, 0, NONE) BUILTIN_VDC_P (BINOPP, combine, 0, NONE) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 50b68552fe43..5757f3759740 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7828,6 +7828,71 @@ DONE; }) +;; Patterns for rcpc3 vector lane loads and stores. + +(define_insn "aarch64_vec_stl1_lanes_lane" + [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q") + (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_STL1_LANE))] + "TARGET_RCPC3" + { + operands[2] = aarch64_endian_lane_rtx (mode, + INTVAL (operands[2])); + return "stl1\\t{%S1.}[%2], %0"; + } + [(set_attr "type" "neon_store2_one_lane")] +) + +(define_expand "aarch64_vec_stl1_lane" + [(match_operand:DI 0 "register_operand") + (match_operand:V12DIF 1 "register_operand") + (match_operand:SI 2 "immediate_operand")] + "TARGET_RCPC3" +{ + rtx mem = gen_rtx_MEM (BLKmode, operands[0]); + set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (mode))); + + aarch64_simd_lane_bounds (operands[2], 0, + GET_MODE_NUNITS (mode).to_constant (), NULL); + emit_insn (gen_aarch64_vec_stl1_lanes_lane (mem, + operands[1], operands[2])); + DONE; +}) + +(define_insn "aarch64_vec_ldap1_lanes_lane" + [(set (match_operand:V12DIF 0 "register_operand" "=w") + (unspec:V12DIF [ + (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q") + (match_operand:V12DIF 2 "register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_LDAP1_LANE))] + "TARGET_RCPC3" + { + operands[3] = aarch64_endian_lane_rtx (mode, + INTVAL (operands[3])); + return "ldap1\\t{%S0.}[%3], %1"; + } + [(set_attr "type" "neon_load2_one_lane")] +) + +(define_expand "aarch64_vec_ldap1_lane" + [(match_operand:V12DIF 0 "register_operand") + (match_operand:DI 1 "register_operand") + (match_operand:V12DIF 2 "register_operand") + (match_operand:SI 3 "immediate_operand")] + "TARGET_RCPC3" +{ + rtx mem = gen_rtx_MEM (BLKmode, operands[1]); + set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (mode))); + + aarch64_simd_lane_bounds (operands[3], 0, + GET_MODE_NUNITS (mode).to_constant (), NULL); + emit_insn (gen_aarch64_vec_ldap1_lanes_lane (operands[0], + mem, operands[2], operands[3])); + DONE; +}) + (define_insn_and_split "aarch64_rev_reglist" [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w") (unspec:VSTRUCT_QD diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 38da76a1ee20..d43f8be6e952 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -358,6 +358,8 @@ UNSPEC_SAVE_NZCV UNSPEC_RESTORE_NZCV UNSPECV_PATCHABLE_AREA + UNSPEC_LDAP1_LANE + UNSPEC_STL1_LANE ;; Wraps a constant integer that should be multiplied by the number ;; of quadwords in an SME vector. UNSPEC_SME_VQ From 20214aaab6fe8d77523ef86b47248f35992f49d4 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Thu, 2 Nov 2023 15:52:52 +0000 Subject: [PATCH 032/311] aarch64: rcpc3: add Neon ACLE wrapper functions to `arm_neon.h' Create the necessary mappings from the ACLE-defined Neon intrinsics names[1] to the internal builtin function names. [1] https://arm-software.github.io/acle/neon_intrinsics/advsimd.html gcc/ChangeLog: * config/aarch64/arm_neon.h (vldap1_lane_u64): New. (vldap1q_lane_u64): Likewise. (vldap1_lane_s64): Likewise. (vldap1q_lane_s64): Likewise. (vldap1_lane_f64): Likewise. (vldap1q_lane_f64): Likewise. (vldap1_lane_p64): Likewise. (vldap1q_lane_p64): Likewise. (vstl1_lane_u64): Likewise. (vstl1q_lane_u64): Likewise. (vstl1_lane_s64): Likewise. (vstl1q_lane_s64): Likewise. (vstl1_lane_f64): Likewise. (vstl1q_lane_f64): Likewise. (vstl1_lane_p64): Likewise. (vstl1q_lane_p64): Likewise. --- gcc/config/aarch64/arm_neon.h | 129 ++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 349f31676994..ef0d75e07ce1 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -13446,6 +13446,135 @@ vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) return __aarch64_vset_lane_any (*__src, __vec, __lane); } +#pragma GCC push_options +#pragma GCC target ("+nothing+rcpc3+simd") + +/* vldap1_lane. */ + +__extension__ extern __inline uint64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di_usus ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di_usus ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline int64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); +} + +__extension__ extern __inline int64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane); +} + +__extension__ extern __inline float64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane); +} + +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane); +} + +__extension__ extern __inline poly64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev1di_psps ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane) +{ + return __builtin_aarch64_vec_ldap1_lanev2di_psps ( + (__builtin_aarch64_simd_di *) __src, __vec, __lane); +} + +/* vstl1_lane. */ + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_u64 (uint64_t *__src, uint64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2di_sus ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_f64 (float64_t *__src, float64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1df (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_f64 (float64_t *__src, float64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2df (__src, __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1_lane_p64 (poly64_t *__src, poly64x1_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev1di_sps ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstl1q_lane_p64 (poly64_t *__src, poly64x2_t __vec, const int __lane) +{ + __builtin_aarch64_vec_stl1_lanev2di_sps ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); +} + +#pragma GCC pop_options + /* vldn */ __extension__ extern __inline int64x1x2_t From 3b096bc439cc4499b2caca4e2772adecaabc20c8 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Thu, 2 Nov 2023 11:19:16 +0000 Subject: [PATCH 033/311] aarch64: rcpc3: Add intrinsics tests Add unit test to ensure that added intrinsics compile to the correct `LDAP1 {Vt.D}[lane],[Xn]' and `STL1 {Vt.d}[lane],[Xn]' instructions. gcc/testsuite/ChangeLog: * gcc.target/aarch64/acle/rcpc3.c: New. --- gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c new file mode 100644 index 000000000000..689d047ab915 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/rcpc3.c @@ -0,0 +1,47 @@ +/* Test the rcpc3 ACLE intrinsics. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv8.2-a+rcpc3" } */ +#include +#include + +#define TEST_LDAP(TYPE, T) \ + TYPE##x##1_t T##1_test (TYPE##_t const * ptr, TYPE##x##1_t src) { \ + return vldap1_lane_##T##64 (ptr, src, 0); \ + } + +#define TEST_LDAPQ(TYPE, T) \ + TYPE##x##2_t T##2_test (TYPE##_t const * ptr, TYPE##x##2_t src) { \ + return vldap1q_lane_##T##64 (ptr, src, 1); \ + } + +#define TEST_STL(TYPE, T) \ + void T##1s_test (TYPE##_t * ptr, TYPE##x##1_t src) { \ + vstl1_lane_##T##64 (ptr, src, 0); \ + } + +#define TEST_STLQ(TYPE, T) \ + void T##2s_test (TYPE##_t * ptr, TYPE##x##2_t src) { \ + vstl1q_lane_##T##64 (ptr, src, 1); \ + } + +TEST_LDAP (uint64, u); +TEST_LDAP (int64, s); +TEST_LDAP (float64, f); +TEST_LDAP (poly64, p); +/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */ +TEST_LDAPQ (uint64, u); +TEST_LDAPQ (int64, s); +TEST_LDAPQ (float64, f); +TEST_LDAPQ (poly64, p); +/* { dg-final { scan-assembler-times {ldap1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */ + +TEST_STL (uint64, u); +TEST_STL (int64, s); +TEST_STL (float64, f); +TEST_STL (poly64, p); +/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[0\], \[x\d\]} 4 } } */ +TEST_STLQ (uint64, u); +TEST_STLQ (int64, s); +TEST_STLQ (float64, f); +TEST_STLQ (poly64, p); +/* { dg-final { scan-assembler-times {stl1\t\{v\d.d\}\[1\], \[x\d\]} 4 } } */ From 3d0f3382fa7b5677f35a9becf75ac436cd7eda7b Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Thu, 7 Dec 2023 00:38:14 -0300 Subject: [PATCH 034/311] libsupc++: try cxa_thread_atexit_impl at runtime g++.dg/tls/thread_local-order2.C fails when the toolchain is built for a platform that lacks __cxa_thread_atexit_impl, even if the program is built and run using that toolchain on a (later) platform that offers __cxa_thread_atexit_impl. This patch adds runtime testing for __cxa_thread_atexit_impl on select platforms (GNU variants, for starters) that support weak symbols. for libstdc++-v3/ChangeLog PR libstdc++/112858 * config/os/gnu-linux/os_defines.h (_GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL): Define. * libsupc++/atexit_thread.cc [__GXX_WEAK__ && _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL] (__cxa_thread_atexit): Add dynamic detection of __cxa_thread_atexit_impl. --- libstdc++-v3/config/os/gnu-linux/os_defines.h | 5 ++++ libstdc++-v3/libsupc++/atexit_thread.cc | 23 ++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/libstdc++-v3/config/os/gnu-linux/os_defines.h b/libstdc++-v3/config/os/gnu-linux/os_defines.h index 87317031fcd7..a2e4baec069d 100644 --- a/libstdc++-v3/config/os/gnu-linux/os_defines.h +++ b/libstdc++-v3/config/os/gnu-linux/os_defines.h @@ -60,6 +60,11 @@ # define _GLIBCXX_HAVE_FLOAT128_MATH 1 #endif +// Enable __cxa_thread_atexit to rely on a (presumably libc-provided) +// __cxa_thread_atexit_impl, if it happens to be defined, even if +// configure couldn't find it during the build. +#define _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL 1 + #ifdef __linux__ // The following libpthread properties only apply to Linux, not GNU/Hurd. diff --git a/libstdc++-v3/libsupc++/atexit_thread.cc b/libstdc++-v3/libsupc++/atexit_thread.cc index 9346d50f5daf..28423344a0f3 100644 --- a/libstdc++-v3/libsupc++/atexit_thread.cc +++ b/libstdc++-v3/libsupc++/atexit_thread.cc @@ -138,11 +138,32 @@ namespace { } } +#if __GXX_WEAK__ && _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL +extern "C" +int __attribute__ ((__weak__)) +__cxa_thread_atexit_impl (void (_GLIBCXX_CDTOR_CALLABI *func) (void *), + void *arg, void *d); +#endif + +// ??? We can't make it an ifunc, can we? extern "C" int __cxxabiv1::__cxa_thread_atexit (void (_GLIBCXX_CDTOR_CALLABI *dtor)(void *), - void *obj, void */*dso_handle*/) + void *obj, [[maybe_unused]] void *dso_handle) _GLIBCXX_NOTHROW { +#if __GXX_WEAK__ && _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL + if (__cxa_thread_atexit_impl) + // Rely on a (presumably libc-provided) __cxa_thread_atexit_impl, + // if it happens to be defined, even if configure couldn't find it + // during the build. _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL + // may be defined e.g. in os_defines.h on platforms where some + // versions of libc have a __cxa_thread_atexit_impl definition, + // but whose earlier versions didn't. This enables programs build + // by toolchains compatible with earlier libc versions to still + // benefit from a libc-provided __cxa_thread_atexit_impl. + return __cxa_thread_atexit_impl (dtor, obj, dso_handle); +#endif + // Do this initialization once. if (__gthread_active_p ()) { From 3cbab07b08d2f3a3ed34b6ec12e67727c59d285c Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Thu, 7 Dec 2023 00:38:18 -0300 Subject: [PATCH 035/311] analyzer: deal with -fshort-enums On platforms that enable -fshort-enums by default, various switch-enum analyzer tests fail, because apply_constraints_for_gswitch doesn't expect the integral promotion type cast. I've arranged for the code to cope with those casts. for gcc/analyzer/ChangeLog * region-model.cc (has_nondefault_case_for_value_p): Take enumerate type as a parameter. (region_model::apply_constraints_for_gswitch): Cope with integral promotion type casts. for gcc/testsuite/ChangeLog * gcc.dg/analyzer/switch-short-enum-1.c: New. * gcc.dg/analyzer/switch-no-short-enum-1.c: New. --- gcc/analyzer/region-model.cc | 27 +++- .../gcc.dg/analyzer/switch-no-short-enum-1.c | 141 ++++++++++++++++++ .../gcc.dg/analyzer/switch-short-enum-1.c | 140 +++++++++++++++++ 3 files changed, 304 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/switch-no-short-enum-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/switch-short-enum-1.c diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 2157ad2578b8..6a7a8bc9f488 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -5387,10 +5387,10 @@ has_nondefault_case_for_value_p (const gswitch *switch_stmt, tree int_cst) has nondefault cases handling all values in the enum. */ static bool -has_nondefault_cases_for_all_enum_values_p (const gswitch *switch_stmt) +has_nondefault_cases_for_all_enum_values_p (const gswitch *switch_stmt, + tree type) { gcc_assert (switch_stmt); - tree type = TREE_TYPE (gimple_switch_index (switch_stmt)); gcc_assert (TREE_CODE (type) == ENUMERAL_TYPE); for (tree enum_val_iter = TYPE_VALUES (type); @@ -5426,6 +5426,23 @@ apply_constraints_for_gswitch (const switch_cfg_superedge &edge, { tree index = gimple_switch_index (switch_stmt); const svalue *index_sval = get_rvalue (index, ctxt); + bool check_index_type = true; + + /* With -fshort-enum, there may be a type cast. */ + if (ctxt && index_sval->get_kind () == SK_UNARYOP + && TREE_CODE (index_sval->get_type ()) == INTEGER_TYPE) + { + const unaryop_svalue *unaryop = as_a (index_sval); + if (unaryop->get_op () == NOP_EXPR + && is_a (unaryop->get_arg ())) + if (const initial_svalue *initvalop = (as_a + (unaryop->get_arg ()))) + if (TREE_CODE (initvalop->get_type ()) == ENUMERAL_TYPE) + { + index_sval = initvalop; + check_index_type = false; + } + } /* If we're switching based on an enum type, assume that the user is only working with values from the enum. Hence if this is an @@ -5437,12 +5454,14 @@ apply_constraints_for_gswitch (const switch_cfg_superedge &edge, ctxt /* Must be an enum value. */ && index_sval->get_type () - && TREE_CODE (TREE_TYPE (index)) == ENUMERAL_TYPE + && (!check_index_type + || TREE_CODE (TREE_TYPE (index)) == ENUMERAL_TYPE) && TREE_CODE (index_sval->get_type ()) == ENUMERAL_TYPE /* If we have a constant, then we can check it directly. */ && index_sval->get_kind () != SK_CONSTANT && edge.implicitly_created_default_p () - && has_nondefault_cases_for_all_enum_values_p (switch_stmt) + && has_nondefault_cases_for_all_enum_values_p (switch_stmt, + index_sval->get_type ()) /* Don't do this if there's a chance that the index is attacker-controlled. */ && !ctxt->possibly_tainted_p (index_sval)) diff --git a/gcc/testsuite/gcc.dg/analyzer/switch-no-short-enum-1.c b/gcc/testsuite/gcc.dg/analyzer/switch-no-short-enum-1.c new file mode 100644 index 000000000000..98f6d91f9748 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/switch-no-short-enum-1.c @@ -0,0 +1,141 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-short-enums" } */ +/* { dg-skip-if "default" { ! short_enums } } */ + +#include "analyzer-decls.h" + +/* Verify the handling of "switch (enum_value)". */ + +enum e +{ + E_VAL0, + E_VAL1, + E_VAL2 +}; + +/* Verify that we assume that "switch (enum)" doesn't follow implicit + "default" if all enum values have cases */ + +int test_all_values_covered_implicit_default_1 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + case E_VAL2: + return 1945; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ +} + +int test_all_values_covered_implicit_default_2 (enum e x) +{ + int result; + switch (x) + { + case E_VAL0: + result = 1066; + break; + case E_VAL1: + result = 1776; + break; + case E_VAL2: + result = 1945; + break; + } + return result; /* { dg-bogus "uninitialized" } */ +} + +/* Verify that we consider paths that use the implicit default when not + all enum values are covered by cases. */ + +int test_missing_values_implicit_default_1 (enum e x) +{ + switch (x) /* { dg-message "following 'default:' branch" } */ + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + } + __analyzer_dump_path (); /* { dg-message "path" } */ + return 0; +} + +int test_missing_values_implicit_default_2 (enum e x) +{ + int result; + switch (x) /* { dg-message "following 'default:' branch" } */ + { + case E_VAL0: + result = 1066; + break; + case E_VAL1: + result = 1776; + break; + } + return result; /* { dg-warning "uninitialized" } */ +} + +/* Verify that explicit "default" isn't rejected. */ + +int test_all_values_covered_explicit_default_1 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + case E_VAL2: + return 1945; + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 0; + } +} + +int test_missing_values_explicit_default_1 (enum e x) +{ + switch (x) + { + default: + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} + +int test_missing_values_explicit_default_2 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 1945; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} + +int test_just_default (enum e x) +{ + switch (x) + { + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 42; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/analyzer/switch-short-enum-1.c b/gcc/testsuite/gcc.dg/analyzer/switch-short-enum-1.c new file mode 100644 index 000000000000..384113fde5cb --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/switch-short-enum-1.c @@ -0,0 +1,140 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fshort-enums" } */ +/* { dg-skip-if "default" { short_enums } } */ + +#include "analyzer-decls.h" + +/* Verify the handling of "switch (enum_value)". */ + +enum e +{ + E_VAL0, + E_VAL1, + E_VAL2 +}; + +/* Verify that we assume that "switch (enum)" doesn't follow implicit + "default" if all enum values have cases */ + +int test_all_values_covered_implicit_default_1 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + case E_VAL2: + return 1945; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ +} + +int test_all_values_covered_implicit_default_2 (enum e x) +{ + int result; + switch (x) + { + case E_VAL0: + result = 1066; + break; + case E_VAL1: + result = 1776; + break; + case E_VAL2: + result = 1945; + break; + } + return result; /* { dg-bogus "uninitialized" } */ +} + +/* Verify that we consider paths that use the implicit default when not + all enum values are covered by cases. */ + +int test_missing_values_implicit_default_1 (enum e x) +{ + switch (x) /* { dg-message "following 'default:' branch" } */ + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + } + __analyzer_dump_path (); /* { dg-message "path" } */ + return 0; +} + +int test_missing_values_implicit_default_2 (enum e x) +{ + int result; + switch (x) /* { dg-message "following 'default:' branch" } */ + { + case E_VAL0: + result = 1066; + break; + case E_VAL1: + result = 1776; + break; + } + return result; /* { dg-warning "uninitialized" } */ +} + +/* Verify that explicit "default" isn't rejected. */ + +int test_all_values_covered_explicit_default_1 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + case E_VAL2: + return 1945; + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 0; + } +} + +int test_missing_values_explicit_default_1 (enum e x) +{ + switch (x) + { + default: + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} + +int test_missing_values_explicit_default_2 (enum e x) +{ + switch (x) + { + case E_VAL0: + return 1066; + case E_VAL1: + return 1776; + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 1945; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} + +int test_just_default (enum e x) +{ + switch (x) + { + default: + __analyzer_dump_path (); /* { dg-message "path" } */ + return 42; + } + __analyzer_dump_path (); /* { dg-bogus "path" } */ + return 0; +} From 0d79636b8d2c7f0da16fbf5f786993783a4389ff Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Dec 2023 09:45:13 +0100 Subject: [PATCH 036/311] testsuite: Fix up gcc.target/s390/pr96127.c test for modern C [PR96127] I've noticed this test regressed on s390x-linux with the addition of the switch to modern C patchset. Haven't tried to reproduce the ICE, but as it was a backend ICE and FE after warning used to add such casts before (now errors), I think this ought to keep the testcase testing what was intended before. 2023-12-07 Jakub Jelinek PR target/96127 * gcc.target/s390/pr96127.c (c1): Add casts to long int *. --- gcc/testsuite/gcc.target/s390/pr96127.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/s390/pr96127.c b/gcc/testsuite/gcc.target/s390/pr96127.c index 213ed147175e..dd78643e452c 100644 --- a/gcc/testsuite/gcc.target/s390/pr96127.c +++ b/gcc/testsuite/gcc.target/s390/pr96127.c @@ -7,7 +7,7 @@ void c1 (int oz, int dk, int ub) { int *hd = 0; - long int *th = &dk; + long int *th = (long int *) &dk; while (ub < 1) { @@ -17,7 +17,7 @@ c1 (int oz, int dk, int ub) while (oz < 2) { - long int *lq = &oz; + long int *lq = (long int *) &oz; (*hd < (*lq = *th)) < oz; From f74939bd539c87f669e35042f7bc7aa47f0c29eb Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Dec 2023 09:46:38 +0100 Subject: [PATCH 037/311] tree-ssa-dce: Fix up maybe_optimize_arith_overflow for BITINT_TYPE [PR112880] The following testcase ICEs because maybe_optimize_arith_overflow uses build_nonstandard_integer_type, which is inappropriate if type is large BITINT_TYPE. 2023-12-07 Jakub Jelinek PR tree-optimization/112880 * tree-ssa-dce.cc (maybe_optimize_arith_overflow): Use unsigned_type_for instead of conditionally calling build_nonstandard_integer_type. * gcc.dg/bitint-49.c: New test. --- gcc/testsuite/gcc.dg/bitint-49.c | 37 ++++++++++++++++++++++++++++++++ gcc/tree-ssa-dce.cc | 4 +--- 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/bitint-49.c diff --git a/gcc/testsuite/gcc.dg/bitint-49.c b/gcc/testsuite/gcc.dg/bitint-49.c new file mode 100644 index 000000000000..74d9bde7e155 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-49.c @@ -0,0 +1,37 @@ +/* PR tree-optimization/112880 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +#if __BITINT_MAXWIDTH__ >= 1024 +_BitInt(1024) a, b, c, d, e, f; + +void +foo (void) +{ + __builtin_add_overflow (a, b, &a); + __builtin_sub_overflow (c, d, &c); + __builtin_mul_overflow (e, f, &e); +} +#endif + +#if __BITINT_MAXWIDTH__ >= 512 +_BitInt(512) g, h, i, j, k, l; + +void +bar (void) +{ + __builtin_add_overflow (g, h, &g); + __builtin_sub_overflow (i, j, &i); + __builtin_mul_overflow (k, l, &k); +} +#endif + +_BitInt(32) m, n, o, p, q, r; + +void +baz (void) +{ + __builtin_add_overflow (m, n, &m); + __builtin_sub_overflow (o, p, &o); + __builtin_mul_overflow (q, r, &q); +} diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index 4e371b21a903..fbf4773eb9db 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -1241,9 +1241,7 @@ maybe_optimize_arith_overflow (gimple_stmt_iterator *gsi, tree arg1 = gimple_call_arg (stmt, 1); location_t loc = gimple_location (stmt); tree type = TREE_TYPE (TREE_TYPE (lhs)); - tree utype = type; - if (!TYPE_UNSIGNED (type)) - utype = build_nonstandard_integer_type (TYPE_PRECISION (type), 1); + tree utype = unsigned_type_for (type); tree result = fold_build2_loc (loc, subcode, utype, fold_convert_loc (loc, utype, arg0), fold_convert_loc (loc, utype, arg1)); From e5489faf8efa30d8548bb669c0a700c409068bce Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Dec 2023 09:47:16 +0100 Subject: [PATCH 038/311] expr: Handle BITINT_TYPE in count_type_elements [PR112881] The following testcaser ICEs during gimplification, because count_type_elements doesn't handle BITINT_TYPE. It should handle it like other integral types. 2023-12-07 Jakub Jelinek PR middle-end/112881 * expr.cc (count_type_elements): Handle BITINT_TYPE like INTEGER_TYPE. * gcc.dg/bitint-50.c: New test. --- gcc/expr.cc | 1 + gcc/testsuite/gcc.dg/bitint-50.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/bitint-50.c diff --git a/gcc/expr.cc b/gcc/expr.cc index fea719028a16..6da51f2aca29 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7021,6 +7021,7 @@ count_type_elements (const_tree type, bool for_ctor_p) case REFERENCE_TYPE: case NULLPTR_TYPE: case OPAQUE_TYPE: + case BITINT_TYPE: return 1; case ERROR_MARK: diff --git a/gcc/testsuite/gcc.dg/bitint-50.c b/gcc/testsuite/gcc.dg/bitint-50.c new file mode 100644 index 000000000000..ea7820eafd7b --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-50.c @@ -0,0 +1,21 @@ +/* PR middle-end/112881 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-O2 -std=c23" } */ + +struct S { _BitInt(64) b; }; + +struct S +foo (_BitInt(64) p) +{ + return (struct S) { p }; +} + +#if __BITINT_MAXWIDTH__ >= 3924 +struct T { _BitInt(3924) b; }; + +struct T +bar (_BitInt(3924) p) +{ + return (struct T) { p }; +} +#endif From 8c088c4307dbb855596acb6366b0882e95d20b91 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Dec 2023 09:47:54 +0100 Subject: [PATCH 039/311] c-family: Fix up -fno-debug-cpp [PR111965] As can be seen in the second testcase, -fno-debug-cpp is actually implemented the same as -fdebug-cpp and so doesn't turn the debugging off. The following patch fixes that. 2023-12-07 Andrew Pinski Jakub Jelinek PR preprocessor/111965 gcc/c-family/ * c-opts.cc (c_common_handle_option) : Set cpp_opts->debug to value rather than 1. gcc/testsuite/ * gcc.dg/cpp/pr111965-1.c: New test. * gcc.dg/cpp/pr111965-2.c: New test. --- gcc/c-family/c-opts.cc | 2 +- gcc/testsuite/gcc.dg/cpp/pr111965-1.c | 5 +++++ gcc/testsuite/gcc.dg/cpp/pr111965-2.c | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/cpp/pr111965-1.c create mode 100644 gcc/testsuite/gcc.dg/cpp/pr111965-2.c diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index 60aa7c5fd430..0399899387f7 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -532,7 +532,7 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value, break; case OPT_fdebug_cpp: - cpp_opts->debug = 1; + cpp_opts->debug = value; break; case OPT_ftrack_macro_expansion: diff --git a/gcc/testsuite/gcc.dg/cpp/pr111965-1.c b/gcc/testsuite/gcc.dg/cpp/pr111965-1.c new file mode 100644 index 000000000000..97900552a4c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr111965-1.c @@ -0,0 +1,5 @@ +/* PR preprocessor/111965 + { dg-do preprocess } + { dg-options "-fdebug-cpp" } + { dg-final { scan-file pr111965-1.i "P:;F:;" } } */ +int x; diff --git a/gcc/testsuite/gcc.dg/cpp/pr111965-2.c b/gcc/testsuite/gcc.dg/cpp/pr111965-2.c new file mode 100644 index 000000000000..44f301210925 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr111965-2.c @@ -0,0 +1,5 @@ +/* PR preprocessor/111965 + { dg-do preprocess } + { dg-options "-fdebug-cpp -fno-debug-cpp" } + { dg-final { scan-file-not pr111965-2.i "P:;F:;" } } */ +int x; From bf38c6374b5fb6a2afa05af26432f1fd1a120bc4 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 7 Dec 2023 09:48:57 +0100 Subject: [PATCH 040/311] testsuite: Add testcase for already fixed PR [PR111068] This one unfortunately can't be bisected, it ICEd until r14-3430 inclusive, but r14-3431 removed -mavx10.1-512 support and when it was readded in r14-5607 it doesn't ICE anymore. I'm just committing the testcase so that it doesn't reappear. 2023-12-07 Jakub Jelinek PR target/111068 * gcc.target/i386/pr111068.c: New test. --- gcc/testsuite/gcc.target/i386/pr111068.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr111068.c diff --git a/gcc/testsuite/gcc.target/i386/pr111068.c b/gcc/testsuite/gcc.target/i386/pr111068.c new file mode 100644 index 000000000000..4ff2ea0efc05 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111068.c @@ -0,0 +1,12 @@ +/* PR target/111068 */ +/* { dg-do compile } */ +/* { dg-options "-ffloat-store -mavx10.1-512" } */ + +typedef _Float16 __attribute__((__vector_size__ (8))) V; +V u, v, w; + +void +foo (void) +{ + v /= __builtin_shufflevector (w, u, 3, 3, 6, 1); +} From 8c09c73adf4c537876e8ef418378d6581b768a64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Tue, 5 Dec 2023 01:00:11 +0100 Subject: [PATCH 041/311] RISC-V: xtheadfmemidx: Disable if xtheadmemidx is not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XTheadMemIdx provides register-register offsets for GP register loads/stores. XTheadFMemIdx does the same for FP registers. We've observed an issue with XTheadFMemIdx-only builds, where FP registers have been promoted to GP registers: (insn 26 22 51 (set (reg:DF 15 a5 [orig:136 ] [136]) (mem/u:DF (plus:DI (reg/f:DI 15 a5 [141]) (reg:DI 10 a0 [144])) [1 CSWTCH.2[_10]+0 S8 A64])) 217 {*movdf_hardfloat_rv64} (expr_list:REG_DEAD (reg:DI 10 a0 [144]) (nil))) This results in the following assembler error: Assembler messages: Error: unrecognized opcode `th.lrd a5,a5,a0,0', extension `xtheadmemidx' required There seems to be a (reasonable) assumption, that addressing modes for FP registers are compatible with those of GP registers. We already ran into a similar issue during development of the XTheadFMemIdx support patch, where we could trace the issue down to the optimization splitters. Back then we simply disabled them in case XTheadMemIdx is not available. But as it turned out, that was not enough. To ensure, we won't see such issues anymore, let's make the support for XTheadFMemIdx depend on XTheadMemIdx. I.e., if only XTheadFMemIdx is available, then no instructions of this extension will be emitted. While this looks a bit drastic at first view, it is the best practical solution since XTheadFMemIdx without XTheadMemIdx does not exist in real hardware and would be an odd thing to do. gcc/ChangeLog: * config/riscv/thead.cc (th_memidx_classify_address_index): Require TARGET_XTHEADMEMIDX for FP modes. * config/riscv/thead.md: Require TARGET_XTHEADMEMIDX for all XTheadFMemIdx pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c: New test. Reported-by: Jin Ma Signed-off-by: Christoph Müllner --- gcc/config/riscv/thead.cc | 3 +- gcc/config/riscv/thead.md | 19 ++++----- .../xtheadfmemidx-without-xtheadmemidx.c | 39 +++++++++++++++++++ 3 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc index bd9af7ecd60a..203539959319 100644 --- a/gcc/config/riscv/thead.cc +++ b/gcc/config/riscv/thead.cc @@ -603,7 +603,8 @@ th_memidx_classify_address_index (struct riscv_address_info *info, rtx x, { /* Ensure that the mode is supported. */ if (!(TARGET_XTHEADMEMIDX && is_memidx_mode (mode)) - && !(TARGET_XTHEADFMEMIDX && is_fmemidx_mode (mode))) + && !(TARGET_XTHEADMEMIDX + && TARGET_XTHEADFMEMIDX && is_fmemidx_mode (mode))) return false; if (GET_CODE (x) != PLUS) diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md index 2babfafb23ca..186ca4688754 100644 --- a/gcc/config/riscv/thead.md +++ b/gcc/config/riscv/thead.md @@ -822,11 +822,19 @@ ) ;; XTheadFMemIdx +;; Note, that we might get GP registers in FP-mode (reg:DF a2) +;; which cannot be handled by the XTheadFMemIdx instructions. +;; This might even happend after register allocation. +;; We could implement splitters that undo the combiner results +;; if "after_reload && !HARDFP_REG_P (operands[0])", but this +;; raises even more questions (e.g. split into what?). +;; So let's solve this by simply requiring XTheadMemIdx +;; which provides the necessary instructions to cover this case. (define_insn "*th_fmemidx_movsf_hardfloat" [(set (match_operand:SF 0 "nonimmediate_operand" "=f,th_m_mir,f,th_m_miu") (match_operand:SF 1 "move_operand" " th_m_mir,f,th_m_miu,f"))] - "TARGET_HARD_FLOAT && TARGET_XTHEADFMEMIDX + "TARGET_HARD_FLOAT && TARGET_XTHEADFMEMIDX && TARGET_XTHEADMEMIDX && (register_operand (operands[0], SFmode) || reg_or_0_operand (operands[1], SFmode))" { return riscv_output_move (operands[0], operands[1]); } @@ -837,6 +845,7 @@ [(set (match_operand:DF 0 "nonimmediate_operand" "=f,th_m_mir,f,th_m_miu") (match_operand:DF 1 "move_operand" " th_m_mir,f,th_m_miu,f"))] "TARGET_64BIT && TARGET_DOUBLE_FLOAT && TARGET_XTHEADFMEMIDX + && TARGET_XTHEADMEMIDX && (register_operand (operands[0], DFmode) || reg_or_0_operand (operands[1], DFmode))" { return riscv_output_move (operands[0], operands[1]); } @@ -845,14 +854,6 @@ ;; XTheadFMemIdx optimizations ;; Similar like XTheadMemIdx optimizations, but less cases. -;; Note, that we might get GP registers in FP-mode (reg:DF a2) -;; which cannot be handled by the XTheadFMemIdx instructions. -;; This might even happend after register allocation. -;; We could implement splitters that undo the combiner results -;; if "after_reload && !HARDFP_REG_P (operands[0])", but this -;; raises even more questions (e.g. split into what?). -;; So let's solve this by simply requiring XTheadMemIdx -;; which provides the necessary instructions to cover this case. (define_insn_and_split "*th_fmemidx_I_a" [(set (match_operand:TH_M_NOEXTF 0 "register_operand" "=f") diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c new file mode 100644 index 000000000000..c5502390ccab --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ +/* { dg-options "-march=rv64gc_xtheadfmemidx" { target { rv64 } } } */ +/* { dg-options "-march=rv32gc_xtheadfmemidx" { target { rv32 } } } */ + +typedef union { + double v; + unsigned w; +} my_t; + +double z; + +double foo (int i, int j) +{ + + if (j) + { + switch (i) + { + case 0: + return 1; + case 1: + return 0; + case 2: + return 3.0; + } + } + + if (i == 1) + { + my_t u; + u.v = z; + u.w = 1; + z = u.v; + } + return z; +} + +/* { dg-final { scan-assembler-not "th.lrd\t" } } */ From 570d74119d2daaa9ea909b6326a3756f548097d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20M=C3=BCllner?= Date: Tue, 5 Dec 2023 02:54:52 +0100 Subject: [PATCH 042/311] RISC-V: xtheadmemidx: Document inline asm issue with memory constraint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XTheadMemIdx support relies on the fact that memory operands that can be expressed by XTheadMemIdx instructions, will only appear as operands of such instructions. For internal instruction generation this is guaranteed by the implemenation. However, in case of inline assembly, this guarantee is not given and we cannot differentiate these two cases when printing the operand: asm volatile ("sd %1,%0" : "=m"(*tmp) : "r"(val)); asm volatile ("th.srd %1,%0" : "=m"(*tmp) : "r"(val)); If XTheadMemIdx is enabled, then the address will be printed as if an XTheadMemIdx instruction is emitted, which is obviously wrong in the first case. There might be solutions to handle this (e.g. using TARGET_MEM_CONSTRAINT or extending the mnemonics to accept the standard operands for XTheadMemIdx instructions), but let's document this behavior for now as a known issue by adding xfail tests until we have an acceptable fix. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadmemidx-inline-asm-1.c: New test. Reported-by: Jin Ma Signed-off-by: Christoph Müllner --- .../riscv/xtheadmemidx-inline-asm-1.c | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadmemidx-inline-asm-1.c diff --git a/gcc/testsuite/gcc.target/riscv/xtheadmemidx-inline-asm-1.c b/gcc/testsuite/gcc.target/riscv/xtheadmemidx-inline-asm-1.c new file mode 100644 index 000000000000..d4cfdeeaaec4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/xtheadmemidx-inline-asm-1.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */ +/* { dg-options "-march=rv64gc_xtheadmemidx -mabi=lp64" } */ + +/* XTheadMemIdx support is implemented such that reg+reg addressing mode + loads/stores are preferred over standard loads/stores. + If this order changed using inline assembly, the result will be invalid + instructions. This test serves the purpose of documenting this + limitation until a solution is available. */ + +void foo (void *p, unsigned long off, unsigned long val) +{ + unsigned long *tmp = (unsigned long*)(p + off); + asm volatile ("sd %1,%0" : "=m"(*tmp) : "r"(val)); +} + +void bar (void *p, unsigned long off, unsigned long val) +{ + unsigned long *tmp = (unsigned long*)(p + off); + asm volatile ("th.srd %1,%0" : "=m"(*tmp) : "r"(val)); +} + +/* { dg-final { scan-assembler "sd\t\[a-z\]\[0-9\]+,0\\(\[a-z\]\[0-9\]+\\)" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-not "sd\t\[a-z\]\[0-9\]+,\[a-z\]\[0-9\]+,\[a-z\]\[0-9\]+,0" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "th\.srd\t\[a-z\]\[0-9\]+,\[a-z\]\[0-9\]+,\[a-z\]\[0-9\]+,0" } } */ +/* { dg-final { scan-assembler-not "th\.srd\t\[a-z\]\[0-9\]+,0\\(\[a-z\]\[0-9\]+\\)" } } */ From abded9bf3e1ebc1789d47ac2c445f5b613981a0e Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Thu, 7 Dec 2023 20:08:02 +0800 Subject: [PATCH 043/311] RISC-V: Fix AVL propagation ICE for vleff/vlsegff This patch fixes 400 ICEs in full coverage testing: internal compiler error: in validate_change_or_fail, at config/riscv/riscv-v.cc:4597 The root cause is each operand is used in vleff/vlsegff twice: (define_insn "@pred_fault_load" [(set (match_operand:V 0 "register_operand" "=vd, vd, vr, vr") (if_then_else:V (unspec: [(match_operand: 1 "vector_mask_operand" " vm, vm, Wc1, Wc1") (match_operand 4 "vector_length_operand" " rK, rK, rK, rK") (match_operand 5 "const_int_operand" " i, i, i, i") (match_operand 6 "const_int_operand" " i, i, i, i") (match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:V [(match_operand:V 3 "memory_operand" " m, m, m, m")] UNSPEC_VLEFF) (match_operand:V 2 "vector_merge_operand" " vu, 0, vu, 0"))) (set (reg:SI VL_REGNUM) (unspec:SI [(if_then_else:V (unspec: [(match_dup 1) (match_dup 4) (match_dup 5) (match_dup 6) (match_dup 7) (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:V [(match_dup 3)] UNSPEC_VLEFF) (match_dup 2))] UNSPEC_MODIFY_VL))] Then later instruction change in AVL propagation change ICE: validate_change_or_fail (rinsn, recog_data.operand_loc[index], get_avl_type_rtx (avl_type::NONVLMAX), false); which is the operand change according to location. Such operand change in 2 locations instead of 1. So regenerate pattern for such instructions AVL propagation to fix the ICEs. gcc/ChangeLog: * config/riscv/riscv-avlprop.cc (simplify_replace_avl): New function. (simplify_replace_vlmax_avl): Fix bug. * config/riscv/t-riscv: Add a new include file. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_prop-2.c: New test. --- gcc/config/riscv/riscv-avlprop.cc | 36 +++++++++++++--- gcc/config/riscv/t-riscv | 2 +- .../gcc.target/riscv/rvv/vsetvl/avl_prop-2.c | 41 +++++++++++++++++++ 3 files changed, 72 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-2.c diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index d298f0ea456c..02f006742f18 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgcleanup.h" #include "insn-attr.h" #include "tm-constrs.h" +#include "insn-opinit.h" using namespace rtl_ssa; using namespace riscv_vector; @@ -142,6 +143,34 @@ get_insn_vtype_mode (rtx_insn *rinsn) return GET_MODE (recog_data.operand[mode_idx]); } +/* Return new pattern for AVL propagation. + Normally, we just replace AVL operand only for most + of the instructions. However, for instructions like + fault load which use AVL TYPE twice in the pattern which + will cause ICE in the later AVL TYPE change so we regenerate + the whole pattern for such instructions. */ +static rtx +simplify_replace_avl (rtx_insn *rinsn, rtx new_avl) +{ + /* Replace AVL operand. */ + extract_insn_cached (rinsn); + rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; + int count = count_regno_occurrences (rinsn, REGNO (avl)); + gcc_assert (count == 1); + rtx new_pat = simplify_replace_rtx (PATTERN (rinsn), avl, new_avl); + if (get_attr_type (rinsn) == TYPE_VLDFF + || get_attr_type (rinsn) == TYPE_VLSEGDFF) + new_pat + = gen_pred_fault_load (recog_data.operand_mode[0], recog_data.operand[0], + recog_data.operand[1], recog_data.operand[2], + recog_data.operand[3], new_avl, + recog_data.operand[5], recog_data.operand[6], + get_avl_type_rtx (avl_type::NONVLMAX)); + else + new_pat = simplify_replace_rtx (PATTERN (rinsn), avl, new_avl); + return new_pat; +} + static void simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl) { @@ -152,12 +181,7 @@ simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl) fprintf (dump_file, "into: "); print_rtl_single (dump_file, rinsn); } - /* Replace AVL operand. */ - extract_insn_cached (rinsn); - rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; - int count = count_regno_occurrences (rinsn, REGNO (avl)); - gcc_assert (count == 1); - rtx new_pat = simplify_replace_rtx (PATTERN (rinsn), avl, new_avl); + rtx new_pat = simplify_replace_avl (rinsn, new_avl); validate_change_or_fail (rinsn, &PATTERN (rinsn), new_pat, false); /* Change AVL TYPE into NONVLMAX if it is VLMAX. */ diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 3b9686daa585..372bb77c7264 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -81,7 +81,7 @@ riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \ $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \ - tm-constrs.h + tm-constrs.h insn-opinit.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-avlprop.cc diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-2.c new file mode 100644 index 000000000000..fdef8e37ce5f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_prop-2.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zve32f -mabi=lp64d -O3 --param=riscv-autovec-preference=fixed-vlmax" } */ + +int d0, sj, v0, rp, zi; + +void +zn(void) +{ + if (v0 != 0) + { + int *js, *r3; + int pm, gc; + + for (gc = 0; gc < 1; ++gc) + { + sj = 1; + while (sj != 0) + ; + } + r3 = ± + *js = (long)&gc; +ka: + for (d0 = 0; d0 < 2; ++d0) + { + d0 = zi; + if (zi) + for (pm = 2; pm != 0; --pm) + ; + } + while (*r3 != 0) + { + while (pm) + ; + ++r3; + } + } + rp = 0; + goto ka; +} + +/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*1} 2 } } */ From f8c8aebc597611b8d37edce165a891d00e23a9c1 Mon Sep 17 00:00:00 2001 From: Gaius Mulley Date: Thu, 7 Dec 2023 13:10:49 +0000 Subject: [PATCH 044/311] PR modula2/112893 detect procedure address incompatible with cardinal in iso In ISO m2 the type cardinal is assignment incompatible with address (but it is allowed in PIM). The patch also extends the type checker to include procedures (which appear as having GetType () = address). At some point this should be be improved to use a pointer to proc type. Perhaps in the next stage1. For now this will catch procedures being passed as actual parameters into a formal cardinal parameter in ISO m2 (for example). gcc/m2/ChangeLog: PR modula2/112893 * gm2-compiler/M2Base.mod (Ass): Extend array to include proc row and column. Allow PIM to assign cardinal variables to address variables. (Expr): Ditto. (Comp): Ditto. * gm2-compiler/M2Check.mod (getSType): New procedure function. Replace all occurances of GetSType with getSType. * gm2-compiler/M2GenGCC.mod (CodeParam): Rewrite format specifier error message. * gm2-compiler/M2Quads.mod (CheckProcTypeAndProcedure): Add tokno parameter. * gm2-compiler/M2Range.def (InitTypesParameterCheck): Add tokno parameter. (InitParameterRangeCheck): Add tokno parameter. Remove EXPORT QUALIFIED list. (InitParameterRangeCheck): Add tokno parameter. * gm2-compiler/M2Range.mod (InitTypesParameterCheck): Add tokno parameter and pass tokno to PutRangeParam. (InitParameterRangeCheck): Add tokno parameter and pass tokno to PutRangeParam. (PutRangeParam): Add tokno parameter and assign to tokenNo. (FoldTypeParam): Rewrite format string. gcc/testsuite/ChangeLog: PR modula2/112893 * gm2/iso/fail/proccard.mod: New test. * gm2/pim/pass/proccard.mod: New test. Signed-off-by: Gaius Mulley --- gcc/m2/gm2-compiler/M2Base.mod | 336 ++++++++++++------------ gcc/m2/gm2-compiler/M2Check.mod | 39 ++- gcc/m2/gm2-compiler/M2GenGCC.mod | 4 +- gcc/m2/gm2-compiler/M2Quads.mod | 22 +- gcc/m2/gm2-compiler/M2Range.def | 42 +-- gcc/m2/gm2-compiler/M2Range.mod | 18 +- gcc/testsuite/gm2/iso/fail/proccard.mod | 14 + gcc/testsuite/gm2/pim/pass/proccard.mod | 14 + 8 files changed, 254 insertions(+), 235 deletions(-) create mode 100644 gcc/testsuite/gm2/iso/fail/proccard.mod create mode 100644 gcc/testsuite/gm2/pim/pass/proccard.mod diff --git a/gcc/m2/gm2-compiler/M2Base.mod b/gcc/m2/gm2-compiler/M2Base.mod index cc3aa4cc9616..bae90cb1cd66 100644 --- a/gcc/m2/gm2-compiler/M2Base.mod +++ b/gcc/m2/gm2-compiler/M2Base.mod @@ -2480,7 +2480,6 @@ BEGIN END ; INC(x) | - ELSE InternalError ('unexpected specifier') END ; @@ -2544,84 +2543,85 @@ BEGIN (* 1 p w - N W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A - u o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r - l r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r - S d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a - y e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y - m s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e + C W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A P + o o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r r + n r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r o + s d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a c + t e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y + s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e s r n t a a r e a 8 x o m x x x x t l r d a l m p 3 6 9 1 d l p l 2 4 6 2 l e 8 e x x - ------------------------------------------------------------------------------------------------------------ + -------------------------------------------------------------------------------------------------------------- 2 P W *) - A(const , 'T T T T T T T T T T T T T T T T T T T F T T T T T T T T T T T T F F F F F F F F F F F F F F F F F') ; - A(word , '. T S S S 2 S S 2 S S S 2 S S S T T S S T S S S S S S S S S S S S S S S S S S S S S S S S S T T T') ; - A(byte , '. . T S 2 S S S S S S S S S S S T T S S T S S S S S S S S S S S S S S S S S S S S S S S S S T T T') ; - A(address , '. . . T F F F F F F F 2 F F F F F 2 2 F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - A(chr , '. . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - A(normint , '. . . . . T T T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(shortint , '. . . . . . T T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(longint , '. . . . . . . T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(normcard , '. . . . . . . . T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(shortcard , '. . . . . . . . . T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(longcard , '. . . . . . . . . . T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - A(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F T T F F F F F F F F F F F F F F F') ; - A(real , '. . . . . . . . . . . . . T T T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F') ; - A(shortreal , '. . . . . . . . . . . . . . T T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F') ; - A(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F') ; - A(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - A(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - A(loc , '. . . . . . . . . . . . . . . . . . T F F T F F F T F F F F F F F F F F S F F F F F F F F F F T T') ; - A(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F') ; - A(ztype , '. . . . . . . . . . . . . . . . . . . . T T T T T T T T T T T T F F F F F F F F F F F F F F F F F') ; - A(int8 , '. . . . . . . . . . . . . . . . . . . . . T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(int16 , '. . . . . . . . . . . . . . . . . . . . . . T T T T T T T T F F F F F F F F F F F F F F F F F F F') ; - A(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T T T T T T F T T F F F F F F F F F F F F F F F F F') ; - A(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T T T T T F F F F F F F F F F F F F F F F F F F F') ; - A(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T T T T T F F F F F F F F F F F F F F F F F F F') ; - A(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T T T F F F F F F F F F F F F F F F F F F F F') ; - A(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T T F T F F F F F F F F F F F F F F F F F F') ; - A(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F F F F F F F F F F F F F F F F') ; - A(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F') ; - A(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F F F F F F F F F F F F F F') ; - A(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F F F F F F F F F F F F F') ; - A(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; - A(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; - A(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; - A(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; - A(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; - A(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; - A(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F') ; - A(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F') ; - A(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F') ; - A(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F') ; - A(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F') ; - A(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F') ; - A(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F') ; - A(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F') ; - A(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F') ; - A(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F') ; - A(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T') ; + A(const , 'T T T T T T T T T T T T T T T T T T T F T T T T T T T T T T T T F F F F F F F F F F F F F F F F F F') ; + A(word , '. T S S S 2 S S 2 S S S 2 S S S T T S S T S S S S S S S S S S S S S S S S S S S S S S S S S T T T F') ; + A(byte , '. . T S 2 S S S S S S S S S S S T T S S T S S S S S S S S S S S S S S S S S S S S S S S S S T T T F') ; + A(address , '. . . T F F F F P F F 2 F F F F F 2 2 F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F T') ; + A(chr , '. . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + A(normint , '. . . . . T T T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(shortint , '. . . . . . T T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(longint , '. . . . . . . T T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(normcard , '. . . . . . . . T T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(shortcard , '. . . . . . . . . T T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(longcard , '. . . . . . . . . . T F F F F F F F F F T T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + A(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F T T F F F F F F F F F F F F F F F F') ; + A(real , '. . . . . . . . . . . . . T T T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F F') ; + A(shortreal , '. . . . . . . . . . . . . . T T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F F') ; + A(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F T T T T F F F F F F F F F F F F F F') ; + A(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + A(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + A(loc , '. . . . . . . . . . . . . . . . . . T F F T F F F T F F F F F F F F F F S F F F F F F F F F F T T F') ; + A(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F F') ; + A(ztype , '. . . . . . . . . . . . . . . . . . . . T T T T T T T T T T T T F F F F F F F F F F F F F F F F F F') ; + A(int8 , '. . . . . . . . . . . . . . . . . . . . . T T T T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(int16 , '. . . . . . . . . . . . . . . . . . . . . . T T T T T T T T F F F F F F F F F F F F F F F F F F F F') ; + A(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T T T T T T F T T F F F F F F F F F F F F F F F F F F') ; + A(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T T T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T T T T T F F F F F F F F F F F F F F F F F F F F') ; + A(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T T T F F F F F F F F F F F F F F F F F F F F F') ; + A(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T T F T F F F F F F F F F F F F F F F F F F F') ; + A(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F F F F F F F F F F F F F F F F F') ; + A(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F') ; + A(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F F F F F F F F F F F F F F F') ; + A(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F F F F F F F F F F F F F F') ; + A(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F') ; + A(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; + A(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; + A(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; + A(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; + A(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; + A(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; + A(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F F') ; + A(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F F') ; + A(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F F') ; + A(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F F') ; + A(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F F') ; + A(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F') ; + A(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F F') ; + A(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F') ; + A(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F') ; + A(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F') ; + A(procedure , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T') ; (* Expression compatibility *) (* 1 p w - N W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A - u o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r - l r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r - S d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a - y e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y - m s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e + C W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A P + o o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r r + n r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r o + s d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a c + t e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y + s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e s r n t a a r e a 8 x o m x x x x t l r d a l m p 3 6 9 1 d l p l 2 4 6 2 @@ -2634,55 +2634,56 @@ BEGIN W *) - E(const , 'T T T T T T T T T T T T T T T T T T F F T T T T T T T T T T T T T T T T F F F F F F F F F F F F F') ; - E(word , '. T F F F F F F F F F F F F F F F F F W F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(byte , '. . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(address , '. . . T F P F F P F F T F F F F F F F F P F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(chr , '. . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(normint , '. . . . . T F F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(shortint , '. . . . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(longint , '. . . . . . . T F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(normcard , '. . . . . . . . T F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(shortcard , '. . . . . . . . . T F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(longcard , '. . . . . . . . . . T F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(real , '. . . . . . . . . . . . . T F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(shortreal , '. . . . . . . . . . . . . . T F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(loc , '. . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F') ; - E(ztype , '. . . . . . . . . . . . . . . . . . . . T 1 1 1 1 1 1 1 1 1 1 1 F F F F F F F F F F F F F F F F F') ; - E(int8 , '. . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(int16 , '. . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F') ; - E(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F') ; - E(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F') ; - E(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F') ; - E(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F') ; - E(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F') ; - E(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F') ; - E(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F') ; - E(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; - E(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; - E(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; - E(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; - E(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; - E(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; - E(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F') ; - E(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F') ; - E(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F') ; - E(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F') ; - E(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F') ; - E(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F') ; - E(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F') ; - E(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F') ; - E(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F') ; - E(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F') ; - E(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F') ; + E(const , 'T T T T T T T T T T T T T T T T T T F F T T T T T T T T T T T T T T T T F F F F F F F F F F F F F F') ; + E(word , '. T F F F F F F F F F F F F F F F F F W F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(byte , '. . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(address , '. . . T F P F F P F F T F F F F F F F F P F F F F F F F F F F F F F F F F F F F F F F F F F F F F T') ; + E(chr , '. . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(normint , '. . . . . T F F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(shortint , '. . . . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(longint , '. . . . . . . T F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(normcard , '. . . . . . . . T F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(shortcard , '. . . . . . . . . T F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(longcard , '. . . . . . . . . . T F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(real , '. . . . . . . . . . . . . T F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(shortreal , '. . . . . . . . . . . . . . T F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(loc , '. . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F F') ; + E(ztype , '. . . . . . . . . . . . . . . . . . . . T 1 1 1 1 1 1 1 1 1 1 1 F F F F F F F F F F F F F F F F F F') ; + E(int8 , '. . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(int16 , '. . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F') ; + E(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F') ; + E(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F') ; + E(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F') ; + E(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F') ; + E(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F') ; + E(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F') ; + E(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F') ; + E(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; + E(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; + E(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; + E(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; + E(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; + E(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; + E(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F F') ; + E(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F F') ; + E(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F F') ; + E(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F F') ; + E(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F F') ; + E(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F') ; + E(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F F') ; + E(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F') ; + E(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F') ; + E(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F') ; + E(procedure , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T') ; (* Comparison compatibility *) @@ -2690,12 +2691,12 @@ BEGIN (* 1 p w - N W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A - u o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r - l r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r - S d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a - y e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y - m s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e + C W B A C I S L C S L P E R S L S O L R Z I I I I C C C C W W W R R R R S S S C S L C C C C C R A P + o o y d h n h o a h o t n e h o e p o t t n n n n a a a a o o o e e e e e e e o h o o o o o t e r r + n r t d a t o n r o n r u a o n t a c y y t t t t r r r r r r r a a a a t t t m o n m m m m y c r o + s d e r r e r g d r g m l r g q p p 8 1 3 6 d d d d d d d l l l l 8 1 3 p r g p p p p p a c + t e g t i i t c t r u e e 6 2 4 8 1 3 6 1 3 6 3 6 9 1 6 2 l t C l l l l e y + s e i n n c a r e e 6 2 4 6 2 4 2 4 6 2 e C o e e e e s r n t a a r e a 8 x o m x x x x t l r d a l m p 3 6 9 1 d l p l 2 4 6 2 @@ -2708,55 +2709,56 @@ BEGIN W *) - C(const , 'T T T T T T T T T T T T T T T T T T F F T T T T T T T T T T T T T T T T F F F F F F F F F F F F F') ; - C(word , '. T F F F F F F F F F F F F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(byte , '. . T F F F F F F F F F F F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(address , '. . . T F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(chr , '. . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(normint , '. . . . . T F F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(shortint , '. . . . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(longint , '. . . . . . . T F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(normcard , '. . . . . . . . T F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(shortcard , '. . . . . . . . . T F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(longcard , '. . . . . . . . . . T F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(real , '. . . . . . . . . . . . . T F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(shortreal , '. . . . . . . . . . . . . . T F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(loc , '. . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F') ; - C(ztype , '. . . . . . . . . . . . . . . . . . . . T 1 1 1 1 1 1 1 1 1 1 1 F F F F F F F F F F F F F F F F F') ; - C(int8 , '. . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(int16 , '. . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F') ; - C(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F') ; - C(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F') ; - C(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F') ; - C(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F') ; - C(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F') ; - C(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F') ; - C(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F') ; - C(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; - C(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; - C(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; - C(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; - C(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; - C(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; - C(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F') ; - C(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F') ; - C(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F') ; - C(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F') ; - C(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F') ; - C(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F') ; - C(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F') ; - C(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F') ; - C(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F') ; - C(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F') ; - C(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F') ; + C(const , 'T T T T T T T T T T T T T T T T T T F F T T T T T T T T T T T T T T T T F F F F F F F F F F F F F F') ; + C(word , '. T F F F F F F F F F F F F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(byte , '. . T F F F F F F F F F F F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(address , '. . . T F F F F F F F T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F T') ; + C(chr , '. . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(normint , '. . . . . T F F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(shortint , '. . . . . . T F F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(longint , '. . . . . . . T F F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(normcard , '. . . . . . . . T F F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(shortcard , '. . . . . . . . . T F F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(longcard , '. . . . . . . . . . T F F F F F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(pointer , '. . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(enum , '. . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(real , '. . . . . . . . . . . . . T F F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(shortreal , '. . . . . . . . . . . . . . T F F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(longreal , '. . . . . . . . . . . . . . . T F F F 2 F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(set , '. . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(opaque , '. . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(loc , '. . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(rtype , '. . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F 1 1 1 1 F F F F F F F F F F F F F F') ; + C(ztype , '. . . . . . . . . . . . . . . . . . . . T 1 1 1 1 1 1 1 1 1 1 1 F F F F F F F F F F F F F F F F F F') ; + C(int8 , '. . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(int16 , '. . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(int32 , '. . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(int64 , '. . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(card8 , '. . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F F') ; + C(card16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F F') ; + C(card32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F F') ; + C(card64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F F F F F') ; + C(word16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F F') ; + C(word32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F F') ; + C(word64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F F F F F F F F F F F F F F F F F') ; + C(real32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F F') ; + C(real64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F F') ; + C(real96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F F') ; + C(real128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F F') ; + C(set8 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F F') ; + C(set16 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F F') ; + C(set32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F F F F F F') ; + C(complex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F F T F F F') ; + C(shortcomplex, '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F F T F F F') ; + C(longcomplex , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F F T F F F') ; + C(complex32 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F T F F F') ; + C(complex64 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F T F F F') ; + C(complex96 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F T F F F') ; + C(complex128 , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T T F F F') ; + C(ctype , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T F F F') ; + C(rec , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F F') ; + C(array , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . F F') ; + C(procedure , '. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . T') ; END InitCompatibilityMatrices ; diff --git a/gcc/m2/gm2-compiler/M2Check.mod b/gcc/m2/gm2-compiler/M2Check.mod index af2c7c7ccadb..9ef100e65409 100644 --- a/gcc/m2/gm2-compiler/M2Check.mod +++ b/gcc/m2/gm2-compiler/M2Check.mod @@ -221,11 +221,11 @@ BEGIN result := checkPair (result, tinfo, GetType (left), GetType (right)) ; IF (lSub # NulSym) AND (rSub # NulSym) THEN - result := checkSubrange (result, tinfo, GetSType (lSub), GetSType (rSub)) + result := checkSubrange (result, tinfo, getSType (lSub), getSType (rSub)) END ELSIF IsUnbounded (left) AND (IsArray (right) OR IsUnbounded (right)) THEN - IF IsGenericSystemType (GetSType (left)) OR IsGenericSystemType (GetSType (right)) + IF IsGenericSystemType (getSType (left)) OR IsGenericSystemType (getSType (right)) THEN RETURN true ELSE @@ -296,9 +296,12 @@ BEGIN THEN (* need to create top level error message first. *) tinfo^.error := NewError (tinfo^.token) ; + (* The parameters to MetaString4 in buildError4 must match the order + of paramters passed to ParameterTypeCompatible. *) s := MetaString4 (tinfo^.format, + tinfo^.procedure, tinfo^.left, tinfo^.right, - tinfo^.procedure, tinfo^.nth) ; + tinfo^.nth) ; ErrorString (tinfo^.error, s) END ; (* and also generate a sub error containing detail. *) @@ -396,7 +399,7 @@ BEGIN THEN IF IsVar (right) OR IsConst (right) THEN - right := GetSType (right) + right := getSType (right) END END ; IF tinfo^.strict @@ -1179,7 +1182,10 @@ END checkRecordEquivalence ; PROCEDURE getType (sym: CARDINAL) : CARDINAL ; BEGIN - IF IsTyped (sym) + IF (sym # NulSym) AND IsProcedure (sym) + THEN + RETURN Address + ELSIF IsTyped (sym) THEN RETURN GetDType (sym) ELSE @@ -1188,6 +1194,21 @@ BEGIN END getType ; +(* + getSType - +*) + +PROCEDURE getSType (sym: CARDINAL) : CARDINAL ; +BEGIN + IF IsProcedure (sym) + THEN + RETURN Address + ELSE + RETURN GetSType (sym) + END +END getSType ; + + (* determineCompatible - check for compatibility by checking equivalence, array, generic and type kind. @@ -1459,8 +1480,8 @@ VAR tinfo : tInfo ; BEGIN tinfo := newtInfo () ; - formalT := GetSType (formal) ; - actualT := GetSType (actual) ; + formalT := getSType (formal) ; + actualT := getSType (actual) ; tinfo^.format := collapseString (format) ; tinfo^.token := token ; tinfo^.kind := parameter ; @@ -1545,11 +1566,11 @@ BEGIN THEN IF IsConst (right) OR IsVar (right) THEN - right := GetSType (right) + right := getSType (right) END ; IF IsSet (right) THEN - right := GetSType (right) + right := getSType (right) END END END ; diff --git a/gcc/m2/gm2-compiler/M2GenGCC.mod b/gcc/m2/gm2-compiler/M2GenGCC.mod index e0b024d244a5..acbfe0c179d8 100644 --- a/gcc/m2/gm2-compiler/M2GenGCC.mod +++ b/gcc/m2/gm2-compiler/M2GenGCC.mod @@ -2528,13 +2528,13 @@ BEGIN THEN IF IsVarParam (procedure, nth) AND (NOT ParameterTypeCompatible (CurrentQuadToken, - 'parameter incompatibility when attempting to pass actual parameter {%3Ead} to a {%kVAR} formal parameter {%2ad} during call to procedure {%1ad}', + 'parameter incompatibility when attempting to pass actual parameter {%2ad} to a {%kVAR} formal parameter {%3Ead} during call to procedure {%1ad}', procedure, GetNthParam (procedure, nth), parameter, nth, TRUE)) THEN ELSIF (NOT IsVarParam (procedure, nth)) AND (NOT ParameterTypeCompatible (CurrentQuadToken, - 'parameter incompatibility when attempting to pass actual parameter {%3Ead} to a formal parameter {%2ad} during call to procedure {%1ad}', + 'parameter incompatibility when attempting to pass actual parameter {%3Ead} to the {%4EN} formal parameter {%2ad} during call to procedure {%1ad}', procedure, GetNthParam (procedure, nth), parameter, nth, FALSE)) THEN (* use the AssignmentTypeCompatible as the rules are for assignment for non var parameters. *) diff --git a/gcc/m2/gm2-compiler/M2Quads.mod b/gcc/m2/gm2-compiler/M2Quads.mod index 83c9b99759a2..1cbd30aa49d8 100644 --- a/gcc/m2/gm2-compiler/M2Quads.mod +++ b/gcc/m2/gm2-compiler/M2Quads.mod @@ -5418,7 +5418,7 @@ BEGIN WarnStringAt (s, paramtok) END ; - BuildRange(InitTypesParameterCheck(Proc, i, FormalI, Actual)) ; + BuildRange (InitTypesParameterCheck (paramtok, Proc, i, FormalI, Actual)) ; IF IsConst(Actual) THEN IF IsVarParam(Proc, i) @@ -5482,7 +5482,7 @@ END CheckProcedureParameters ; CheckProcTypeAndProcedure - checks the ProcType with the call. *) -PROCEDURE CheckProcTypeAndProcedure (ProcType: CARDINAL; call: CARDINAL) ; +PROCEDURE CheckProcTypeAndProcedure (tokno: CARDINAL; ProcType: CARDINAL; call: CARDINAL) ; VAR n1, n2 : Name ; i, n, t : CARDINAL ; @@ -5516,14 +5516,14 @@ BEGIN ELSE i := 1 ; WHILE i<=n DO - IF IsVarParam(ProcType, i) # IsVarParam(CheckedProcedure, i) + IF IsVarParam (ProcType, i) # IsVarParam (CheckedProcedure, i) THEN - MetaError3('parameter {%3n} in {%1dD} causes a mismatch it was declared as a {%2d}', ProcType, GetNth(ProcType, i), i) ; - MetaError3('parameter {%3n} in {%1dD} causes a mismatch it was declared as a {%2d}', call, GetNth(call, i), i) + MetaError3 ('parameter {%3n} in {%1dD} causes a mismatch it was declared as a {%2d}', ProcType, GetNth (ProcType, i), i) ; + MetaError3 ('parameter {%3n} in {%1dD} causes a mismatch it was declared as a {%2d}', call, GetNth (call, i), i) END ; - BuildRange(InitTypesParameterCheck(CheckedProcedure, i, - GetParam(CheckedProcedure, i), - GetParam(ProcType, i))) ; + BuildRange (InitTypesParameterCheck (tokno, CheckedProcedure, i, + GetParam (CheckedProcedure, i), + GetParam (ProcType, i))) ; (* CheckParameter(tokpos, GetParam(CheckedProcedure, i), 0, GetParam(ProcType, i), call, i, TypeList) ; *) INC(i) END @@ -5716,7 +5716,7 @@ BEGIN END END ; (* now to check each parameter of the proc type *) - CheckProcTypeAndProcedure (FormalType, Actual) + CheckProcTypeAndProcedure (tokpos, FormalType, Actual) ELSIF (ActualType#FormalType) AND (ActualType#NulSym) THEN IF IsUnknown(FormalType) @@ -6414,6 +6414,7 @@ END ManipulateParameters ; PROCEDURE CheckParameterOrdinals ; VAR + tokno : CARDINAL ; Proc, ProcSym : CARDINAL ; Actual, @@ -6438,13 +6439,14 @@ BEGIN THEN FormalI := GetParam (Proc, i) ; Actual := OperandT (pi) ; + tokno := OperandTok (pi) ; IF IsOrdinalType (GetLType (FormalI)) THEN IF NOT IsSet (GetDType (FormalI)) THEN (* tell code generator to test runtime values of assignment so ensure we catch overflow and underflow *) - BuildRange (InitParameterRangeCheck (Proc, i, FormalI, Actual)) + BuildRange (InitParameterRangeCheck (tokno, Proc, i, FormalI, Actual)) END END END ; diff --git a/gcc/m2/gm2-compiler/M2Range.def b/gcc/m2/gm2-compiler/M2Range.def index 2512791b138e..a278578dd082 100644 --- a/gcc/m2/gm2-compiler/M2Range.def +++ b/gcc/m2/gm2-compiler/M2Range.def @@ -43,42 +43,6 @@ FROM m2tree IMPORT Tree ; FROM m2linemap IMPORT location_t ; FROM DynamicStrings IMPORT String ; -EXPORT QUALIFIED InitAssignmentRangeCheck, - InitReturnRangeCheck, - InitSubrangeRangeCheck, - InitStaticArraySubscriptRangeCheck, - InitDynamicArraySubscriptRangeCheck, - InitIncRangeCheck, - InitDecRangeCheck, - InitInclCheck, - InitExclCheck, - InitRotateCheck, - InitShiftCheck, - InitTypesExpressionCheck, - InitTypesAssignmentCheck, - InitTypesParameterCheck, - InitParameterRangeCheck, - InitForLoopBeginRangeCheck, - InitForLoopToRangeCheck, - InitForLoopEndRangeCheck, - InitPointerRangeCheck, - InitNoReturnRangeCheck, - InitNoElseRangeCheck, - InitCaseBounds, - InitWholeNonPosDivCheck, - InitWholeNonPosModCheck, - InitWholeZeroDivisionCheck, - InitWholeZeroRemainderCheck, - CodeRangeCheck, FoldRangeCheck, CodeErrorCheck, - (* CheckRangeAddVariableRead, *) - (* CheckRangeRemoveVariableRead, *) - WriteRangeCheck, - OverlapsRange, - IsEqual, IsGreaterOrEqual, IsGreater, - BuildIfCallRealHandlerLoc, - BuildIfCallWholeHandlerLoc, - GetMinMax ; - (* InitAssignmentRangeCheck - returns a range check node which @@ -286,7 +250,8 @@ PROCEDURE InitTypesAssignmentCheck (tokno: CARDINAL; d, e: CARDINAL) : CARDINAL are parameter compatible. *) -PROCEDURE InitTypesParameterCheck (proc: CARDINAL; i: CARDINAL; +PROCEDURE InitTypesParameterCheck (tokno: CARDINAL; + proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : CARDINAL ; @@ -295,7 +260,8 @@ PROCEDURE InitTypesParameterCheck (proc: CARDINAL; i: CARDINAL; are parameter compatible. *) -PROCEDURE InitParameterRangeCheck (proc: CARDINAL; i: CARDINAL; +PROCEDURE InitParameterRangeCheck (tokno: CARDINAL; + proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : CARDINAL ; diff --git a/gcc/m2/gm2-compiler/M2Range.mod b/gcc/m2/gm2-compiler/M2Range.mod index 0f7c740a1ea1..543c2784d3d3 100644 --- a/gcc/m2/gm2-compiler/M2Range.mod +++ b/gcc/m2/gm2-compiler/M2Range.mod @@ -492,7 +492,7 @@ END PutRangeUnary ; and returns, p. *) -PROCEDURE PutRangeParam (p: Range; t: TypeOfRange; proc: CARDINAL; +PROCEDURE PutRangeParam (tokno: CARDINAL; p: Range; t: TypeOfRange; proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : Range ; BEGIN WITH p^ DO @@ -504,7 +504,7 @@ BEGIN procedure := proc ; paramNo := i ; isLeftValue := FALSE ; - tokenNo := GetTokenNo () ; + tokenNo := tokno ; strict := FALSE ; isin := FALSE END ; @@ -737,13 +737,13 @@ END InitTypesAssignmentCheck ; and, e, are parameter compatible. *) -PROCEDURE InitTypesParameterCheck (proc: CARDINAL; i: CARDINAL; +PROCEDURE InitTypesParameterCheck (tokno: CARDINAL; proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : CARDINAL ; VAR r: CARDINAL ; BEGIN r := InitRange () ; - Assert (PutRangeParam (GetIndice (RangeIndex, r), typeparam, proc, i, formal, actual) # NIL) ; + Assert (PutRangeParam (tokno, GetIndice (RangeIndex, r), typeparam, proc, i, formal, actual) # NIL) ; RETURN r END InitTypesParameterCheck ; @@ -755,7 +755,7 @@ END InitTypesParameterCheck ; and returns, p. *) -PROCEDURE PutRangeParamAssign (p: Range; t: TypeOfRange; proc: CARDINAL; +PROCEDURE PutRangeParamAssign (tokno: CARDINAL; p: Range; t: TypeOfRange; proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : Range ; BEGIN WITH p^ DO @@ -768,7 +768,7 @@ BEGIN paramNo := i ; dimension := i ; isLeftValue := FALSE ; - tokenNo := GetTokenNo () + tokenNo := tokno END ; RETURN( p ) END PutRangeParamAssign ; @@ -779,13 +779,13 @@ END PutRangeParamAssign ; are parameter compatible. *) -PROCEDURE InitParameterRangeCheck (proc: CARDINAL; i: CARDINAL; +PROCEDURE InitParameterRangeCheck (tokno: CARDINAL; proc: CARDINAL; i: CARDINAL; formal, actual: CARDINAL) : CARDINAL ; VAR r: CARDINAL ; BEGIN r := InitRange () ; - Assert (PutRangeParamAssign (GetIndice (RangeIndex, r), paramassign, proc, i, formal, actual) # NIL) ; + Assert (PutRangeParamAssign (tokno, GetIndice (RangeIndex, r), paramassign, proc, i, formal, actual) # NIL) ; RETURN r END InitParameterRangeCheck ; @@ -1619,7 +1619,7 @@ END FoldTypeAssign ; PROCEDURE FoldTypeParam (q: CARDINAL; tokenNo: CARDINAL; formal, actual, procedure: CARDINAL; paramNo: CARDINAL) ; BEGIN IF ParameterTypeCompatible (tokenNo, - '{%4EN} type failure between actual {%3ad} and the {%2ad}', + '{%4EN} parameter type failure between actual parameter type {%3ad} and the formal type {%2ad}', procedure, formal, actual, paramNo, IsVarParam (procedure, paramNo)) THEN SubQuad(q) diff --git a/gcc/testsuite/gm2/iso/fail/proccard.mod b/gcc/testsuite/gm2/iso/fail/proccard.mod new file mode 100644 index 000000000000..4518022dab7a --- /dev/null +++ b/gcc/testsuite/gm2/iso/fail/proccard.mod @@ -0,0 +1,14 @@ +MODULE proccard ; + +FROM NumberIO IMPORT WriteCard ; +FROM StrIO IMPORT WriteString, WriteLn ; + +PROCEDURE func () : CARDINAL ; +BEGIN + RETURN 42 +END func ; + + +BEGIN + WriteString ('the value is: ') ; WriteCard (func, 5) ; WriteLn +END proccard. diff --git a/gcc/testsuite/gm2/pim/pass/proccard.mod b/gcc/testsuite/gm2/pim/pass/proccard.mod new file mode 100644 index 000000000000..4518022dab7a --- /dev/null +++ b/gcc/testsuite/gm2/pim/pass/proccard.mod @@ -0,0 +1,14 @@ +MODULE proccard ; + +FROM NumberIO IMPORT WriteCard ; +FROM StrIO IMPORT WriteString, WriteLn ; + +PROCEDURE func () : CARDINAL ; +BEGIN + RETURN 42 +END func ; + + +BEGIN + WriteString ('the value is: ') ; WriteCard (func, 5) ; WriteLn +END proccard. From 249404649d26f544d1ad6808625807532c2b6a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Poulhi=C3=A8s?= Date: Mon, 6 Nov 2023 11:59:18 +0100 Subject: [PATCH 045/311] testsuite: require avx_runtime for some tests These 3 tests fails parsing the 'vect' dump when not using -mavx. Make the dependency explicit. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-ifcvt-18.c: Add dep on avx_runtime. * gcc.dg/vect/vect-simd-clone-16f.c: Likewise. * gcc.dg/vect/vect-simd-clone-18f.c: Likewise. --- gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c | 3 ++- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c | 4 ++-- gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c index c1d3c27d8193..607194496e90 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c +++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c @@ -1,6 +1,7 @@ /* { dg-require-effective-target vect_condition } */ /* { dg-require-effective-target vect_float } */ -/* { dg-additional-options "-Ofast -mavx" { target avx_runtime } } */ +/* { dg-require-effective-target avx_runtime } */ +/* { dg-additional-options "-Ofast -mavx" } */ int A0[4] = {36,39,42,45}; diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c index 7cd29e894d05..c6615dc626dd 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target vect_simd_clones } */ -/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */ -/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } && { ! lp64 } } } } */ #define TYPE __INT64_TYPE__ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c index 4dd51381d73c..787b918d0c46 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target vect_simd_clones } */ -/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */ -/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 -mavx" } */ +/* { dg-require-effective-target avx_runtime } */ /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } && { ! lp64 } } } } */ #define TYPE __INT64_TYPE__ From 45eb2c703c02fa2a5467efcdfb27692df37e4d75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Poulhi=C3=A8s?= Date: Mon, 6 Nov 2023 12:01:17 +0100 Subject: [PATCH 046/311] testsuite: refine gcc.dg/analyzer/fd-4.c test for newlib Contrary to glibc, including stdio.h from newlib defines mode_t which conflicts with the test's type definition. .../gcc/testsuite/gcc.dg/analyzer/fd-4.c:19:3: error: redefinition of typedef 'mode_t' with different type ... .../include/sys/types.h:189:25: note: previous declaration of 'mode_t' with type 'mode_t' {aka 'unsigned int'} Defining _MODE_T_DECLARED skips the type definition. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/fd-4.c: Fix for newlib. --- gcc/testsuite/gcc.dg/analyzer/fd-4.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-4.c b/gcc/testsuite/gcc.dg/analyzer/fd-4.c index 3a961e4f8137..880de3d78960 100644 --- a/gcc/testsuite/gcc.dg/analyzer/fd-4.c +++ b/gcc/testsuite/gcc.dg/analyzer/fd-4.c @@ -1,3 +1,4 @@ +/* { dg-additional-options "-D_MODE_T_DECLARED=1" { target newlib } } */ #if defined(_AIX) || defined(__hpux) #define _MODE_T #endif From d36cac18724a63b1dbcbe722015232c72214ce93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Poulhi=C3=A8s?= Date: Mon, 6 Nov 2023 11:57:16 +0100 Subject: [PATCH 047/311] testsuite: skip gcc.target/i386/pr106910-1.c test when using newlib Using newlib produces a different codegen because the support for c99 differs (see libc_has_function hook). gcc/testsuite/ChangeLog: * gcc.target/i386/pr106910-1.c: Disable for newlib. --- gcc/testsuite/gcc.target/i386/pr106910-1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/i386/pr106910-1.c b/gcc/testsuite/gcc.target/i386/pr106910-1.c index c7685a321833..13901a1fe89f 100644 --- a/gcc/testsuite/gcc.target/i386/pr106910-1.c +++ b/gcc/testsuite/gcc.target/i386/pr106910-1.c @@ -1,4 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ +/* { dg-skip-if "newlib libc math causes different codegen" { newlib } } */ /* { dg-options "-msse4.1 -O2 -Ofast" } */ /* { dg-final { scan-assembler-times "roundps" 9 } } */ /* { dg-final { scan-assembler-times "cvtps2dq" 1 } } */ From f908368d2cb50b26c4557df6b37bc13a3723ef49 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Thu, 7 Dec 2023 12:58:20 -0300 Subject: [PATCH 048/311] strub: enable conditional support Targets that don't expose callee stacks to callers, such as nvptx, as well as -fsplit-stack compilations, violate fundamental assumptions of the current strub implementation. This patch enables targets to disable strub, and disables it when -fsplit-stack is enabled. When strub support is disabled, the testsuite will now skip strub tests, and libgcc will not build the strub runtime components. for gcc/ChangeLog * target.def (have_strub_support_for): New hook. * doc/tm.texi.in: Document it. * doc/tm.texi: Rebuild. * ipa-strub.cc: Include target.h. (strub_target_support_p): New. (can_strub_p): Call it. Test for no flag_split_stack. (pass_ipa_strub::adjust_at_calls_call): Check for target support. * config/nvptx/nvptx.cc (TARGET_HAVE_STRUB_SUPPORT_FOR): Disable. * doc/sourcebuild.texi (strub): Document new effective target. for gcc/testsuite/ChangeLog * c-c++-common/strub-split-stack.c: New. * c-c++-common/strub-unsupported.c: New. * c-c++-common/strub-unsupported-2.c: New. * c-c++-common/strub-unsupported-3.c: New. * lib/target-supports.exp (check_effective_target_strub): New. * c-c++-common/strub-O0.c: Require effective target strub. * c-c++-common/strub-O1.c: Likewise. * c-c++-common/strub-O2.c: Likewise. * c-c++-common/strub-O2fni.c: Likewise. * c-c++-common/strub-O3.c: Likewise. * c-c++-common/strub-O3fni.c: Likewise. * c-c++-common/strub-Og.c: Likewise. * c-c++-common/strub-Os.c: Likewise. * c-c++-common/strub-all1.c: Likewise. * c-c++-common/strub-all2.c: Likewise. * c-c++-common/strub-apply1.c: Likewise. * c-c++-common/strub-apply2.c: Likewise. * c-c++-common/strub-apply3.c: Likewise. * c-c++-common/strub-apply4.c: Likewise. * c-c++-common/strub-at-calls1.c: Likewise. * c-c++-common/strub-at-calls2.c: Likewise. * c-c++-common/strub-defer-O1.c: Likewise. * c-c++-common/strub-defer-O2.c: Likewise. * c-c++-common/strub-defer-O3.c: Likewise. * c-c++-common/strub-defer-Os.c: Likewise. * c-c++-common/strub-internal1.c: Likewise. * c-c++-common/strub-internal2.c: Likewise. * c-c++-common/strub-parms1.c: Likewise. * c-c++-common/strub-parms2.c: Likewise. * c-c++-common/strub-parms3.c: Likewise. * c-c++-common/strub-relaxed1.c: Likewise. * c-c++-common/strub-relaxed2.c: Likewise. * c-c++-common/strub-short-O0-exc.c: Likewise. * c-c++-common/strub-short-O0.c: Likewise. * c-c++-common/strub-short-O1.c: Likewise. * c-c++-common/strub-short-O2.c: Likewise. * c-c++-common/strub-short-O3.c: Likewise. * c-c++-common/strub-short-Os.c: Likewise. * c-c++-common/strub-strict1.c: Likewise. * c-c++-common/strub-strict2.c: Likewise. * c-c++-common/strub-tail-O1.c: Likewise. * c-c++-common/strub-tail-O2.c: Likewise. * c-c++-common/strub-var1.c: Likewise. * c-c++-common/torture/strub-callable1.c: Likewise. * c-c++-common/torture/strub-callable2.c: Likewise. * c-c++-common/torture/strub-const1.c: Likewise. * c-c++-common/torture/strub-const2.c: Likewise. * c-c++-common/torture/strub-const3.c: Likewise. * c-c++-common/torture/strub-const4.c: Likewise. * c-c++-common/torture/strub-data1.c: Likewise. * c-c++-common/torture/strub-data2.c: Likewise. * c-c++-common/torture/strub-data3.c: Likewise. * c-c++-common/torture/strub-data4.c: Likewise. * c-c++-common/torture/strub-data5.c: Likewise. * c-c++-common/torture/strub-indcall1.c: Likewise. * c-c++-common/torture/strub-indcall2.c: Likewise. * c-c++-common/torture/strub-indcall3.c: Likewise. * c-c++-common/torture/strub-inlinable1.c: Likewise. * c-c++-common/torture/strub-inlinable2.c: Likewise. * c-c++-common/torture/strub-ptrfn1.c: Likewise. * c-c++-common/torture/strub-ptrfn2.c: Likewise. * c-c++-common/torture/strub-ptrfn3.c: Likewise. * c-c++-common/torture/strub-ptrfn4.c: Likewise. * c-c++-common/torture/strub-pure1.c: Likewise. * c-c++-common/torture/strub-pure2.c: Likewise. * c-c++-common/torture/strub-pure3.c: Likewise. * c-c++-common/torture/strub-pure4.c: Likewise. * c-c++-common/torture/strub-run1.c: Likewise. * c-c++-common/torture/strub-run2.c: Likewise. * c-c++-common/torture/strub-run3.c: Likewise. * c-c++-common/torture/strub-run4.c: Likewise. * c-c++-common/torture/strub-run4c.c: Likewise. * c-c++-common/torture/strub-run4d.c: Likewise. * c-c++-common/torture/strub-run4i.c: Likewise. * g++.dg/strub-run1.C: Likewise. * g++.dg/torture/strub-init1.C: Likewise. * g++.dg/torture/strub-init2.C: Likewise. * g++.dg/torture/strub-init3.C: Likewise. * gnat.dg/strub_attr.adb: Likewise. * gnat.dg/strub_ind.adb: Likewise. * gnat.dg/strub_access.adb: Likewise. * gnat.dg/strub_access1.adb: Likewise. * gnat.dg/strub_disp.adb: Likewise. * gnat.dg/strub_disp1.adb: Likewise. * gnat.dg/strub_ind1.adb: Likewise. * gnat.dg/strub_ind2.adb: Likewise. * gnat.dg/strub_intf.adb: Likewise. * gnat.dg/strub_intf1.adb: Likewise. * gnat.dg/strub_intf2.adb: Likewise. * gnat.dg/strub_renm.adb: Likewise. * gnat.dg/strub_renm1.adb: Likewise. * gnat.dg/strub_renm2.adb: Likewise. * gnat.dg/strub_var.adb: Likewise. * gnat.dg/strub_var1.adb: Likewise. for libgcc/ChangeLog * configure.ac: Check for strub support. * configure: Rebuilt. * Makefile.in: Compile strub.c conditionally. --- gcc/config/nvptx/nvptx.cc | 3 ++ gcc/doc/sourcebuild.texi | 3 ++ gcc/doc/tm.texi | 6 +++ gcc/doc/tm.texi.in | 2 + gcc/ipa-strub.cc | 54 ++++++++++++++++++- gcc/target.def | 8 +++ gcc/testsuite/c-c++-common/strub-O0.c | 1 + gcc/testsuite/c-c++-common/strub-O1.c | 1 + gcc/testsuite/c-c++-common/strub-O2.c | 1 + gcc/testsuite/c-c++-common/strub-O2fni.c | 1 + gcc/testsuite/c-c++-common/strub-O3.c | 1 + gcc/testsuite/c-c++-common/strub-O3fni.c | 1 + gcc/testsuite/c-c++-common/strub-Og.c | 1 + gcc/testsuite/c-c++-common/strub-Os.c | 1 + gcc/testsuite/c-c++-common/strub-all1.c | 1 + gcc/testsuite/c-c++-common/strub-all2.c | 1 + gcc/testsuite/c-c++-common/strub-apply1.c | 1 + gcc/testsuite/c-c++-common/strub-apply2.c | 1 + gcc/testsuite/c-c++-common/strub-apply3.c | 1 + gcc/testsuite/c-c++-common/strub-apply4.c | 1 + gcc/testsuite/c-c++-common/strub-at-calls1.c | 1 + gcc/testsuite/c-c++-common/strub-at-calls2.c | 1 + gcc/testsuite/c-c++-common/strub-defer-O1.c | 1 + gcc/testsuite/c-c++-common/strub-defer-O2.c | 1 + gcc/testsuite/c-c++-common/strub-defer-O3.c | 1 + gcc/testsuite/c-c++-common/strub-defer-Os.c | 1 + gcc/testsuite/c-c++-common/strub-internal1.c | 1 + gcc/testsuite/c-c++-common/strub-internal2.c | 1 + gcc/testsuite/c-c++-common/strub-parms1.c | 1 + gcc/testsuite/c-c++-common/strub-parms2.c | 1 + gcc/testsuite/c-c++-common/strub-parms3.c | 1 + gcc/testsuite/c-c++-common/strub-relaxed1.c | 1 + gcc/testsuite/c-c++-common/strub-relaxed2.c | 1 + .../c-c++-common/strub-short-O0-exc.c | 1 + gcc/testsuite/c-c++-common/strub-short-O0.c | 1 + gcc/testsuite/c-c++-common/strub-short-O1.c | 1 + gcc/testsuite/c-c++-common/strub-short-O2.c | 1 + gcc/testsuite/c-c++-common/strub-short-O3.c | 1 + gcc/testsuite/c-c++-common/strub-short-Os.c | 1 + .../c-c++-common/strub-split-stack.c | 10 ++++ gcc/testsuite/c-c++-common/strub-strict1.c | 1 + gcc/testsuite/c-c++-common/strub-strict2.c | 1 + gcc/testsuite/c-c++-common/strub-tail-O1.c | 1 + gcc/testsuite/c-c++-common/strub-tail-O2.c | 1 + .../c-c++-common/strub-unsupported-2.c | 13 +++++ .../c-c++-common/strub-unsupported-3.c | 18 +++++++ .../c-c++-common/strub-unsupported.c | 21 ++++++++ gcc/testsuite/c-c++-common/strub-var1.c | 1 + .../c-c++-common/torture/strub-callable1.c | 1 + .../c-c++-common/torture/strub-callable2.c | 1 + .../c-c++-common/torture/strub-const1.c | 1 + .../c-c++-common/torture/strub-const2.c | 1 + .../c-c++-common/torture/strub-const3.c | 1 + .../c-c++-common/torture/strub-const4.c | 1 + .../c-c++-common/torture/strub-data1.c | 1 + .../c-c++-common/torture/strub-data2.c | 1 + .../c-c++-common/torture/strub-data3.c | 1 + .../c-c++-common/torture/strub-data4.c | 1 + .../c-c++-common/torture/strub-data5.c | 1 + .../c-c++-common/torture/strub-indcall1.c | 1 + .../c-c++-common/torture/strub-indcall2.c | 1 + .../c-c++-common/torture/strub-indcall3.c | 1 + .../c-c++-common/torture/strub-inlinable1.c | 1 + .../c-c++-common/torture/strub-inlinable2.c | 1 + .../c-c++-common/torture/strub-ptrfn1.c | 1 + .../c-c++-common/torture/strub-ptrfn2.c | 1 + .../c-c++-common/torture/strub-ptrfn3.c | 1 + .../c-c++-common/torture/strub-ptrfn4.c | 1 + .../c-c++-common/torture/strub-pure1.c | 1 + .../c-c++-common/torture/strub-pure2.c | 1 + .../c-c++-common/torture/strub-pure3.c | 1 + .../c-c++-common/torture/strub-pure4.c | 1 + .../c-c++-common/torture/strub-run1.c | 1 + .../c-c++-common/torture/strub-run2.c | 1 + .../c-c++-common/torture/strub-run3.c | 1 + .../c-c++-common/torture/strub-run4.c | 1 + .../c-c++-common/torture/strub-run4c.c | 1 + .../c-c++-common/torture/strub-run4d.c | 1 + .../c-c++-common/torture/strub-run4i.c | 1 + gcc/testsuite/g++.dg/strub-run1.C | 1 + gcc/testsuite/g++.dg/torture/strub-init1.C | 1 + gcc/testsuite/g++.dg/torture/strub-init2.C | 1 + gcc/testsuite/g++.dg/torture/strub-init3.C | 1 + gcc/testsuite/gnat.dg/strub_access.adb | 1 + gcc/testsuite/gnat.dg/strub_access1.adb | 1 + gcc/testsuite/gnat.dg/strub_attr.adb | 1 + gcc/testsuite/gnat.dg/strub_disp.adb | 1 + gcc/testsuite/gnat.dg/strub_disp1.adb | 1 + gcc/testsuite/gnat.dg/strub_ind.adb | 1 + gcc/testsuite/gnat.dg/strub_ind1.adb | 1 + gcc/testsuite/gnat.dg/strub_ind2.adb | 1 + gcc/testsuite/gnat.dg/strub_intf.adb | 1 + gcc/testsuite/gnat.dg/strub_intf1.adb | 1 + gcc/testsuite/gnat.dg/strub_intf2.adb | 1 + gcc/testsuite/gnat.dg/strub_renm.adb | 1 + gcc/testsuite/gnat.dg/strub_renm1.adb | 1 + gcc/testsuite/gnat.dg/strub_renm2.adb | 1 + gcc/testsuite/gnat.dg/strub_var.adb | 1 + gcc/testsuite/gnat.dg/strub_var1.adb | 1 + gcc/testsuite/lib/target-supports.exp | 7 +++ libgcc/Makefile.in | 2 +- libgcc/configure | 26 +++++++++ libgcc/configure.ac | 13 +++++ 103 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/strub-split-stack.c create mode 100644 gcc/testsuite/c-c++-common/strub-unsupported-2.c create mode 100644 gcc/testsuite/c-c++-common/strub-unsupported-3.c create mode 100644 gcc/testsuite/c-c++-common/strub-unsupported.c diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index ae20802c8799..3fb1deb70fda 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -7789,6 +7789,9 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) #undef TARGET_LIBC_HAS_FUNCTION #define TARGET_LIBC_HAS_FUNCTION nvptx_libc_has_function +#undef TARGET_HAVE_STRUB_SUPPORT_FOR +#define TARGET_HAVE_STRUB_SUPPORT_FOR hook_bool_tree_false + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-nvptx.h" diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index c99090268541..26a7e9c35070 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2983,6 +2983,9 @@ Target supports statically linking @samp{libgfortran}. @item string_merging Target supports merging string constants at link time. +@item strub +Target supports attribute @code{strub} for stack scrubbing. + @item ucn Target supports compiling and assembling UCN. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 89a1735dd799..768ada0af522 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -3450,6 +3450,12 @@ in DWARF 2 debug information. The default is zero. A different value may reduce the size of debug information on some ports. @end defmac +@deftypefn {Target Hook} bool TARGET_HAVE_STRUB_SUPPORT_FOR (tree) +Returns true if the target supports stack scrubbing for the given function +or type, otherwise return false. The default implementation always returns +true. +@end deftypefn + @defmac TARGET_STRUB_USE_DYNAMIC_ARRAY If defined to nonzero, @code{__strub_leave} will allocate a dynamic array covering the stack range that needs scrubbing before clearing it. diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index ebc1d3de5caa..4fe0805394ea 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -2686,6 +2686,8 @@ in DWARF 2 debug information. The default is zero. A different value may reduce the size of debug information on some ports. @end defmac +@hook TARGET_HAVE_STRUB_SUPPORT_FOR + @defmac TARGET_STRUB_USE_DYNAMIC_ARRAY If defined to nonzero, @code{__strub_leave} will allocate a dynamic array covering the stack range that needs scrubbing before clearing it. diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc index 293bec132b88..2afb7a455751 100644 --- a/gcc/ipa-strub.cc +++ b/gcc/ipa-strub.cc @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-strub.h" #include "symtab-thunks.h" #include "attr-fnspec.h" +#include "target.h" /* This file introduces two passes that, together, implement machine-independent stack scrubbing, strub for short. It arranges @@ -631,17 +632,60 @@ strub_always_inline_p (cgraph_node *node) return lookup_attribute ("always_inline", DECL_ATTRIBUTES (node->decl)); } +/* Return TRUE iff the target has strub support for T, a function + decl, or a type used in an indirect call, and optionally REPORT the + reasons for ineligibility. If T is a type and error REPORTing is + enabled, the LOCation (of the indirect call) should be provided. */ +static inline bool +strub_target_support_p (tree t, bool report = false, + location_t loc = UNKNOWN_LOCATION) +{ + bool result = true; + + if (!targetm.have_strub_support_for (t)) + { + result = false; + + if (!report) + return result; + + if (DECL_P (t)) + sorry_at (DECL_SOURCE_LOCATION (t), + "%qD is not eligible for %" + " on the target system", t); + else + sorry_at (loc, + "unsupported % call" + " on the target system"); + } + + return result; +} + /* Return TRUE iff NODE is potentially eligible for any strub-enabled mode, and optionally REPORT the reasons for ineligibility. */ static inline bool can_strub_p (cgraph_node *node, bool report = false) { - bool result = true; + bool result = strub_target_support_p (node->decl, report); - if (!report && strub_always_inline_p (node)) + if (!report && (!result || strub_always_inline_p (node))) return result; + if (flag_split_stack) + { + result = false; + + if (!report) + return result; + + sorry_at (DECL_SOURCE_LOCATION (node->decl), + "%qD is not eligible for %" + " because %<-fsplit-stack%> is enabled", + node->decl); + } + if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl))) { result = false; @@ -2417,6 +2461,12 @@ pass_ipa_strub::adjust_at_calls_call (cgraph_edge *e, int named_args, && (TREE_TYPE (gimple_call_arg (ocall, named_args)) == get_pwmt ()))); + tree tsup; + if (!(tsup = gimple_call_fndecl (ocall))) + tsup = TREE_TYPE (TREE_TYPE (gimple_call_fn (ocall))); + if (!strub_target_support_p (tsup, true, gimple_location (ocall))) + return; + /* If we're already within a strub context, pass on the incoming watermark pointer, and omit the enter and leave calls around the modified call, as an optimization, or as a means to satisfy a tail-call requirement. */ diff --git a/gcc/target.def b/gcc/target.def index 52b83e091b94..08218f3a42ad 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -4457,6 +4457,14 @@ otherwise return false. The default implementation always returns true.", bool, (void), hook_bool_void_true) +DEFHOOK +(have_strub_support_for, + "Returns true if the target supports stack scrubbing for the given function\n\ +or type, otherwise return false. The default implementation always returns\n\ +true.", + bool, (tree), + hook_bool_tree_true) + DEFHOOK (have_speculation_safe_value, "This hook is used to determine the level of target support for\n\ diff --git a/gcc/testsuite/c-c++-common/strub-O0.c b/gcc/testsuite/c-c++-common/strub-O0.c index c7a79a6ea0d8..f0a3f7b4c6f9 100644 --- a/gcc/testsuite/c-c++-common/strub-O0.c +++ b/gcc/testsuite/c-c++-common/strub-O0.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O0 -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ /* At -O0, none of the strub builtins are expanded inline. */ diff --git a/gcc/testsuite/c-c++-common/strub-O1.c b/gcc/testsuite/c-c++-common/strub-O1.c index 96285c975d98..50403426b18f 100644 --- a/gcc/testsuite/c-c++-common/strub-O1.c +++ b/gcc/testsuite/c-c++-common/strub-O1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ /* At -O1, without -fno-inline, we fully expand enter, but neither update nor leave. */ diff --git a/gcc/testsuite/c-c++-common/strub-O2.c b/gcc/testsuite/c-c++-common/strub-O2.c index 8edc0d8aa132..37e02998e318 100644 --- a/gcc/testsuite/c-c++-common/strub-O2.c +++ b/gcc/testsuite/c-c++-common/strub-O2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ /* At -O2, without -fno-inline, we fully expand enter and update, and add a test around the leave call. */ diff --git a/gcc/testsuite/c-c++-common/strub-O2fni.c b/gcc/testsuite/c-c++-common/strub-O2fni.c index c6d900cf3c45..905e2c6b2ffc 100644 --- a/gcc/testsuite/c-c++-common/strub-O2fni.c +++ b/gcc/testsuite/c-c++-common/strub-O2fni.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fstrub=strict -fdump-rtl-expand -fno-inline" } */ +/* { dg-require-effective-target strub } */ /* With -fno-inline, none of the strub builtins are inlined. */ diff --git a/gcc/testsuite/c-c++-common/strub-O3.c b/gcc/testsuite/c-c++-common/strub-O3.c index 33ee465e51cb..3bbf132bdf1e 100644 --- a/gcc/testsuite/c-c++-common/strub-O3.c +++ b/gcc/testsuite/c-c++-common/strub-O3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O3 -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ int __attribute__ ((__strub__)) var; diff --git a/gcc/testsuite/c-c++-common/strub-O3fni.c b/gcc/testsuite/c-c++-common/strub-O3fni.c index 2936f82079e1..c46fce38e5c9 100644 --- a/gcc/testsuite/c-c++-common/strub-O3fni.c +++ b/gcc/testsuite/c-c++-common/strub-O3fni.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O3 -fstrub=strict -fdump-rtl-expand -fno-inline" } */ +/* { dg-require-effective-target strub } */ /* With -fno-inline, none of the strub builtins are inlined. */ diff --git a/gcc/testsuite/c-c++-common/strub-Og.c b/gcc/testsuite/c-c++-common/strub-Og.c index 479746e57d87..3b8eb19765cd 100644 --- a/gcc/testsuite/c-c++-common/strub-Og.c +++ b/gcc/testsuite/c-c++-common/strub-Og.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-Og -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ /* At -Og, without -fno-inline, we fully expand enter, but neither update nor leave. */ diff --git a/gcc/testsuite/c-c++-common/strub-Os.c b/gcc/testsuite/c-c++-common/strub-Os.c index 2241d4ea07f2..8cfb253d6764 100644 --- a/gcc/testsuite/c-c++-common/strub-Os.c +++ b/gcc/testsuite/c-c++-common/strub-Os.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-Os -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-require-effective-target strub } */ /* At -Os, without -fno-inline, we fully expand enter, and also update. The expanded update might be larger than a call proper, but argument saving and diff --git a/gcc/testsuite/c-c++-common/strub-all1.c b/gcc/testsuite/c-c++-common/strub-all1.c index a322bcc5da60..2037f681f297 100644 --- a/gcc/testsuite/c-c++-common/strub-all1.c +++ b/gcc/testsuite/c-c++-common/strub-all1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=all -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* h becomes STRUB_CALLABLE, rather than STRUB_INLINABLE, because of the strub-enabling -fstrub flag, and gets inlined before pass_ipa_strub. */ diff --git a/gcc/testsuite/c-c++-common/strub-all2.c b/gcc/testsuite/c-c++-common/strub-all2.c index db60026d0e08..c026e7d9d289 100644 --- a/gcc/testsuite/c-c++-common/strub-all2.c +++ b/gcc/testsuite/c-c++-common/strub-all2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=all -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* g becomes STRUB_INTERNAL, because of the flag. Without inline, force_output is set for static non-inline functions when not optimizing, and that keeps diff --git a/gcc/testsuite/c-c++-common/strub-apply1.c b/gcc/testsuite/c-c++-common/strub-apply1.c index 2f462adc1efe..3edc89c54eea 100644 --- a/gcc/testsuite/c-c++-common/strub-apply1.c +++ b/gcc/testsuite/c-c++-common/strub-apply1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ void __attribute__ ((__strub__ ("callable"))) apply_function (void *args) diff --git a/gcc/testsuite/c-c++-common/strub-apply2.c b/gcc/testsuite/c-c++-common/strub-apply2.c index a5d7551f5da5..838fc7527345 100644 --- a/gcc/testsuite/c-c++-common/strub-apply2.c +++ b/gcc/testsuite/c-c++-common/strub-apply2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ extern void __attribute__ ((__strub__)) apply_function (void *args); diff --git a/gcc/testsuite/c-c++-common/strub-apply3.c b/gcc/testsuite/c-c++-common/strub-apply3.c index 64422a0d1e88..0206e4d930e7 100644 --- a/gcc/testsuite/c-c++-common/strub-apply3.c +++ b/gcc/testsuite/c-c++-common/strub-apply3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ void __attribute__ ((__strub__)) apply_function (void *args) diff --git a/gcc/testsuite/c-c++-common/strub-apply4.c b/gcc/testsuite/c-c++-common/strub-apply4.c index 15ffaa031b89..e82504728b2c 100644 --- a/gcc/testsuite/c-c++-common/strub-apply4.c +++ b/gcc/testsuite/c-c++-common/strub-apply4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fstrub=strict -fdump-ipa-strubm" } */ +/* { dg-require-effective-target strub } */ /* Check that implicit enabling of strub mode selects internal strub when the function uses __builtin_apply_args, that prevents the optimization to diff --git a/gcc/testsuite/c-c++-common/strub-at-calls1.c b/gcc/testsuite/c-c++-common/strub-at-calls1.c index b70843b4215a..a20acc0a48a5 100644 --- a/gcc/testsuite/c-c++-common/strub-at-calls1.c +++ b/gcc/testsuite/c-c++-common/strub-at-calls1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=at-calls -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* h becomes STRUB_CALLABLE, rather than STRUB_INLINABLE, because of the strub-enabling -fstrub flag, and gets inlined before pass_ipa_strub. */ diff --git a/gcc/testsuite/c-c++-common/strub-at-calls2.c b/gcc/testsuite/c-c++-common/strub-at-calls2.c index 97a3988a6b92..7915b33a39a0 100644 --- a/gcc/testsuite/c-c++-common/strub-at-calls2.c +++ b/gcc/testsuite/c-c++-common/strub-at-calls2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=at-calls -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* g does NOT become STRUB_AT_CALLS because it's not viable. Without inline, force_output is set for static non-inline functions when not optimizing, and diff --git a/gcc/testsuite/c-c++-common/strub-defer-O1.c b/gcc/testsuite/c-c++-common/strub-defer-O1.c index 3d73431b3dcd..3689998b5a32 100644 --- a/gcc/testsuite/c-c++-common/strub-defer-O1.c +++ b/gcc/testsuite/c-c++-common/strub-defer-O1.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict -O1" } */ +/* { dg-require-effective-target strub } */ /* Check that a strub function called by another strub function does NOT defer the strubbing to its caller at -O1. */ diff --git a/gcc/testsuite/c-c++-common/strub-defer-O2.c b/gcc/testsuite/c-c++-common/strub-defer-O2.c index fddf3c745e7e..9e01949db6be 100644 --- a/gcc/testsuite/c-c++-common/strub-defer-O2.c +++ b/gcc/testsuite/c-c++-common/strub-defer-O2.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict -O2" } */ +/* { dg-require-effective-target strub } */ /* Check that a strub function called by another strub function does NOT defer the strubbing to its caller at -O2. */ diff --git a/gcc/testsuite/c-c++-common/strub-defer-O3.c b/gcc/testsuite/c-c++-common/strub-defer-O3.c index 7ebc65b58dd7..40ee8edd1e0e 100644 --- a/gcc/testsuite/c-c++-common/strub-defer-O3.c +++ b/gcc/testsuite/c-c++-common/strub-defer-O3.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict -O3" } */ +/* { dg-require-effective-target strub } */ /* Check that a strub function called by another strub function defers the strubbing to its caller at -O3. */ diff --git a/gcc/testsuite/c-c++-common/strub-defer-Os.c b/gcc/testsuite/c-c++-common/strub-defer-Os.c index fbaf85fe0faf..67ea9f046397 100644 --- a/gcc/testsuite/c-c++-common/strub-defer-Os.c +++ b/gcc/testsuite/c-c++-common/strub-defer-Os.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict -Os" } */ +/* { dg-require-effective-target strub } */ /* Check that a strub function called by another strub function defers the strubbing to its caller at -Os. */ diff --git a/gcc/testsuite/c-c++-common/strub-internal1.c b/gcc/testsuite/c-c++-common/strub-internal1.c index e9d7b7b9ee0a..d17254904e50 100644 --- a/gcc/testsuite/c-c++-common/strub-internal1.c +++ b/gcc/testsuite/c-c++-common/strub-internal1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=internal -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* h becomes STRUB_CALLABLE, rather than STRUB_INLINABLE, because of the strub-enabling -fstrub flag, and gets inlined before pass_ipa_strub. */ diff --git a/gcc/testsuite/c-c++-common/strub-internal2.c b/gcc/testsuite/c-c++-common/strub-internal2.c index 8b8e15a51c71..afc9189701f8 100644 --- a/gcc/testsuite/c-c++-common/strub-internal2.c +++ b/gcc/testsuite/c-c++-common/strub-internal2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=internal -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* g becomes STRUB_INTERNAL, because of the flag. */ static void diff --git a/gcc/testsuite/c-c++-common/strub-parms1.c b/gcc/testsuite/c-c++-common/strub-parms1.c index 0a4a7539d348..f410b268971a 100644 --- a/gcc/testsuite/c-c++-common/strub-parms1.c +++ b/gcc/testsuite/c-c++-common/strub-parms1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ #include diff --git a/gcc/testsuite/c-c++-common/strub-parms2.c b/gcc/testsuite/c-c++-common/strub-parms2.c index 147171d96d5a..6f572115a88c 100644 --- a/gcc/testsuite/c-c++-common/strub-parms2.c +++ b/gcc/testsuite/c-c++-common/strub-parms2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ #include diff --git a/gcc/testsuite/c-c++-common/strub-parms3.c b/gcc/testsuite/c-c++-common/strub-parms3.c index 4e92682895a4..7383fea9ce88 100644 --- a/gcc/testsuite/c-c++-common/strub-parms3.c +++ b/gcc/testsuite/c-c++-common/strub-parms3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that uses of a strub variable implicitly enables internal strub for publicly-visible functions, and causes the same transformations to their diff --git a/gcc/testsuite/c-c++-common/strub-relaxed1.c b/gcc/testsuite/c-c++-common/strub-relaxed1.c index e2f9d8aebca5..d2b4b52c51e6 100644 --- a/gcc/testsuite/c-c++-common/strub-relaxed1.c +++ b/gcc/testsuite/c-c++-common/strub-relaxed1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The difference between relaxed and strict in this case is that we accept the call from one internal-strub function to another. Without the error, diff --git a/gcc/testsuite/c-c++-common/strub-relaxed2.c b/gcc/testsuite/c-c++-common/strub-relaxed2.c index 98474435d2e5..9e5a8e76b6c3 100644 --- a/gcc/testsuite/c-c++-common/strub-relaxed2.c +++ b/gcc/testsuite/c-c++-common/strub-relaxed2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed -fdump-ipa-strubm -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The difference between relaxed and strict in this case is that we accept the call from one internal-strub function to another. */ diff --git a/gcc/testsuite/c-c++-common/strub-short-O0-exc.c b/gcc/testsuite/c-c++-common/strub-short-O0-exc.c index 1de15342595e..aaeba2a2159a 100644 --- a/gcc/testsuite/c-c++-common/strub-short-O0-exc.c +++ b/gcc/testsuite/c-c++-common/strub-short-O0-exc.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O0 -fstrub=strict -fexceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. */ diff --git a/gcc/testsuite/c-c++-common/strub-short-O0.c b/gcc/testsuite/c-c++-common/strub-short-O0.c index f9209c819004..30cbdd819f17 100644 --- a/gcc/testsuite/c-c++-common/strub-short-O0.c +++ b/gcc/testsuite/c-c++-common/strub-short-O0.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O0 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. */ diff --git a/gcc/testsuite/c-c++-common/strub-short-O1.c b/gcc/testsuite/c-c++-common/strub-short-O1.c index bed1dcfb54a4..911fdfb6db9a 100644 --- a/gcc/testsuite/c-c++-common/strub-short-O1.c +++ b/gcc/testsuite/c-c++-common/strub-short-O1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. */ diff --git a/gcc/testsuite/c-c++-common/strub-short-O2.c b/gcc/testsuite/c-c++-common/strub-short-O2.c index 6bf0071f52b9..9b23ee3ac331 100644 --- a/gcc/testsuite/c-c++-common/strub-short-O2.c +++ b/gcc/testsuite/c-c++-common/strub-short-O2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. */ diff --git a/gcc/testsuite/c-c++-common/strub-short-O3.c b/gcc/testsuite/c-c++-common/strub-short-O3.c index 4732f515bf70..4b3a8f843ea1 100644 --- a/gcc/testsuite/c-c++-common/strub-short-O3.c +++ b/gcc/testsuite/c-c++-common/strub-short-O3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O3 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. At -O3 and -Os, we omit enter and leave calls within strub contexts, passing on the enclosing diff --git a/gcc/testsuite/c-c++-common/strub-short-Os.c b/gcc/testsuite/c-c++-common/strub-short-Os.c index 8d6424c479a3..3627a2406000 100644 --- a/gcc/testsuite/c-c++-common/strub-short-Os.c +++ b/gcc/testsuite/c-c++-common/strub-short-Os.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-Os -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. At -O3 and -Os, we omit enter and leave calls within strub contexts, passing on the enclosing diff --git a/gcc/testsuite/c-c++-common/strub-split-stack.c b/gcc/testsuite/c-c++-common/strub-split-stack.c new file mode 100644 index 000000000000..7a030cdb9e9e --- /dev/null +++ b/gcc/testsuite/c-c++-common/strub-split-stack.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-fsplit-stack" } */ +/* { dg-require-effective-target strub } */ +/* { dg-require-effective-target split_stack } */ + +void __attribute__ ((__strub__)) +f () {} /* { dg-message "not eligible|requested" } */ + +void __attribute__ ((__strub__ ("internal"))) +g () {} /* { dg-message "not eligible|requested" } */ diff --git a/gcc/testsuite/c-c++-common/strub-strict1.c b/gcc/testsuite/c-c++-common/strub-strict1.c index 368522442066..503eb1734e36 100644 --- a/gcc/testsuite/c-c++-common/strub-strict1.c +++ b/gcc/testsuite/c-c++-common/strub-strict1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strubm" } */ +/* { dg-require-effective-target strub } */ static int __attribute__ ((__strub__)) var; diff --git a/gcc/testsuite/c-c++-common/strub-strict2.c b/gcc/testsuite/c-c++-common/strub-strict2.c index b4f288832182..3bf1aa30b4af 100644 --- a/gcc/testsuite/c-c++-common/strub-strict2.c +++ b/gcc/testsuite/c-c++-common/strub-strict2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strubm" } */ +/* { dg-require-effective-target strub } */ static int __attribute__ ((__strub__)) var; diff --git a/gcc/testsuite/c-c++-common/strub-tail-O1.c b/gcc/testsuite/c-c++-common/strub-tail-O1.c index e48e0610e079..ba4b1623e281 100644 --- a/gcc/testsuite/c-c++-common/strub-tail-O1.c +++ b/gcc/testsuite/c-c++-common/strub-tail-O1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ #include "strub-tail-O2.c" diff --git a/gcc/testsuite/c-c++-common/strub-tail-O2.c b/gcc/testsuite/c-c++-common/strub-tail-O2.c index 87cda7ab21b1..043813b1de46 100644 --- a/gcc/testsuite/c-c++-common/strub-tail-O2.c +++ b/gcc/testsuite/c-c++-common/strub-tail-O2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fstrub=strict -fno-exceptions -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that the expected strub calls are issued. Tail calls are short-circuited at -O2+. */ diff --git a/gcc/testsuite/c-c++-common/strub-unsupported-2.c b/gcc/testsuite/c-c++-common/strub-unsupported-2.c new file mode 100644 index 000000000000..3586f4f679df --- /dev/null +++ b/gcc/testsuite/c-c++-common/strub-unsupported-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ + +/* Check that, when strub is not supported (so no dg-required-effective-target + strub above), we report when pointers to strub functions are called. This + cannot be part of strub-unsupported.c because errors in the strub-mode pass + prevent the main strub pass, where errors at calls are detected, from + running. */ + +void __attribute__ ((__strub__ ("at-calls"))) (*p) (void); + +void m () { + p (); /* { dg-message "unsupported" "" { target { ! strub } } } */ +} diff --git a/gcc/testsuite/c-c++-common/strub-unsupported-3.c b/gcc/testsuite/c-c++-common/strub-unsupported-3.c new file mode 100644 index 000000000000..d6fb4c525c4a --- /dev/null +++ b/gcc/testsuite/c-c++-common/strub-unsupported-3.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ + +/* Check that, when strub is not supported (so no dg-required-effective-target + strub above), we report when strub functions that are not defined are + called. This cannot be part of strub-unsupported-2.c because errors in the + strub-mode pass prevent the main strub pass, where errors at calls are + detected, from running. */ + +extern void __attribute__ ((__strub__)) +s (void); /* { dg-message "not eligible|requested" "" { target { ! strub } } } */ + +extern void __attribute__ ((__strub__ ("internal"))) +t (void); /* { dg-message "not eligible|requested" "" { target { ! strub } } } */ + +void m () { + s (); + t (); +} diff --git a/gcc/testsuite/c-c++-common/strub-unsupported.c b/gcc/testsuite/c-c++-common/strub-unsupported.c new file mode 100644 index 000000000000..cb5c4049495c --- /dev/null +++ b/gcc/testsuite/c-c++-common/strub-unsupported.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ + +/* Check that, when strub is not supported (so no dg-required-effective-target + strub above), we report when strub functions are defined, and when they're + called in ways that would require changes. */ + +void __attribute__ ((__strub__)) +f (void) {} /* { dg-message "not eligible|requested" "" { target { ! strub } } } */ + +void __attribute__ ((__strub__ ("internal"))) +g (void) {} /* { dg-message "not eligible|requested" "" { target { ! strub } } } */ + +/* This only gets an error when called, see strub-unsupported-2.c. */ +void __attribute__ ((__strub__ ("at-calls"))) (*p) (void); + +/* These too, see strub-unsupported-3.c. */ +extern void __attribute__ ((__strub__)) +s (void); + +extern void __attribute__ ((__strub__ ("internal"))) +t (void); diff --git a/gcc/testsuite/c-c++-common/strub-var1.c b/gcc/testsuite/c-c++-common/strub-var1.c index eb6250fd39c9..67014aa5de84 100644 --- a/gcc/testsuite/c-c++-common/strub-var1.c +++ b/gcc/testsuite/c-c++-common/strub-var1.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target strub } */ int __attribute__ ((strub)) x; float __attribute__ ((strub)) f; diff --git a/gcc/testsuite/c-c++-common/torture/strub-callable1.c b/gcc/testsuite/c-c++-common/torture/strub-callable1.c index b5e45ab0525a..86dbee6746d1 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-callable1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-callable1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ /* Check that strub and non-strub functions can be called from non-strub contexts, and that strub and callable functions can be called from strub diff --git a/gcc/testsuite/c-c++-common/torture/strub-callable2.c b/gcc/testsuite/c-c++-common/torture/strub-callable2.c index 96aa7fe4b07f..9da120f61564 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-callable2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-callable2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ /* Check that impermissible (cross-strub-context) calls are reported. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-const1.c b/gcc/testsuite/c-c++-common/torture/strub-const1.c index 5e956cb1a9b6..22056713cce4 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-const1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-const1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub const function call, we issue an asm statement to make sure the watermark passed to it is held in memory before diff --git a/gcc/testsuite/c-c++-common/torture/strub-const2.c b/gcc/testsuite/c-c++-common/torture/strub-const2.c index 73d650292dfb..a105c66d7a9c 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-const2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-const2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub implicitly-const function call, we issue an asm statement to make sure the watermark passed to it is held in memory diff --git a/gcc/testsuite/c-c++-common/torture/strub-const3.c b/gcc/testsuite/c-c++-common/torture/strub-const3.c index 2584f1f974a5..386200c2784a 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-const3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-const3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub const wrapping call, we issue an asm statement to make sure the watermark passed to it is held in memory before the call, diff --git a/gcc/testsuite/c-c++-common/torture/strub-const4.c b/gcc/testsuite/c-c++-common/torture/strub-const4.c index d819f54ec023..817e9fa2118b 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-const4.c +++ b/gcc/testsuite/c-c++-common/torture/strub-const4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub implicitly-const wrapping call, we issue an asm statement to make sure the watermark passed to it is held in memory diff --git a/gcc/testsuite/c-c++-common/torture/strub-data1.c b/gcc/testsuite/c-c++-common/torture/strub-data1.c index 7c27a2a1a6dc..132ab63ef733 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-data1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-data1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The pointed-to data enables strubbing if accessed. */ int __attribute__ ((__strub__)) var; diff --git a/gcc/testsuite/c-c++-common/torture/strub-data2.c b/gcc/testsuite/c-c++-common/torture/strub-data2.c index e66d903780af..b660702d26e7 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-data2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-data2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The pointer itself is a strub variable, enabling internal strubbing when its value is used. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-data3.c b/gcc/testsuite/c-c++-common/torture/strub-data3.c index 5e08e0e58c65..fc44eef6f8fb 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-data3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-data3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The pointer itself is a strub variable, that would enable internal strubbing if its value was used. Here, it's only overwritten, so no strub. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-data4.c b/gcc/testsuite/c-c++-common/torture/strub-data4.c index a818e7a38bb5..85e2f59055b5 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-data4.c +++ b/gcc/testsuite/c-c++-common/torture/strub-data4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* The pointer itself is a strub variable, that would enable internal strubbing if its value was used. Here, it's only overwritten, so no strub. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-data5.c b/gcc/testsuite/c-c++-common/torture/strub-data5.c index ddb0b5c0543b..0a5edac414df 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-data5.c +++ b/gcc/testsuite/c-c++-common/torture/strub-data5.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ /* It would be desirable to issue at least warnings for these. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-indcall1.c b/gcc/testsuite/c-c++-common/torture/strub-indcall1.c index c165f312f16d..988954e7ed6b 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-indcall1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-indcall1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ typedef void __attribute__ ((__strub__)) fntype (); fntype (*ptr); diff --git a/gcc/testsuite/c-c++-common/torture/strub-indcall2.c b/gcc/testsuite/c-c++-common/torture/strub-indcall2.c index 69fcff8d3763..d3ca91389a70 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-indcall2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-indcall2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ typedef void __attribute__ ((__strub__)) fntype (int, int); fntype (*ptr); diff --git a/gcc/testsuite/c-c++-common/torture/strub-indcall3.c b/gcc/testsuite/c-c++-common/torture/strub-indcall3.c index ff006224909b..89b5979cf7b7 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-indcall3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-indcall3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ typedef void __attribute__ ((__strub__)) fntype (int, int, ...); fntype (*ptr); diff --git a/gcc/testsuite/c-c++-common/torture/strub-inlinable1.c b/gcc/testsuite/c-c++-common/torture/strub-inlinable1.c index 614b02228ba2..4917dda8826d 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-inlinable1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-inlinable1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed" } */ +/* { dg-require-effective-target strub } */ inline void __attribute__ ((strub ("internal"), always_inline)) inl_int_ali (void) diff --git a/gcc/testsuite/c-c++-common/torture/strub-inlinable2.c b/gcc/testsuite/c-c++-common/torture/strub-inlinable2.c index f9a6b4a16faf..c45903856d4f 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-inlinable2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-inlinable2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=all" } */ +/* { dg-require-effective-target strub } */ #include "strub-inlinable1.c" diff --git a/gcc/testsuite/c-c++-common/torture/strub-ptrfn1.c b/gcc/testsuite/c-c++-common/torture/strub-ptrfn1.c index b4a7f3992bba..b0d6139f0a87 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-ptrfn1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-ptrfn1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ typedef void ft (void); typedef void ft2 (int, int); diff --git a/gcc/testsuite/c-c++-common/torture/strub-ptrfn2.c b/gcc/testsuite/c-c++-common/torture/strub-ptrfn2.c index ef634d351265..1148c246f205 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-ptrfn2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-ptrfn2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed -Wpedantic" } */ +/* { dg-require-effective-target strub } */ /* C++ does not warn about the partial incompatibilities. diff --git a/gcc/testsuite/c-c++-common/torture/strub-ptrfn3.c b/gcc/testsuite/c-c++-common/torture/strub-ptrfn3.c index e1f179e160e5..06a72d86d2c5 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-ptrfn3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-ptrfn3.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed -Wpedantic -fpermissive" } */ /* { dg-prune-output "command-line option .-fpermissive." } */ +/* { dg-require-effective-target strub } */ /* See strub-ptrfn2.c. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-ptrfn4.c b/gcc/testsuite/c-c++-common/torture/strub-ptrfn4.c index 70b558afad04..83ea1af7056e 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-ptrfn4.c +++ b/gcc/testsuite/c-c++-common/torture/strub-ptrfn4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=relaxed" } */ +/* { dg-require-effective-target strub } */ /* This is strub-ptrfn2.c without -Wpedantic. diff --git a/gcc/testsuite/c-c++-common/torture/strub-pure1.c b/gcc/testsuite/c-c++-common/torture/strub-pure1.c index a262a086837b..2643136f178c 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-pure1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-pure1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub pure function call, we issue an asm statement to make sure the watermark passed to it is not assumed to be unchanged. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-pure2.c b/gcc/testsuite/c-c++-common/torture/strub-pure2.c index 4c4bd50c209a..8bda129b77dc 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-pure2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-pure2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub implicitly-pure function call, we issue an asm statement to make sure the watermark passed to it is not assumed to be diff --git a/gcc/testsuite/c-c++-common/torture/strub-pure3.c b/gcc/testsuite/c-c++-common/torture/strub-pure3.c index ce195c6b1f1b..00bcbdd097af 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-pure3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-pure3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub pure wrapping call, we issue an asm statement to make sure the watermark passed to it is not assumed to be unchanged. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-pure4.c b/gcc/testsuite/c-c++-common/torture/strub-pure4.c index 75cd54ccb5b5..ea7c40e7912b 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-pure4.c +++ b/gcc/testsuite/c-c++-common/torture/strub-pure4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +/* { dg-require-effective-target strub } */ /* Check that, along with a strub implicitly-pure wrapping call, we issue an asm statement to make sure the watermark passed to it is not assumed to be diff --git a/gcc/testsuite/c-c++-common/torture/strub-run1.c b/gcc/testsuite/c-c++-common/torture/strub-run1.c index 7458b3fb54da..fdf100428631 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run1.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run1.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ /* Check that a non-strub function leaves a string behind in the stack, and that equivalent strub functions don't. Avoid the use of red zones by avoiding diff --git a/gcc/testsuite/c-c++-common/torture/strub-run2.c b/gcc/testsuite/c-c++-common/torture/strub-run2.c index 5d60a7775f4b..1228a6659972 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run2.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run2.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict" } */ +/* { dg-require-effective-target strub } */ /* Check that a non-strub function leaves a string behind in the stack, and that equivalent strub functions don't. Allow red zones to be used. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-run3.c b/gcc/testsuite/c-c++-common/torture/strub-run3.c index c2ad710858e8..e5047a988f5b 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run3.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run3.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target strub } */ /* Check that a non-strub function leaves a string behind in the stack, and that equivalent strub functions don't. */ diff --git a/gcc/testsuite/c-c++-common/torture/strub-run4.c b/gcc/testsuite/c-c++-common/torture/strub-run4.c index 3b36b8e5d68e..0e84a4bab80f 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run4.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run4.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-fstrub=all" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target strub } */ /* Check that multi-level, multi-inlined functions still get cleaned up as expected, without overwriting temporary stack allocations while they should diff --git a/gcc/testsuite/c-c++-common/torture/strub-run4c.c b/gcc/testsuite/c-c++-common/torture/strub-run4c.c index 57f9baf758de..edc98486dc93 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run4c.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run4c.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=at-calls" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target strub } */ #include "strub-run4.c" diff --git a/gcc/testsuite/c-c++-common/torture/strub-run4d.c b/gcc/testsuite/c-c++-common/torture/strub-run4d.c index 08de3f1c3b17..487ed08bb660 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run4d.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run4d.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-fstrub=strict" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target strub } */ #define ATTR_STRUB_AT_CALLS __attribute__ ((__strub__ ("at-calls"))) diff --git a/gcc/testsuite/c-c++-common/torture/strub-run4i.c b/gcc/testsuite/c-c++-common/torture/strub-run4i.c index 459f6886c549..a85447ffabfa 100644 --- a/gcc/testsuite/c-c++-common/torture/strub-run4i.c +++ b/gcc/testsuite/c-c++-common/torture/strub-run4i.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-options "-fstrub=internal" } */ /* { dg-require-effective-target alloca } */ +/* { dg-require-effective-target strub } */ #include "strub-run4.c" diff --git a/gcc/testsuite/g++.dg/strub-run1.C b/gcc/testsuite/g++.dg/strub-run1.C index 0d367fb83d09..beb8b811f8fc 100644 --- a/gcc/testsuite/g++.dg/strub-run1.C +++ b/gcc/testsuite/g++.dg/strub-run1.C @@ -1,5 +1,6 @@ // { dg-do run } // { dg-options "-fstrub=internal" } +// { dg-require-effective-target strub } // Check that we don't get extra copies. diff --git a/gcc/testsuite/g++.dg/torture/strub-init1.C b/gcc/testsuite/g++.dg/torture/strub-init1.C index c226ab10ff65..6ae45fadd70b 100644 --- a/gcc/testsuite/g++.dg/torture/strub-init1.C +++ b/gcc/testsuite/g++.dg/torture/strub-init1.C @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +// { dg-require-effective-target strub } extern int __attribute__((__strub__)) initializer (); diff --git a/gcc/testsuite/g++.dg/torture/strub-init2.C b/gcc/testsuite/g++.dg/torture/strub-init2.C index a7911f1fa721..8f4849c7fde7 100644 --- a/gcc/testsuite/g++.dg/torture/strub-init2.C +++ b/gcc/testsuite/g++.dg/torture/strub-init2.C @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +// { dg-require-effective-target strub } extern int __attribute__((__strub__)) initializer (); diff --git a/gcc/testsuite/g++.dg/torture/strub-init3.C b/gcc/testsuite/g++.dg/torture/strub-init3.C index 6ebebcd01e8e..14f28e3c276b 100644 --- a/gcc/testsuite/g++.dg/torture/strub-init3.C +++ b/gcc/testsuite/g++.dg/torture/strub-init3.C @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-fstrub=strict -fdump-ipa-strub" } */ +// { dg-require-effective-target strub } extern int __attribute__((__strub__)) initializer (); diff --git a/gcc/testsuite/gnat.dg/strub_access.adb b/gcc/testsuite/gnat.dg/strub_access.adb index 29e6996ecf61..488a2d64afe3 100644 --- a/gcc/testsuite/gnat.dg/strub_access.adb +++ b/gcc/testsuite/gnat.dg/strub_access.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=relaxed -fdump-ipa-strubm" } +-- { dg-require-effective-target strub } -- The main subprogram doesn't read from the automatic variable, but -- being an automatic variable, its presence should be enough for the diff --git a/gcc/testsuite/gnat.dg/strub_access1.adb b/gcc/testsuite/gnat.dg/strub_access1.adb index dae470601643..4a8653c4d843 100644 --- a/gcc/testsuite/gnat.dg/strub_access1.adb +++ b/gcc/testsuite/gnat.dg/strub_access1.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=relaxed" } +-- { dg-require-effective-target strub } -- Check that we reject 'Access of a strub variable whose type does -- not carry a strub modifier. diff --git a/gcc/testsuite/gnat.dg/strub_attr.adb b/gcc/testsuite/gnat.dg/strub_attr.adb index 10445d7cf845..eb7826dc990f 100644 --- a/gcc/testsuite/gnat.dg/strub_attr.adb +++ b/gcc/testsuite/gnat.dg/strub_attr.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict -fdump-ipa-strubm -fdump-ipa-strub" } +-- { dg-require-effective-target strub } package body Strub_Attr is E : exception; diff --git a/gcc/testsuite/gnat.dg/strub_disp.adb b/gcc/testsuite/gnat.dg/strub_disp.adb index 3dbcc4a357cb..f23d4675def3 100644 --- a/gcc/testsuite/gnat.dg/strub_disp.adb +++ b/gcc/testsuite/gnat.dg/strub_disp.adb @@ -1,4 +1,5 @@ -- { dg-do compile } +-- { dg-require-effective-target strub } procedure Strub_Disp is package Foo is diff --git a/gcc/testsuite/gnat.dg/strub_disp1.adb b/gcc/testsuite/gnat.dg/strub_disp1.adb index 09756a74b7d8..9c4c7f696371 100644 --- a/gcc/testsuite/gnat.dg/strub_disp1.adb +++ b/gcc/testsuite/gnat.dg/strub_disp1.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fdump-ipa-strub" } +-- { dg-require-effective-target strub } -- Check that at-calls dispatching calls are transformed. diff --git a/gcc/testsuite/gnat.dg/strub_ind.adb b/gcc/testsuite/gnat.dg/strub_ind.adb index da56acaa957d..613db69305e0 100644 --- a/gcc/testsuite/gnat.dg/strub_ind.adb +++ b/gcc/testsuite/gnat.dg/strub_ind.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict" } +-- { dg-require-effective-target strub } -- This is essentially the same test as strub_attr.adb, -- but applying attributes to access types as well. diff --git a/gcc/testsuite/gnat.dg/strub_ind1.adb b/gcc/testsuite/gnat.dg/strub_ind1.adb index 825e395e6819..245b0a830f69 100644 --- a/gcc/testsuite/gnat.dg/strub_ind1.adb +++ b/gcc/testsuite/gnat.dg/strub_ind1.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict -fdump-ipa-strubm" } +-- { dg-require-effective-target strub } -- This is essentially the same test as strub_attr.adb, -- but with an explicit conversion. diff --git a/gcc/testsuite/gnat.dg/strub_ind2.adb b/gcc/testsuite/gnat.dg/strub_ind2.adb index e918b3926311..b9bfe50e9296 100644 --- a/gcc/testsuite/gnat.dg/strub_ind2.adb +++ b/gcc/testsuite/gnat.dg/strub_ind2.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict" } +-- { dg-require-effective-target strub } -- This is essentially the same test as strub_attr.adb, -- but with an explicit conversion. diff --git a/gcc/testsuite/gnat.dg/strub_intf.adb b/gcc/testsuite/gnat.dg/strub_intf.adb index 8f0212a75866..f43854705d07 100644 --- a/gcc/testsuite/gnat.dg/strub_intf.adb +++ b/gcc/testsuite/gnat.dg/strub_intf.adb @@ -1,4 +1,5 @@ -- { dg-do compile } +-- { dg-require-effective-target strub } -- Check that strub mode mismatches between overrider and overridden -- subprograms are reported. diff --git a/gcc/testsuite/gnat.dg/strub_intf1.adb b/gcc/testsuite/gnat.dg/strub_intf1.adb index bf77321cef79..7a38a4c49ba8 100644 --- a/gcc/testsuite/gnat.dg/strub_intf1.adb +++ b/gcc/testsuite/gnat.dg/strub_intf1.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fdump-ipa-strub" } +-- { dg-require-effective-target strub } -- Check that at-calls dispatching calls to interfaces are transformed. diff --git a/gcc/testsuite/gnat.dg/strub_intf2.adb b/gcc/testsuite/gnat.dg/strub_intf2.adb index e8880dbc4373..7992b7344fb8 100644 --- a/gcc/testsuite/gnat.dg/strub_intf2.adb +++ b/gcc/testsuite/gnat.dg/strub_intf2.adb @@ -1,4 +1,5 @@ -- { dg-do compile } +-- { dg-require-effective-target strub } -- Check that strub mode mismatches between overrider and overridden -- subprograms are reported even when the overriders for an diff --git a/gcc/testsuite/gnat.dg/strub_renm.adb b/gcc/testsuite/gnat.dg/strub_renm.adb index 217367e712d8..abfb120b5146 100644 --- a/gcc/testsuite/gnat.dg/strub_renm.adb +++ b/gcc/testsuite/gnat.dg/strub_renm.adb @@ -1,4 +1,5 @@ -- { dg-do compile } +-- { dg-require-effective-target strub } procedure Strub_Renm is procedure P (X : Integer); diff --git a/gcc/testsuite/gnat.dg/strub_renm1.adb b/gcc/testsuite/gnat.dg/strub_renm1.adb index a11adbfb5a9d..68d3230b5356 100644 --- a/gcc/testsuite/gnat.dg/strub_renm1.adb +++ b/gcc/testsuite/gnat.dg/strub_renm1.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=relaxed -fdump-ipa-strub" } +-- { dg-require-effective-target strub } procedure Strub_Renm1 is V : Integer := 0; diff --git a/gcc/testsuite/gnat.dg/strub_renm2.adb b/gcc/testsuite/gnat.dg/strub_renm2.adb index c488c20826fd..3cb81ea03f76 100644 --- a/gcc/testsuite/gnat.dg/strub_renm2.adb +++ b/gcc/testsuite/gnat.dg/strub_renm2.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict -fdump-ipa-strub" } +-- { dg-require-effective-target strub } procedure Strub_Renm2 is V : Integer := 0; diff --git a/gcc/testsuite/gnat.dg/strub_var.adb b/gcc/testsuite/gnat.dg/strub_var.adb index 3d158de28031..7c6affa06d4a 100644 --- a/gcc/testsuite/gnat.dg/strub_var.adb +++ b/gcc/testsuite/gnat.dg/strub_var.adb @@ -1,5 +1,6 @@ -- { dg-do compile } -- { dg-options "-fstrub=strict -fdump-ipa-strubm" } +-- { dg-require-effective-target strub } -- We don't read from the automatic variable, but being an automatic -- variable, its presence should be enough for the procedure to get diff --git a/gcc/testsuite/gnat.dg/strub_var1.adb b/gcc/testsuite/gnat.dg/strub_var1.adb index 6a504e09198b..64b7e65fe9b0 100644 --- a/gcc/testsuite/gnat.dg/strub_var1.adb +++ b/gcc/testsuite/gnat.dg/strub_var1.adb @@ -1,4 +1,5 @@ -- { dg-do compile } +-- { dg-require-effective-target strub } with Strub_Attr; procedure Strub_Var1 is diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 3fcce6be49d6..40a60c198cfe 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1302,6 +1302,13 @@ proc check_stack_check_available { stack_kind } { } "$stack_opt"] } +# Return 1 if the target supports stack scrubbing. +proc check_effective_target_strub {} { + return [check_no_compiler_messages strub assembly { + void __attribute__ ((__strub__)) fn (void) {} + } ""] +} + # Return 1 if compilation with -freorder-blocks-and-partition is error-free # for trivial code, 0 otherwise. As some targets (ARM for example) only # warn when -fprofile-use is also supplied we test that combination too. diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index d8163c5af990..3f77283490ef 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -434,7 +434,7 @@ LIB2ADD += enable-execute-stack.c LIB2ADD += $(srcdir)/hardcfr.c # Stack scrubbing infrastructure. -LIB2ADD += $(srcdir)/strub.c +@HAVE_STRUB_SUPPORT@LIB2ADD += $(srcdir)/strub.c # While emutls.c has nothing to do with EH, it is in LIB2ADDEH* # instead of LIB2ADD because that's the way to be sure on some targets diff --git a/libgcc/configure b/libgcc/configure index cf149209652e..567158955a32 100755 --- a/libgcc/configure +++ b/libgcc/configure @@ -593,6 +593,7 @@ asm_hidden_op extra_parts cpu_type get_gcc_base_ver +HAVE_STRUB_SUPPORT thread_header tm_defines tm_file @@ -5702,6 +5703,31 @@ esac +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for strub support" >&5 +$as_echo_n "checking for strub support... " >&6; } +if ${libgcc_cv_strub_support+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +void __attribute__ ((__strub__)) fn (void) {} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + libgcc_cv_strub_support=yes +else + libgcc_cv_strub_support=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_strub_support" >&5 +$as_echo "$libgcc_cv_strub_support" >&6; } +if test "x$libgcc_cv_strub_support" != xno; then + HAVE_STRUB_SUPPORT= +else + HAVE_STRUB_SUPPORT='# ' +fi + + # Determine what GCC version number to use in filesystem paths. get_gcc_base_ver="cat" diff --git a/libgcc/configure.ac b/libgcc/configure.ac index 2fc9d5d7c93e..9c0e415501a8 100644 --- a/libgcc/configure.ac +++ b/libgcc/configure.ac @@ -694,6 +694,19 @@ AC_SUBST(tm_defines) # Map from thread model to thread header. GCC_AC_THREAD_HEADER([$target_thread_file]) +AC_CACHE_CHECK([for strub support], + [libgcc_cv_strub_support], + [AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([void __attribute__ ((__strub__)) fn (void) {}])], + [libgcc_cv_strub_support=yes], + [libgcc_cv_strub_support=no])]) +if test "x$libgcc_cv_strub_support" != xno; then + HAVE_STRUB_SUPPORT= +else + HAVE_STRUB_SUPPORT='# ' +fi +AC_SUBST(HAVE_STRUB_SUPPORT) + # Determine what GCC version number to use in filesystem paths. GCC_BASE_VER From 2125710245d64ff6edff7ebd339caf4698fdd89d Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Wed, 6 Dec 2023 15:34:24 -0500 Subject: [PATCH 049/311] aarch64: add -fno-stack-protector to tests These tests fail when the testsuite is executed with -fstack-protector-strong. To avoid this, this patch adds -fno-stack-protector to dg-options. The list of FAILs is appended. As you can see, it's mostly about scan-assembler-* which are sort of expected to fail with the stack protector on. FAIL: gcc.target/aarch64/ldp_stp_unaligned_2.c scan-assembler-not mov\\tx[0-9]+, sp FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler-times stp\\\\tx29, x30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/shadow_call_stack_5.c scan-assembler ldr\\\\tx29, \\\\[sp\\\\] FAIL: gcc.target/aarch64/shadow_call_stack_6.c scan-assembler-times str\\\\tx30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler-times stp\\\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 FAIL: gcc.target/aarch64/shadow_call_stack_7.c scan-assembler ldr\\\\tx19, \\\\[sp\\\\], [0-9]+ FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler-times stp\\\\tx19, x20, \\\\[sp, -[0-9]+\\\\]! 1 FAIL: gcc.target/aarch64/shadow_call_stack_8.c scan-assembler ldp\\\\tx19, x20, \\\\[sp\\\\], [0-9]+ FAIL: gcc.target/aarch64/stack-check-12.c scan-assembler-times str\\\\txzr, 2 FAIL: gcc.target/aarch64/stack-check-prologue-11.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-12.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-13.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-14.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-15.c scan-assembler-times str\\\\s+x30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test1 FAIL: gcc.target/aarch64/stack-check-prologue-17.c check-function-bodies test2 FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test1 FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test2 FAIL: gcc.target/aarch64/stack-check-prologue-18.c check-function-bodies test3 FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test1 FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test2 FAIL: gcc.target/aarch64/stack-check-prologue-19.c check-function-bodies test3 FAIL: gcc.target/aarch64/stack-check-prologue-2.c scan-assembler-times str\\\\s+xzr, 0 FAIL: gcc.target/aarch64/stack-check-prologue-5.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-6.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/stack-check-prologue-8.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 2 FAIL: gcc.target/aarch64/stack-check-prologue-9.c scan-assembler-times str\\\\s+xzr, \\\\[sp, 1024\\\\] 1 FAIL: gcc.target/aarch64/test_frame_1.c scan-assembler-times str\\tx30, \\\\[sp, -[0-9]+\\\\]! 2 FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler-times stp\\tx19, x30, \\\\[sp, [0-9]+\\\\] 1 FAIL: gcc.target/aarch64/test_frame_10.c scan-assembler ldp\\tx19, x30, \\\\[sp, [0-9]+\\\\] FAIL: gcc.target/aarch64/test_frame_11.c scan-assembler-times stp\\tx29, x30, \\\\[sp, -[0-9]+\\\\]! 2 FAIL: gcc.target/aarch64/test_frame_13.c scan-assembler-times stp\\tx29, x30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/test_frame_15.c scan-assembler-times stp\\tx29, x30, \\\\[sp, [0-9]+\\\\] 1 FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler-times stp\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 FAIL: gcc.target/aarch64/test_frame_2.c scan-assembler ldp\\tx19, x30, \\\\[sp\\\\], [0-9]+ FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler-times stp\\tx19, x30, \\\\[sp, -[0-9]+\\\\]! 1 FAIL: gcc.target/aarch64/test_frame_4.c scan-assembler ldp\\tx19, x30, \\\\[sp\\\\], [0-9]+ FAIL: gcc.target/aarch64/test_frame_6.c scan-assembler-times str\\tx30, \\\\[sp\\\\] 1 FAIL: gcc.target/aarch64/test_frame_7.c scan-assembler-times stp\\tx19, x30, \\\\[sp] 1 FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler-times str\\tx30, \\\\[sp, [0-9]+\\\\] 1 FAIL: gcc.target/aarch64/test_frame_8.c scan-assembler ldr\\tx30, \\\\[sp, [0-9]+\\\\] FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times cmp\\\\s+x[0-9]+, 61440 4 FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times sub\\\\s+x[0-9]+, x[0-9]+, 61440 4 FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times cmp\\s+x[0-9]+, 61440 4 FAIL: gcc.target/aarch64/sve/struct_vect_24.c scan-assembler-times sub\\s+x[0-9]+, x[0-9]+, 61440 4 gcc/testsuite/ChangeLog: * gcc.target/aarch64/ldp_stp_unaligned_2.c: Use -fno-stack-protector. * gcc.target/aarch64/shadow_call_stack_5.c: Likewise. * gcc.target/aarch64/shadow_call_stack_6.c: Likewise. * gcc.target/aarch64/shadow_call_stack_7.c: Likewise. * gcc.target/aarch64/shadow_call_stack_8.c: Likewise. * gcc.target/aarch64/stack-check-12.c: Likewise. * gcc.target/aarch64/stack-check-prologue-11.c: Likewise. * gcc.target/aarch64/stack-check-prologue-12.c: Likewise. * gcc.target/aarch64/stack-check-prologue-13.c: Likewise. * gcc.target/aarch64/stack-check-prologue-14.c: Likewise. * gcc.target/aarch64/stack-check-prologue-15.c: Likewise. * gcc.target/aarch64/stack-check-prologue-17.c: Likewise. * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. * gcc.target/aarch64/stack-check-prologue-2.c: Likewise. * gcc.target/aarch64/stack-check-prologue-5.c: Likewise. * gcc.target/aarch64/stack-check-prologue-6.c: Likewise. * gcc.target/aarch64/stack-check-prologue-8.c: Likewise. * gcc.target/aarch64/stack-check-prologue-9.c: Likewise. * gcc.target/aarch64/sve/struct_vect_24.c: Likewise. * gcc.target/aarch64/test_frame_1.c: Likewise. * gcc.target/aarch64/test_frame_10.c: Likewise. * gcc.target/aarch64/test_frame_11.c: Likewise. * gcc.target/aarch64/test_frame_13.c: Likewise. * gcc.target/aarch64/test_frame_15.c: Likewise. * gcc.target/aarch64/test_frame_2.c: Likewise. * gcc.target/aarch64/test_frame_4.c: Likewise. * gcc.target/aarch64/test_frame_6.c: Likewise. * gcc.target/aarch64/test_frame_7.c: Likewise. * gcc.target/aarch64/test_frame_8.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c | 2 +- gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c | 2 +- gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c | 2 +- gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-12.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c | 4 ++-- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c | 4 ++-- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c | 2 +- gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c | 2 +- gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_11.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_13.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_15.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_6.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 2 +- gcc/testsuite/gcc.target/aarch64/test_frame_8.c | 2 +- 30 files changed, 32 insertions(+), 32 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c index 1e46755a39a0..50d7d7a2d5d5 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_2.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fomit-frame-pointer" } */ +/* { dg-options "-O2 -fomit-frame-pointer -fno-stack-protector" } */ /* Check that we split unaligned LDP/STP into base and aligned offset. */ diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c index d88357ca04da..d7f82984ff53 100644 --- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c +++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_5.c @@ -7,7 +7,7 @@ * optimized code should use "ldr x29, [sp]" to restore x29 only. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-omit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ +/* { dg-options "-O2 -fno-omit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c index 83b74834c6a7..8d088aecc202 100644 --- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c +++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_6.c @@ -7,7 +7,7 @@ * optimized code should not restore x30 in epilogue. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c index 5537fb3293aa..a2f376e0091c 100644 --- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c +++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_7.c @@ -7,7 +7,7 @@ * optimized code should use "ldr x19, [sp], x" to restore x19 only. */ /* { dg-do compile } */ -/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c index b03f26f7bcfa..5162cbb39175 100644 --- a/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c +++ b/gcc/testsuite/gcc.target/aarch64/shadow_call_stack_8.c @@ -9,7 +9,7 @@ * optimized code should not restore x30 in epilogue. */ /* { dg-do compile } */ -/* { dg-options "-O0 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps" } */ +/* { dg-options "-O0 -fomit-frame-pointer -fsanitize=shadow-call-stack -fno-exceptions -ffixed-x18 --save-temps -fno-stack-protector" } */ int func1 (void) { diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c index be5a57a9ec6b..e1a4c67b041a 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-12.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-asynchronous-unwind-tables -fno-unwind-tables" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-asynchronous-unwind-tables -fno-unwind-tables -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ typedef unsigned __attribute__((mode(DI))) uint64_t; diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c index 741f2f5fadc6..d57aece05bb6 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE (6 * 64 * 1024) + (1 * 32 * 1024) diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c index ece68003ade4..895d130e4fa7 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-12.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ void diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c index 0fc900c6943e..1f1a6c497bec 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ void h (void) __attribute__ ((noreturn)); @@ -17,4 +17,4 @@ f (void) /* SIZE is more than 1 guard-size, but only one 64KB page is used, expect only 1 probe. Leaf function and omitting leaf pointers, tail call to noreturn which - may only omit an epilogue and not a prologue. Checking for LR saving. */ \ No newline at end of file + may only omit an epilogue and not a prologue. Checking for LR saving. */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c index ea733f861e77..facb3cb72a78 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ void h (void) __attribute__ ((noreturn)); @@ -21,4 +21,4 @@ f (void) probe at 1024 and one implicit probe due to LR being saved. Leaf function and omitting leaf pointers, tail call to noreturn which may only omit an epilogue and not a prologue and control flow in between. Checking for - LR saving. */ \ No newline at end of file + LR saving. */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c index 63df4a5609a2..f2ac60a62149 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-15.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fomit-frame-pointer -momit-leaf-frame-pointer -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ void g (volatile int *x) ; diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c index f0ec1389771d..1cf6fbbb0857 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fno-stack-protector" } */ /* { dg-final { check-function-bodies "**" "" } } */ void f(int, ...); diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c index 6383bec5ebcd..2e06346c1584 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */ +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fno-stack-protector" } */ /* { dg-final { check-function-bodies "**" "" } } */ void f(int, ...); diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c index 562039b5e9b8..38eab4d36ab2 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c @@ -1,4 +1,4 @@ -/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */ +/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18 -fno-stack-protector" } */ /* { dg-final { check-function-bodies "**" "" } } */ void f(int, ...); diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c index 61c52a251a7b..b37f62cad273 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE 2 * 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c index 2ee16350127c..34a438671d03 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE 64 * 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c index 3c9b606cbe0e..a4e34e2fe6af 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE 65 * 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c index 333f5fcc3607..277dce4c71e0 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE 128 * 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c index a3ff89b55813..a21305541c15 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ /* { dg-require-effective-target supports_stack_clash_protection } */ #define SIZE 6 * 64 * 1024 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c index 68a9d5e3d2e7..19be6de0c2e4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_24.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target supports_stack_clash_protection } */ -/* { dg-options "-O3 -fopenmp-simd -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */ +/* { dg-options "-O3 -fopenmp-simd -fstack-clash-protection --param stack-clash-protection-guard-size=16 -fno-stack-protector" } */ #include diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c index f906b0735454..c9b8822b4b18 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_1.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_1.c @@ -6,7 +6,7 @@ * optimized code should use "str !" for stack adjustment. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c index c54ab2d0ccb4..fe5cbd9ed05f 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c @@ -7,7 +7,7 @@ * Use a single stack adjustment, no writeback. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_11.c b/gcc/testsuite/gcc.target/aarch64/test_frame_11.c index f162cc091e00..11cf471168d3 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_11.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_11.c @@ -5,7 +5,7 @@ * optimized code should use "stp !" for stack adjustment. */ /* { dg-do run } */ -/* { dg-options "-O2 --save-temps" } */ +/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c index 74b3370fa463..ec56963c038f 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_13.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_13.c @@ -5,7 +5,7 @@ * Use a single stack adjustment, no writeback. */ /* { dg-do run } */ -/* { dg-options "-O2 --save-temps" } */ +/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c index bed6714b4fe5..4247008de8e2 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_15.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_15.c @@ -6,7 +6,7 @@ * Use a single stack adjustment, no writeback. */ /* { dg-do run } */ -/* { dg-options "-O2 --save-temps" } */ +/* { dg-options "-O2 --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c index 0d715314cb83..9c4243b6480e 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c @@ -6,7 +6,7 @@ * optimized code should use "stp !" for stack adjustment. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c index b41229c42f45..8d0bed93e444 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c @@ -6,7 +6,7 @@ * we can use "stp !" to optimize stack adjustment. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c index 56259c945d26..2944a8bbe168 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_6.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_6.c @@ -6,7 +6,7 @@ * use a single stack adjustment, no writeback. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c index 5702656a5da7..ca371632d817 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c @@ -6,7 +6,7 @@ * use a single stack adjustment, no writeback. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c index 75a68b41e08c..084e8fac3733 100644 --- a/gcc/testsuite/gcc.target/aarch64/test_frame_8.c +++ b/gcc/testsuite/gcc.target/aarch64/test_frame_8.c @@ -5,7 +5,7 @@ * number of callee-saved reg == 1. */ /* { dg-do run } */ -/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ +/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-stack-protector" } */ #include "test_frame_common.h" From cd3c1cfde73ac946be218eaa5d1fdf4e0cb2d61b Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Thu, 7 Dec 2023 17:23:11 +0100 Subject: [PATCH 050/311] s390: Fix expansion of vec_step Add missing "s390" while expanding vec_step to __builtin_s390_vec_step. gcc/ChangeLog: * config/s390/vecintrin.h (vec_step): Expand vec_step to __builtin_s390_vec_step. --- gcc/config/s390/vecintrin.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h index 133492c5b19a..7cd1db57aec7 100644 --- a/gcc/config/s390/vecintrin.h +++ b/gcc/config/s390/vecintrin.h @@ -59,8 +59,8 @@ along with GCC; see the file COPYING3. If not see | __VEC_CLASS_FP_INFINITY) /* This also accepts a type for its parameter, so it is not enough - to #define vec_step to __builtin_vec_step. */ -#define vec_step(x) __builtin_vec_step (* (__typeof__ (x) *) 0) + to #define vec_step to __builtin_s390_vec_step. */ +#define vec_step(x) __builtin_s390_vec_step (* (__typeof__ (x) *) 0) static inline int __lcbb(const void *ptr, int bndry) From a1a0cdf21bb6a076e98658d815645d8ad1193840 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:21:54 +0000 Subject: [PATCH 051/311] arm: vld1q_types_x2 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1q intrinsic for the arm port. This patch adds the _x2 variants of the vld1q intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1q_u8_x2, vld1q_u16_x2, vld1q_u32_x2, vld1q_u64_x2): New. (vld1q_s8_x2, vld1q_s16_x2, vld1q_s32_x2, vld1q_s64_x2): New. (vld1q_f16_x2, vld1q_f32_x2): New. (vld1q_p8_x2, vld1q_p16_x2, vld1q_p64_x2): New. (vld1q_bf16_x2): New. * config/arm/arm_neon_builtins.def (vld1_x2): New entries. * config/arm/neon.md (vld1_x2): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new test. * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new test. * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new test. * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new test. --- gcc/config/arm/arm_neon.h | 128 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vld1q_base_xN_1.c | 67 +++++++++ .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 14 ++ .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 14 ++ 7 files changed, 247 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index cdfdb44259a1..3eb41c6bdc83 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10403,6 +10403,15 @@ vld1q_p64 (const poly64_t * __a) return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } +__extension__ extern __inline poly64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p64_x2 (const poly64_t * __a) +{ + union { poly64x2x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10432,6 +10441,42 @@ vld1q_s64 (const int64_t * __a) return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } +__extension__ extern __inline int8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s8_x2 (const int8_t * __a) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s16_x2 (const int16_t * __a) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s32_x2 (const int32_t * __a) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s64_x2 (const int64_t * __a) +{ + union { int64x2x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10448,6 +10493,26 @@ vld1q_f32 (const float32_t * __a) return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f16_x2 (const float16_t * __a) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f32_x2 (const float32_t * __a) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10476,6 +10541,42 @@ vld1q_u64 (const uint64_t * __a) return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } +__extension__ extern __inline uint8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u8_x2 (const uint8_t * __a) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u16_x2 (const uint16_t * __a) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u32_x2 (const uint32_t * __a) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u64_x2 (const uint64_t * __a) +{ + union { uint64x2x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10490,6 +10591,24 @@ vld1q_p16 (const poly16_t * __a) return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); } +__extension__ extern __inline poly8x16x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p8_x2 (const poly8_t * __a) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p16_x2 (const poly16_t * __a) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -19782,6 +19901,15 @@ vld1q_bf16 (const bfloat16_t * __ptr) return __builtin_neon_vld1v8bf (__ptr); } +__extension__ extern __inline bfloat16x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_bf16_x2 (const bfloat16_t * __ptr) +{ + union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 94b152381236..6a8f0cb2ce1f 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -301,6 +301,7 @@ VAR1 (TERNOP, vtbx4, v8qi) VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) +VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index d213369ffc38..55049ea549f3 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4957,6 +4957,16 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_1reg")] ) +(define_insn "neon_vld1_x2" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD1))] + "TARGET_NEON" + "vld1.\t%h0, %A1" + [(set_attr "type" "neon_load1_2reg")] +) + ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c new file mode 100644 index 000000000000..1d31777afdf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c @@ -0,0 +1,67 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint8x16x2_t test_vld1q_u8_x2 (uint8_t * a) +{ + return vld1q_u8_x2 (a); +} + +uint16x8x2_t test_vld1q_u16_x2 (uint16_t * a) +{ + return vld1q_u16_x2 (a); +} + +uint32x4x2_t test_vld1q_u32_x2 (uint32_t * a) +{ + return vld1q_u32_x2 (a); +} + +uint64x2x2_t test_vld1q_u64_x2 (uint64_t * a) +{ + return vld1q_u64_x2 (a); +} + +int8x16x2_t test_vld1q_s8_x2 (int8_t * a) +{ + return vld1q_s8_x2 (a); +} + +int16x8x2_t test_vld1q_s16_x2 (int16_t * a) +{ + return vld1q_s16_x2 (a); +} + +int32x4x2_t test_vld1q_s32_x2 (int32_t * a) +{ + return vld1q_s32_x2 (a); +} + +int64x2x2_t test_vld1q_s64_x2 (int64_t * a) +{ + return vld1q_s64_x2 (a); +} + +float32x4x2_t test_vld1q_f32_x2 (float32_t * a) +{ + return vld1q_f32_x2 (a); +} + +poly8x16x2_t test_vld1q_p8_x2 (poly8_t * a) +{ + return vld1q_p8_x2 (a); +} + +poly16x8x2_t test_vld1q_p16_x2 (poly16_t * a) +{ + return vld1q_p16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c new file mode 100644 index 000000000000..5f6fc98640e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include "arm_neon.h" + +bfloat16x8x2_t test_vld1q_bf16_x2 (bfloat16_t * a) +{ + return vld1q_bf16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c new file mode 100644 index 000000000000..aecf491a4de8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +float16x8x2_t test_vld1q_f16_x2 (float16_t * a) +{ + return vld1q_f16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c new file mode 100644 index 000000000000..04ceb5e4a247 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c @@ -0,0 +1,14 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +poly64x2x2_t test_vld1q_p64_x2 (poly64_t * a) +{ + return vld1q_p64_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ + From 2514a331835e055a963fd059dc5770e5ae500af0 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:21:55 +0000 Subject: [PATCH 052/311] arm: vld1q_types_x3 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1q intrinsic for the arm port. This patch adds the _x3 variants of the vld1q intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1q_u8_x3, vld1q_u16_x3, vld1q_u32_x3, vld1q_u64_x3): New. (vld1q_s8_x3, vld1q_s16_x3, vld1q_s32_x3, vld1q_s64_x3): New. (vld1q_f16_x3, vld1q_f32_x3): New. (vld1q_p8_x3, vld1q_p16_x3, vld1q_p64_x3): New. (vld1q_bf16_x3): New. * config/arm/arm_neon_builtins.def (vld1_x3): New entries. * config/arm/neon.md (vld1_x3): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 128 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 27 ++++ .../gcc.target/arm/simd/vld1q_base_xN_1.c | 69 +++++++++- .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 6 + .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 7 +- 7 files changed, 239 insertions(+), 6 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 3eb41c6bdc83..557873ac0285 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10412,6 +10412,15 @@ vld1q_p64_x2 (const poly64_t * __a) return __rv.__i; } +__extension__ extern __inline poly64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p64_x3 (const poly64_t * __a) +{ + union { poly64x2x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10477,6 +10486,42 @@ vld1q_s64_x2 (const int64_t * __a) return __rv.__i; } +__extension__ extern __inline int8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s8_x3 (const uint8_t * __a) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s16_x3 (const uint16_t * __a) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s32_x3 (const int32_t * __a) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s64_x3 (const int64_t * __a) +{ + union { int64x2x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10513,6 +10558,26 @@ vld1q_f32_x2 (const float32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f16_x3 (const float16_t * __a) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f32_x3 (const float32_t * __a) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10577,6 +10642,42 @@ vld1q_u64_x2 (const uint64_t * __a) return __rv.__i; } +__extension__ extern __inline uint8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u8_x3 (const uint8_t * __a) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u16_x3 (const uint16_t * __a) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u32_x3 (const uint32_t * __a) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u64_x3 (const uint64_t * __a) +{ + union { uint64x2x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10609,6 +10710,24 @@ vld1q_p16_x2 (const poly16_t * __a) return __rv.__i; } +__extension__ extern __inline poly8x16x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p8_x3 (const poly8_t * __a) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p16_x3 (const poly16_t * __a) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -19910,6 +20029,15 @@ vld1q_bf16_x2 (const bfloat16_t * __ptr) return __rv.__i; } +__extension__ extern __inline bfloat16x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_bf16_x3 (const bfloat16_t * __ptr) +{ + union { bfloat16x8x3_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 6a8f0cb2ce1f..a363bf18ccb7 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -302,6 +302,7 @@ VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 55049ea549f3..b37d95f1fa07 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4967,6 +4967,33 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_2reg")] ) +(define_insn "neon_vld1_x3" + [(set (match_operand:CI 0 "s_register_operand" "=w") + (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD3A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[4]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = operands[1]; + + output_asm_insn ("vld1.\t{%P0, %P1, %P2}, %A3", ops); + + ops[0] = gen_rtx_REG (DImode, regno + 6); + ops[1] = gen_rtx_REG (DImode, regno + 8); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = operands[1]; + + output_asm_insn ("vld1.\t{%P0, %P1, %P2}, %A3", ops); + return ""; +} + [(set_attr "type" "neon_load1_3reg")] +) + ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c index 1d31777afdf3..bfad282751b4 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c @@ -60,8 +60,69 @@ poly16x8x2_t test_vld1q_p16_x2 (poly16_t * a) return vld1q_p16_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +uint8x16x3_t test_vld1q_u8_x3 (uint8_t * a) +{ + return vld1q_u8_x3 (a); +} +uint16x8x3_t test_vld1q_u16_x3 (uint16_t * a) +{ + return vld1q_u16_x3 (a); +} + +uint32x4x3_t test_vld1q_u32_x3 (uint32_t * a) +{ + return vld1q_u32_x3 (a); +} + +uint64x2x3_t test_vld1q_u64_x3 (uint64_t * a) +{ + return vld1q_u64_x3 (a); +} + +int8x16x3_t test_vld1q_s8_x3 (int8_t * a) +{ + return vld1q_s8_x3 (a); +} + +int16x8x3_t test_vld1q_s16_x3 (int16_t * a) +{ + return vld1q_s16_x3 (a); +} + +int32x4x3_t test_vld1q_s32_x3 (int32_t * a) +{ + return vld1q_s32_x3 (a); +} + +int64x2x3_t test_vld1q_s64_x3 (int64_t * a) +{ + return vld1q_s64_x3 (a); +} + +float32x4x3_t test_vld1q_f32_x3 (float32_t * a) +{ + return vld1q_f32_x3 (a); +} + +poly8x16x3_t test_vld1q_p8_x3 (poly8_t * a) +{ + return vld1q_p8_x3 (a); +} + +poly16x8x3_t test_vld1q_p16_x3 (poly16_t * a) +{ + return vld1q_p16_x3 (a); +} + +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ + +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ + +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c index 5f6fc98640e7..4138fe951ee7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c @@ -10,4 +10,10 @@ bfloat16x8x2_t test_vld1q_bf16_x2 (bfloat16_t * a) return vld1q_bf16_x2 (a); } +bfloat16x8x3_t test_vld1q_bf16_x3 (bfloat16_t * a) +{ + return vld1q_bf16_x3 (a); +} + /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c index aecf491a4de8..01640d7cc1f6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c @@ -10,5 +10,10 @@ float16x8x2_t test_vld1q_f16_x2 (float16_t * a) return vld1q_f16_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +float16x8x3_t test_vld1q_f16_x3 (float16_t * a) +{ + return vld1q_f16_x3 (a); +} +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c index 04ceb5e4a247..ae2ab36df57c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c @@ -10,5 +10,10 @@ poly64x2x2_t test_vld1q_p64_x2 (poly64_t * a) return vld1q_p64_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ +poly64x2x3_t test_vld1q_p64_x3 (poly64_t * a) +{ + return vld1q_p64_x3 (a); +} +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From ac827ec3e600bcb636f564876b186ee19d384a1e Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:21:56 +0000 Subject: [PATCH 053/311] arm: vld1q_types_x4 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1q intrinsic for the arm port. This patch adds the _x4 variants of the vld1q intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1q_u8_x4, vld1q_u16_x4, vld1q_u32_x4, vld1q_u64_x4): New. (vld1q_s8_x4, vld1q_s16_x4, vld1q_s32_x4, vld1q_s64_x4): New. (vld1q_f16_x4, vld1q_f32_x4): New. (vld1q_p8_x4, vld1q_p16_x4, vld1q_p64_x4): New. (vld1q_bf16_x4): New. * config/arm/arm_neon_builtins.def (vld1_x4): New entries. * config/arm/neon.md (vld1_x4): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 128 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 30 ++++ .../gcc.target/arm/simd/vld1q_base_xN_1.c | 59 ++++++++ .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 6 + .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 6 + .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 6 + 7 files changed, 236 insertions(+) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 557873ac0285..c03be9912f87 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10421,6 +10421,15 @@ vld1q_p64_x3 (const poly64_t * __a) return __rv.__i; } +__extension__ extern __inline poly64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p64_x4 (const poly64_t * __a) +{ + union { poly64x2x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10522,6 +10531,42 @@ vld1q_s64_x3 (const int64_t * __a) return __rv.__i; } +__extension__ extern __inline int8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s8_x4 (const uint8_t * __a) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s16_x4 (const uint16_t * __a) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s32_x4 (const int32_t * __a) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_s64_x4 (const int64_t * __a) +{ + union { int64x2x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10578,6 +10623,26 @@ vld1q_f32_x3 (const float32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f16_x4 (const float16_t * __a) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_f32_x4 (const float32_t * __a) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10678,6 +10743,42 @@ vld1q_u64_x3 (const uint64_t * __a) return __rv.__i; } +__extension__ extern __inline uint8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u8_x4 (const uint8_t * __a) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u16_x4 (const uint16_t * __a) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u32_x4 (const uint32_t * __a) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_u64_x4 (const uint64_t * __a) +{ + union { uint64x2x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10728,6 +10829,24 @@ vld1q_p16_x3 (const poly16_t * __a) return __rv.__i; } +__extension__ extern __inline poly8x16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p8_x4 (const poly8_t * __a) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_p16_x4 (const poly16_t * __a) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -20038,6 +20157,15 @@ vld1q_bf16_x3 (const bfloat16_t * __ptr) return __rv.__i; } +__extension__ extern __inline bfloat16x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1q_bf16_x4 (const bfloat16_t * __ptr) +{ + union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a363bf18ccb7..90dad8cf6d18 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -303,6 +303,7 @@ VAR13 (LOAD1, vld1, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index b37d95f1fa07..b8f8fd6b9280 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4994,6 +4994,36 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_3reg")] ) +(define_insn "neon_vld1_x4" + [(set (match_operand:XI 0 "s_register_operand" "=w") + (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD4A))] + "TARGET_NEON" +{ + int regno = REGNO (operands[0]); + rtx ops[5]; + ops[0] = gen_rtx_REG (DImode, regno); + ops[1] = gen_rtx_REG (DImode, regno + 2); + ops[2] = gen_rtx_REG (DImode, regno + 4); + ops[3] = gen_rtx_REG (DImode, regno + 6); + ops[4] = operands[1]; + + output_asm_insn ("vld1.\t{%P0, %P1, %P2, %P3}, %A4", ops); + + ops[0] = gen_rtx_REG (DImode, regno + 8); + ops[1] = gen_rtx_REG (DImode, regno + 10); + ops[2] = gen_rtx_REG (DImode, regno + 12); + ops[3] = gen_rtx_REG (DImode, regno + 14); + ops[4] = operands[1]; + + output_asm_insn ("vld1.\t{%P0, %P1, %P2, %P3}, %A4", ops); + + return ""; +} + [(set_attr "type" "neon_load1_4reg")] +) + ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c index bfad282751b4..9aae34cc18e7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c @@ -115,14 +115,73 @@ poly16x8x3_t test_vld1q_p16_x3 (poly16_t * a) return vld1q_p16_x3 (a); } +uint8x16x4_t test_vld1q_u8_x4 (uint8_t * a) +{ + return vld1q_u8_x4 (a); +} + +uint16x8x4_t test_vld1q_u16_x4 (uint16_t * a) +{ + return vld1q_u16_x4 (a); +} + +uint32x4x4_t test_vld1q_u32_x4 (uint32_t * a) +{ + return vld1q_u32_x4 (a); +} + +uint64x2x4_t test_vld1q_u64_x4 (uint64_t * a) +{ + return vld1q_u64_x4 (a); +} + +int8x16x4_t test_vld1q_s8_x4 (int8_t * a) +{ + return vld1q_s8_x4 (a); +} + +int16x8x4_t test_vld1q_s16_x4 (int16_t * a) +{ + return vld1q_s16_x4 (a); +} + +int32x4x4_t test_vld1q_s32_x4 (int32_t * a) +{ + return vld1q_s32_x4 (a); +} + +int64x2x4_t test_vld1q_s64_x4 (int64_t * a) +{ + return vld1q_s64_x4 (a); +} + +float32x4x4_t test_vld1q_f32_x4 (float32_t * a) +{ + return vld1q_f32_x4 (a); +} + +poly8x16x4_t test_vld1q_p8_x4 (poly8_t * a) +{ + return vld1q_p8_x4 (a); +} + +poly16x8x4_t test_vld1q_p16_x4 (poly16_t * a) +{ + return vld1q_p16_x4 (a); +} + /* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c index 4138fe951ee7..fd86723f1464 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c @@ -15,5 +15,11 @@ bfloat16x8x3_t test_vld1q_bf16_x3 (bfloat16_t * a) return vld1q_bf16_x3 (a); } +bfloat16x8x4_t test_vld1q_bf16_x4 (bfloat16_t * a) +{ + return vld1q_bf16_x4 (a); +} + /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c index 01640d7cc1f6..2de3495f1dad 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c @@ -15,5 +15,11 @@ float16x8x3_t test_vld1q_f16_x3 (float16_t * a) return vld1q_f16_x3 (a); } +float16x8x4_t test_vld1q_f16_x4 (float16_t * a) +{ + return vld1q_f16_x4 (a); +} + /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c index ae2ab36df57c..521b784e8e84 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c @@ -15,5 +15,11 @@ poly64x2x3_t test_vld1q_p64_x3 (poly64_t * a) return vld1q_p64_x3 (a); } +poly64x2x4_t test_vld1q_p64_x4 (poly64_t * a) +{ + return vld1q_p64_x4 (a); +} + /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From a69a7c7b6782c5b6f213f1f34af8dbb6541f27bb Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:28:42 +0000 Subject: [PATCH 054/311] arm: vst1_types_x2 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1 intrinsic for the arm port. This patch adds the _x2 variants of the vst1 intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1_u8_x2, vst1_u16_x2, vst1_u32_x2, vst1_u64_x2): New. (vst1_s8_x2, vst1_s16_x2, vst1_s32_x2, vst1_s64_x2): New. (vst1_f16_x2, vst1_f32_x2): New. (vst1_p8_x2, vst1_p16_x2, vst1_p64_x2): New. (vst1_bf16_x2): New. * config/arm/arm_neon_builtins.def (vst1_x2): New entries. * config/arm/neon.md (vst1_x2): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vst1_base_xN_1.c | 67 ++++++++++ .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vst1_p64_xN_1.c | 13 ++ 7 files changed, 231 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c03be9912f87..60f1077752c6 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11242,6 +11242,14 @@ vst1_p64 (poly64_t * __a, poly64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __b) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11271,6 +11279,38 @@ vst1_s64 (int64_t * __a, int64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s8_x2 (int8_t * __a, int8x8x2_t __b) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s16_x2 (int16_t * __a, int16x4x2_t __b) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s32_x2 (int32_t * __a, int32x2x2_t __b) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11287,6 +11327,24 @@ vst1_f32 (float32_t * __a, float32x2_t __b) __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f16_x2 (float16_t * __a, float16x4x2_t __b) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v4hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f32_x2 (float32_t * __a, float32x2x2_t __b) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11315,6 +11373,38 @@ vst1_u64 (uint64_t * __a, uint64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __b) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __b) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __b) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __b) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11329,6 +11419,22 @@ vst1_p16 (poly16_t * __a, poly16x4_t __b) __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __b) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __b) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20070,6 +20176,14 @@ vst1_bf16 (bfloat16_t * __a, bfloat16x4_t __b) __builtin_neon_vst1v4bf (__a, __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __b) +{ + union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; + __builtin_neon_vst1_x2v4bf ((__builtin_neon_bf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 90dad8cf6d18..6f16bf0863c8 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -311,6 +311,7 @@ VAR10 (LOAD1, vld1_dup, VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) +VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index b8f8fd6b9280..2a7286dee52d 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5125,6 +5125,16 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1))] "TARGET_NEON") +(define_insn "neon_vst1_x2" + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST1))] + "TARGET_NEON" + "vst1.\t%h1, %A0" + [(set_attr "type" "neon_store1_2reg")] +) + (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c new file mode 100644 index 000000000000..575897fa422e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -0,0 +1,67 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +void test_vst1_u8_x2 (uint8_t * ptr, uint8x8x2_t val) +{ + vst1_u8_x2 (ptr, val); +} + +void test_vst1_u16_x2 (uint16_t * ptr, uint16x4x2_t val) +{ + vst1_u16_x2 (ptr, val); +} + +void test_vst1_u32_x2 (uint32_t * ptr, uint32x2x2_t val) +{ + vst1_u32_x2 (ptr, val); +} + +void test_vst1_u64_x2 (uint64_t * ptr, uint64x1x2_t val) +{ + vst1_u64_x2 (ptr, val); +} + +void test_vst1_s8_x2 (int8_t * ptr, int8x8x2_t val) +{ + vst1_s8_x2 (ptr, val); +} + +void test_vst1_s16_x2 (int16_t * ptr, int16x4x2_t val) +{ + vst1_s16_x2 (ptr, val); +} + +void test_vst1_s32_x2 (int32_t * ptr, int32x2x2_t val) +{ + vst1_s32_x2 (ptr, val); +} + +void test_vst1_s64_x2 (int64_t * ptr, int64x1x2_t val) +{ + vst1_s64_x2 (ptr, val); +} + +void test_vst1_f32_x2 (float32_t * ptr, float32x2x2_t val) +{ + vst1_f32_x2 (ptr, val); +} + +void test_vst1_p8_x2 (poly8_t * ptr, poly8x8x2_t val) +{ + vst1_p8_x2 (ptr, val); +} + +void test_vst1_p16_x2 (poly16_t * ptr, poly16x4x2_t val) +{ + vst1_p16_x2 (ptr, val); +} + + +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c new file mode 100644 index 000000000000..213fd20ee65f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include "arm_neon.h" + +void test_vst1_bf16_x2 (bfloat16_t * ptr, bfloat16x4x2_t val) +{ + vst1_bf16_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c new file mode 100644 index 000000000000..523aec92db24 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +void test_vst1_f16_x2 (float16_t * ptr, float16x4x2_t val) +{ + vst1_f16_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c new file mode 100644 index 000000000000..f590ebd7b943 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1_p64_x2 (poly64_t * ptr, poly64x1x2_t val) +{ + vst1_p64_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ \ No newline at end of file From ef07ae652c25ec04c2e3ef8cec14b0771a809861 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:28:43 +0000 Subject: [PATCH 055/311] arm: vst1_types_x3 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1 intrinsic for the arm port. This patch adds the _x3 variants of the vst1 intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1_u8_x3, vst1_u16_x3, vst1_u32_x3, vst1_u64_x3): New. (vst1_s8_x3, vst1_s16_x3, vst1_s32_x3, vst1_s64_x3): New. (vst1_f16_x3, vst1_f32_x3): New. (vst1_p8_x3, vst1_p16_x3, vst1_p64_x3): New. (vst1_bf16_x3): New. * config/arm/arm_neon_builtins.def (vst1_x3): New entries. * config/arm/neon.md (vst1_x3): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vst1_base_xN_1.c | 63 +++++++++- .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_p64_xN_1.c | 7 +- 7 files changed, 202 insertions(+), 7 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 60f1077752c6..e76be3516d95 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11250,6 +11250,14 @@ vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __b) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11311,6 +11319,38 @@ vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s16_x3 (int16_t * __a, int16x4x3_t __b) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s32_x3 (int32_t * __a, int32x2x3_t __b) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s64_x3 (int64_t * __a, int64x1x3_t __b) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11345,6 +11385,24 @@ vst1_f32_x2 (float32_t * __a, float32x2x2_t __b) __builtin_neon_vst1_x2v2sf ((__builtin_neon_sf *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f16_x3 (float16_t * __a, float16x4x3_t __b) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v4hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f32_x3 (float32_t * __a, float32x2x3_t __b) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11405,6 +11463,38 @@ vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __b) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __b) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __b) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __b) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11435,6 +11525,22 @@ vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __b) __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __b) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __b) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20184,6 +20290,14 @@ vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __b) __builtin_neon_vst1_x2v4bf ((__builtin_neon_bf *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __b) +{ + union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; + __builtin_neon_vst1_x3v4bf ((__builtin_neon_bf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 6f16bf0863c8..8b104b1a700b 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -312,6 +312,7 @@ VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 2a7286dee52d..5185434d6d93 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5135,6 +5135,16 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_2reg")] ) +(define_insn "neon_vst1_x3" + [(set (match_operand:EI 0 "neon_struct_operand" "=Um") + (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST1))] + "TARGET_NEON" + "vst1.\t%h1, %A0" + [(set_attr "type" "neon_store1_3reg")] +) + (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c index 575897fa422e..5f820a6a496e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -60,8 +60,63 @@ void test_vst1_p16_x2 (poly16_t * ptr, poly16x4x2_t val) vst1_p16_x2 (ptr, val); } +void test_vst1_u8_x3 (uint8_t * ptr, uint8x8x3_t val) +{ + vst1_u8_x3 (ptr, val); +} -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +void test_vst1_u16_x3 (uint16_t * ptr, uint16x4x3_t val) +{ + vst1_u16_x3 (ptr, val); +} + +void test_vst1_u32_x3 (uint32_t * ptr, uint32x2x3_t val) +{ + vst1_u32_x3 (ptr, val); +} + +void test_vst1_u64_x3 (uint64_t * ptr, uint64x1x3_t val) +{ + vst1_u64_x3 (ptr, val); +} + +void test_vst1_s8_x3 (int8_t * ptr, int8x8x3_t val) +{ + vst1_s8_x3 (ptr, val); +} + +void test_vst1_s16_x3 (int16_t * ptr, int16x4x3_t val) +{ + vst1_s16_x3 (ptr, val); +} + +void test_vst1_s32_x3 (int32_t * ptr, int32x2x3_t val) +{ + vst1_s32_x3 (ptr, val); +} + +void test_vst1_s64_x3 (int64_t * ptr, int64x1x3_t val) +{ + vst1_s64_x3 (ptr, val); +} + +void test_vst1_f32_x3 (float32_t * ptr, float32x2x3_t val) +{ + vst1_f32_x3 (ptr, val); +} + +void test_vst1_p8_x3 (poly8_t * ptr, poly8x8x3_t val) +{ + vst1_p8_x3 (ptr, val); +} + +void test_vst1_p16_x3 (poly16_t * ptr, poly16x4x3_t val) +{ + vst1_p16_x3 (ptr, val); +} + + +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c index 213fd20ee65f..a3a00ead4682 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -10,4 +10,9 @@ void test_vst1_bf16_x2 (bfloat16_t * ptr, bfloat16x4x2_t val) vst1_bf16_x2 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +void test_vst1_bf16_x3 (bfloat16_t * ptr, bfloat16x4x3_t val) +{ + vst1_bf16_x3 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c index 523aec92db24..0a6863e24c6c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -10,4 +10,9 @@ void test_vst1_f16_x2 (float16_t * ptr, float16x4x2_t val) vst1_f16_x2 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +void test_vst1_f16_x3 (float16_t * ptr, float16x4x3_t val) +{ + vst1_f16_x3 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c index f590ebd7b943..5dbd6049bc9f 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -10,4 +10,9 @@ void test_vst1_p64_x2 (poly64_t * ptr, poly64x1x2_t val) vst1_p64_x2 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ \ No newline at end of file +void test_vst1_p64_x3 (poly64_t * ptr, poly64x1x3_t val) +{ + vst1_p64_x3 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file From 2f48d846c794ba091b266133f73717361096d454 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:28:44 +0000 Subject: [PATCH 056/311] arm: vst1_types_x4 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1 intrinsic for the arm port. This patch adds the _x4 variants of the vst1 intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1_u8_x4, vst1_u16_x4, vst1_u32_x4, vst1_u64_x4): New. (vst1_s8_x4, vst1_s16_x4, vst1_s32_x4, vst1_s64_x4): New. (vst1_f16_x4, vst1_f32_x4): New. (vst1_p8_x4, vst1_p16_x4, vst1_p64_x4): New. (vst1_bf16_x4): New. * config/arm/arm_neon_builtins.def (vst1_x4): New entries. * config/arm/neon.md (vst1_x4): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vst1_base_xN_1.c | 62 +++++++++- .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 6 +- .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_p64_xN_1.c | 7 +- 7 files changed, 200 insertions(+), 7 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index e76be3516d95..c9bdda39663a 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11258,6 +11258,14 @@ vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __b) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11351,6 +11359,38 @@ vst1_s64_x3 (int64_t * __a, int64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s8_x4 (int8_t * __a, int8x8x4_t __b) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s16_x4 (int16_t * __a, int16x4x4_t __b) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s32_x4 (int32_t * __a, int32x2x4_t __b) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_s64_x4 (int64_t * __a, int64x1x4_t __b) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11403,6 +11443,24 @@ vst1_f32_x3 (float32_t * __a, float32x2x3_t __b) __builtin_neon_vst1_x3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f16_x4 (float16_t * __a, float16x4x4_t __b) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v4hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_f32_x4 (float32_t * __a, float32x2x4_t __b) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v2sf ((__builtin_neon_sf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11495,6 +11553,38 @@ vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __b) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __b) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __b) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __b) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11541,6 +11631,22 @@ vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __b) __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __b) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __b) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20298,6 +20404,14 @@ vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __b) __builtin_neon_vst1_x3v4bf ((__builtin_neon_bf *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __b) +{ + union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1_x4v4bf ((__builtin_neon_bf *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 8b104b1a700b..a4056ec24d96 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -313,6 +313,7 @@ VAR14 (STORE1, vst1, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 5185434d6d93..dfbaf5a6dc68 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5145,6 +5145,16 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) +(define_insn "neon_vst1_x4" + [(set (match_operand:OI 0 "neon_struct_operand" "=Um") + (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST1))] + "TARGET_NEON" + "vst1.\t%h1, %A0" + [(set_attr "type" "neon_store1_4reg")] +) + (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c index 5f820a6a496e..04ca6583552f 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -115,8 +115,62 @@ void test_vst1_p16_x3 (poly16_t * ptr, poly16x4x3_t val) vst1_p16_x3 (ptr, val); } +void test_vst1_u8_x4 (uint8_t * ptr, uint8x8x4_t val) +{ + vst1_u8_x4 (ptr, val); +} -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ +void test_vst1_u16_x4 (uint16_t * ptr, uint16x4x4_t val) +{ + vst1_u16_x4 (ptr, val); +} + +void test_vst1_u32_x4 (uint32_t * ptr, uint32x2x4_t val) +{ + vst1_u32_x4 (ptr, val); +} + +void test_vst1_u64_x4 (uint64_t * ptr, uint64x1x4_t val) +{ + vst1_u64_x4 (ptr, val); +} + +void test_vst1_s8_x4 (int8_t * ptr, int8x8x4_t val) +{ + vst1_s8_x4 (ptr, val); +} + +void test_vst1_s16_x4 (int16_t * ptr, int16x4x4_t val) +{ + vst1_s16_x4 (ptr, val); +} + +void test_vst1_s32_x4 (int32_t * ptr, int32x2x4_t val) +{ + vst1_s32_x4 (ptr, val); +} + +void test_vst1_s64_x4 (int64_t * ptr, int64x1x4_t val) +{ + vst1_s64_x4 (ptr, val); +} + +void test_vst1_f32_x4 (float32_t * ptr, float32x2x4_t val) +{ + vst1_f32_x4 (ptr, val); +} + +void test_vst1_p8_x4 (poly8_t * ptr, poly8x8x4_t val) +{ + vst1_p8_x4 (ptr, val); +} + +void test_vst1_p16_x4 (poly16_t * ptr, poly16x4x4_t val) +{ + vst1_p16_x4 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 6 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c index a3a00ead4682..d919c7d060dc 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -15,4 +15,8 @@ void test_vst1_bf16_x3 (bfloat16_t * ptr, bfloat16x4x3_t val) vst1_bf16_x3 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +void test_vst1_bf16_x4 (bfloat16_t * ptr, bfloat16x4x4_t val) +{ + vst1_bf16_x4 (ptr, val); +} +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c index 0a6863e24c6c..3d1d1eb7ad14 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -15,4 +15,9 @@ void test_vst1_f16_x3 (float16_t * ptr, float16x4x3_t val) vst1_f16_x3 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +void test_vst1_f16_x4 (float16_t * ptr, float16x4x4_t val) +{ + vst1_f16_x4 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c index 5dbd6049bc9f..62912143481a 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -15,4 +15,9 @@ void test_vst1_p64_x3 (poly64_t * ptr, poly64x1x3_t val) vst1_p64_x3 (ptr, val); } -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file +void test_vst1_p64_x4 (poly64_t * ptr, poly64x1x4_t val) +{ + vst1_p64_x4 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 3 } } */ \ No newline at end of file From 2cd0d0261ef9d0e13e20407f131f32dcb67fcdd3 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:36:50 +0000 Subject: [PATCH 057/311] arm: vst1q_types_x2 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1q intrinsic for the arm port. This patch adds the _x2 variants of the vst1q intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1q_u8_x2, vst1q_u16_x2, vst1q_u32_x2, vst1q_u64_x2): New. (vst1q_s8_x2, vst1q_s16_x2, vst1q_s32_x2, vst1q_s64_x2): New. (vst1q_f16_x2, vst1q_f32_x2): New. (vst1q_p8_x2, vst1q_p16_x2, vst1q_p64_x2): New. (vst1q_bf16_x2): New. * config/arm/arm_neon_builtins.def (vst1q_x2): New entries. * config/arm/neon.md (neon_vst1_x2): Updated from neon_vst1_x2. * config/arm/iterators.md (VMEMX2): New mode iterator. (VMEMX2_q): New mode attribute. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/iterators.md | 6 + gcc/config/arm/neon.md | 6 +- .../gcc.target/arm/simd/vst1q_base_xN_1.c | 69 +++++++++++ .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 13 ++ 8 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c9bdda39663a..1c447b6d42fa 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11327,6 +11327,38 @@ vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s8_x2 (int8_t * __a, int8x16x2_t __b) +{ + union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s16_x2 (int16_t * __a, int16x8x2_t __b) +{ + union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s32_x2 (int32_t * __a, int32x4x2_t __b) +{ + union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s64_x2 (int64_t * __a, int64x2x2_t __b) +{ + union { int64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11656,6 +11688,14 @@ vst1q_p64 (poly64_t * __a, poly64x2_t __b) __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __b) +{ + union { poly64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11701,6 +11741,24 @@ vst1q_f32 (float32_t * __a, float32x4_t __b) __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f16_x2 (float16_t * __a, float16x8x2_t __b) +{ + union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v8hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f32_x2 (float32_t * __a, float32x4x2_t __b) +{ + union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v4sf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11729,6 +11787,38 @@ vst1q_u64 (uint64_t * __a, uint64x2_t __b) __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __b) +{ + union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __b) +{ + union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __b) +{ + union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __b) +{ + union { uint64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -11743,6 +11833,22 @@ vst1q_p16 (poly16_t * __a, poly16x8_t __b) __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __b) +{ + union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __b) +{ + union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20419,6 +20525,14 @@ vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) __builtin_neon_vst1v8bf (__a, __b); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __b) +{ + union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; + __builtin_neon_vst1q_x2v8bf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a4056ec24d96..696ed72678aa 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -312,6 +312,7 @@ VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index a98035381016..6c5a80d93483 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -141,6 +141,9 @@ ;; Opaque structure types used in table lookups (except vtbl1/vtbx1). (define_mode_iterator VTAB [TI EI OI]) +;; Opaque structure types for x2 variants of VSTR1/VSTR1Q or VLD1/VLD1Q. +(define_mode_iterator VMEMX2 [TI OI]) + ;; Widenable modes. (define_mode_iterator VW [V8QI V4HI V2SI]) @@ -1533,6 +1536,9 @@ ;; vtbl suffix for NEON vector modes. (define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")]) +;; Suffix for x2 variants of vld1 and vst1. +(define_mode_attr VMEMX2_q [(TI "") (OI "q")]) + ;; fp16 or bf16 marker for 16-bit float modes. (define_mode_attr fporbf [(HF "fp16") (BF "bf16")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index dfbaf5a6dc68..ce525ccbc392 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5125,9 +5125,9 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1))] "TARGET_NEON") -(define_insn "neon_vst1_x2" - [(set (match_operand:TI 0 "neon_struct_operand" "=Um") - (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") +(define_insn "neon_vst1_x2" + [(set (match_operand:VMEMX2 0 "neon_struct_operand" "=Um") + (unspec:VMEMX2 [(match_operand:VMEMX2 1 "s_register_operand" "w") (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST1))] "TARGET_NEON" diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c new file mode 100644 index 000000000000..4a17a80974b2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c @@ -0,0 +1,69 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +void test_vst1q_u8_x2 (uint8_t * ptr, uint8x16x2_t val) +{ + vst1q_u8_x2 (ptr, val); +} + +void test_vst1q_u16_x2 (uint16_t * ptr, uint16x8x2_t val) +{ + vst1q_u16_x2 (ptr, val); +} + +void test_vst1q_u32_x2 (uint32_t * ptr, uint32x4x2_t val) +{ + vst1q_u32_x2 (ptr, val); +} + +void test_vst1q_u64_x2 (uint64_t * ptr, uint64x2x2_t val) +{ + vst1q_u64_x2 (ptr, val); +} + +void test_vst1q_s8_x2 (int8_t * ptr, int8x16x2_t val) +{ + vst1q_s8_x2 (ptr, val); +} + +void test_vst1q_s16_x2 (int16_t * ptr, int16x8x2_t val) +{ + vst1q_s16_x2 (ptr, val); +} + +void test_vst1q_s32_x2 (int32_t * ptr, int32x4x2_t val) +{ + vst1q_s32_x2 (ptr, val); +} + +void test_vst1q_s64_x2 (int64_t * ptr, int64x2x2_t val) +{ + vst1q_s64_x2 (ptr, val); +} + +void test_vst1q_f32_x2 (float32_t * ptr, float32x4x2_t val) +{ + vst1q_f32_x2 (ptr, val); +} + +void test_vst1q_p8_x2 (poly8_t * ptr, poly8x16x2_t val) +{ + vst1q_p8_x2 (ptr, val); +} + +void test_vst1q_p16_x2 (poly16_t * ptr, poly16x8x2_t val) +{ + vst1q_p16_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ + +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c new file mode 100644 index 000000000000..2a4579f0aaef --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include "arm_neon.h" + +void test_vst1q_bf16_x2 (bfloat16_t * ptr, bfloat16x8x2_t val) +{ + vst1q_bf16_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c new file mode 100644 index 000000000000..61a7e558c48d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +void test_vst1q_f16_x2 (float16_t * ptr, float16x8x2_t val) +{ + vst1q_f16_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c new file mode 100644 index 000000000000..82f3dad293c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +void test_vst1q_p64_x2 (poly64_t * ptr, poly64x2x2_t val) +{ + vst1q_p64_x2 (ptr, val); +} + +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ From 2d58d53c9e0eed83faa9254f8d3ec0ddd54812d8 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:36:51 +0000 Subject: [PATCH 058/311] arm: vst1q_types_x3 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1q intrinsic for the arm port. This patch adds the _x3 variants of the vst1q intrinsic. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1q_u8_x3, vst1q_u16_x3, vst1q_u32_x3, vst1q_u64_x3): New. (vst1q_s8_x3, vst1q_s16_x3, vst1q_s32_x3, vst1q_s64_x3): New. (vst1q_f16_x3, vst1q_f32_x3): New. (vst1q_p8_x3, vst1q_p16_x3, vst1q_p64_x3): New. (vst1q_bf16_x3): New. * config/arm/arm_neon_builtins.def (vst1q_x3): New entries. * config/arm/neon.md (neon_vst1q_x3): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 24 ++++ .../gcc.target/arm/simd/vst1q_base_xN_1.c | 60 +++++++++ .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 6 + .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 6 + .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 6 + 7 files changed, 217 insertions(+) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 1c447b6d42fa..5cec7dd876f0 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11359,6 +11359,38 @@ vst1q_s64_x2 (int64_t * __a, int64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s8_x3 (int8_t * __a, int8x16x3_t __b) +{ + union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s16_x3 (int16_t * __a, int16x8x3_t __b) +{ + union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s32_x3 (int32_t * __a, int32x4x3_t __b) +{ + union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s64_x3 (int64_t * __a, int64x2x3_t __b) +{ + union { int64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11696,6 +11728,14 @@ vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __b) +{ + union { poly64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11759,6 +11799,24 @@ vst1q_f32_x2 (float32_t * __a, float32x4x2_t __b) __builtin_neon_vst1q_x2v4sf (__a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f16_x3 (float16_t * __a, float16x8x3_t __b) +{ + union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v8hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f32_x3 (float32_t * __a, float32x4x3_t __b) +{ + union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v4sf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11819,6 +11877,38 @@ vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __b) +{ + union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __b) +{ + union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __b) +{ + union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __b) +{ + union { uint64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -11849,6 +11939,22 @@ vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __b) __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __b) +{ + union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __b) +{ + union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20533,6 +20639,14 @@ vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __b) __builtin_neon_vst1q_x2v8bf (__a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __b) +{ + union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; + __builtin_neon_vst1q_x3v8bf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 696ed72678aa..cc014f9b89ed 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -314,6 +314,7 @@ VAR14 (STORE1, vst1, VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (STORE1, vst1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ce525ccbc392..010cc579f23a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5145,6 +5145,30 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) +(define_insn "neon_vst1q_x3" + [(set (match_operand:CI 0 "neon_struct_operand" "=Um") + (unspec:CI [(match_operand:CI 1 "s_register_operand" "w") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST1))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[4]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + output_asm_insn ("vst1.\t{%P1, %P2, %P3}, %A0", ops); + + ops[1] = gen_rtx_REG (DImode, regno + 6); + ops[2] = gen_rtx_REG (DImode, regno + 8); + ops[3] = gen_rtx_REG (DImode, regno + 10); + output_asm_insn ("vst1.\t{%P1, %P2, %P3}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store1_3reg")] +) + (define_insn "neon_vst1_x4" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c index 4a17a80974b2..838da09fee7a 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c @@ -60,10 +60,70 @@ void test_vst1q_p16_x2 (poly16_t * ptr, poly16x8x2_t val) vst1q_p16_x2 (ptr, val); } +void test_vst1q_u8_x3 (uint8_t * ptr, uint8x16x3_t val) +{ + vst1q_u8_x3 (ptr, val); +} + +void test_vst1q_u16_x3 (uint16_t * ptr, uint16x8x3_t val) +{ + vst1q_u16_x3 (ptr, val); +} + +void test_vst1q_u32_x3 (uint32_t * ptr, uint32x4x3_t val) +{ + vst1q_u32_x3 (ptr, val); +} + +void test_vst1q_u64_x3 (uint64_t * ptr, uint64x2x3_t val) +{ + vst1q_u64_x3 (ptr, val); +} + +void test_vst1q_s8_x3 (int8_t * ptr, int8x16x3_t val) +{ + vst1q_s8_x3 (ptr, val); +} + +void test_vst1q_s16_x3 (int16_t * ptr, int16x8x3_t val) +{ + vst1q_s16_x3 (ptr, val); +} + +void test_vst1q_s32_x3 (int32_t * ptr, int32x4x3_t val) +{ + vst1q_s32_x3 (ptr, val); +} + +void test_vst1q_s64_x3 (int64_t * ptr, int64x2x3_t val) +{ + vst1q_s64_x3 (ptr, val); +} + +void test_vst1q_f32_x3 (float32_t * ptr, float32x4x3_t val) +{ + vst1q_f32_x3 (ptr, val); +} + +void test_vst1q_p8_x3 (poly8_t * ptr, poly8x16x3_t val) +{ + vst1q_p8_x3 (ptr, val); +} + +void test_vst1q_p16_x3 (poly16_t * ptr, poly16x8x3_t val) +{ + vst1q_p16_x3 (ptr, val); +} + + /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c index 2a4579f0aaef..2593c31c7561 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c @@ -10,4 +10,10 @@ void test_vst1q_bf16_x2 (bfloat16_t * ptr, bfloat16x8x2_t val) vst1q_bf16_x2 (ptr, val); } +void test_vst1q_bf16_x3 (bfloat16_t * ptr, bfloat16x8x3_t val) +{ + vst1q_bf16_x3 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c index 61a7e558c48d..28e949b557a9 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c @@ -10,4 +10,10 @@ void test_vst1q_f16_x2 (float16_t * ptr, float16x8x2_t val) vst1q_f16_x2 (ptr, val); } +void test_vst1q_f16_x3 (float16_t * ptr, float16x8x3_t val) +{ + vst1q_f16_x3 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c index 82f3dad293c6..7878d936b9f6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c @@ -10,4 +10,10 @@ void test_vst1q_p64_x2 (poly64_t * ptr, poly64x2x2_t val) vst1q_p64_x2 (ptr, val); } +void test_vst1q_p64_x3 (poly64_t * ptr, poly64x2x3_t val) +{ + vst1q_p64_x3 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 4ad77f883c178679f1dbb3a5603f811e022080bb Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:36:52 +0000 Subject: [PATCH 059/311] arm: vst1q_types_x4 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vst1q intrinsic for the arm port. This patch adds the _x4 variants of the vst1q intrinsic. ACLE: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vst1q_u8_x4, vst1q_u16_x4, vst1q_u32_x4, vst1q_u64_x4): New. (vst1q_s8_x4, vst1q_s16_x4, vst1q_s32_x4, vst1q_s64_x4): New. (vst1q_f16_x4, vst1q_f32_x4): New. (vst1q_p8_x4, vst1q_p16_x4, vst1q_p64_x4): New. (vst1q_bf16_x4): New. * config/arm/arm_neon_builtins.def (vst1q_x4): New entries. * config/arm/neon.md (neon_vst1q_x4): New. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 114 ++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 26 ++++ .../gcc.target/arm/simd/vst1q_base_xN_1.c | 59 +++++++++ .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 8 +- .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 6 + .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 6 + 7 files changed, 219 insertions(+), 1 deletion(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 5cec7dd876f0..af1f747f2622 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11391,6 +11391,38 @@ vst1q_s64_x3 (int64_t * __a, int64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s8_x4 (int8_t * __a, int8x16x4_t __b) +{ + union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s16_x4 (int16_t * __a, int16x8x4_t __b) +{ + union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s32_x4 (int32_t * __a, int32x4x4_t __b) +{ + union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_s64_x4 (int64_t * __a, int64x2x4_t __b) +{ + union { int64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11736,6 +11768,14 @@ vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __b) +{ + union { poly64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11817,6 +11857,24 @@ vst1q_f32_x3 (float32_t * __a, float32x4x3_t __b) __builtin_neon_vst1q_x3v4sf (__a, __bu.__o); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f16_x4 (float16_t * __a, float16x8x4_t __b) +{ + union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hf (__a, __bu.__o); +} +#endif + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_f32_x4 (float32_t * __a, float32x4x4_t __b) +{ + union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4sf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11909,6 +11967,38 @@ vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __b) +{ + union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __b) +{ + union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __b) +{ + union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __b) +{ + union { uint64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -11955,6 +12045,22 @@ vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __b) __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __b) +{ + union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __b) +{ + union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20647,6 +20753,14 @@ vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __b) __builtin_neon_vst1q_x3v8bf (__a, __bu.__o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __b) +{ + union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; + __builtin_neon_vst1q_x4v8bf (__a, __bu.__o); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index cc014f9b89ed..55e097227485 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -316,6 +316,7 @@ VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (STORE1, vst1q_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 010cc579f23a..e069ceb651c9 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5169,6 +5169,32 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) +(define_insn "neon_vst1q_x4" + [(set (match_operand:XI 0 "neon_struct_operand" "=Um") + (unspec:XI [(match_operand:XI 1 "s_register_operand" "w") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VST1))] + "TARGET_NEON" +{ + int regno = REGNO (operands[1]); + rtx ops[5]; + ops[0] = operands[0]; + ops[1] = gen_rtx_REG (DImode, regno); + ops[2] = gen_rtx_REG (DImode, regno + 2); + ops[3] = gen_rtx_REG (DImode, regno + 4); + ops[4] = gen_rtx_REG (DImode, regno + 6); + output_asm_insn ("vst1.\t{%P1, %P2, %P3, %P4}, %A0", ops); + + ops[1] = gen_rtx_REG (DImode, regno + 8); + ops[2] = gen_rtx_REG (DImode, regno + 10); + ops[3] = gen_rtx_REG (DImode, regno + 12); + ops[4] = gen_rtx_REG (DImode, regno + 14); + output_asm_insn ("vst1.\t{%P1, %P2, %P3, %P4}, %A0", ops); + return ""; +} + [(set_attr "type" "neon_store1_4reg")] +) + (define_insn "neon_vst1_x4" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c index 838da09fee7a..5a639560de6e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c @@ -115,15 +115,74 @@ void test_vst1q_p16_x3 (poly16_t * ptr, poly16x8x3_t val) vst1q_p16_x3 (ptr, val); } +void test_vst1q_u8_x4 (uint8_t * ptr, uint8x16x4_t val) +{ + vst1q_u8_x4 (ptr, val); +} + +void test_vst1q_u16_x4 (uint16_t * ptr, uint16x8x4_t val) +{ + vst1q_u16_x4 (ptr, val); +} + +void test_vst1q_u32_x4 (uint32_t * ptr, uint32x4x4_t val) +{ + vst1q_u32_x4 (ptr, val); +} + +void test_vst1q_u64_x4 (uint64_t * ptr, uint64x2x4_t val) +{ + vst1q_u64_x4 (ptr, val); +} + +void test_vst1q_s8_x4 (int8_t * ptr, int8x16x4_t val) +{ + vst1q_s8_x4 (ptr, val); +} + +void test_vst1q_s16_x4 (int16_t * ptr, int16x8x4_t val) +{ + vst1q_s16_x4 (ptr, val); +} + +void test_vst1q_s32_x4 (int32_t * ptr, int32x4x4_t val) +{ + vst1q_s32_x4 (ptr, val); +} + +void test_vst1q_s64_x4 (int64_t * ptr, int64x2x4_t val) +{ + vst1q_s64_x4 (ptr, val); +} + +void test_vst1q_f32_x4 (float32_t * ptr, float32x4x4_t val) +{ + vst1q_f32_x4 (ptr, val); +} + +void test_vst1q_p8_x4 (poly8_t * ptr, poly8x16x4_t val) +{ + vst1q_p8_x4 (ptr, val); +} + +void test_vst1q_p16_x4 (poly16_t * ptr, poly16x8x4_t val) +{ + vst1q_p16_x4 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c index 2593c31c7561..84fa8509db8e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c @@ -15,5 +15,11 @@ void test_vst1q_bf16_x3 (bfloat16_t * ptr, bfloat16x8x3_t val) vst1q_bf16_x3 (ptr, val); } +void test_vst1q_bf16_x4 (bfloat16_t * ptr, bfloat16x8x4_t val) +{ + vst1q_bf16_x4 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c index 28e949b557a9..5b13edf99987 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c @@ -15,5 +15,11 @@ void test_vst1q_f16_x3 (float16_t * ptr, float16x8x3_t val) vst1q_f16_x3 (ptr, val); } +void test_vst1q_f16_x4 (float16_t * ptr, float16x8x4_t val) +{ + vst1q_f16_x4 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c index 7878d936b9f6..f49917d5ec8b 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c @@ -15,5 +15,11 @@ void test_vst1q_p64_x3 (poly64_t * ptr, poly64x2x3_t val) vst1q_p64_x3 (ptr, val); } +void test_vst1q_p64_x4 (poly64_t * ptr, poly64x2x4_t val) +{ + vst1q_p64_x4 (ptr, val); +} + /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 8fff3f065277f13176c320f22c4ed766a82c5d8e Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:41:04 +0000 Subject: [PATCH 060/311] arm: vld1_types_x2 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1 intrinsic for the arm port. This patch adds the _x2 variants of the vld1 intrinsic. The previous vld1_x2 has been updated to vld1q_x2 to take into account that it works with 4-word-length types. vld1_x2 is now only for 2-word-length types. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1_u8_x2, vld1_u16_x2, vld1_u32_x2, vld1_u64_x2): New (vld1_s8_x2, vld1_s16_x2, vld1_s32_x2, vld1_s64_x2): New. (vld1_f16_x2, vld1_f32_x2): New. (vld1_p8_x2, vld1_p16_x2, vld1_p64_x2): New. (vld1_bf16_x2): New. (vld1q_types_x2): Updated to use vld1q_x2 from arm_neon_builtins.def * config/arm/arm_neon_builtins.def (vld1_x2): Updated entries. (vld1q_x2): New entries, but comes from the old vld1_x2 * config/arm/neon.md (neon_vld1_x2): Updated from neon_vld1_x2. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 156 ++++++++++++++++-- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 +- .../gcc.target/arm/simd/vld1_base_xN_1.c | 66 ++++++++ .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 13 ++ .../gcc.target/arm/simd/vld1_p64_xN_1.c | 13 ++ 7 files changed, 254 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c create mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index af1f747f2622..669b8fffb405 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10307,6 +10307,15 @@ vld1_p64 (const poly64_t * __a) return (poly64x1_t) { *__a }; } +__extension__ extern __inline poly64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p64_x2 (const poly64_t * __a) +{ + union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10336,6 +10345,42 @@ vld1_s64 (const int64_t * __a) return (int64x1_t) { *__a }; } +__extension__ extern __inline int8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s8_x2 (const int8_t * __a) +{ + union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s16_x2 (const int16_t * __a) +{ + union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s32_x2 (const int32_t * __a) +{ + union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s64_x2 (const int64_t * __a) +{ + union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10352,6 +10397,26 @@ vld1_f32 (const float32_t * __a) return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f16_x2 (const float16_t * __a) +{ + union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f32_x2 (const float32_t * __a) +{ + union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10380,6 +10445,42 @@ vld1_u64 (const uint64_t * __a) return (uint64x1_t) { *__a }; } +__extension__ extern __inline uint8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u8_x2 (const uint8_t * __a) +{ + union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u16_x2 (const uint16_t * __a) +{ + union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x2x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u32_x2 (const uint32_t * __a) +{ + union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x1x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u64_x2 (const uint64_t * __a) +{ + union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10394,6 +10495,24 @@ vld1_p16 (const poly16_t * __a) return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); } +__extension__ extern __inline poly8x8x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p8_x2 (const poly8_t * __a) +{ + union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p16_x2 (const poly16_t * __a) +{ + union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10408,7 +10527,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x2 (const poly64_t * __a) { union { poly64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10464,7 +10583,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x2 (const int8_t * __a) { union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10473,7 +10592,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x2 (const int16_t * __a) { union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10482,7 +10601,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x2 (const int32_t * __a) { union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x2v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10491,7 +10610,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x2 (const int64_t * __a) { union { int64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10589,7 +10708,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x2 (const float16_t * __a) { union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hf (__a); + __rv.__o = __builtin_neon_vld1q_x2v8hf (__a); return __rv.__i; } #endif @@ -10599,7 +10718,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x2 (const float32_t * __a) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1q_x2v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10676,7 +10795,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x2 (const uint8_t * __a) { union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10685,7 +10804,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x2 (const uint16_t * __a) { union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10694,7 +10813,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x2 (const uint32_t * __a) { union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x2v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10703,7 +10822,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x2 (const uint64_t * __a) { union { uint64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10798,7 +10917,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x2 (const poly8_t * __a) { union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10807,7 +10926,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x2 (const poly16_t * __a) { union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -20816,6 +20935,15 @@ vld1_bf16 (bfloat16_t const * __ptr) return __builtin_neon_vld1v4bf (__ptr); } +__extension__ extern __inline bfloat16x4x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_bf16_x2 (const bfloat16_t * __ptr) +{ + union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv; + __rv.__o = __builtin_neon_vld1_x2v4bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -20828,7 +20956,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x2 (const bfloat16_t * __ptr) { union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1q_x2v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 55e097227485..07750c03c087 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -301,7 +301,8 @@ VAR1 (TERNOP, vtbx4, v8qi) VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) -VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e069ceb651c9..75add42777d8 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4957,11 +4957,11 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_1reg")] ) -(define_insn "neon_vld1_x2" - [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD1))] +(define_insn "neon_vld1_x2" + [(set (match_operand:VMEMX2 0 "s_register_operand" "=w") + (unspec:VMEMX2 [(match_operand:VMEMX2 1 "neon_struct_operand" "Um") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD1))] "TARGET_NEON" "vld1.\t%h0, %A1" [(set_attr "type" "neon_load1_2reg")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c new file mode 100644 index 000000000000..6b0e78d94d7c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c @@ -0,0 +1,66 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" + +uint8x8x2_t test_vld1_u8_x2 (uint8_t * a) +{ + return vld1_u8_x2 (a); +} + +uint16x4x2_t test_vld1_u16_x2 (uint16_t * a) +{ + return vld1_u16_x2 (a); +} + +uint32x2x2_t test_vld1_u32_x2 (uint32_t * a) +{ + return vld1_u32_x2 (a); +} + +uint64x1x2_t test_vld1_u64_x2 (uint64_t * a) +{ + return vld1_u64_x2 (a); +} + +int8x8x2_t test_vld1_s8_x2 (int8_t * a) +{ + return vld1_s8_x2 (a); +} + +int16x4x2_t test_vld1_s16_x2 (int16_t * a) +{ + return vld1_s16_x2 (a); +} + +int32x2x2_t test_vld1_s32_x2 (int32_t * a) +{ + return vld1_s32_x2 (a); +} + +int64x1x2_t test_vld1_s64_x2 (int64_t * a) +{ + return vld1_s64_x2 (a); +} + +float32x2x2_t test_vld1_f32_x2 (float32_t * a) +{ + return vld1_f32_x2 (a); +} + +poly8x8x2_t test_vld1_p8_x2 (poly8_t * a) +{ + return vld1_p8_x2 (a); +} + +poly16x4x2_t test_vld1_p16_x2 (poly16_t * a) +{ + return vld1_p16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c new file mode 100644 index 000000000000..3ec7a5e19864 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ + +#include "arm_neon.h" + +bfloat16x4x2_t test_vld1_bf16_x2 (bfloat16_t * a) +{ + return vld1_bf16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c new file mode 100644 index 000000000000..c0e5ea491424 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_neon_fp16_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_neon_fp16 } */ + +#include "arm_neon.h" + +float16x4x2_t test_vld1_f16_x2 (float16_t * a) +{ + return vld1_f16_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c new file mode 100644 index 000000000000..3ccea520ddc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c @@ -0,0 +1,13 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O2" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +poly64x1x2_t test_vld1_p64_x2 (poly64_t * a) +{ + return vld1_p64_x2 (a); +} + +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ From 8e3ae874b21bdd8da32afefa6f6f60913481564c Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:41:05 +0000 Subject: [PATCH 061/311] arm: vld1_types_x3 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1 intrinsic for the arm port. This patch adds the _x3 variants of the vld1 intrinsic. The previous vld1_x3 has been updated to vld1q_x3 to take into account that it works with 4-word-length types. vld1_x3 is now only for 2-word-length types. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1_u8_x3, vld1_u16_x3, vld1_u32_x3, vld1_u64_x3): New (vld1_s8_x3, vld1_s16_x3, vld1_s32_x3, vld1_s64_x3): New. (vld1_f16_x3, vld1_f32_x3): New. (vld1_p8_x3, vld1_p16_x3, vld1_p64_x3): New. (vld1_bf16_x3): New. (vld1q_types_x3): Updated to use vld1q_x3 from arm_neon_builtins.def * config/arm/arm_neon_builtins.def (vld1_x3): Updated entries. (vld1q_x3): New entries, but comes from the old vld1_x2 * config/arm/neon.md (neon_vld1q_x3): Updated from neon_vld1_x3. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 156 ++++++++++++++++-- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vld1_base_xN_1.c | 63 ++++++- .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_p64_xN_1.c | 7 +- 7 files changed, 231 insertions(+), 22 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 669b8fffb405..dbc37cafe286 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10316,6 +10316,15 @@ vld1_p64_x2 (const poly64_t * __a) return __rv.__i; } +__extension__ extern __inline poly64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p64_x3 (const poly64_t * __a) +{ + union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10381,6 +10390,42 @@ vld1_s64_x2 (const int64_t * __a) return __rv.__i; } +__extension__ extern __inline int8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s8_x3 (const int8_t * __a) +{ + union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s16_x3 (const int16_t * __a) +{ + union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s32_x3 (const int32_t * __a) +{ + union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s64_x3 (const int64_t * __a) +{ + union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10417,6 +10462,26 @@ vld1_f32_x2 (const float32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f16_x3 (const float16_t * __a) +{ + union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f32_x3 (const float32_t * __a) +{ + union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10481,6 +10546,42 @@ vld1_u64_x2 (const uint64_t * __a) return __rv.__i; } +__extension__ extern __inline uint8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u8_x3 (const uint8_t * __a) +{ + union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u16_x3 (const uint16_t * __a) +{ + union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x2x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u32_x3 (const uint32_t * __a) +{ + union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x1x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u64_x3 (const uint64_t * __a) +{ + union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10513,6 +10614,24 @@ vld1_p16_x2 (const poly16_t * __a) return __rv.__i; } +__extension__ extern __inline poly8x8x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p8_x3 (const poly8_t * __a) +{ + union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p16_x3 (const poly16_t * __a) +{ + union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10536,7 +10655,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x3 (const poly64_t * __a) { union { poly64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10619,7 +10738,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x3 (const uint8_t * __a) { union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10628,7 +10747,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x3 (const uint16_t * __a) { union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10637,7 +10756,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x3 (const int32_t * __a) { union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x3v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10646,7 +10765,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x3 (const int64_t * __a) { union { int64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10728,7 +10847,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x3 (const float16_t * __a) { union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hf (__a); + __rv.__o = __builtin_neon_vld1q_x3v8hf (__a); return __rv.__i; } #endif @@ -10738,7 +10857,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x3 (const float32_t * __a) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1q_x3v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10831,7 +10950,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x3 (const uint8_t * __a) { union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10840,7 +10959,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x3 (const uint16_t * __a) { union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10849,7 +10968,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x3 (const uint32_t * __a) { union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x3v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10858,7 +10977,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x3 (const uint64_t * __a) { union { uint64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10935,7 +11054,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x3 (const poly8_t * __a) { union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10944,7 +11063,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x3 (const poly16_t * __a) { union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -20944,6 +21063,15 @@ vld1_bf16_x2 (const bfloat16_t * __ptr) return __rv.__i; } +__extension__ extern __inline bfloat16x4x3_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_bf16_x3 (const bfloat16_t * __ptr) +{ + union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv; + __rv.__o = __builtin_neon_vld1_x3v4bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -20965,7 +21093,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x3 (const bfloat16_t * __ptr) { union { bfloat16x8x3_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1q_x3v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 07750c03c087..c74f0db645bd 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -303,7 +303,8 @@ VAR13 (LOAD1, vld1, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (LOAD1, vld1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 75add42777d8..e67cbc247d9a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4968,6 +4968,16 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_x3" + [(set (match_operand:EI 0 "s_register_operand" "=w") + (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD1))] + "TARGET_NEON" + "vld1.\t%h0, %A1" + [(set_attr "type" "neon_load1_3reg")] +) + +(define_insn "neon_vld1q_x3" [(set (match_operand:CI 0 "s_register_operand" "=w") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c index 6b0e78d94d7c..95314bbe0ded 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c @@ -60,7 +60,62 @@ poly16x4x2_t test_vld1_p16_x2 (poly16_t * a) return vld1_p16_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file +uint8x8x3_t test_vld1_u8_x3 (uint8_t * a) +{ + return vld1_u8_x3 (a); +} + +uint16x4x3_t test_vld1_u16_x3 (uint16_t * a) +{ + return vld1_u16_x3 (a); +} + +uint32x2x3_t test_vld1_u32_x3 (uint32_t * a) +{ + return vld1_u32_x3 (a); +} + +uint64x1x3_t test_vld1_u64_x3 (uint64_t * a) +{ + return vld1_u64_x3 (a); +} + +int8x8x3_t test_vld1_s8_x3 (int8_t * a) +{ + return vld1_s8_x3 (a); +} + +int16x4x3_t test_vld1_s16_x3 (int16_t * a) +{ + return vld1_s16_x3 (a); +} + +int32x2x3_t test_vld1_s32_x3 (int32_t * a) +{ + return vld1_s32_x3 (a); +} + +int64x1x3_t test_vld1_s64_x3 (int64_t * a) +{ + return vld1_s64_x3 (a); +} + +float32x2x3_t test_vld1_f32_x3 (float32_t * a) +{ + return vld1_f32_x3 (a); +} + +poly8x8x3_t test_vld1_p8_x3 (poly8_t * a) +{ + return vld1_p8_x3 (a); +} + +poly16x4x3_t test_vld1_p16_x3 (poly16_t * a) +{ + return vld1_p16_x3 (a); +} + +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c index 3ec7a5e19864..c1935da0a4c7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c @@ -10,4 +10,9 @@ bfloat16x4x2_t test_vld1_bf16_x2 (bfloat16_t * a) return vld1_bf16_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ \ No newline at end of file +bfloat16x4x3_t test_vld1_bf16_x3 (bfloat16_t * a) +{ + return vld1_bf16_x3 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c index c0e5ea491424..20363239f5b4 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c @@ -10,4 +10,9 @@ float16x4x2_t test_vld1_f16_x2 (float16_t * a) return vld1_f16_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ +float16x4x3_t test_vld1_f16_x3 (float16_t * a) +{ + return vld1_f16_x3 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c index 3ccea520ddc2..210de511c716 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c @@ -10,4 +10,9 @@ poly64x1x2_t test_vld1_p64_x2 (poly64_t * a) return vld1_p64_x2 (a); } -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ +poly64x1x3_t test_vld1_p64_x3 (poly64_t * a) +{ + return vld1_p64_x3 (a); +} + +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 656f092cba951fddc1e40468ad71d241ffe98566 Mon Sep 17 00:00:00 2001 From: Ezra Sitorus Date: Thu, 7 Dec 2023 15:41:06 +0000 Subject: [PATCH 062/311] arm: vld1_types_x4 ACLE intrinsics This patch is part of a series of patches implementing the _xN variants of the vld1 intrinsic for the arm port. This patch adds the _x4 variants of the vld1 intrinsic. The previous vld1_x4 has been updated to vld1q_x4 to take into account that it works with 4-word-length types. vld1_x4 is now only for 2-word-length types. ACLE documents: https://developer.arm.com/documentation/ihi0053/latest/ ISA documents: https://developer.arm.com/documentation/ddi0487/latest/ gcc/ChangeLog: * config/arm/arm_neon.h (vld1_u8_x4, vld1_u16_x4, vld1_u32_x4, vld1_u64_x4): New (vld1_s8_x4, vld1_s16_x4, vld1_s32_x4, vld1_s64_x4): New. (vld1_f16_x4, vld1_f32_x4): New. (vld1_p8_x4, vld1_p16_x4, vld1_p64_x4): New. (vld1_bf16_x4): New. (vld1q_types_x4): Updated to use vld1q_x4 from arm_neon_builtins.def * config/arm/arm_neon_builtins.def (vld1_x4): Updated entries. (vld1q_x4): New entries, but comes from the old vld1_x2 * config/arm/neon.md (neon_vld1q_x4): Updated from neon_vld1_x4. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. --- gcc/config/arm/arm_neon.h | 156 ++++++++++++++++-- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 ++ .../gcc.target/arm/simd/vld1_base_xN_1.c | 63 ++++++- .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_p64_xN_1.c | 7 +- 7 files changed, 231 insertions(+), 22 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index dbc37cafe286..8bcf1d6325e5 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10325,6 +10325,15 @@ vld1_p64_x3 (const poly64_t * __a) return __rv.__i; } +__extension__ extern __inline poly64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p64_x4 (const poly64_t * __a) +{ + union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10426,6 +10435,42 @@ vld1_s64_x3 (const int64_t * __a) return __rv.__i; } +__extension__ extern __inline int8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s8_x4 (const int8_t * __a) +{ + union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s16_x4 (const int16_t * __a) +{ + union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline int32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s32_x4 (const int32_t * __a) +{ + union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline int64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_s64_x4 (const int64_t * __a) +{ + union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10482,6 +10527,26 @@ vld1_f32_x3 (const float32_t * __a) return __rv.__i; } +#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) +__extension__ extern __inline float16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f16_x4 (const float16_t * __a) +{ + union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4hf (__a); + return __rv.__i; +} +#endif + +__extension__ extern __inline float32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_f32_x4 (const float32_t * __a) +{ + union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2sf ((const __builtin_neon_sf *) __a); + return __rv.__i; +} + __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10582,6 +10647,42 @@ vld1_u64_x3 (const uint64_t * __a) return __rv.__i; } +__extension__ extern __inline uint8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u8_x4 (const uint8_t * __a) +{ + union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u16_x4 (const uint16_t * __a) +{ + union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint32x2x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u32_x4 (const uint32_t * __a) +{ + union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v2si ((const __builtin_neon_si *) __a); + return __rv.__i; +} + +__extension__ extern __inline uint64x1x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_u64_x4 (const uint64_t * __a) +{ + union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); + return __rv.__i; +} + __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10632,6 +10733,24 @@ vld1_p16_x3 (const poly16_t * __a) return __rv.__i; } +__extension__ extern __inline poly8x8x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p8_x4 (const poly8_t * __a) +{ + union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); + return __rv.__i; +} + +__extension__ extern __inline poly16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_p16_x4 (const poly16_t * __a) +{ + union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); + return __rv.__i; +} + #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10664,7 +10783,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x4 (const poly64_t * __a) { union { poly64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10774,7 +10893,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x4 (const uint8_t * __a) { union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10783,7 +10902,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x4 (const uint16_t * __a) { union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10792,7 +10911,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x4 (const int32_t * __a) { union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x4v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10801,7 +10920,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x4 (const int64_t * __a) { union { int64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10867,7 +10986,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x4 (const float16_t * __a) { union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hf (__a); + __rv.__o = __builtin_neon_vld1q_x4v8hf (__a); return __rv.__i; } #endif @@ -10877,7 +10996,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x4 (const float32_t * __a) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1q_x4v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10986,7 +11105,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x4 (const uint8_t * __a) { union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10995,7 +11114,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x4 (const uint16_t * __a) { union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -11004,7 +11123,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x4 (const uint32_t * __a) { union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1q_x4v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -11013,7 +11132,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x4 (const uint64_t * __a) { union { uint64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -11072,7 +11191,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x4 (const poly8_t * __a) { union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -11081,7 +11200,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x4 (const poly16_t * __a) { union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -21072,6 +21191,15 @@ vld1_bf16_x3 (const bfloat16_t * __ptr) return __rv.__i; } +__extension__ extern __inline bfloat16x4x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vld1_bf16_x4 (const bfloat16_t * __ptr) +{ + union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv; + __rv.__o = __builtin_neon_vld1_x4v4bf ((const __builtin_neon_bf *) __ptr); + return __rv.__i; +} + __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -21102,7 +21230,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x4 (const bfloat16_t * __ptr) { union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1q_x4v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index c74f0db645bd..20dfcae7de56 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -305,7 +305,8 @@ VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) +VAR7 (LOAD1, vld1q_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e67cbc247d9a..30f5bf8e40e8 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5005,6 +5005,16 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_x4" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD1))] + "TARGET_NEON" + "vld1.\t%h0, %A1" + [(set_attr "type" "neon_load1_4reg")] +) + +(define_insn "neon_vld1q_x4" [(set (match_operand:XI 0 "s_register_operand" "=w") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c index 95314bbe0ded..a5686ffac013 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c @@ -115,7 +115,62 @@ poly16x4x3_t test_vld1_p16_x3 (poly16_t * a) return vld1_p16_x3 (a); } -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ \ No newline at end of file +uint8x8x4_t test_vld1_u8_x4 (uint8_t * a) +{ + return vld1_u8_x4 (a); +} + +uint16x4x4_t test_vld1_u16_x4 (uint16_t * a) +{ + return vld1_u16_x4 (a); +} + +uint32x2x4_t test_vld1_u32_x4 (uint32_t * a) +{ + return vld1_u32_x4 (a); +} + +uint64x1x4_t test_vld1_u64_x4 (uint64_t * a) +{ + return vld1_u64_x4 (a); +} + +int8x8x4_t test_vld1_s8_x4 (int8_t * a) +{ + return vld1_s8_x4 (a); +} + +int16x4x4_t test_vld1_s16_x4 (int16_t * a) +{ + return vld1_s16_x4 (a); +} + +int32x2x4_t test_vld1_s32_x4 (int32_t * a) +{ + return vld1_s32_x4 (a); +} + +int64x1x4_t test_vld1_s64_x4 (int64_t * a) +{ + return vld1_s64_x4 (a); +} + +float32x2x4_t test_vld1_f32_x4 (float32_t * a) +{ + return vld1_f32_x4 (a); +} + +poly8x8x4_t test_vld1_p8_x4 (poly8_t * a) +{ + return vld1_p8_x4 (a); +} + +poly16x4x4_t test_vld1_p16_x4 (poly16_t * a) +{ + return vld1_p16_x4 (a); +} + +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 6 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c index c1935da0a4c7..7ed17834ccf4 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c @@ -15,4 +15,9 @@ bfloat16x4x3_t test_vld1_bf16_x3 (bfloat16_t * a) return vld1_bf16_x3 (a); } -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file +bfloat16x4x4_t test_vld1_bf16_x4 (bfloat16_t * a) +{ + return vld1_bf16_x4 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c index 20363239f5b4..82e7211ebbf2 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c @@ -15,4 +15,9 @@ float16x4x3_t test_vld1_f16_x3 (float16_t * a) return vld1_f16_x3 (a); } -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +float16x4x4_t test_vld1_f16_x4 (float16_t * a) +{ + return vld1_f16_x4 (a); +} + +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c index 210de511c716..644371b89ea2 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c @@ -15,4 +15,9 @@ poly64x1x3_t test_vld1_p64_x3 (poly64_t * a) return vld1_p64_x3 (a); } -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +poly64x1x4_t test_vld1_p64_x4 (poly64_t * a) +{ + return vld1_p64_x4 (a); +} + +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 3 } } */ From 9f0f7d802482a8958d6cdc72f1fe0c8549db2182 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 7 Dec 2023 19:41:19 +0000 Subject: [PATCH 063/311] aarch64: Add an early RA for strided registers This pass adds a simple register allocator for FP & SIMD registers. Its main purpose is to make use of SME2's strided LD1, ST1 and LUTI2/4 instructions, which require a very specific grouping structure, and so would be difficult to exploit with general allocation. The allocator is very simple. It gives up on anything that would require spilling, or that it might not handle well for other reasons. The allocator needs to track liveness at the level of individual FPRs. Doing that fixes a lot of the PRs relating to redundant moves caused by structure loads and stores. That particular problem is going to be fixed more generally for GCC 15 by Lehua's RA patches. However, the early-RA pass runs before scheduling, so it has a chance to bag a spill-free allocation of vector code before the scheduler moves things around. It could therefore still be useful for non-SME code (e.g. for hand-scheduled ACLE code) even after Lehua's patches are in. The pass is controlled by a tristate switch: - -mearly-ra=all: run on all functions - -mearly-ra=strided: run on functions that have access to strided registers - -mearly-ra=none: don't run on any function The patch makes -mearly-ra=all the default at -O2 and above for now. We can revisit this for GCC 15 once Lehua's patches are in; -mearly-ra=strided might then be more appropriate. As said previously, the pass is very naive. There's much more that we could do, such as handling invariants better. The main focus is on not committing to a bad allocation, rather than on handling as much as possible. gcc/ PR rtl-optimization/106694 PR rtl-optimization/109078 PR rtl-optimization/109391 * config.gcc: Add aarch64-early-ra.o for AArch64 targets. * config/aarch64/t-aarch64 (aarch64-early-ra.o): New rule. * config/aarch64/aarch64-opts.h (aarch64_early_ra_scope): New enum. * config/aarch64/aarch64.opt (mearly_ra): New option. * doc/invoke.texi: Document it. * common/config/aarch64/aarch64-common.cc (aarch_option_optimization_table): Use -mearly-ra=strided by default for -O2 and above. * config/aarch64/aarch64-passes.def (pass_aarch64_early_ra): New pass. * config/aarch64/aarch64-protos.h (aarch64_strided_registers_p) (make_pass_aarch64_early_ra): Declare. * config/aarch64/aarch64-sme.md (@aarch64_sme_lut): Add a stride_type attribute. (@aarch64_sme_lut_strided2): New pattern. (@aarch64_sme_lut_strided4): Likewise. * config/aarch64/aarch64-sve-builtins-base.cc (svld1_impl::expand) (svldnt1_impl::expand, svst1_impl::expand, svstn1_impl::expand): Handle new way of defining multi-register loads and stores. * config/aarch64/aarch64-sve.md (@aarch64_ld1) (@aarch64_ldnt1, @aarch64_st1) (@aarch64_stnt1): Delete. * config/aarch64/aarch64-sve2.md (@aarch64_) (@aarch64__strided2): New patterns. (@aarch64__strided4): Likewise. (@aarch64_): Likewise. (@aarch64__strided2): Likewise. (@aarch64__strided4): Likewise. * config/aarch64/aarch64.cc (aarch64_strided_registers_p): New function. * config/aarch64/aarch64.md (UNSPEC_LD1_SVE_COUNT): Delete. (UNSPEC_ST1_SVE_COUNT, UNSPEC_LDNT1_SVE_COUNT): Likewise. (UNSPEC_STNT1_SVE_COUNT): Likewise. (stride_type): New attribute. * config/aarch64/constraints.md (Uwd, Uwt): New constraints. * config/aarch64/iterators.md (UNSPEC_LD1_COUNT, UNSPEC_LDNT1_COUNT) (UNSPEC_ST1_COUNT, UNSPEC_STNT1_COUNT): New unspecs. (optab): Handle them. (LD1_COUNT, ST1_COUNT): New iterators. * config/aarch64/aarch64-early-ra.cc: New file. gcc/testsuite/ PR rtl-optimization/106694 PR rtl-optimization/109078 PR rtl-optimization/109391 * gcc.target/aarch64/ldp_stp_16.c (cons4_4_float): Tighten expected output test. * gcc.target/aarch64/sve/shift_1.c: Allow reversed shifts for .s as well as .d. * gcc.target/aarch64/sme/strided_1.c: New test. * gcc.target/aarch64/pr109078.c: Likewise. * gcc.target/aarch64/pr109391.c: Likewise. * gcc.target/aarch64/sve/pr106694.c: Likewise. --- gcc/common/config/aarch64/aarch64-common.cc | 1 + gcc/config.gcc | 2 +- gcc/config/aarch64/aarch64-early-ra.cc | 3423 +++++++++++++++++ gcc/config/aarch64/aarch64-opts.h | 11 + gcc/config/aarch64/aarch64-passes.def | 1 + gcc/config/aarch64/aarch64-protos.h | 2 + gcc/config/aarch64/aarch64-sme.md | 70 + .../aarch64/aarch64-sve-builtins-base.cc | 14 +- gcc/config/aarch64/aarch64-sve.md | 44 - gcc/config/aarch64/aarch64-sve2.md | 144 +- gcc/config/aarch64/aarch64.cc | 13 + gcc/config/aarch64/aarch64.md | 24 +- gcc/config/aarch64/aarch64.opt | 18 + gcc/config/aarch64/constraints.md | 8 + gcc/config/aarch64/iterators.md | 12 + gcc/config/aarch64/t-aarch64 | 6 + gcc/doc/invoke.texi | 15 + gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c | 5 +- gcc/testsuite/gcc.target/aarch64/pr109078.c | 59 + gcc/testsuite/gcc.target/aarch64/pr109391.c | 14 + .../gcc.target/aarch64/sme/strided_1.c | 253 ++ .../gcc.target/aarch64/sve/pr106694.c | 28 + .../gcc.target/aarch64/sve/shift_1.c | 6 +- 23 files changed, 4113 insertions(+), 60 deletions(-) create mode 100644 gcc/config/aarch64/aarch64-early-ra.cc create mode 100644 gcc/testsuite/gcc.target/aarch64/pr109078.c create mode 100644 gcc/testsuite/gcc.target/aarch64/pr109391.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/strided_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr106694.c diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc index 20bc4e1291bb..8fb901029ec2 100644 --- a/gcc/common/config/aarch64/aarch64-common.cc +++ b/gcc/common/config/aarch64/aarch64-common.cc @@ -55,6 +55,7 @@ static const struct default_options aarch_option_optimization_table[] = { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, /* Enable redundant extension instructions removal at -O2 and higher. */ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_mearly_ra_, NULL, AARCH64_EARLY_RA_ALL }, #if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1) { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 }, { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1}, diff --git a/gcc/config.gcc b/gcc/config.gcc index 6450448f2f0c..4884aca4dd87 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -349,7 +349,7 @@ aarch64*-*-*) c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" - extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o" + extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch-bti-insert.o aarch64-cc-fusion.o aarch64-early-ra.o" target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" target_has_targetm_common=yes ;; diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc new file mode 100644 index 000000000000..c065416c5b94 --- /dev/null +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -0,0 +1,3423 @@ +// Early register allocation pass. +// Copyright (C) 2023 Free Software Foundation, Inc. +// +// This file is part of GCC. +// +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. +// +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +// This pass implements a simple form of early register allocation. +// It is restricted to FP/SIMD registers, and it only allocates +// a region of FP/SIMD usage if it can do so without any spilling. +// It punts on anything too complicated, leaving it to the real +// register allocator. +// +// There are two main purposes: +// +// (1) The pass runs before scheduling. It therefore has a chance to +// bag a spill-free allocation, if there is one, before scheduling +// moves things around. +// +// (2) The pass can make use of strided register operations, such as the +// strided forms of LD1 and ST1 in SME2. +// +// The allocator works at the level of individual FPRs, rather than whole +// pseudo registers. It is mostly intended to help optimize ACLE code. +// +// The pass is very simplistic. There are many things that could be improved. +#define IN_TARGET_CODE 1 + +#define INCLUDE_ALGORITHM +#define INCLUDE_FUNCTIONAL +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "rtl.h" +#include "df.h" +#include "rtl-ssa.h" +#include "tree-pass.h" +#include "target.h" +#include "expr.h" +#include "cfgrtl.h" +#include "print-rtl.h" +#include "insn-attr.h" +#include "insn-opinit.h" +#include "reload.h" + +template +class simple_iterator : public wrapper_iterator +{ +public: + using wrapper_iterator::wrapper_iterator; + + simple_iterator &operator-- () { --this->m_contents; return *this; } + simple_iterator operator-- (int) { return this->m_contents--; } + simple_iterator &operator++ () { ++this->m_contents; return *this; } + simple_iterator operator++ (int) { return this->m_contents++; } +}; + +using namespace rtl_ssa; + +namespace { +const pass_data pass_data_early_ra = +{ + RTL_PASS, // type + "early_ra", // name + OPTGROUP_NONE, // optinfo_flags + TV_NONE, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + TODO_df_finish, // todo_flags_finish +}; + +using allocno_iterator = simple_iterator; + +// Class that represents one run of the pass. +class early_ra +{ +public: + early_ra (function *fn); + ~early_ra (); + void execute (); + +private: + static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits"); + using operand_mask = uint32_t; + + // Points in the function are represented using "program points". + // The program points are allocated in reverse order, with smaller + // numbers indicating later points. These special values indicate + // the start and end of a region. + static constexpr unsigned int START_OF_REGION = ~0U; + static constexpr unsigned int END_OF_REGION = 0U; + + // An invalid allocno index, used to represent no allocno. + static constexpr unsigned int INVALID_ALLOCNO = ~0U; + + // Enumerates the single FPR sizes that matter for register allocation. + // Anything smaller than 64 bits is treated as FPR_D. + enum fpr_size_info + { + FPR_D, + FPR_Q, + FPR_Z + }; + + // A live range for an FPR, containing program points [START_POINT, + // END_POINT]. If ALLOCNO is not INVALID_ALLOCNO, the FPR is known + // to be equal to ALLOCNO for the duration of the live range. + struct fpr_range_info + { + unsigned int start_point; + unsigned int end_point; + unsigned int allocno; + }; + + // Flags used in pseudo_reg_info. + // + // Whether the pseudo register occurs in one instruction alternative that + // matches (respectively) V0-V7, V0-V15, V0-V31 or a non-FP register. + static constexpr unsigned int ALLOWS_FPR8 = 1U << 0; + static constexpr unsigned int ALLOWS_FPR16 = 1U << 1; + static constexpr unsigned int ALLOWS_FPR32 = 1U << 2; + static constexpr unsigned int ALLOWS_NONFPR = 1U << 3; + // + // Likewise whether the register occurs in an instruction that requires + // the associated register type. + static constexpr unsigned int NEEDS_FPR8 = 1U << 4; + static constexpr unsigned int NEEDS_FPR16 = 1U << 5; + static constexpr unsigned int NEEDS_FPR32 = 1U << 6; + static constexpr unsigned int NEEDS_NONFPR = 1U << 7; + // + // Whether the pseudo register is copied to or from a hard FP register. + static constexpr unsigned int HAS_FPR_COPY = 1U << 8; + // + // Whether the pseudo register is copied to or from a hard non-FP register. + static constexpr unsigned int HAS_NONFPR_COPY = 1U << 9; + // + // Whether the pseudo register is used as a multi-register vector operand + // to an instruction that supports strided accesses, and whether it is used + // as a multi-register vector operand in some other non-move instruction. + static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10; + static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11; + + // Flags that should be propagated across moves between pseudo registers. + static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE + | HAS_FIXED_STRIDE); + + // Information about a copy between two registers. + struct reg_copy_info + { + // The two registers, in order. + unsigned int regnos[2]; + + // Index I gives the index of the next reg_copy_info involving REGNOS[I], + // or 0 if none. + unsigned int next_copies[2]; + }; + + // Information about a pseudo register. + struct pseudo_reg_info + { + // Flags describing how the register is used, defined above. + unsigned int flags : 16; + + // The mode of the pseudo register, cached for convenience. + machine_mode mode : 16; + + // The index of the first copy, or 0 if none. + unsigned int first_copy; + }; + + // Information about a group of allocnos that have a fixed offset + // relative to each other. The allocnos in each group must be allocated + // together. + // + // Allocnos that can share the same hard register are eventually + // chained together. These chains represent edges on a graph of + // allocnos, such that two allocnos joined by an edge use the same FPR. + // These chains are formed between individual allocnos rather than + // whole groups, although the system is required to be self-consistent. + // Each clique in the graph has at least one "full-width" allocno group + // that has one allocno for every FPR that needs to be allocated to + // the clique. + // + // One group of allocnos is chosen as the "color representative" of + // each clique in the graph. This group will be a full-width group. + struct allocno_info; + struct allocno_group_info + { + array_slice chain_heads (); + array_slice allocnos (); + allocno_group_info *color_rep (); + allocno_info *allocno (unsigned int); + + // The color representative of the containing clique. + allocno_group_info *m_color_rep; + + // The pseudo register associated with this allocno, or INVALID_REGNUM + // if none. + unsigned int regno; + + // The offset of the first allocno (and thus this group) from the start + // of color_rep. + unsigned int color_rep_offset : 8; + + // The number of allocnos in the group, and thus the number of FPRs + // that need to be allocated. + unsigned int size : 8; + + // The gap between FPRs in the group. This is normally 1, but can be + // higher if we've decided to use strided multi-register accesses. + unsigned int stride : 4; + + // Used temporarily while deciding which allocnos should have non-unit + // strides; see find_strided_accesses for details. + int consecutive_pref : 4; + int strided_polarity : 2; + + // The largest size of FPR needed by references to the allocno group. + fpr_size_info fpr_size : 2; + + // True if all non-move accesses can be converted to strided form. + unsigned int has_flexible_stride : 1; + + // True if we've assigned a color index to this group. + unsigned int has_color : 1; + + // The mask of FPRs that would make valid choices for the first allocno, + // taking the requirements of all the allocnos in the group into account. + unsigned int fpr_candidates; + + // The index of the color that has been assigned to the containing clique. + unsigned int color; + }; + + // Represents a single FPR-sized quantity that needs to be allocated. + // Each allocno is identified by index (for compactness). + // + // Quantities that span multiple FPRs are assigned groups of consecutive + // allocnos. Quantities that occupy a single FPR are assigned their own + // group. + struct allocno_info + { + allocno_group_info *group (); + + // The allocno's unique identifier. + unsigned int id; + + // The offset of this allocno into the containing group. + unsigned int offset : 8; + + // The number of allocnos in the containing group. + unsigned int group_size : 8; + + // If the allocno has an affinity with at least one hard register + // (so that choosing that hard register would avoid a copy), this is + // the number of one such hard register, otherwise it is + // FIRST_PSEUDO_REGISTER. + unsigned int hard_regno : 8; + + // Set to 1 if the allocno has a single definition or 2 if it has more. + unsigned int num_defs : 2; + + // True if, at START_POINT, another allocno is copied to this one. + // See callers of record_copy for what counts as a copy. + unsigned int is_copy_dest : 1; + + // True if, at START_POINT, another allocno is copied to this one, + // and if the allocnos at both ends of the copy chain have an affinity + // with the same hard register. + unsigned int is_strong_copy_dest : 1; + + // True if, at END_POINT, this allocno is copied to another one, + // and both allocnos have an affinity with the same hard register. + unsigned int is_strong_copy_src : 1; + + // True if the allocno is subject to an earlyclobber at END_POINT, + // so that it cannot be tied to the destination of the instruction. + unsigned int is_earlyclobbered : 1; + + // The inclusive range of program points spanned by the allocno. + // START_POINT >= END_POINT. + unsigned int start_point; + unsigned int end_point; + + // If, at END_POINT, this allocno is copied to another allocno, this + // is the index of that allocno, otherwise it is INVALID_ALLOCNO. + // See callers of record_copy for what counts as a copy. + unsigned int copy_dest; + + // If this field is not INVALID_ALLOCNO, this allocno is known to be + // equivalent to EQUIV_ALLOCNO for the whole of this allocno's lifetime. + unsigned int equiv_allocno; + + // The next chained allocno in program order (i.e. at lower program + // points), or INVALID_ALLOCNO if none. + unsigned int chain_next; + + // The previous chained allocno in program order (i.e. at higher + // program points), or INVALID_ALLOCNO if none. + unsigned int chain_prev; + }; + + // Information about a full allocno group or a subgroup of it. + // The subgroup can be empty to indicate "none". + struct allocno_subgroup + { + array_slice allocnos (); + allocno_info *allocno (unsigned int); + + // True if a subgroup is present. + operator bool () const { return count; } + + // The containing group. + allocno_group_info *group; + + // The offset of the subgroup from the start of GROUP. + unsigned int start; + + // The number of allocnos in the subgroup. + unsigned int count; + }; + + // Represents information about a copy between an allocno and an FPR. + // This establishes an affinity between the allocno and the FPR. + struct allocno_copy_info + { + // The allocno involved in the copy. + unsigned int allocno; + + // The FPR involved in the copy, relative to V0_REGNUM. + unsigned int fpr : 16; + + // A measure of how strong the affinity between the allocno and FPR is. + unsigned int weight : 16; + }; + + // Information about a possible allocno chain. + struct chain_candidate_info + { + // The candidate target allocno. + allocno_info *allocno; + + // A rating of the candidate (higher is better). + int score; + }; + + // Information about an allocno color. + struct color_info + { + // The color's unique identifier. + int id; + + // The allocated hard register, when known. + unsigned int hard_regno; + + // The clique's representative group. + allocno_group_info *group; + + // Weights in favor of choosing each FPR as the first register for GROUP. + int8_t fpr_preferences[32]; + }; + + template + T *region_allocate (Ts...); + + allocno_info *chain_prev (allocno_info *); + allocno_info *chain_next (allocno_info *); + + void dump_pseudo_regs (); + void dump_fpr_ranges (); + void dump_copies (); + void dump_allocnos (); + void dump_colors (); + + iterator_range get_group_allocnos (unsigned int); + + void preprocess_move (rtx, rtx); + void process_pseudo_reg_constraints (rtx_insn *); + void preprocess_insns (); + + int fpr_preference (unsigned int); + void propagate_pseudo_reg_info (); + + void choose_fpr_pseudos (); + + void start_new_region (); + + allocno_group_info *create_allocno_group (unsigned int, unsigned int); + allocno_subgroup get_allocno_subgroup (rtx); + void record_fpr_use (unsigned int); + void record_fpr_def (unsigned int); + void record_allocno_use (allocno_info *); + void record_allocno_def (allocno_info *); + void record_copy (rtx, rtx, bool = false); + void record_constraints (rtx_insn *); + void record_artificial_refs (unsigned int); + void record_insn_refs (rtx_insn *); + + bool consider_strong_copy_src_chain (allocno_info *); + int strided_polarity_pref (allocno_info *, allocno_info *); + void find_strided_accesses (); + + template + static int cmp_increasing (const void *, const void *); + bool is_chain_candidate (allocno_info *, allocno_info *); + int rate_chain (allocno_info *, allocno_info *); + static int cmp_chain_candidates (const void *, const void *); + void chain_allocnos (unsigned int &, unsigned int &); + void set_single_color_rep (allocno_info *, allocno_group_info *, + unsigned int); + void set_color_rep (allocno_group_info *, allocno_group_info *, + unsigned int); + bool try_to_chain_allocnos (allocno_info *, allocno_info *); + void create_color (allocno_group_info *); + void form_chains (); + + bool fpr_conflicts_with_allocno_p (unsigned int, allocno_info *); + bool call_in_range_p (unsigned int, unsigned int, unsigned int); + unsigned int partial_fpr_clobbers (unsigned int, fpr_size_info); + + void process_copies (); + + static int cmp_decreasing_size (const void *, const void *); + void allocate_colors (); + allocno_info *find_independent_subchain (allocno_info *); + color_info *find_oldest_color (unsigned int, unsigned int); + void broaden_colors (); + void finalize_allocation (); + + bool replace_regs (df_ref); + int try_enforce_constraints (rtx_insn *, vec> &); + void enforce_constraints (rtx_insn *); + bool maybe_convert_to_strided_access (rtx_insn *); + void apply_allocation (); + + void process_region (); + bool is_dead_insn (rtx_insn *); + void process_block (basic_block, bool); + void process_blocks (); + + // ---------------------------------------------------------------------- + + // The function we're operating on. + function *m_fn; + + // Information about each pseudo register, indexed by REGNO. + auto_vec m_pseudo_regs; + + // All recorded register copies. + auto_vec m_pseudo_reg_copies; + + // The set of pseudos that we've decided to allocate an FPR to. + auto_bitmap m_fpr_pseudos; + + // ---------------------------------------------------------------------- + + // An obstack for allocating information that is referenced by the member + // variables below. + obstack m_region_obstack; + void *m_region_alloc_start; + + // ---------------------------------------------------------------------- + + // The basic block that we're currently processing. + basic_block m_current_bb; + + // The program point that we're currently processing (described above). + unsigned int m_current_point; + + // The set of allocnos that are currently live. + auto_bitmap m_live_allocnos; + + // The set of FPRs that are currently live. + unsigned int m_live_fprs; + + // ---------------------------------------------------------------------- + + // A mask of the FPRs that have already been allocated. + unsigned int m_allocated_fprs; + + // A mask of the FPRs that must be at least partially preserved by the + // current function. + unsigned int m_call_preserved_fprs; + + // True if we haven't yet failed to allocate the current region. + bool m_allocation_successful; + + // A map from pseudo registers to the first allocno in their associated + // allocno groups. + hash_map, + allocno_group_info *> m_regno_to_group; + + // All recorded copies between allocnos and FPRs. + auto_vec m_allocno_copies; + + // All allocnos, by index. + auto_vec m_allocnos; + + // All allocnos, by increasing START_POINT. + auto_vec m_sorted_allocnos; + + // All colors, by index. + auto_vec m_colors; + + // The instruction ranges that make up the current region, + // as half-open ranges [LAST, FIRST). + auto_vec> m_insn_ranges; + + // The live ranges of each FPR, in order of increasing program point. + auto_vec m_fpr_ranges[32]; + + // For each function call id, a list of program points at which a call + // to such a function is made. Each list is in order of increasing + // program point. + auto_vec m_call_points[NUM_ABI_IDS]; + + // A list of instructions that can be removed if allocation succeeds. + auto_vec m_dead_insns; +}; + +// True if PAT is something that would typically be treated as a move. +static inline bool +is_move_set (rtx pat) +{ + if (GET_CODE (pat) != SET) + return false; + + rtx dest = SET_DEST (pat); + if (SUBREG_P (dest)) + dest = SUBREG_REG (dest); + if (!OBJECT_P (dest)) + return false; + + rtx src = SET_SRC (pat); + if (SUBREG_P (src)) + src = SUBREG_REG (src); + if (!OBJECT_P (src) && !CONSTANT_P (src)) + return false; + + return true; +} + +// Return true if operand OP is likely to match OP_ALT after register +// allocation. +static bool +likely_operand_match_p (const operand_alternative &op_alt, rtx op) +{ + // Empty constraints match everything. + const char *constraint = op_alt.constraint; + if (constraint[0] == 0 || constraint[0] == ',') + return true; + + for (;;) + { + char c = *constraint; + int len = CONSTRAINT_LEN (c, constraint); + if (c == 0 || c == ',') + break; + + if (c == 'X') + return true; + + auto cn = lookup_constraint (constraint); + if (REG_P (op) || SUBREG_P (op)) + { + if (insn_extra_register_constraint (cn)) + return true; + } + else if (MEM_P (op)) + { + if (insn_extra_memory_constraint (cn)) + return true; + } + else + { + if (!insn_extra_memory_constraint (cn) + && constraint_satisfied_p (op, cn)) + return true; + } + + constraint += len; + } + + if (op_alt.matches >= 0) + { + rtx other = recog_data.operand[op_alt.matches]; + if ((REG_P (other) || SUBREG_P (other)) + && (REG_P (op) || SUBREG_P (op))) + return true; + } + return false; +} + +// Return true if the operands of the current instruction are likely to +// match OP_ALT. +static bool +likely_alternative_match_p (const operand_alternative *op_alt) +{ + for (int i = 0; i < recog_data.n_operands; ++i) + if (!likely_operand_match_p (op_alt[i], recog_data.operand[i])) + return false; + return true; +} + +// Return the sum of how disparaged OP_ALT is. +static int +count_rejects (const operand_alternative *op_alt) +{ + int reject = 0; + for (int opno = 0; opno < recog_data.n_operands; ++opno) + reject += op_alt[opno].reject; + return reject; +} + +// Allocate a T from the region obstack. +template +inline T * +early_ra::region_allocate (Ts... args) +{ + static_assert (std::is_trivially_destructible::value, + "destructor won't be called"); + void *addr = obstack_alloc (&m_region_obstack, sizeof (T)); + return new (addr) T (std::forward (args)...); +} + +early_ra::early_ra (function *fn) : m_fn (fn), m_live_fprs (0) +{ + gcc_obstack_init (&m_region_obstack); + m_region_alloc_start = obstack_alloc (&m_region_obstack, 0); + bitmap_tree_view (m_live_allocnos); +} + +early_ra::~early_ra () +{ + obstack_free (&m_region_obstack, nullptr); +} + +// Return an array that, for each allocno A in the group, contains the index +// of the allocno at the head of A's chain (that is, the one with the highest +// START_POINT). The index is INVALID_ALLOCNO if the chain is empty. +inline array_slice +early_ra::allocno_group_info::chain_heads () +{ + auto *start = reinterpret_cast (this + 1); + return { start, size }; +} + +// Return the array of allocnos in the group. +inline array_slice +early_ra::allocno_group_info::allocnos () +{ + gcc_checking_assert (regno != INVALID_REGNUM); + auto *chain_end = reinterpret_cast (this + 1) + size; + auto *allocno_start = reinterpret_cast (chain_end); + return { allocno_start, size }; +} + +// Return the group's color representative. +inline early_ra::allocno_group_info * +early_ra::allocno_group_info::color_rep () +{ + gcc_checking_assert (m_color_rep->m_color_rep == m_color_rep); + return m_color_rep; +} + +// Return the group that contains the allocno. +inline early_ra::allocno_group_info * +early_ra::allocno_info::group () +{ + auto *chain_end = reinterpret_cast (this - offset); + return reinterpret_cast (chain_end - group_size) - 1; +} + +// Return the allocnos in the subgroup. +inline array_slice +early_ra::allocno_subgroup::allocnos () +{ + if (!count) + return {}; + return { &group->allocnos ()[start], count }; +} + +// Return allocno I in the subgroup, with 0 being the first. +inline early_ra::allocno_info * +early_ra::allocno_subgroup::allocno (unsigned int i) +{ + return &group->allocnos ()[start + i]; +} + +// Return the previous (earlier) allocno in ALLOCNO's chain, or null if none. +inline early_ra::allocno_info * +early_ra::chain_prev (allocno_info *allocno) +{ + if (allocno->chain_prev != INVALID_ALLOCNO) + return m_allocnos[allocno->chain_prev]; + return nullptr; +} + +// Return the next (later) allocno in ALLOCNO's chain, or null if none. +inline early_ra::allocno_info * +early_ra::chain_next (allocno_info *allocno) +{ + if (allocno->chain_next != INVALID_ALLOCNO) + return m_allocnos[allocno->chain_next]; + return nullptr; +} + +// Dump the information in m_pseudo_regs. +void +early_ra::dump_pseudo_regs () +{ + fprintf (dump_file, "\nPseudos:\n"); + fprintf (dump_file, " %6s %6s %6s %6s %6s %6s %8s %s\n", + "Id", "FPR8", "FPR16", "FPR32", "NONFPR", "Stride", + "FPRness", "Copies"); + pseudo_reg_info unused_reg = {}; + for (unsigned int regno = FIRST_PSEUDO_REGISTER; + regno < m_pseudo_regs.length (); ++regno) + { + const auto ® = m_pseudo_regs[regno]; + if (memcmp (®, &unused_reg, sizeof (reg)) == 0) + continue; + + fprintf (dump_file, " %6d %6s %6s %6s %6s %6s %8d", regno, + reg.flags & NEEDS_FPR8 ? "Req" + : reg.flags & ALLOWS_FPR8 ? "OK" : "-", + reg.flags & NEEDS_FPR16 ? "Req" + : reg.flags & ALLOWS_FPR16 ? "OK" : "-", + reg.flags & NEEDS_FPR32 ? "Req" + : reg.flags & ALLOWS_FPR32 ? "OK" : "-", + reg.flags & NEEDS_NONFPR ? "Req" + : reg.flags & ALLOWS_NONFPR ? "OK" : "-", + ~reg.flags & HAS_FLEXIBLE_STRIDE ? "-" + : reg.flags & HAS_FIXED_STRIDE ? "Some" : "All", + fpr_preference (regno)); + if (reg.flags & HAS_FPR_COPY) + fprintf (dump_file, " FPR"); + if (reg.flags & HAS_NONFPR_COPY) + fprintf (dump_file, " Non-FPR"); + unsigned int copyi = reg.first_copy; + while (copyi) + { + const auto © = m_pseudo_reg_copies[copyi]; + if (copy.regnos[0] == regno) + { + fprintf (dump_file, " r%d", copy.regnos[1]); + copyi = copy.next_copies[0]; + } + else + { + fprintf (dump_file, " r%d", copy.regnos[0]); + copyi = copy.next_copies[1]; + } + } + fprintf (dump_file, "\n"); + } +} + +// Dump the information in m_fpr_ranges. +void +early_ra::dump_fpr_ranges () +{ + fprintf (dump_file, "\nFPR live ranges:\n"); + for (unsigned int fpr = 0; fpr < 32; ++fpr) + { + auto &intervals = m_fpr_ranges[fpr]; + if (intervals.is_empty ()) + continue; + + fprintf (dump_file, " %2d", fpr); + for (unsigned int i = 0; i < intervals.length (); ++i) + { + auto &interval = intervals[i]; + if (i && (i % 4) == 0) + fprintf (dump_file, "\n "); + fprintf (dump_file, " [ %6d %6d ]", interval.start_point, + interval.end_point); + } + fprintf (dump_file, "\n"); + } +} + +// Dump the information in m_allocno_copies. +void +early_ra::dump_copies () +{ + fprintf (dump_file, "\nCopies:\n"); + fprintf (dump_file, " %8s %3s %6s\n", + "Allocno", "FPR", "Weight"); + for (const auto © : m_allocno_copies) + fprintf (dump_file, " %8d %3d %6d\n", copy.allocno, + copy.fpr, copy.weight); +} + +// Dump the information in m_allocnos. +void +early_ra::dump_allocnos () +{ + char buffer[sizeof ("r[:]") + 3 * 3 * sizeof (int) + 1]; + fprintf (dump_file, "\nAllocno groups:\n"); + fprintf (dump_file, + " %12s %12s %4s %6s %8s %s\n", + "Ids", "Regno", "Size", "Stride", "Cands", "Heads"); + for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) + { + auto *allocno = m_allocnos[ai]; + if (allocno->offset != 0) + continue; + auto *group = allocno->group (); + snprintf (buffer, sizeof (buffer), "[%d:%d]", allocno->id, + allocno->id + group->size - 1); + fprintf (dump_file, " %12s", buffer); + snprintf (buffer, sizeof (buffer), "r%d[0:%d]", group->regno, + group->size - 1); + fprintf (dump_file, " %12s %4s %6d %08x", buffer, + group->fpr_size == FPR_D ? "D" + : group->fpr_size == FPR_Q ? "Q" : "Z", + group->stride, + group->fpr_candidates); + for (auto head : group->chain_heads ()) + if (head == INVALID_ALLOCNO) + fprintf (dump_file, " -"); + else + fprintf (dump_file, " %d", head); + fprintf (dump_file, "\n"); + } + + fprintf (dump_file, "\nAllocno chains:\n"); + fprintf (dump_file, " %5s %12s %12s %5s %5s %5s %5s\n", + "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "FPR"); + for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) + { + auto *allocno = m_allocnos[ai]; + if (allocno->chain_prev != INVALID_ALLOCNO) + continue; + const char *prefix = "=>"; + for (;;) + { + auto *group = allocno->group (); + fprintf (dump_file, " %2s", prefix); + fprintf (dump_file, " %5d", allocno->id); + snprintf (buffer, sizeof (buffer), "r%d[%d]", group->regno, + allocno->offset); + fprintf (dump_file, " %12s", buffer); + snprintf (buffer, sizeof (buffer), "[%d,%d]", + allocno->start_point, allocno->end_point); + fprintf (dump_file, " %11s%s %5s", buffer, + allocno->is_earlyclobbered ? "*" : " ", + allocno->is_strong_copy_dest ? "Strong" + : allocno->is_copy_dest ? "Yes" : "-"); + if (allocno->copy_dest == INVALID_ALLOCNO) + fprintf (dump_file, " %5s", "-"); + else + fprintf (dump_file, " %5d", allocno->copy_dest); + if (allocno->equiv_allocno != INVALID_ALLOCNO) + fprintf (dump_file, " %5d", allocno->equiv_allocno); + else + fprintf (dump_file, " %5s", "-"); + if (allocno->hard_regno == FIRST_PSEUDO_REGISTER) + fprintf (dump_file, " %5s", "-"); + else + fprintf (dump_file, " %5s", reg_names[allocno->hard_regno]); + fprintf (dump_file, "\n"); + if (allocno->chain_next == INVALID_ALLOCNO) + break; + allocno = m_allocnos[allocno->chain_next]; + prefix = ""; + } + } +} + +// Dump the information in m_colors. +void +early_ra::dump_colors () +{ + fprintf (dump_file, "\nColors:\n"); + for (unsigned int i = 0; i < m_colors.length (); ++i) + { + auto *color = m_colors[i]; + if (!color->group) + continue; + + fprintf (dump_file, " color %d:\n", i); + fprintf (dump_file, " chains:\n"); + auto heads = color->group->chain_heads (); + for (unsigned int i = 0; i < color->group->size; ++i) + { + fprintf (dump_file, " %2d:", i); + auto ai = heads[i]; + while (ai != INVALID_ALLOCNO) + { + auto *allocno = m_allocnos[ai]; + fprintf (dump_file, " r%d[%d]", allocno->group ()->regno, + allocno->offset); + ai = allocno->chain_next; + } + fprintf (dump_file, "\n"); + } + fprintf (dump_file, " FPR candidates:"); + for (unsigned int fpr = 0; fpr < 32; ++fpr) + fprintf (dump_file, "%s%c", fpr % 8 ? "" : " ", + color->group->fpr_candidates & (1U << fpr) ? 'Y' : '-'); + fprintf (dump_file, "\n"); + fprintf (dump_file, " FPR preferences:"); + for (unsigned int fpr = 0; fpr < 32; ++fpr) + if (color->fpr_preferences[fpr]) + fprintf (dump_file, " %d(%d)", fpr, color->fpr_preferences[fpr]); + fprintf (dump_file, "\n"); + } +} + +// Record any necessary information about a move from SRC to DEST. +void +early_ra::preprocess_move (rtx dest, rtx src) +{ + if (SUBREG_P (dest)) + dest = SUBREG_REG (dest); + if (!REG_P (dest)) + return; + + if (SUBREG_P (src)) + src = SUBREG_REG (src); + if (!REG_P (src)) + return; + + // Sort the registers by increasing REGNO. + rtx regs[] = { dest, src }; + if (REGNO (dest) > REGNO (src)) + std::swap (regs[0], regs[1]); + unsigned int regno0 = REGNO (regs[0]); + unsigned int regno1 = REGNO (regs[1]); + + // Ignore moves between hard registers. + if (HARD_REGISTER_NUM_P (regno1)) + return; + + // For moves between hard registers and pseudos, just record the type + // of hard register involved. + auto ®1 = m_pseudo_regs[regno1]; + reg1.mode = GET_MODE (regs[1]); + if (HARD_REGISTER_NUM_P (regno0)) + { + reg1.flags |= (FP_REGNUM_P (regno0) ? HAS_FPR_COPY : HAS_NONFPR_COPY); + return; + } + + // Record a move between two pseudo registers. + auto ®0 = m_pseudo_regs[regno0]; + reg0.mode = GET_MODE (regs[0]); + + reg_copy_info copy; + copy.regnos[0] = regno0; + copy.regnos[1] = regno1; + copy.next_copies[0] = reg0.first_copy; + copy.next_copies[1] = reg1.first_copy; + + reg0.first_copy = reg1.first_copy = m_pseudo_reg_copies.length (); + m_pseudo_reg_copies.safe_push (copy); +} + +// Return true if INSN has a multi-vector operand and if that operand +// could be converted to strided form. +static bool +is_stride_candidate (rtx_insn *insn) +{ + if (recog_memoized (insn) < 0) + return false; + + auto stride_type = get_attr_stride_type (insn); + return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE + || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE + || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE); +} + +// Go through the constraints of INSN, which has already been extracted, +// and record any relevant information about pseudo registers. +void +early_ra::process_pseudo_reg_constraints (rtx_insn *insn) +{ + extract_insn (insn); + preprocess_constraints (insn); + + // Flags that describe any multi-register vector operands. + unsigned int insn_flags = (is_stride_candidate (insn) + ? HAS_FLEXIBLE_STRIDE + : HAS_FIXED_STRIDE); + + auto alts = get_preferred_alternatives (insn); + + int operand_matches[MAX_RECOG_OPERANDS]; + unsigned int operand_flags[MAX_RECOG_OPERANDS]; + for (int i = 0; i < recog_data.n_operands; ++i) + { + operand_matches[i] = -1; + operand_flags[i] = 0; + } + + // Extract information from the constraints, considering all plausible + // alternatives. + for (int altno = 0; altno < recog_data.n_alternatives; ++altno) + { + if (!(alts & ALTERNATIVE_BIT (altno))) + continue; + + auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; + if (!likely_alternative_match_p (op_alt)) + continue; + + // Use SRC_OPNO's constraints to derive information about DEST_OPNO. + auto record_operand = [&](int src_opno, int dest_opno) + { + int matches = op_alt[src_opno].matches; + if (matches >= 0) + operand_matches[dest_opno] = matches; + + auto cl = alternative_class (op_alt, src_opno); + if (cl != NO_REGS) + { + if (reg_class_subset_p (cl, FP_REGS)) + operand_flags[dest_opno] |= ALLOWS_FPR32; + if (reg_class_subset_p (cl, FP_LO_REGS)) + operand_flags[dest_opno] |= ALLOWS_FPR16; + if (reg_class_subset_p (cl, FP_LO8_REGS)) + operand_flags[dest_opno] |= ALLOWS_FPR8; + if (!reg_classes_intersect_p (cl, FP_REGS)) + operand_flags[dest_opno] |= ALLOWS_NONFPR; + } + }; + + for (int i = 0; i < recog_data.n_operands; ++i) + { + record_operand (i, i); + if (recog_data.constraints[i][0] == '%') + { + record_operand (i, i + 1); + record_operand (i + 1, i); + } + } + } + + // Process the information we collected above. + for (int i = 0; i < recog_data.n_operands; ++i) + { + rtx op = recog_data.operand[i]; + machine_mode orig_mode = GET_MODE (op); + if (SUBREG_P (op)) + op = SUBREG_REG (op); + + // Record the accumulated information in m_pseudo_regs. + if (REG_P (op) && !HARD_REGISTER_P (op)) + { + // The flags so far just describe what at least one alternative + // would accept. Calculate the associated NEEDS_* information. + auto flags = operand_flags[i]; + if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR)) + flags |= NEEDS_NONFPR; + else if ((flags & ALLOWS_FPR32) && !(flags & ALLOWS_NONFPR)) + { + if (flags & ALLOWS_FPR8) + flags |= NEEDS_FPR8; + if (flags & ALLOWS_FPR16) + flags |= NEEDS_FPR16; + flags |= NEEDS_FPR32; + } + + // Look for multi-register vector operands. + if (VECTOR_MODE_P (orig_mode) + && targetm.hard_regno_mode_ok (V0_REGNUM, orig_mode) + && hard_regno_nregs (V0_REGNUM, orig_mode) > 1) + flags |= insn_flags; + + m_pseudo_regs[REGNO (op)].flags |= flags; + m_pseudo_regs[REGNO (op)].mode = GET_MODE (op); + } + + // Treat matching constraints as equivalent to moves. + if (operand_matches[i] >= 0) + preprocess_move (recog_data.operand[operand_matches[i]], op); + } +} + +// Make one pass through the instructions, collecting information that +// will be needed later. +void +early_ra::preprocess_insns () +{ + m_pseudo_regs.safe_grow_cleared (max_reg_num ()); + m_pseudo_reg_copies.safe_push (reg_copy_info ()); + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + continue; + + rtx set = single_set (insn); + if (set && is_move_set (set)) + preprocess_move (SET_DEST (set), SET_SRC (set)); + else + process_pseudo_reg_constraints (insn); + } +} + +// Return a signed integer that says (roughly) how strong an affinity +// pseudo register REGNO has with FPRs. A positive value indicates +// that we should try to allocate an FPR, a negative value indicates +// that we shouldn't, and 0 indicates neutrality. +int +early_ra::fpr_preference (unsigned int regno) +{ + auto mode = m_pseudo_regs[regno].mode; + auto flags = m_pseudo_regs[regno].flags; + if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode)) + return -3; + else if (flags & HAS_FLEXIBLE_STRIDE) + return 3; + else if (flags & NEEDS_FPR32) + return 2; + else if (!(flags & ALLOWS_FPR32)) + return -2; + else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY)) + return 1; + else if ((flags & HAS_NONFPR_COPY) && !(flags & HAS_FPR_COPY)) + return -1; + else + return 0; +} + +// Propagate information about pseudo-registers along copy edges, +// while doing so doesn't create conflicting FPR preferences. +void +early_ra::propagate_pseudo_reg_info () +{ + struct stack_entry { unsigned int regno, copyi; }; + + auto_vec stack; + for (unsigned int i = FIRST_PSEUDO_REGISTER; + i < m_pseudo_regs.length (); ++i) + { + auto start = m_pseudo_regs[i].first_copy; + if (!start) + continue; + + stack.quick_push ({ i, start }); + while (!stack.is_empty ()) + { + auto entry = stack.pop (); + auto © = m_pseudo_reg_copies[entry.copyi]; + auto src_regno = entry.regno; + auto dest_regno = (src_regno == copy.regnos[1] + ? copy.regnos[0] + : copy.regnos[1]); + auto next_copyi = (src_regno == copy.regnos[1] + ? copy.next_copies[1] + : copy.next_copies[0]); + if (next_copyi) + stack.safe_push ({ src_regno, next_copyi }); + + auto &src_reg = m_pseudo_regs[src_regno]; + auto &dest_reg = m_pseudo_regs[dest_regno]; + + if (src_reg.flags & ~dest_reg.flags & PSEUDO_COPY_FLAGS) + { + auto src_preference = fpr_preference (src_regno); + auto dest_preference = fpr_preference (dest_regno); + if ((src_preference >= 0 && dest_preference >= 0) + || (src_preference <= 0 && dest_preference <= 0)) + { + dest_reg.flags |= (src_reg.flags & PSEUDO_COPY_FLAGS); + stack.safe_push ({ dest_regno, dest_reg.first_copy }); + } + } + } + } +} + +// Decide which pseudos should be allocated an FPR, setting m_fpr_pseudos +// accordingly. +void +early_ra::choose_fpr_pseudos () +{ + for (unsigned int i = FIRST_PSEUDO_REGISTER; + i < m_pseudo_regs.length (); ++i) + if (fpr_preference (i) > 0) + bitmap_set_bit (m_fpr_pseudos, i); +} + +// Clear out information about the previous CFG region (if any) +// and set up the data for a new region. +void +early_ra::start_new_region () +{ + obstack_free (&m_region_obstack, m_region_alloc_start); + m_regno_to_group.empty (); + m_allocno_copies.truncate (0); + m_allocnos.truncate (0); + m_sorted_allocnos.truncate (0); + m_colors.truncate (0); + m_insn_ranges.truncate (0); + for (auto &fpr_ranges : m_fpr_ranges) + fpr_ranges.truncate (0); + for (auto &call_points : m_call_points) + call_points.truncate (0); + gcc_assert (bitmap_empty_p (m_live_allocnos) && m_live_fprs == 0); + m_dead_insns.truncate (0); + m_allocated_fprs = 0; + m_call_preserved_fprs = 0; + m_allocation_successful = true; +} + +// Create and return an allocno group of size SIZE for register REGNO. +// REGNO can be INVALID_REGNUM if the group just exists to allow +// other groups to be chained together, and does not have any new +// allocnos of its own. +early_ra::allocno_group_info * +early_ra::create_allocno_group (unsigned int regno, unsigned int size) +{ + static_assert (alignof (unsigned int) == alignof (allocno_info), + "allocno_info alignment"); + unsigned int num_allocnos = (regno != INVALID_REGNUM ? size : 0); + + // Allocate an allocno_group_info, followed by an array of chain heads, + // followed by the allocnos themselves. + size_t alloc_size = (sizeof (allocno_group_info) + + size * sizeof (unsigned int) + + num_allocnos * sizeof (allocno_info)); + void *data = obstack_alloc (&m_region_obstack, alloc_size); + + // Initialize the group. + auto *group = reinterpret_cast (data); + memset (group, 0, sizeof (*group)); + group->m_color_rep = group; + group->regno = regno; + group->size = size; + group->stride = 1; + group->fpr_size = FPR_D; + group->fpr_candidates = ~0U; + + // Initialize the chain heads. + auto heads = group->chain_heads (); + for (unsigned int i = 0; i < heads.size (); ++i) + heads[i] = (i < num_allocnos ? m_allocnos.length () + i : INVALID_ALLOCNO); + + // Initialize the allocnos. + if (num_allocnos > 0) + { + auto allocnos = group->allocnos (); + memset (allocnos.begin (), 0, num_allocnos * sizeof (allocno_info)); + for (unsigned int i = 0; i < num_allocnos; ++i) + { + auto *allocno = &allocnos[i]; + allocno->id = m_allocnos.length (); + allocno->offset = i; + allocno->group_size = size; + allocno->hard_regno = FIRST_PSEUDO_REGISTER; + allocno->start_point = END_OF_REGION; + allocno->end_point = START_OF_REGION; + allocno->copy_dest = INVALID_ALLOCNO; + allocno->equiv_allocno = INVALID_ALLOCNO; + allocno->chain_next = INVALID_ALLOCNO; + allocno->chain_prev = INVALID_ALLOCNO; + m_allocnos.safe_push (allocno); + } + } + return group; +} + +// If REG refers to a pseudo register that might be allocated to FPRs, +// return the associated range of allocnos, creating new ones if necessary. +// Return an empty range otherwise. +early_ra::allocno_subgroup +early_ra::get_allocno_subgroup (rtx reg) +{ + if (GET_CODE (reg) == SUBREG) + { + allocno_subgroup inner = get_allocno_subgroup (SUBREG_REG (reg)); + if (!inner) + return {}; + + if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)), + GET_MODE (reg))) + { + m_allocation_successful = false; + return {}; + } + + subreg_info info; + subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)), + SUBREG_BYTE (reg), GET_MODE (reg), &info); + if (!info.representable_p) + { + m_allocation_successful = false; + return {}; + } + + inner.start += info.offset; + inner.count = info.nregs; + return inner; + } + + if (!REG_P (reg) || HARD_REGISTER_P (reg)) + return {}; + + unsigned int regno = REGNO (reg); + if (fpr_preference (regno) <= 0) + return {}; + + unsigned int count = hard_regno_nregs (V0_REGNUM, GET_MODE (reg)); + bool existed; + auto &entry = m_regno_to_group.get_or_insert (regno, &existed); + if (!existed) + { + auto *group = create_allocno_group (regno, count); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + auto allocnos = group->allocnos (); + fprintf (dump_file, "Creating allocnos [%d:%d] for r%d\n", + allocnos.front ().id, allocnos.back ().id, regno); + } + + auto reg_bits = GET_MODE_BITSIZE (GET_MODE (reg)); + auto fpr_bits = exact_div (reg_bits, count); + auto flags = m_pseudo_regs[regno].flags; + + // Punt for now if there is a choice to be made between using an + // FPR and a non-FPR. + if ((flags & NEEDS_NONFPR) + || ((flags & ALLOWS_NONFPR) + && !FLOAT_MODE_P (GET_MODE (reg)) + && !VECTOR_MODE_P (GET_MODE (reg)))) + m_allocation_successful = false; + + if (flags & ALLOWS_FPR8) + group->fpr_candidates &= 0xff; + else if (flags & ALLOWS_FPR16) + group->fpr_candidates &= 0xffff; + group->fpr_candidates &= ~0U >> (count - 1); + + group->has_flexible_stride = ((flags & HAS_FLEXIBLE_STRIDE) != 0 + && (flags & HAS_FIXED_STRIDE) == 0); + + group->fpr_size = (maybe_gt (fpr_bits, 128) ? FPR_Z + : maybe_gt (fpr_bits, 64) ? FPR_Q : FPR_D); + + entry = group; + } + return { entry, 0, count }; +} + +// Record a use of FPR REGNO at the current program point, as part of +// a backwards walk over a block. +void +early_ra::record_fpr_use (unsigned int regno) +{ + gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM)); + unsigned int offset = regno - V0_REGNUM; + if (!(m_live_fprs & (1U << offset))) + { + m_fpr_ranges[offset].safe_push ({ START_OF_REGION, m_current_point, + INVALID_ALLOCNO }); + m_live_fprs |= 1U << offset; + } +} + +// Record a definition of FPR REGNO at the current program point, as part of +// a backwards walk over a block. +void +early_ra::record_fpr_def (unsigned int regno) +{ + gcc_assert (IN_RANGE (regno, V0_REGNUM, V31_REGNUM)); + unsigned int offset = regno - V0_REGNUM; + + // The definition completes the current live range. If the result + // of the definition is used, the live range extends to the last use. + // Otherwise the live range is just a momentary blip at the current point. + auto &ranges = m_fpr_ranges[offset]; + if (m_live_fprs & (1U << offset)) + { + ranges.last ().start_point = m_current_point; + m_live_fprs &= ~(1U << offset); + } + else + ranges.safe_push ({ m_current_point, m_current_point, INVALID_ALLOCNO }); +} + +// Record a use of allocno ALLOCNO at the current program point, as part +// of a backwards walk over a block. +void +early_ra::record_allocno_use (allocno_info *allocno) +{ + bitmap_set_bit (m_live_allocnos, allocno->id); + if (allocno->end_point > m_current_point) + allocno->end_point = m_current_point; + allocno->start_point = m_current_point; + allocno->is_copy_dest = false; + allocno->is_strong_copy_dest = false; +} + +// Record a definition of the allocno with index AI at the current program +// point, as part of a backwards walk over a block. The allocno is known +// to be live. +void +early_ra::record_allocno_def (allocno_info *allocno) +{ + allocno->start_point = m_current_point; + allocno->num_defs = MIN (allocno->num_defs + 1, 2); + gcc_checking_assert (!allocno->is_copy_dest + && !allocno->is_strong_copy_dest); + if (!bitmap_clear_bit (m_live_allocnos, allocno->id)) + gcc_unreachable (); +} + +// Record any relevant allocno-related information for an actual or imagined +// copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit +// move instruction, false if it represents one way of satisfying the previous +// instruction's constraints. +void +early_ra::record_copy (rtx dest, rtx src, bool from_move_p) +{ + auto dest_range = get_allocno_subgroup (dest); + auto src_range = get_allocno_subgroup (src); + if (from_move_p + && dest_range + && REG_P (src) + && FP_REGNUM_P (REGNO (src))) + { + // A copy from an FPR to an allocno group. + unsigned int fpr = REGNO (src) - V0_REGNUM; + m_allocno_copies.safe_push ({ dest_range.allocno (0)->id, fpr, + dest_range.count }); + + // If the allocno at the other end of the chain of copies from DEST + // has a copy to the same FPR, record that all intervening copy chains + // could become "strong" ones. This indicates that picking the FPR + // avoids a copy at both ends. + unsigned int hard_regno = REGNO (src); + for (auto &dest_allocno : dest_range.allocnos ()) + if (dest_allocno.hard_regno == hard_regno++) + dest_allocno.is_strong_copy_src = true; + } + else if (from_move_p + && src_range + && REG_P (dest) + && FP_REGNUM_P (REGNO (dest))) + { + // A copy from an allocno group to an FPR. + unsigned int fpr = REGNO (dest) - V0_REGNUM; + m_allocno_copies.safe_push ({ src_range.allocno (0)->id, fpr, + src_range.count }); + for (auto &src_allocno : src_range.allocnos ()) + { + // If the copy comes from a move, see whether the destination + // FPR is known to be equal to the source allocno for the FPR's + // last live range. + if (from_move_p && src_allocno.num_defs == 0) + { + auto &last_range = m_fpr_ranges[fpr].last (); + if (last_range.end_point >= src_allocno.end_point) + last_range.allocno = src_allocno.id; + } + src_allocno.hard_regno = V0_REGNUM + fpr; + fpr += 1; + } + } + else if (src_range && dest_range) + { + // A copy between two allocno groups. We can only have a mismatched + // number of FPRs for imaginary, non-move copies. In that case + // the matching happens on the common lowparts. + gcc_assert (!from_move_p || src_range.count == dest_range.count); + unsigned int count = std::min (src_range.count, dest_range.count); + if (WORDS_BIG_ENDIAN) + { + src_range.start += src_range.count - count; + dest_range.start += dest_range.count - count; + } + src_range.count = count; + dest_range.count = count; + + // Ignore (imaginary non-move) copies if the destination is still live. + for (auto &dest_allocno : dest_range.allocnos ()) + if (bitmap_bit_p (m_live_allocnos, dest_allocno.id)) + return; + + for (unsigned int i = 0; i < src_range.count; ++i) + { + auto *dest_allocno = dest_range.allocno (i); + auto *src_allocno = src_range.allocno (i); + if (src_allocno->end_point > dest_allocno->start_point) + { + gcc_assert (src_allocno->copy_dest == INVALID_ALLOCNO + || src_allocno->copy_dest == dest_allocno->id); + src_allocno->copy_dest = dest_allocno->id; + src_allocno->hard_regno = dest_allocno->hard_regno; + dest_allocno->is_copy_dest = 1; + } + else if (from_move_p + && src_allocno->end_point <= dest_allocno->end_point + && src_allocno->num_defs == 0 + && dest_allocno->num_defs == 1) + dest_allocno->equiv_allocno = src_allocno->id; + } + } +} + +// Record any relevant allocno-related information about the constraints +// on INSN, which has already been extracted. +void +early_ra::record_constraints (rtx_insn *insn) +{ + preprocess_constraints (insn); + + int operand_matches[MAX_RECOG_OPERANDS]; + for (int i = 0; i < recog_data.n_operands; ++i) + operand_matches[i] = -1; + + auto alts = get_preferred_alternatives (insn); + bool any_ok = recog_data.n_alternatives == 0; + + // The set of output operands that are earlyclobber in at least one + // alternative. + operand_mask earlyclobber_operands = 0; + + // The set of output operands that are matched to inputs in at least + // one alternative. + operand_mask matched_operands = 0; + + // The set of output operands that are not matched to inputs in at least + // one alternative. + operand_mask unmatched_operands = 0; + + // The set of input operands that are matched to outputs in at least one + // alternative, or that overlap with such an input if the output is not + // earlyclobber. The latter part of the condition copes with things + // like y = x * x, where the first x is tied to the destination, and where + // y is not earlyclobber. + operand_mask matches_operands = 0; + + for (int altno = 0; altno < recog_data.n_alternatives; ++altno) + { + if (!(alts & ALTERNATIVE_BIT (altno))) + continue; + + auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; + if (!likely_alternative_match_p (op_alt)) + continue; + + any_ok = true; + + // Update the information for operand DEST_OPNO based on the constraint + // information for operand SRC_OPNO. The numbers can be different for + // swapped commutative operands. + auto record_operand = [&](int src_opno, int dest_opno) + { + int matches = op_alt[src_opno].matches; + // A matched earlyclobber cannot be used if the same operand value + // occurs in an unmatched operand. E.g. for y = x * x, a matched + // earlyclobber on the first input does not cover the second input. + if (matches >= 0) + { + rtx op = recog_data.operand[dest_opno]; + operand_mask overlaps = 0; + for (int i = 0; i < recog_data.n_operands; ++i) + if (i != dest_opno + && !recog_data.is_operator[i] + && recog_data.operand_type[i] != OP_OUT + && reg_overlap_mentioned_p (op, recog_data.operand[i])) + overlaps |= 1U << i; + if (!op_alt[matches].earlyclobber || overlaps == 0) + { + operand_matches[dest_opno] = matches; + matches_operands |= (1U << dest_opno) | overlaps; + } + } + }; + + auto reject = count_rejects (op_alt); + for (int opno = 0; opno < recog_data.n_operands; ++opno) + { + operand_mask op_mask = operand_mask (1) << opno; + + if (recog_data.operand_type[opno] != OP_IN) + { + if (reject == 0 && op_alt[opno].matched >= 0) + matched_operands |= op_mask; + else + unmatched_operands |= op_mask; + } + + if (op_alt[opno].earlyclobber) + earlyclobber_operands |= op_mask; + + // Punt for now on scratches. If we wanted to handle them, + // we'd need to create allocnos for them, like IRA does. + rtx op = recog_data.operand[opno]; + if (GET_CODE (op) == SCRATCH + && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS)) + m_allocation_successful = false; + + // Record filter information, which applies to the first register + // in the operand. + if (auto filters = alternative_register_filters (op_alt, opno)) + if (auto range = get_allocno_subgroup (recog_data.operand[opno])) + for (unsigned int fpr = range.start; fpr < 32; ++fpr) + if (!test_register_filters (filters, fpr)) + range.group->fpr_candidates &= ~(1U << (fpr - range.start)); + + if (reject == 0) + { + // Record possible matched operands. + record_operand (opno, opno); + if (recog_data.constraints[opno][0] == '%') + { + record_operand (opno, opno + 1); + record_operand (opno + 1, opno); + } + } + } + } + + if (!any_ok) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " -- no match\n"); + m_allocation_successful = false; + } + + // Record if there is an output operand that is never earlyclobber and never + // matched to an input. See the comment below for how this is used. + rtx dest_op = NULL_RTX; + for (int opno = 0; opno < recog_data.n_operands; ++opno) + { + auto op_mask = operand_mask (1) << opno; + if (recog_data.operand_type[opno] == OP_OUT + && (earlyclobber_operands & op_mask) == 0 + && (matched_operands & op_mask) == 0) + { + dest_op = recog_data.operand[opno]; + break; + } + } + + for (int opno = 0; opno < recog_data.n_operands; ++opno) + { + auto op_mask = operand_mask (1) << opno; + rtx op = recog_data.operand[opno]; + int matches = operand_matches[opno]; + + // Punt for now on operands that already have a fixed choice of + // register, since we don't have IRA's ability to find an alternative. + // It's better if earlier passes don't create this kind of situation. + if (REG_P (op) && FP_REGNUM_P (REGNO (op))) + m_allocation_successful = false; + + // Treat input operands as being earlyclobbered if an output is + // sometimes earlyclobber and if the input never matches an output. + // Do the same if there is an output that is always matched to an + // input, and if this operand doesn't match that input. In both + // cases, tying the input and the output would lead to an impossible + // combination (or at least one that is difficult to reload). + if (recog_data.operand_type[opno] != OP_OUT + && ((earlyclobber_operands && matches < 0) + || ((matched_operands & ~unmatched_operands) + && !(matches_operands & op_mask)))) + for (auto &allocno : get_allocno_subgroup (op).allocnos ()) + if (allocno.end_point + 1 == m_current_point) + allocno.is_earlyclobbered = true; + + // Create copies between operands that can be tied. This (deliberately) + // might add several copies to the same destination register; later code + // can then choose between them based on other criteria. + // + // If there is an output operand that is never matched or earlyclobber, + // and an input operand that never matches an output operand, create + // a tentative copy between them. This allows hard register preferences + // to be transmitted along the copy chains. + if (matches >= 0) + record_copy (recog_data.operand[matches], op); + else if (dest_op && recog_data.operand_type[opno] == OP_IN) + record_copy (dest_op, op); + } +} + +// If FLAGS is DF_REF_AT_TOP, model the artificial uses and defs at the +// start of the current basic block, otherwise model the artificial uses +// and defs at the end of the basic block. This is done as part of a +// backwards walk, so defs should be processed before uses. +void +early_ra::record_artificial_refs (unsigned int flags) +{ + df_ref ref; + + FOR_EACH_ARTIFICIAL_DEF (ref, m_current_bb->index) + if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags + && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) + record_fpr_def (DF_REF_REGNO (ref)); + m_current_point += 1; + + FOR_EACH_ARTIFICIAL_USE (ref, m_current_bb->index) + if ((DF_REF_FLAGS (ref) & DF_REF_AT_TOP) == flags + && IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) + record_fpr_use (DF_REF_REGNO (ref)); + m_current_point += 1; +} + +// Model the register references in INSN as part of a backwards walk. +void +early_ra::record_insn_refs (rtx_insn *insn) +{ + df_ref ref; + + // Record all definitions, excluding partial call clobbers. + FOR_EACH_INSN_DEF (ref, insn) + if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) + record_fpr_def (DF_REF_REGNO (ref)); + else + { + auto range = get_allocno_subgroup (DF_REF_REG (ref)); + for (auto &allocno : range.allocnos ()) + { + // If the destination is unused, record a momentary blip + // in its live range. + if (!bitmap_bit_p (m_live_allocnos, allocno.id)) + record_allocno_use (&allocno); + record_allocno_def (&allocno); + } + } + m_current_point += 1; + + // Model the call made by a call insn as a separate phase in the + // evaluation of the insn. Any partial call clobbers happen at that + // point, rather than in the definition or use phase of the insn. + if (auto *call_insn = dyn_cast (insn)) + { + function_abi abi = insn_callee_abi (call_insn); + m_call_points[abi.id ()].safe_push (m_current_point); + m_current_point += 1; + } + + // Record all uses. We can ignore READ_MODIFY_WRITE uses of plain subregs, + // since we track the FPR-sized parts of them individually. + FOR_EACH_INSN_USE (ref, insn) + if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM)) + record_fpr_use (DF_REF_REGNO (ref)); + else if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE) + || DF_REF_FLAGS_IS_SET (ref, DF_REF_STRICT_LOW_PART) + || DF_REF_FLAGS_IS_SET (ref, DF_REF_ZERO_EXTRACT)) + { + auto range = get_allocno_subgroup (DF_REF_REG (ref)); + for (auto &allocno : range.allocnos ()) + record_allocno_use (&allocno); + } + m_current_point += 1; +} + +// ALLOCNO->is_strong_copy_src is true. See whether ALLOCNO heads a +// natural chain that has an affinity with the same hard register at +// both ends. +bool +early_ra::consider_strong_copy_src_chain (allocno_info *allocno) +{ + auto *src_allocno = allocno; + while (src_allocno->copy_dest != INVALID_ALLOCNO) + { + auto *dest_allocno = m_allocnos[src_allocno->copy_dest]; + if (dest_allocno->start_point > src_allocno->end_point + || dest_allocno->hard_regno != src_allocno->hard_regno) + return false; + gcc_checking_assert (dest_allocno->is_copy_dest); + src_allocno = dest_allocno; + } + + while (allocno->copy_dest != INVALID_ALLOCNO) + { + allocno->is_strong_copy_src = 1; + allocno = m_allocnos[allocno->copy_dest]; + allocno->is_strong_copy_dest = 1; + } + return true; +} + +// ALLOCNO1 and ALLOCNO2 are linked in some way, and might end up being +// chained together. See whether chaining them requires the containing +// groups to have the same stride, or whether it requires them to have +// different strides. Return 1 if they should have the same stride, +// -1 if they should have different strides, or 0 if it doesn't matter. +int +early_ra::strided_polarity_pref (allocno_info *allocno1, + allocno_info *allocno2) +{ + if (allocno1->offset + 1 < allocno1->group_size + && allocno2->offset + 1 < allocno2->group_size) + { + if (is_chain_candidate (allocno1 + 1, allocno2 + 1)) + return 1; + else + return -1; + } + + if (allocno1->offset > 0 && allocno2->offset > 0) + { + if (is_chain_candidate (allocno1 - 1, allocno2 - 1)) + return 1; + else + return -1; + } + + return 0; +} + +// Decide which groups should be strided. Also complete "strong" copy chains. +void +early_ra::find_strided_accesses () +{ + // This function forms a graph of allocnos, linked by equivalences and + // natural copy chains. It temporarily uses chain_next to record the + // reverse of equivalence edges (equiv_allocno) and chain_prev to record + // the reverse of copy edges (copy_dest). + unsigned int allocno_info::*links[] = { + &allocno_info::chain_next, + &allocno_info::chain_prev, + &allocno_info::copy_dest, + &allocno_info::equiv_allocno + }; + + // Set up the temporary reverse edges. Check for strong copy chains. + for (unsigned int i = m_allocnos.length (); i-- > 0; ) + { + auto *allocno1 = m_allocnos[i]; + if (allocno1->copy_dest != INVALID_ALLOCNO) + m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id; + if (allocno1->equiv_allocno != INVALID_ALLOCNO) + m_allocnos[allocno1->equiv_allocno]->chain_next = allocno1->id; + + if (allocno1->is_strong_copy_src + && (allocno1->is_copy_dest + || !consider_strong_copy_src_chain (allocno1))) + allocno1->is_strong_copy_src = false; + } + + // Partition the graph into cliques based on edges that have the following + // properties: + // + // - the edge joins two allocnos whose groups have a free choice between + // consecutive and strided allocations. + // + // - the two groups have a relative strided polarity preference (that is + // they should make the same choice between consecutive and strided, + // or they should make opposite choices). + // + // Assign relative polarities to each group connected in this way. + // + // The aim is to discover natural move-free striding choices, which will + // often exist in carefully written ACLE code. + unsigned int num_edges = m_allocnos.length () * ARRAY_SIZE (links); + auto_sbitmap visited_edges (num_edges); + bitmap_clear (visited_edges); + + auto_vec worklist; + for (unsigned int i = 0; i < num_edges; ++i) + { + if (!bitmap_set_bit (visited_edges, i)) + continue; + worklist.quick_push (i); + while (!worklist.is_empty ()) + { + auto ei = worklist.pop (); + auto *allocno1 = m_allocnos[ei / ARRAY_SIZE (links)]; + auto ai2 = allocno1->*links[ei % ARRAY_SIZE (links)]; + if (ai2 == INVALID_ALLOCNO) + continue; + + auto *allocno2 = m_allocnos[ai2]; + auto *group1 = allocno1->group (); + auto *group2 = allocno2->group (); + if (!group1->has_flexible_stride || !group2->has_flexible_stride) + continue; + + int pref = strided_polarity_pref (allocno1, allocno2); + if (pref == 0) + continue; + + for (auto *group : { group1, group2 }) + for (auto &allocno : group->allocnos ()) + for (unsigned int j = 0; j < ARRAY_SIZE (links); ++j) + if (bitmap_set_bit (visited_edges, allocno.id * 4 + j)) + worklist.safe_push (allocno.id * 4 + j); + + if (group1->strided_polarity) + group2->strided_polarity = group1->strided_polarity * pref; + else if (group1->strided_polarity) + group2->strided_polarity = group1->strided_polarity * pref; + else + { + group1->strided_polarity = 1; + group2->strided_polarity = pref; + } + } + } + + // Now look for edges between allocnos in multi-register groups where: + // + // - the two groups have a relative strided polarity preference (as above). + // + // - one group (G1) has a free choice between consecutive and strided + // allocations. + // + // - the other group (G2) must use consecutive allocations. + // + // Update G1's individual preference for strided or consecutive allocations + // based on G2. If the previous loop chose a polarity for G1, work out + // whether it is better for polarity 1 or -1 to correspond to consecutive + // allocation. + int consecutive_pref = 0; + for (unsigned int i = m_allocnos.length (); i-- > 0; ) + { + auto *allocno1 = m_allocnos[i]; + for (auto link : links) + { + auto ai2 = allocno1->*link; + if (ai2 == INVALID_ALLOCNO) + continue; + + auto *allocno2 = m_allocnos[ai2]; + auto *group1 = allocno1->group (); + auto *group2 = allocno2->group (); + if (group1->has_flexible_stride == group2->has_flexible_stride) + continue; + + int pref = strided_polarity_pref (allocno1, allocno2); + if (pref == 0) + continue; + + auto *group = (group1->has_flexible_stride ? group1 : group2); + consecutive_pref += group->strided_polarity * pref; + group->consecutive_pref += pref; + } + } + + // If it doesn't matter whether polarity 1 or -1 corresponds to consecutive + // allocation, arbitrarily pick 1. + if (consecutive_pref == 0) + consecutive_pref = 1; + + // Record which multi-register groups should use strided allocations. + // Clear out the temporary edges. + for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai) + { + auto *allocno = m_allocnos[ai]; + allocno->chain_prev = INVALID_ALLOCNO; + allocno->chain_next = INVALID_ALLOCNO; + + if (allocno->offset != 0) + continue; + + auto *group = allocno->group (); + if (!group->has_flexible_stride) + continue; + + bool make_strided = (group->strided_polarity + ? (consecutive_pref * group->strided_polarity) < 0 + : group->consecutive_pref < 0); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Allocno [%d:%d]: strided polarity %d," + " consecutive pref %d, %s\n", + allocno->id, allocno->id + group->size - 1, + group->strided_polarity, group->consecutive_pref, + make_strided ? "making strided" : "keeping consecutive"); + if (!make_strided) + continue; + + // 2-register groups have a stride of 8 FPRs and must start in + // registers matching the mask 0x17. 4-register groups have a stride + // of 4 FPRs and must start in registers matching the mask 0x13. + group->stride = group->size == 2 ? 8 : 4; + gcc_checking_assert (group->fpr_candidates + == (group->size == 2 ? 0x55555555 : 0x11111111)); + group->fpr_candidates = (group->size == 2 ? 0xff00ff : 0xf000f); + } +} + +// Compare the allocnos at *ALLOCNO1_PTR and *ALLOCNO2_PTR and return a <=> +// result that puts allocnos in order of increasing FIELD. +template +int +early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr) +{ + auto *allocno1 = *(allocno_info *const *) allocno1_ptr; + auto *allocno2 = *(allocno_info *const *) allocno2_ptr; + + if (allocno1->*field != allocno2->*field) + return allocno1->*field < allocno2->*field ? -1 : 1; + return (allocno1->id < allocno2->id ? -1 + : allocno1->id == allocno2->id ? 0 : 1); +} + +// Return true if we should consider chaining ALLOCNO1 onto the head +// of ALLOCNO2. This is just a local test of the two allocnos; it doesn't +// guarantee that chaining them would give a self-consistent system. +bool +early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2) +{ + if (allocno1->equiv_allocno != INVALID_ALLOCNO) + allocno1 = m_allocnos[allocno1->equiv_allocno]; + + if (allocno2->start_point >= allocno1->end_point + && allocno2->equiv_allocno != allocno1->id) + return false; + + if (allocno2->is_strong_copy_dest) + { + if (!allocno1->is_strong_copy_src + || allocno1->copy_dest != allocno2->id) + return false; + } + else if (allocno2->is_copy_dest) + { + if (allocno1->copy_dest != allocno2->id) + return false; + } + else if (allocno1->is_earlyclobbered) + { + if (allocno1->end_point == allocno2->start_point + 1) + return false; + } + + return true; +} + +// We're trying to chain allocno ALLOCNO1 to a later allocno. +// Rate how good a choice ALLOCNO2 would be, with higher being better. +int +early_ra::rate_chain (allocno_info *allocno1, allocno_info *allocno2) +{ + int score = 0; + if (allocno2->is_strong_copy_dest) + score += 256; + else if (allocno2->is_copy_dest) + score += 128; + + // Prefer well-aligned matches. + auto *group1 = allocno1->group (); + auto *group2 = allocno2->group (); + if (group1->stride == 1 && group2->stride == 1) + { + unsigned int min_size = std::min (group1->color_rep ()->size, + group2->color_rep ()->size); + if ((group1->color_rep_offset + allocno1->offset) % min_size + == (group2->color_rep_offset + allocno2->offset) % min_size) + score += min_size; + else + score -= min_size; + } + return score; +} + +// Sort the chain_candidate_infos at ARG1 and ARG2 in order of decreasing +// score. +int +early_ra::cmp_chain_candidates (const void *arg1, const void *arg2) +{ + auto &candidate1 = *(const chain_candidate_info *) arg1; + auto &candidate2 = *(const chain_candidate_info *) arg2; + if (candidate1.score != candidate2.score) + return candidate1.score > candidate2.score ? -1 : 1; + + // Prefer to increase the gap between uses of the allocated register, + // to give the scheduler more freedom. + auto *allocno1 = candidate1.allocno; + auto *allocno2 = candidate2.allocno; + if (allocno1->start_point != allocno2->start_point) + return allocno1->start_point < allocno2->start_point ? -1 : 1; + + if (allocno1 != allocno2) + return allocno1->id < allocno2->id ? -1 : 1; + + return 0; +} + +// Join the chains of allocnos that start at HEADI1 and HEADI2. +// HEADI1 is either empty or a single allocno. +void +early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2) +{ + if (headi1 == INVALID_ALLOCNO) + headi1 = headi2; + else if (headi2 == INVALID_ALLOCNO) + headi2 = headi1; + else + { + auto *head1 = m_allocnos[headi1]; + auto *head2 = m_allocnos[headi2]; + gcc_checking_assert (head1->chain_next == INVALID_ALLOCNO + && head1->chain_prev == INVALID_ALLOCNO + && head2->chain_prev == INVALID_ALLOCNO); + + if (head1->equiv_allocno != INVALID_ALLOCNO + && m_allocnos[head1->equiv_allocno]->copy_dest == headi2) + { + head1->is_copy_dest = head2->is_copy_dest; + head1->is_strong_copy_dest = head2->is_strong_copy_dest; + m_allocnos[head1->equiv_allocno]->copy_dest = headi1; + } + head1->chain_next = headi2; + head2->chain_prev = headi1; + + headi2 = headi1; + } +} + +// Set the color representative of ALLOCNO's group to REP, such that ALLOCNO +// ends being at allocno offset REP_OFFSET from the start of REP. +void +early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep, + unsigned int rep_offset) +{ + auto *group = allocno->group (); + if (group->m_color_rep == rep) + return; + + group->m_color_rep = rep; + gcc_checking_assert (multiple_p (group->stride, rep->stride)); + unsigned int factor = group->stride / rep->stride; + gcc_checking_assert (rep_offset >= allocno->offset * factor); + group->color_rep_offset = rep_offset - allocno->offset * factor; + rep->fpr_size = std::max (rep->fpr_size, group->fpr_size); + rep->fpr_candidates &= (group->fpr_candidates + >> (group->color_rep_offset * rep->stride)); +} + +// REP1 and REP2 are color representatives. Change REP1's color representative +// to REP2, with REP1 starting at allocno offset REP2_OFFSET into REP2. +void +early_ra::set_color_rep (allocno_group_info *rep1, allocno_group_info *rep2, + unsigned int rep2_offset) +{ + gcc_checking_assert (rep1 != rep2 + && rep2->m_color_rep == rep2 + && multiple_p (rep1->stride, rep2->stride)); + + auto heads1 = rep1->chain_heads (); + auto heads2 = rep2->chain_heads (); + for (unsigned int i1 = 0; i1 < heads1.size (); ++i1) + if (heads1[i1] != INVALID_ALLOCNO) + { + unsigned int i2 = rep2_offset + i1 * rep1->stride / rep2->stride; + if (heads2[i2] == INVALID_ALLOCNO) + heads2[i2] = heads1[i1]; + else + gcc_checking_assert (heads2[i2] == heads1[i1]); + set_single_color_rep (m_allocnos[heads1[i1]], rep2, i2); + } +} + +// Try to chain ALLOCNO1 to the head of the chain starting at ALLOCNO2. +// Return true on success. +bool +early_ra::try_to_chain_allocnos (allocno_info *allocno1, + allocno_info *allocno2) +{ + auto *group1 = allocno1->group ()->color_rep (); + auto *group2 = allocno2->group ()->color_rep (); + + // Avoid trying to tie different subgroups of the same group. This can + // happen if the parts of a register are defined and used piecemeal. + if (group1 == group2) + return false; + + // The stride (in FPRs) between allocnos of each color representative. + auto fpr_stride1 = group1->stride; + auto fpr_stride2 = group2->stride; + + // The offset (in FPRs) of each allocno group from its color representative. + auto fpr_offset1 = allocno1->group ()->color_rep_offset * fpr_stride1; + auto fpr_offset2 = allocno2->group ()->color_rep_offset * fpr_stride2; + + // The offset (in FPRs) of each allocno from its color representative. + fpr_offset1 += allocno1->offset * allocno1->group ()->stride; + fpr_offset2 += allocno2->offset * allocno2->group ()->stride; + + // The FPR overlap is in multiples of the larger stride. + auto max_fpr_stride = std::max (fpr_stride1, fpr_stride2); + auto min_fpr_offset = std::min (fpr_offset1, fpr_offset2); + auto fpr_overlap_offset = ROUND_DOWN (min_fpr_offset, max_fpr_stride); + + // The offset (in FPRs) of the start of the overlapping region from + // each color representative. + fpr_offset1 -= fpr_overlap_offset; + fpr_offset2 -= fpr_overlap_offset; + + // The number of FPRs in each color representative after the start + // of the overlapping region. + auto fpr_after1 = (group1->size - 1) * fpr_stride1 - fpr_offset1; + auto fpr_after2 = (group2->size - 1) * fpr_stride2 - fpr_offset2; + + auto min_fpr_after = std::min (fpr_after1, fpr_after2); + + // The number of overlapping allocnos. + auto allocno_overlap_size = min_fpr_after / max_fpr_stride + 1; + + // The offset (in allocnos) of the overlapping region from the start + // of each color representative. + auto allocno_offset1 = fpr_offset1 / fpr_stride1; + auto allocno_offset2 = fpr_offset2 / fpr_stride2; + + // The stride (in allocnos) between overlapping allocnos. + auto allocno_stride1 = max_fpr_stride / fpr_stride1; + auto allocno_stride2 = max_fpr_stride / fpr_stride2; + + // Reject combinations that are impossible to allocate. + auto fprs1 = group1->fpr_candidates; + auto fprs2 = group2->fpr_candidates; + if (fpr_offset1 > fpr_offset2) + fprs2 >>= (fpr_offset1 - fpr_offset2); + else + fprs1 >>= (fpr_offset2 - fpr_offset1); + if ((fprs1 & fprs2) == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " - cannot chain %d->%d, no FPRs in common" + " (%08x@%d and %08x@%d)\n", allocno1->id, allocno2->id, + group1->fpr_candidates, fpr_offset1, + group2->fpr_candidates, fpr_offset2); + return false; + } + + // Check whether the chain can be formed. + auto heads1 = group1->chain_heads (); + auto heads2 = group2->chain_heads (); + for (unsigned int i = 0; i < allocno_overlap_size; ++i) + { + auto headi1 = heads1[allocno_offset1 + i * allocno_stride1]; + auto headi2 = heads2[allocno_offset2 + i * allocno_stride2]; + if (headi1 != INVALID_ALLOCNO && headi2 != INVALID_ALLOCNO) + { + auto *head1 = m_allocnos[headi1]; + auto *head2 = m_allocnos[headi2]; + if (head1->chain_next != INVALID_ALLOCNO) + return false; + if (head2->equiv_allocno != head1->id + && head1->end_point <= head2->start_point) + return false; + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " - chaining allocnos ["); + for (unsigned int i = 0; i < allocno_overlap_size; ++i) + fprintf (dump_file, "%s%d", i ? "," : "", + heads1[allocno_offset1 + i * allocno_stride1]); + fprintf (dump_file, "] and ["); + for (unsigned int i = 0; i < allocno_overlap_size; ++i) + fprintf (dump_file, "%s%d", i ? "," : "", + heads2[allocno_offset2 + i * allocno_stride2]); + fprintf (dump_file, "]\n"); + } + + // Chain the allocnos, updating the chain heads. + for (unsigned int i = 0; i < allocno_overlap_size; ++i) + chain_allocnos (heads1[allocno_offset1 + i * allocno_stride1], + heads2[allocno_offset2 + i * allocno_stride2]); + + // Pick a color representative for the merged groups. + allocno_group_info *new_rep; + if (allocno_offset1 == 0 + && group1->size == allocno_overlap_size * allocno_stride1 + && multiple_p (fpr_stride1, fpr_stride2)) + { + // The first group fits within the second. + set_color_rep (group1, group2, allocno_offset2); + new_rep = group2; + } + else if (allocno_offset2 == 0 + && group2->size == allocno_overlap_size * allocno_stride2 + && multiple_p (fpr_stride2, fpr_stride1)) + { + // The second group fits within the first. + set_color_rep (group2, group1, allocno_offset1); + new_rep = group1; + } + else + { + // We need a new group that is big enough to span both groups. + // The new group always has an FPR stride of 1. + auto max_fpr_offset = std::max (fpr_offset1, fpr_offset2); + auto max_fpr_after = std::max (fpr_after1, fpr_after2); + auto new_size = max_fpr_offset + max_fpr_after + 1; + new_rep = create_allocno_group (INVALID_REGNUM, new_size); + + set_color_rep (group1, new_rep, max_fpr_offset - fpr_offset1); + set_color_rep (group2, new_rep, max_fpr_offset - fpr_offset2); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " - new frontier ["); + auto new_heads = new_rep->chain_heads (); + for (unsigned int i = 0; i < new_heads.size (); ++i) + { + if (i) + fprintf (dump_file, ","); + if (new_heads[i] == INVALID_ALLOCNO) + fprintf (dump_file, "-"); + else + fprintf (dump_file, "%d", new_heads[i]); + } + fprintf (dump_file, "]\n"); + } + + return true; +} + +// Create a color_info for color representative GROUP. +void +early_ra::create_color (allocno_group_info *group) +{ + auto *color = region_allocate (); + color->id = m_colors.length (); + color->hard_regno = FIRST_PSEUDO_REGISTER; + color->group = group; + + gcc_checking_assert (group->m_color_rep == group); + group->has_color = true; + group->color = m_colors.length (); + + m_colors.safe_push (color); +} + +// Form allocnos into chains. Create colors for each resulting clique. +void +early_ra::form_chains () +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nChaining allocnos:\n"); + + // Perform (modified) interval graph coloring. First sort by + // increasing start point. + m_sorted_allocnos.reserve (m_allocnos.length ()); + m_sorted_allocnos.splice (m_allocnos); + m_sorted_allocnos.qsort (cmp_increasing<&allocno_info::start_point>); + + // During this phase, color representatives are only correct for + // unprocessed allocno groups (where the color representative is + // the group itself) and for groups that contain a current chain head. + unsigned int ti = 0; + auto_vec candidates; + for (unsigned int hi = 0; hi < m_sorted_allocnos.length (); ++hi) + { + auto *allocno1 = m_sorted_allocnos[hi]; + if (allocno1->chain_next != INVALID_ALLOCNO) + continue; + + // Record conflicts with direct uses for FPR hard registers. + auto *group1 = allocno1->group (); + for (unsigned int fpr = allocno1->offset; fpr < 32; ++fpr) + if (fpr_conflicts_with_allocno_p (fpr, allocno1)) + group1->fpr_candidates &= ~(1U << (fpr - allocno1->offset)); + + // Record conflicts due to partially call-clobbered registers. + // (Full clobbers are handled by the previous loop.) + for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) + if (call_in_range_p (abi_id, allocno1->start_point, + allocno1->end_point)) + { + auto fprs = partial_fpr_clobbers (abi_id, group1->fpr_size); + group1->fpr_candidates &= ~fprs >> allocno1->offset; + } + + // Find earlier allocnos (in processing order) that could be chained + // to this one. + candidates.truncate (0); + for (unsigned int sci = ti; sci < hi; ++sci) + { + auto *allocno2 = m_sorted_allocnos[sci]; + if (allocno2->chain_prev == INVALID_ALLOCNO) + { + if (!is_chain_candidate (allocno1, allocno2)) + continue; + chain_candidate_info candidate; + candidate.allocno = allocno2; + candidate.score = rate_chain (allocno1, allocno2); + candidates.safe_push (candidate); + } + else if (sci == ti) + ++ti; + } + + // Sort the candidates by decreasing score. + candidates.qsort (cmp_chain_candidates); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Chain candidates for %d:", allocno1->id); + for (auto &candidate : candidates) + fprintf (dump_file, " %d(%d)", candidate.allocno->id, + candidate.score); + fprintf (dump_file, "\n"); + } + + // Pick the first candidate that works. + for (auto &candidate : candidates) + if (try_to_chain_allocnos (allocno1, candidate.allocno)) + break; + } + + // Create color_infos for each group. Make sure that each group's + // color representative is up to date. + for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; ) + { + auto *allocno = m_sorted_allocnos[hi]; + auto *rep = allocno->group ()->color_rep (); + if (rep->has_color) + continue; + + create_color (rep); + auto heads = rep->chain_heads (); + for (unsigned int i = 0; i < heads.size (); ++i) + { + unsigned int ai = heads[i]; + while (ai != INVALID_ALLOCNO) + { + allocno = m_allocnos[ai]; + set_single_color_rep (allocno, rep, i * rep->stride); + ai = allocno->chain_next; + } + } + } +} + +// Return true if the given FPR (starting at 0) conflicts with allocno +// ALLOCNO. +bool +early_ra::fpr_conflicts_with_allocno_p (unsigned int fpr, + allocno_info *allocno) +{ + auto &ranges = m_fpr_ranges[fpr]; + unsigned int start_i = 0; + unsigned int end_i = ranges.length (); + while (start_i < end_i) + { + unsigned int mid_i = (start_i + end_i) / 2; + auto &range = ranges[mid_i]; + if (allocno->end_point > range.start_point) + start_i = mid_i + 1; + else if (allocno->start_point < range.end_point) + end_i = mid_i; + else + { + if (range.allocno != allocno->id) + return true; + // The FPR is equivalent to ALLOCNO for this particular range. + // See whether ALLOCNO conflicts with a neighboring range. + if (mid_i > 0 + && ranges[mid_i - 1].start_point >= allocno->end_point) + return true; + if (mid_i + 1 < ranges.length () + && ranges[mid_i + 1].end_point <= allocno->start_point) + return true; + return false; + } + } + return false; +} + +// Return true if there is a call with ABI identifier ABI_ID in the inclusive +// program point range [START_POINT, END_POINT]. +bool +early_ra::call_in_range_p (unsigned int abi_id, unsigned int start_point, + unsigned int end_point) +{ + auto &points = m_call_points[abi_id]; + unsigned int start_i = 0; + unsigned int end_i = points.length (); + while (start_i < end_i) + { + unsigned int mid_i = (start_i + end_i) / 2; + auto point = points[mid_i]; + if (end_point > point) + start_i = mid_i + 1; + else if (start_point < point) + end_i = mid_i; + else + return true; + } + return false; +} + +// Return the set of FPRs for which a value of size SIZE will be clobbered +// by a call to a function with ABI identifier ABI_ID, but would not be +// for some smaller size. The set therefore excludes FPRs that are +// fully-clobbered, like V0 in the base ABI. +unsigned int +early_ra::partial_fpr_clobbers (unsigned int abi_id, fpr_size_info size) +{ + auto &abi = function_abis[abi_id]; + unsigned int clobbers = 0; + machine_mode mode = (size == FPR_D ? V8QImode + : size == FPR_Q ? V16QImode : VNx16QImode); + for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) + if (!abi.clobbers_full_reg_p (regno) + && abi.clobbers_reg_p (mode, regno)) + clobbers |= 1U << (regno - V0_REGNUM); + return clobbers; +} + +// Process copies between pseudo registers and hard registers and update +// the FPR preferences for the associated colors. +void +early_ra::process_copies () +{ + for (auto © : m_allocno_copies) + { + auto *allocno = m_allocnos[copy.allocno]; + auto *group = allocno->group (); + auto offset = group->color_rep_offset + allocno->offset; + if (offset > copy.fpr) + continue; + + unsigned int fpr = copy.fpr - offset; + auto *color = m_colors[group->color_rep ()->color]; + color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr] + + copy.weight, 127); + } +} + +// Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=> +// result that puts colors in order of decreasing size. +int +early_ra::cmp_decreasing_size (const void *color1_ptr, const void *color2_ptr) +{ + auto *color1 = *(color_info *const *) color1_ptr; + auto *color2 = *(color_info *const *) color2_ptr; + + if (color1->group->size != color2->group->size) + return color1->group->size > color2->group->size ? -1 : 1; + return (color1->id < color2->id ? -1 + : color1->id == color2->id ? 0 : 1); +} + +// Allocate a register to each color. If we run out of registers, +// give up on doing a full allocation of the FPR-based pseudos in the +// region. +void +early_ra::allocate_colors () +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nAllocating registers:\n"); + + auto_vec sorted_colors; + sorted_colors.safe_splice (m_colors); + sorted_colors.qsort (cmp_decreasing_size); + + for (unsigned int i = 0; i < 32; ++i) + if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i)) + m_call_preserved_fprs |= 1U << i; + + for (auto *color : sorted_colors) + { + unsigned int candidates = color->group->fpr_candidates; + for (unsigned int i = 0; i < color->group->size; ++i) + candidates &= ~(m_allocated_fprs >> i); + unsigned int best = INVALID_REGNUM; + int best_weight = 0; + for (unsigned int fpr = 0; fpr <= 32U - color->group->size; ++fpr) + { + if ((candidates & (1U << fpr)) == 0) + continue; + int weight = color->fpr_preferences[fpr]; + // Account for registers that the current function must preserve. + for (unsigned int i = 0; i < color->group->size; ++i) + if (m_call_preserved_fprs & (1U << (fpr + i))) + weight -= 1; + if (best == INVALID_REGNUM || best_weight <= weight) + { + best = fpr; + best_weight = weight; + } + } + + if (best == INVALID_REGNUM) + { + m_allocation_successful = false; + return; + } + + color->hard_regno = best + V0_REGNUM; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " Allocating [v%d:v%d] to color %d\n", + best, best + color->group->size - 1, color->id); + m_allocated_fprs |= ((1U << color->group->size) - 1) << best; + } +} + +// See if ALLOCNO ends a subchain of single registers that can be split +// off without affecting the rest of the chain, and without introducing +// any moves. Return the start of the chain if so (which might be ALLOCNO +// itself), otherwise return null. +early_ra::allocno_info * +early_ra::find_independent_subchain (allocno_info *allocno) +{ + // Make sure ALLOCNO ends a natural subchain. + if (auto *next_allocno = chain_next (allocno)) + if (next_allocno->start_point + 1 >= allocno->end_point) + return nullptr; + + // Check the allocnos in the purported subchain and find the other end. + for (;;) + { + auto *group = allocno->group (); + if (group->m_color_rep == group) + return nullptr; + if (group->size != 1) + return nullptr; + + auto *prev_allocno = chain_prev (allocno); + if (!prev_allocno || allocno->start_point + 1 < prev_allocno->end_point) + return allocno; + + allocno = prev_allocno; + } +} + +// Search the colors starting at index FIRST_COLOR whose FPRs do not belong +// to FPR_CONFLICTS. Return the first such color that has no group. If all +// such colors have groups, instead return the color with the latest +// (smallest) start point. +early_ra::color_info * +early_ra::find_oldest_color (unsigned int first_color, + unsigned int fpr_conflicts) +{ + color_info *best = nullptr; + unsigned int best_start_point = ~0U; + for (unsigned int ci = first_color; ci < m_colors.length (); ++ci) + { + auto *color = m_colors[ci]; + if (fpr_conflicts & (1U << (color->hard_regno - V0_REGNUM))) + continue; + if (!color->group) + return color; + auto chain_head = color->group->chain_heads ()[0]; + auto start_point = m_allocnos[chain_head]->start_point; + if (!best || best_start_point > start_point) + { + best = color; + best_start_point = start_point; + } + } + return best; +} + +// If there are some spare FPRs that can be reused without introducing saves, +// restores, or moves, use them to "broaden" the allocation, in order to give +// the scheduler more freedom. This is particularly useful for forming LDPs +// and STPs. +void +early_ra::broaden_colors () +{ + // Create dummy colors for every leftover FPR that can be used cheaply. + unsigned int first_color = m_colors.length (); + for (unsigned int fpr = 0; fpr < 32; ++fpr) + if (((m_allocated_fprs | m_call_preserved_fprs) & (1U << fpr)) == 0) + { + auto *color = region_allocate (); + color->id = m_colors.length (); + color->hard_regno = V0_REGNUM + fpr; + color->group = nullptr; + m_colors.safe_push (color); + } + + // Exit early if there are no spare FPRs. + if (first_color == m_colors.length ()) + return; + + // Go through the allocnos in order, seeing if there is a subchain of + // single-FPR allocnos that can be split off from the containingg clique. + // Allocate such subchains to the new colors on an oldest-first basis. + for (auto *allocno : m_sorted_allocnos) + if (auto *start_allocno = find_independent_subchain (allocno)) + { + unsigned int fpr_conflicts = 0; + auto *member = allocno; + for (;;) + { + fpr_conflicts |= ~member->group ()->fpr_candidates; + if (member == start_allocno) + break; + member = m_allocnos[member->chain_prev]; + } + + auto *color = find_oldest_color (first_color, fpr_conflicts); + if (!color) + continue; + + if (!color->group) + { + auto *group = allocno->group (); + color->group = group; + group->color = color->id; + group->chain_heads ()[0] = INVALID_ALLOCNO; + } + else + { + auto chain_head = color->group->chain_heads ()[0]; + auto start_point = m_allocnos[chain_head]->start_point; + if (start_point >= allocno->end_point) + // Allocating to COLOR isn't viable, and it was the best + // option available. + continue; + + auto *next_allocno = chain_next (allocno); + if (!next_allocno || next_allocno->start_point <= start_point) + // The current allocation gives at least as much scheduling + // freedom as COLOR would. + continue; + } + + // Unlink the chain. + if (auto *next_allocno = chain_next (allocno)) + next_allocno->chain_prev = start_allocno->chain_prev; + if (auto *prev_allocno = chain_prev (start_allocno)) + prev_allocno->chain_next = allocno->chain_next; + + // Make the subchain use COLOR. + allocno->chain_next = color->group->chain_heads ()[0]; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Moving to optional color %d (register %s):", + color->id, reg_names[color->hard_regno]); + for (;;) + { + auto *group = allocno->group (); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " r%d", group->regno); + group->m_color_rep = color->group; + group->color_rep_offset = 0; + if (allocno == start_allocno) + break; + allocno = m_allocnos[allocno->chain_prev]; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n"); + color->group->chain_heads ()[0] = start_allocno->id; + } +} + +// Record the final choice of hard register for each allocno. +void +early_ra::finalize_allocation () +{ + for (auto *allocno : m_allocnos) + { + auto *group = allocno->group (); + auto *rep = group->color_rep (); + auto rep_regno = m_colors[rep->color]->hard_regno; + auto group_regno = rep_regno + group->color_rep_offset; + allocno->hard_regno = group_regno + allocno->offset * group->stride; + } +} + +// Replace any allocno references in REFS with the allocated register. +bool +early_ra::replace_regs (df_ref refs) +{ + bool changed = false; + for (df_ref ref = refs; ref; ref = DF_REF_NEXT_LOC (ref)) + { + auto range = get_allocno_subgroup (DF_REF_REG (ref)); + if (!range) + continue; + + auto new_regno = range.allocno (0)->hard_regno; + *DF_REF_LOC (ref) = gen_rtx_REG (GET_MODE (DF_REF_REG (ref)), new_regno); + changed = true; + } + return changed; +} + +// Try to make INSN match its FPR-related constraints. If this needs +// a source operand (SRC) to be copied to a destination operand (DEST) +// before INSN, add the associated (DEST, SRC) pairs to MOVES. +// +// Return -1 on failure, otherwise return a ?/!-style reject count. +// The reject count doesn't model the moves, just the internal alternative +// preferences. +int +early_ra::try_enforce_constraints (rtx_insn *insn, + vec> &moves) +{ + if (!constrain_operands (0, get_preferred_alternatives (insn))) + return -1; + + // Pick the alternative with the lowest cost. + int best = -1; + auto alts = get_preferred_alternatives (insn); + for (int altno = 0; altno < recog_data.n_alternatives; ++altno) + { + if (!(alts & ALTERNATIVE_BIT (altno))) + continue; + + auto *op_alt = &recog_op_alt[altno * recog_data.n_operands]; + if (!likely_alternative_match_p (op_alt)) + continue; + + auto_vec, 4> new_moves; + for (int opno = 0; opno < recog_data.n_operands; ++opno) + { + rtx op = recog_data.operand[opno]; + if (REG_P (op) + && FP_REGNUM_P (REGNO (op)) + && op_alt[opno].matched >= 0) + { + rtx old_src = recog_data.operand[op_alt[opno].matched]; + if (!operands_match_p (op, old_src)) + { + for (int i = 0; i < recog_data.n_operands; ++i) + if (i != opno) + { + rtx other = recog_data.operand[i]; + if (reg_overlap_mentioned_p (op, other)) + { + old_src = NULL_RTX; + break; + } + } + if (!old_src) + continue; + new_moves.safe_push ({ opno, op_alt[opno].matched }); + } + } + } + int cost = count_rejects (op_alt) + new_moves.length () * 7; + if (best < 0 || cost < best) + { + best = cost; + moves.truncate (0); + moves.safe_splice (new_moves); + } + } + return best; +} + +// Make INSN matches its FPR-related constraints. +void +early_ra::enforce_constraints (rtx_insn *insn) +{ + extract_insn (insn); + preprocess_constraints (insn); + + // First try with the operands they are. + auto_vec, 4> moves; + int cost = try_enforce_constraints (insn, moves); + + // Next try taking advantage of commutativity. + for (int opno = 0; opno < recog_data.n_operands - 1; ++opno) + if (recog_data.constraints[opno][0] == '%') + { + std::swap (*recog_data.operand_loc[opno], + *recog_data.operand_loc[opno + 1]); + std::swap (recog_data.operand[opno], + recog_data.operand[opno + 1]); + auto_vec, 4> swapped_moves; + int swapped_cost = try_enforce_constraints (insn, swapped_moves); + if (swapped_cost >= 0 && (cost < 0 || swapped_cost < cost)) + { + cost = swapped_cost; + moves.truncate (0); + moves.safe_splice (swapped_moves); + } + else + { + std::swap (*recog_data.operand_loc[opno], + *recog_data.operand_loc[opno + 1]); + std::swap (recog_data.operand[opno], + recog_data.operand[opno + 1]); + } + } + + // The allocation should ensure that there is at least one valid combination. + // It's too late to back out now if not. + gcc_assert (cost >= 0); + for (int i = 0; i < recog_data.n_dups; ++i) + { + int dup_of = recog_data.dup_num[i]; + rtx new_op = *recog_data.operand_loc[dup_of]; + if (new_op != recog_data.operand[dup_of]) + *recog_data.dup_loc[i] = copy_rtx (new_op); + } + for (auto move : moves) + { + int dest_opno = move.first; + int src_opno = move.second; + rtx dest = recog_data.operand[dest_opno]; + rtx old_src = recog_data.operand[src_opno]; + rtx new_src = lowpart_subreg (GET_MODE (old_src), dest, GET_MODE (dest)); + emit_insn_before (gen_move_insn (new_src, old_src), insn); + *recog_data.operand_loc[src_opno] = new_src; + } +} + +// See whether INSN is an instruction that operates on multi-register vectors, +// and if we have decided to make it use strided rather than consecutive +// accesses. Update the pattern and return true if so. +bool +early_ra::maybe_convert_to_strided_access (rtx_insn *insn) +{ + if (!NONJUMP_INSN_P (insn) || recog_memoized (insn) < 0) + return false; + + auto stride_type = get_attr_stride_type (insn); + rtx pat = PATTERN (insn); + rtx op; + if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE + || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) + op = SET_DEST (pat); + else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE) + op = XVECEXP (SET_SRC (pat), 0, 1); + else + return false; + + auto range = get_allocno_subgroup (op); + if (!range || range.group->stride == 1) + return false; + + gcc_assert (range.start == 0 && range.count == range.group->size); + auto elt_mode = GET_MODE_INNER (GET_MODE (op)); + auto single_mode = aarch64_full_sve_mode (elt_mode).require (); + auto_vec regs; + for (unsigned int i = 0; i < range.count; ++i) + regs.quick_push (gen_rtx_REG (single_mode, range.allocno (i)->hard_regno)); + + extract_insn (insn); + if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE) + { + auto unspec = XINT (SET_SRC (pat), 1); + if (range.count == 2) + pat = gen_aarch64_strided2 (unspec, GET_MODE (op), regs[0], regs[1], + recog_data.operand[1], + recog_data.operand[2]); + else + pat = gen_aarch64_strided4 (unspec, GET_MODE (op), + regs[0], regs[1], regs[2], regs[3], + recog_data.operand[1], + recog_data.operand[2]); + } + else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE) + { + auto unspec = XINT (SET_SRC (pat), 1); + if (range.count == 2) + pat = gen_aarch64_strided2 (unspec, GET_MODE (op), + recog_data.operand[0], + recog_data.operand[2], regs[0], regs[1]); + else + pat = gen_aarch64_strided4 (unspec, GET_MODE (op), + recog_data.operand[0], + recog_data.operand[2], + regs[0], regs[1], regs[2], regs[3]); + // Ensure correct sharing for the source memory. + // + // ??? Why doesn't the generator get this right? + XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1) + = *recog_data.dup_loc[0]; + } + else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE) + { + auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4)); + if (range.count == 2) + pat = gen_aarch64_sme_lut_strided2 (bits, single_mode, + regs[0], regs[1], + recog_data.operand[1], + recog_data.operand[2]); + else + pat = gen_aarch64_sme_lut_strided4 (bits, single_mode, + regs[0], regs[1], regs[2], regs[3], + recog_data.operand[1], + recog_data.operand[2]); + } + else + gcc_unreachable (); + PATTERN (insn) = pat; + INSN_CODE (insn) = -1; + df_insn_rescan (insn); + return true; +} + +// We've successfully allocated the current region. Apply the allocation +// to the instructions. +void +early_ra::apply_allocation () +{ + rtx_insn *prev; + for (auto insn_range : m_insn_ranges) + for (rtx_insn *insn = insn_range.first; + insn != insn_range.second; + insn = prev) + { + prev = PREV_INSN (insn); + if (!INSN_P (insn)) + continue; + + bool changed = maybe_convert_to_strided_access (insn); + changed |= replace_regs (DF_INSN_DEFS (insn)); + changed |= replace_regs (DF_INSN_USES (insn)); + if (changed && NONDEBUG_INSN_P (insn)) + { + if (GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER + && !is_move_set (PATTERN (insn))) + enforce_constraints (insn); + + // A REG_EQUIV note establishes an equivalence throughout + // the function, but here we're reusing hard registers for + // multiple pseudo registers. We also no longer need REG_EQUIV + // notes that record potential spill locations, since we've + // allocated the pseudo register without spilling. + rtx *ptr = ®_NOTES (insn); + while (*ptr) + if (REG_NOTE_KIND (*ptr) == REG_EQUIV) + *ptr = XEXP (*ptr, 1); + else + ptr = &XEXP (*ptr, 1); + } + changed |= replace_regs (DF_INSN_EQ_USES (insn)); + if (changed) + df_insn_rescan (insn); + } + + for (auto *insn : m_dead_insns) + delete_insn (insn); +} + +// Try to allocate the current region. Update the instructions if successful. +void +early_ra::process_region () +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_fpr_ranges (); + dump_copies (); + dump_allocnos (); + } + + find_strided_accesses (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_allocnos (); + + form_chains (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_allocnos (); + + process_copies (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_colors (); + + allocate_colors (); + if (!m_allocation_successful) + return; + + broaden_colors (); + finalize_allocation (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nAllocation successful\nFinal allocation:\n"); + dump_allocnos (); + dump_colors (); + } + + apply_allocation (); +} + +// Return true if INSN would become dead if we successfully allocate the +// current region. +bool +early_ra::is_dead_insn (rtx_insn *insn) +{ + rtx set = single_set (insn); + if (!set) + return false; + + rtx dest = SET_DEST (set); + auto dest_range = get_allocno_subgroup (dest); + if (!dest_range) + return false; + + for (auto &allocno : dest_range.allocnos ()) + if (bitmap_bit_p (m_live_allocnos, allocno.id)) + return false; + + if (side_effects_p (set)) + return false; + + return true; +} + +// Build up information about block BB. IS_ISOLATED is true if the +// block is not part of a larger region. +void +early_ra::process_block (basic_block bb, bool is_isolated) +{ + m_current_bb = bb; + + // Process live-out FPRs. + bitmap live_out = df_get_live_out (bb); + for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) + if (bitmap_bit_p (live_out, regno)) + record_fpr_use (regno); + + // Process live-out allocnos. We don't track individual FPR liveness + // across block boundaries, so we have to assume that the whole pseudo + // register is live. + bitmap_iterator bi; + unsigned int regno; + EXECUTE_IF_AND_IN_BITMAP (df_get_live_out (bb), m_fpr_pseudos, + FIRST_PSEUDO_REGISTER, regno, bi) + { + auto range = get_allocno_subgroup (regno_reg_rtx[regno]); + for (auto &allocno : range.allocnos ()) + record_allocno_use (&allocno); + } + + m_current_point += 1; + + record_artificial_refs (0); + + bool is_first = true; + rtx_insn *start_insn = BB_END (bb); + rtx_insn *insn; + FOR_BB_INSNS_REVERSE (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + // CLOBBERs are used to prevent pseudos from being upwards exposed. + // We can ignore them if allocation is successful. + if (GET_CODE (PATTERN (insn)) == CLOBBER) + { + if (get_allocno_subgroup (XEXP (PATTERN (insn), 0))) + m_dead_insns.safe_push (insn); + continue; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (is_first) + fprintf (dump_file, "\nBlock %d:\n", bb->index); + fprintf (dump_file, "%6d:", m_current_point); + pretty_printer rtl_slim_pp; + rtl_slim_pp.buffer->stream = dump_file; + print_insn (&rtl_slim_pp, insn, 1); + pp_flush (&rtl_slim_pp); + fprintf (dump_file, "\n"); + } + is_first = false; + + if (is_dead_insn (insn)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "%14s -- dead\n", ""); + m_dead_insns.safe_push (insn); + } + else + { + record_insn_refs (insn); + rtx pat = PATTERN (insn); + if (is_move_set (pat)) + record_copy (SET_DEST (pat), SET_SRC (pat), true); + else + { + extract_insn (insn); + record_constraints (insn); + } + } + + // See whether we have a complete region, with no remaining live + // allocnos. + if (is_isolated + && bitmap_empty_p (m_live_allocnos) + && m_live_fprs == 0 + && m_allocation_successful + && !m_allocnos.is_empty ()) + { + rtx_insn *prev_insn = PREV_INSN (insn); + m_insn_ranges.safe_push ({ start_insn, prev_insn }); + process_region (); + start_new_region (); + is_first = true; + start_insn = prev_insn; + } + } + m_insn_ranges.safe_push ({ start_insn, BB_HEAD (bb) }); + + record_artificial_refs (DF_REF_AT_TOP); + + // Process live-in FPRs. + bitmap live_in = df_get_live_in (bb); + for (unsigned int regno = V0_REGNUM; regno <= V31_REGNUM; ++regno) + if (bitmap_bit_p (live_in, regno) + && (m_live_fprs & (1U << (regno - V0_REGNUM)))) + record_fpr_def (regno); + + // Process live-in allocnos. + EXECUTE_IF_AND_IN_BITMAP (live_in, m_fpr_pseudos, + FIRST_PSEUDO_REGISTER, regno, bi) + { + auto range = get_allocno_subgroup (regno_reg_rtx[regno]); + for (auto &allocno : range.allocnos ()) + if (bitmap_bit_p (m_live_allocnos, allocno.id)) + record_allocno_def (&allocno); + } + + m_current_point += 1; + + bitmap_clear (m_live_allocnos); + m_live_fprs = 0; +} + +// Divide the function into regions, such that there no edges into or out +// of the region have live "FPR pseudos". +void +early_ra::process_blocks () +{ + auto_sbitmap visited (last_basic_block_for_fn (m_fn)); + auto_sbitmap fpr_pseudos_live_out (last_basic_block_for_fn (m_fn)); + auto_sbitmap fpr_pseudos_live_in (last_basic_block_for_fn (m_fn)); + + bitmap_clear (visited); + bitmap_clear (fpr_pseudos_live_out); + bitmap_clear (fpr_pseudos_live_in); + + // Record which blocks have live FPR pseudos on entry and exit. + basic_block bb; + FOR_EACH_BB_FN (bb, m_fn) + { + if (bitmap_intersect_p (df_get_live_out (bb), m_fpr_pseudos)) + bitmap_set_bit (fpr_pseudos_live_out, bb->index); + if (bitmap_intersect_p (df_get_live_in (bb), m_fpr_pseudos)) + bitmap_set_bit (fpr_pseudos_live_in, bb->index); + } + + struct stack_node { edge_iterator ei; basic_block bb; }; + + auto_vec stack; + auto_vec region; + + // Go through the function in reverse postorder and process the region + // containing each block. + unsigned int n_blocks = df_get_n_blocks (DF_FORWARD); + int *order = df_get_postorder (DF_FORWARD); + for (unsigned int bbi = 0; bbi < n_blocks; ++bbi) + { + basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, order[bbi]); + if (bb->index < NUM_FIXED_BLOCKS) + continue; + + if (!bitmap_set_bit (visited, bb->index)) + continue; + + // Process forward edges before backward edges (so push backward + // edges first). Build the region in an approximation of reverse + // program order. + if (bitmap_bit_p (fpr_pseudos_live_in, bb->index)) + stack.quick_push ({ ei_start (bb->preds), nullptr }); + if (bitmap_bit_p (fpr_pseudos_live_out, bb->index)) + stack.quick_push ({ ei_start (bb->succs), bb }); + else + region.safe_push (bb); + while (!stack.is_empty ()) + { + auto &node = stack.last (); + if (ei_end_p (node.ei)) + { + if (node.bb) + region.safe_push (node.bb); + stack.pop (); + continue; + } + edge e = ei_edge (node.ei); + if (node.bb) + { + // A forward edge from a node that has not yet been added + // to region. + if (bitmap_bit_p (fpr_pseudos_live_in, e->dest->index) + && bitmap_set_bit (visited, e->dest->index)) + { + stack.safe_push ({ ei_start (e->dest->preds), nullptr }); + if (bitmap_bit_p (fpr_pseudos_live_out, e->dest->index)) + stack.safe_push ({ ei_start (e->dest->succs), e->dest }); + else + region.safe_push (e->dest); + } + else + ei_next (&node.ei); + } + else + { + // A backward edge from a node that has already been added + // to the region. + if (bitmap_bit_p (fpr_pseudos_live_out, e->src->index) + && bitmap_set_bit (visited, e->src->index)) + { + if (bitmap_bit_p (fpr_pseudos_live_in, e->src->index)) + stack.safe_push ({ ei_start (e->src->preds), nullptr }); + stack.safe_push ({ ei_start (e->src->succs), e->src }); + } + else + ei_next (&node.ei); + } + } + + m_current_point = 2; + start_new_region (); + + if (region.is_empty ()) + process_block (bb, true); + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nRegion (from %d):", bb->index); + for (unsigned int j = 0; j < region.length (); ++j) + fprintf (dump_file, " %d", region[j]->index); + fprintf (dump_file, "\n"); + } + for (unsigned int j = 0; j < region.length (); ++j) + { + basic_block bb = region[j]; + bool is_isolated + = ((j == 0 && !bitmap_bit_p (fpr_pseudos_live_out, bb->index)) + || (j == region.length () - 1 + && !bitmap_bit_p (fpr_pseudos_live_in, bb->index))); + process_block (bb, is_isolated); + } + } + region.truncate (0); + + if (!m_allocnos.is_empty () && m_allocation_successful) + process_region (); + } +} + +// Run the pass on the current function. +void +early_ra::execute () +{ + df_analyze (); + + preprocess_insns (); + propagate_pseudo_reg_info (); + choose_fpr_pseudos (); + if (bitmap_empty_p (m_fpr_pseudos)) + return; + + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_pseudo_regs (); + + process_blocks (); + df_verify (); +} + +class pass_early_ra : public rtl_opt_pass +{ +public: + pass_early_ra (gcc::context *ctxt) + : rtl_opt_pass (pass_data_early_ra, ctxt) + {} + + // opt_pass methods: + virtual bool gate (function *); + virtual unsigned int execute (function *); +}; + +bool +pass_early_ra::gate (function *) +{ + // Require a vector ISA to be enabled. + if (!TARGET_SIMD && !TARGET_SVE) + return false; + + if (aarch64_early_ra == AARCH64_EARLY_RA_NONE) + return false; + + if (aarch64_early_ra == AARCH64_EARLY_RA_STRIDED + && !TARGET_STREAMING_SME2) + return false; + + return true; +} + +unsigned int +pass_early_ra::execute (function *fn) +{ + early_ra (fn).execute (); + return 0; +} + +} // end namespace + +// Create a new CC fusion pass instance. + +rtl_opt_pass * +make_pass_aarch64_early_ra (gcc::context *ctxt) +{ + return new pass_early_ra (ctxt); +} diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index 01151e93d179..adb19d564ded 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -120,4 +120,15 @@ enum aarch64_ldp_stp_policy { AARCH64_LDP_STP_POLICY_NEVER }; +/* An enum specifying when the early-ra pass should be run: + - AARCH64_EARLY_RA_ALL: for all functions + - AARCH64_EARLY_RA_STRIDED: for functions that have access to strided + multi-register instructions + - AARCH64_EARLY_RA_NONE: for no functions. */ +enum aarch64_early_ra_scope { + AARCH64_EARLY_RA_ALL, + AARCH64_EARLY_RA_STRIDED, + AARCH64_EARLY_RA_NONE +}; + #endif diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def index 662a13fd5e6d..2d792e9aa502 100644 --- a/gcc/config/aarch64/aarch64-passes.def +++ b/gcc/config/aarch64/aarch64-passes.def @@ -18,6 +18,7 @@ along with GCC; see the file COPYING3. If not see . */ +INSERT_PASS_BEFORE (pass_sched, 1, pass_aarch64_early_ra); INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index f2075a177326..d1af7f40891e 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -891,6 +891,7 @@ bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx); rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); bool aarch64_stepped_int_parallel_p (rtx, int); +bool aarch64_strided_registers_p (rtx *, unsigned int, unsigned int); rtx aarch64_tls_get_addr (void); unsigned aarch64_debugger_regno (unsigned); unsigned aarch64_trampoline_size (void); @@ -1063,6 +1064,7 @@ void aarch64_get_all_extension_candidates (auto_vec *candidates); std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags, aarch64_feature_flags); +rtl_opt_pass *make_pass_aarch64_early_ra (gcc::context *); rtl_opt_pass *make_pass_fma_steering (gcc::context *); rtl_opt_pass *make_pass_track_speculation (gcc::context *); rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *); diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 6cba6ab5f74c..575a10c6f652 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1981,4 +1981,74 @@ "TARGET_STREAMING_SME2 && !( == 4 && == 4 && == 8)" "luti\t%0, zt0, %1[%2]" + [(set_attr "stride_type" "luti_consecutive")] +) + +(define_insn "@aarch64_sme_lut_strided2" + [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:VNx16QI 2 "register_operand" "w") + (match_operand:DI 3 "const_int_operand") + (const_int LUTI_BITS) + (const_int 0)] + UNSPEC_SME_LUTI)) + (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_dup 2) + (match_dup 3) + (const_int LUTI_BITS) + (const_int 1)] + UNSPEC_SME_LUTI))] + "TARGET_STREAMING_SME2 + && aarch64_strided_registers_p (operands, 2, 8)" + "luti\t{%0., %1.}, zt0, %2[%3]" + [(set_attr "stride_type" "luti_strided")] +) + +(define_insn "@aarch64_sme_lut_strided4" + [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_operand:VNx16QI 4 "register_operand" "w") + (match_operand:DI 5 "const_int_operand") + (const_int LUTI_BITS) + (const_int 0)] + UNSPEC_SME_LUTI)) + (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_dup 4) + (match_dup 5) + (const_int LUTI_BITS) + (const_int 1)] + UNSPEC_SME_LUTI)) + (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_dup 4) + (match_dup 5) + (const_int LUTI_BITS) + (const_int 2)] + UNSPEC_SME_LUTI)) + (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w") + (unspec:SVE_FULL_BHS + [(reg:V8DI ZT0_REGNUM) + (reg:DI SME_STATE_REGNUM) + (match_dup 4) + (match_dup 5) + (const_int LUTI_BITS) + (const_int 3)] + UNSPEC_SME_LUTI))] + "TARGET_STREAMING_SME2 + && !( == 4 && == 8) + && aarch64_strided_registers_p (operands, 4, 4)" + "luti\t{%0., %1., %2., %3.}, zt0, %4[%5]" + [(set_attr "stride_type" "luti_strided")] ) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 6492da0b383a..4e5a88aa03a9 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -1305,7 +1305,7 @@ public: icode = convert_optab_handler (maskload_optab, e.vector_mode (0), e.gp_mode (0)); else - icode = code_for_aarch64_ld1 (e.tuple_mode (0)); + icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0)); return e.use_contiguous_load_insn (icode); } }; @@ -1605,7 +1605,10 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = code_for_aarch64_ldnt1 (e.tuple_mode (0)); + insn_code icode = (e.vectors_per_tuple () == 1 + ? code_for_aarch64_ldnt1 (e.vector_mode (0)) + : code_for_aarch64 (UNSPEC_LDNT1_COUNT, + e.tuple_mode (0))); return e.use_contiguous_load_insn (icode); } }; @@ -2415,7 +2418,7 @@ public: icode = convert_optab_handler (maskstore_optab, e.vector_mode (0), e.gp_mode (0)); else - icode = code_for_aarch64_st1 (e.tuple_mode (0)); + icode = code_for_aarch64 (UNSPEC_ST1_COUNT, e.tuple_mode (0)); return e.use_contiguous_store_insn (icode); } }; @@ -2533,7 +2536,10 @@ public: rtx expand (function_expander &e) const override { - insn_code icode = code_for_aarch64_stnt1 (e.tuple_mode (0)); + insn_code icode = (e.vectors_per_tuple () == 1 + ? code_for_aarch64_stnt1 (e.vector_mode (0)) + : code_for_aarch64 (UNSPEC_STNT1_COUNT, + e.tuple_mode (0))); return e.use_contiguous_store_insn (icode); } }; diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index d911f657871f..fdd14d15096a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1277,17 +1277,6 @@ "ld1\t%0., %2/z, %1" ) -;; Predicated LD1 (multi), with a count as predicate. -(define_insn "@aarch64_ld1" - [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw") - (unspec:SVE_FULLx24 - [(match_operand:VNx16BI 2 "register_operand" "Uph") - (match_operand:SVE_FULLx24 1 "memory_operand" "m")] - UNSPEC_LD1_SVE_COUNT))] - "TARGET_SME2 && TARGET_STREAMING" - "ld1\t%0, %K2/z, %1" -) - ;; Unpredicated LD[234]. (define_expand "vec_load_lanes" [(set (match_operand:SVE_STRUCT 0 "register_operand") @@ -1430,17 +1419,6 @@ "ldnt1\t%0., %2/z, %1" ) -;; Predicated contiguous non-temporal load (multi). -(define_insn "@aarch64_ldnt1" - [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw") - (unspec:SVE_FULLx24 - [(match_operand:VNx16BI 2 "register_operand" "Uph") - (match_operand:SVE_FULLx24 1 "memory_operand" "m")] - UNSPEC_LDNT1_SVE_COUNT))] - "TARGET_SVE" - "ldnt1\t%0, %K2/z, %1" -) - ;; ------------------------------------------------------------------------- ;; ---- Normal gather loads ;; ------------------------------------------------------------------------- @@ -2263,17 +2241,6 @@ "st1\t%1., %2, %0" ) -(define_insn "@aarch64_st1" - [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") - (unspec:SVE_FULLx24 - [(match_operand:VNx16BI 2 "register_operand" "Uph") - (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw") - (match_dup 0)] - UNSPEC_ST1_SVE_COUNT))] - "TARGET_SME2 && TARGET_STREAMING" - "st1\t%1, %K2, %0" -) - ;; Unpredicated ST[234]. This is always a full update, so the dependence ;; on the old value of the memory location (via (match_dup 0)) is redundant. ;; There doesn't seem to be any obvious benefit to treating the all-true @@ -2373,17 +2340,6 @@ "stnt1\t%1., %2, %0" ) -(define_insn "@aarch64_stnt1" - [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") - (unspec:SVE_FULLx24 - [(match_operand:VNx16BI 2 "register_operand" "Uph") - (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw") - (match_dup 0)] - UNSPEC_STNT1_SVE_COUNT))] - "TARGET_SME2 && TARGET_STREAMING" - "stnt1\t%1, %K2, %0" -) - ;; ------------------------------------------------------------------------- ;; ---- Normal scatter stores ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 29c41ca3c931..619a95f1c311 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -21,8 +21,12 @@ ;; The file is organised into the following sections (search for the full ;; line): ;; -;; == Moves +;; == Loads +;; ---- Multi-register loads predicated by a counter ;; ---- Non-temporal gather loads +;; +;; == Stores +;; ---- Multi-register stores predicated by a counter ;; ---- Non-temporal scatter stores ;; ;; == Predicate manipulation @@ -112,9 +116,85 @@ ;; ---- Optional SM4 extensions ;; ========================================================================= -;; == Moves +;; == Loads ;; ========================================================================= +;; ------------------------------------------------------------------------- +;; ---- Multi-register loads predicated by a counter +;; ------------------------------------------------------------------------- +;; Includes: +;; - LD1B +;; - LD1D +;; - LD1H +;; - LD1W +;; - LDNT1B +;; - LDNT1D +;; - LDNT1H +;; - LDNT1W +;; ------------------------------------------------------------------------- + +;; Predicated LD1 (multi), with a count as predicate. +(define_insn "@aarch64_" + [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "memory_operand" "m")] + LD1_COUNT))] + "TARGET_STREAMING_SME2" + "\t%0, %K2/z, %1" + [(set_attr "stride_type" "ld1_consecutive")] +) + +(define_insn "@aarch64__strided2" + [(set (match_operand: 0 "aarch64_simd_register" "=Uwd") + (unspec: + [(match_operand:VNx16BI 3 "register_operand" "Uph") + (match_operand:SVE_FULLx2 2 "memory_operand" "m") + (const_int 0)] + LD1_COUNT)) + (set (match_operand: 1 "aarch64_simd_register" "=w") + (unspec: + [(match_dup 3) + (match_dup 2) + (const_int 1)] + LD1_COUNT))] + "TARGET_STREAMING_SME2 + && aarch64_strided_registers_p (operands, 2, 8)" + "\t{%0., %1.}, %K3/z, %2" + [(set_attr "stride_type" "ld1_strided")] +) + +(define_insn "@aarch64__strided4" + [(set (match_operand: 0 "aarch64_simd_register" "=Uwt") + (unspec: + [(match_operand:VNx16BI 5 "register_operand" "Uph") + (match_operand:SVE_FULLx4 4 "memory_operand" "m") + (const_int 0)] + LD1_COUNT)) + (set (match_operand: 1 "aarch64_simd_register" "=w") + (unspec: + [(match_dup 5) + (match_dup 4) + (const_int 1)] + LD1_COUNT)) + (set (match_operand: 2 "aarch64_simd_register" "=w") + (unspec: + [(match_dup 5) + (match_dup 4) + (const_int 2)] + LD1_COUNT)) + (set (match_operand: 3 "aarch64_simd_register" "=w") + (unspec: + [(match_dup 5) + (match_dup 4) + (const_int 3)] + LD1_COUNT))] + "TARGET_STREAMING_SME2 + && aarch64_strided_registers_p (operands, 4, 4)" + "\t{%0., %1., %2., %3.}, %K5/z, %4" + [(set_attr "stride_type" "ld1_strided")] +) + ;; ------------------------------------------------------------------------- ;; ---- Non-temporal gather loads ;; ------------------------------------------------------------------------- @@ -171,6 +251,66 @@ } ) +;; ========================================================================= +;; == Stores +;; ========================================================================= + +;; ------------------------------------------------------------------------- +;; ---- Multi-register stores predicated by a counter +;; ------------------------------------------------------------------------- +;; Includes: +;; - ST1B +;; - ST1D +;; - ST1H +;; - ST1W +;; - STNT1B +;; - STNT1D +;; - STNT1H +;; - STNT1W +;; ------------------------------------------------------------------------- + +(define_insn "@aarch64_" + [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 2 "register_operand" "Uph") + (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw") + (match_dup 0)] + ST1_COUNT))] + "TARGET_STREAMING_SME2" + "\t%1, %K2, %0" + [(set_attr "stride_type" "st1_consecutive")] +) + +(define_insn "@aarch64__strided2" + [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 1 "register_operand" "Uph") + (match_operand: 2 "aarch64_simd_register" "Uwd") + (match_operand: 3 "aarch64_simd_register" "w") + (match_dup 0)] + ST1_COUNT))] + "TARGET_STREAMING_SME2 + && aarch64_strided_registers_p (operands + 2, 2, 8)" + "\t{%2., %3.}, %K1, %0" + [(set_attr "stride_type" "st1_strided")] +) + +(define_insn "@aarch64__strided4" + [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m") + (unspec:SVE_FULLx24 + [(match_operand:VNx16BI 1 "register_operand" "Uph") + (match_operand: 2 "aarch64_simd_register" "Uwt") + (match_operand: 3 "aarch64_simd_register" "w") + (match_operand: 4 "aarch64_simd_register" "w") + (match_operand: 5 "aarch64_simd_register" "w") + (match_dup 0)] + ST1_COUNT))] + "TARGET_STREAMING_SME2 + && aarch64_strided_registers_p (operands + 2, 4, 4)" + "\t{%2., %3., %4., %5.}, %K1, %0" + [(set_attr "stride_type" "st1_strided")] +) + ;; ------------------------------------------------------------------------- ;; ---- Non-temporal scatter stores ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 8f50a70083d7..5cffdabc62e5 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22113,6 +22113,19 @@ aarch64_stepped_int_parallel_p (rtx op, int step) return true; } +/* Return true if OPERANDS[0] to OPERANDS[NUM_OPERANDS - 1] form a + sequence of strided registers, with the stride being equal STRIDE. + The operands are already known to be FPRs. */ +bool +aarch64_strided_registers_p (rtx *operands, unsigned int num_operands, + unsigned int stride) +{ + for (unsigned int i = 1; i < num_operands; ++i) + if (REGNO (operands[i]) != REGNO (operands[0]) + i * stride) + return false; + return true; +} + /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). */ void diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index d43f8be6e952..f4d4427a3a07 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -290,13 +290,9 @@ UNSPEC_NZCV UNSPEC_XPACLRI UNSPEC_LD1_SVE - UNSPEC_LD1_SVE_COUNT UNSPEC_ST1_SVE - UNSPEC_ST1_SVE_COUNT UNSPEC_LDNT1_SVE - UNSPEC_LDNT1_SVE_COUNT UNSPEC_STNT1_SVE - UNSPEC_STNT1_SVE_COUNT UNSPEC_LD1RQ UNSPEC_LD1_GATHER UNSPEC_LDFF1_GATHER @@ -531,6 +527,26 @@ ;; may chose to hold the tracking state encoded in SP. (define_attr "speculation_barrier" "true,false" (const_string "false")) +;; This attribute is attached to multi-register instructions that have +;; two forms: one in which the registers are consecutive and one in +;; which they are strided. The consecutive and strided forms have +;; different define_insns, with different operands. The mapping between +;; the RTL of the consecutive form and the RTL of the strided form varies +;; from one type of instruction to another. +;; +;; The attribute gives two pieces of information: +;; - does the current instruction have consecutive or strided registers? +;; - what kind of RTL rewrite is needed to move between forms? +;; +;; For example, all consecutive LD*1 instructions have the same basic +;; RTL structure. The same applies to all strided LD*1 instructions. +;; The RTL mapping therefore applies at LD1 granularity, rather than +;; being broken down into individual types of load. +(define_attr "stride_type" + "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided, + luti_consecutive,luti_strided" + (const_string "none")) + ;; ------------------------------------------------------------------- ;; Pipeline descriptions and scheduling ;; ------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index f5a518202a15..df84c662d243 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -237,6 +237,24 @@ Enable the division approximation. Enabling this reduces precision of division results to about 16 bits for single precision and to 32 bits for double precision. +Enum +Name(early_ra_scope) Type(enum aarch64_early_ra_scope) + +EnumValue +Enum(early_ra_scope) String(all) Value(AARCH64_EARLY_RA_ALL) + +EnumValue +Enum(early_ra_scope) String(strided) Value(AARCH64_EARLY_RA_STRIDED) + +EnumValue +Enum(early_ra_scope) String(none) Value(AARCH64_EARLY_RA_NONE) + +mearly-ra= +Target RejectNegative Joined Enum(early_ra_scope) Var(aarch64_early_ra) Init(AARCH64_EARLY_RA_NONE) Save +Specify when to enable an early register allocation pass. The possibilities +are: all functions, functions that have access to strided multi-register +instructions, and no functions. + Enum Name(sve_vector_bits) Type(enum aarch64_sve_vector_bits_enum) The possible SVE vector lengths: diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 8b65cab29fb8..f217fca437bb 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -56,6 +56,14 @@ "4-tuple-aligned floating point and SIMD vector registers." "regno % 4 == 0") +(define_register_constraint "Uwd" "FP_REGS" + "@internal The first register in a tuple of 2 strided FPRs." + "(regno & 0x8) == 0") + +(define_register_constraint "Uwt" "FP_REGS" + "@internal The first register in a tuple of 4 strided FPRs." + "(regno & 0xc) == 0") + (define_register_constraint "Upa" "PR_REGS" "SVE predicate registers p0 - p15.") diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 9bbcacd9d37d..4377188303c3 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -926,6 +926,8 @@ UNSPEC_FMLSLT ; Used in aarch64-sve2.md. UNSPEC_HISTCNT ; Used in aarch64-sve2.md. UNSPEC_HISTSEG ; Used in aarch64-sve2.md. + UNSPEC_LD1_COUNT ; Used in aarch64-sve2.md. + UNSPEC_LDNT1_COUNT ; Used in aarch64-sve2.md. UNSPEC_MATCH ; Used in aarch64-sve2.md. UNSPEC_NMATCH ; Used in aarch64-sve2.md. UNSPEC_PEXT ; Used in aarch64-sve2.md. @@ -994,6 +996,8 @@ UNSPEC_SSUBLTB ; Used in aarch64-sve2.md. UNSPEC_SSUBWB ; Used in aarch64-sve2.md. UNSPEC_SSUBWT ; Used in aarch64-sve2.md. + UNSPEC_ST1_COUNT ; Used in aarch64-sve2.md. + UNSPEC_STNT1_COUNT ; Used in aarch64-sve2.md. UNSPEC_SUBHNB ; Used in aarch64-sve2.md. UNSPEC_SUBHNT ; Used in aarch64-sve2.md. UNSPEC_TBL2 ; Used in aarch64-sve2.md. @@ -3163,6 +3167,10 @@ (define_int_attr pred_load [(UNSPEC_PRED_X "_x") (UNSPEC_LD1_SVE "")]) +(define_int_iterator LD1_COUNT [UNSPEC_LD1_COUNT UNSPEC_LDNT1_COUNT]) + +(define_int_iterator ST1_COUNT [UNSPEC_ST1_COUNT UNSPEC_STNT1_COUNT]) + (define_int_iterator SVE2_U32_UNARY [UNSPEC_URECPE UNSPEC_RSQRTE]) (define_int_iterator SVE2_INT_UNARY_NARROWB [UNSPEC_SQXTNB @@ -3578,6 +3586,8 @@ (UNSPEC_FEXPA "fexpa") (UNSPEC_FTSMUL "ftsmul") (UNSPEC_FTSSEL "ftssel") + (UNSPEC_LD1_COUNT "ld1") + (UNSPEC_LDNT1_COUNT "ldnt1") (UNSPEC_PMULLB "pmullb") (UNSPEC_PMULLB_PAIR "pmullb_pair") (UNSPEC_PMULLT "pmullt") @@ -3641,6 +3651,8 @@ (UNSPEC_SQRDCMLAH90 "sqrdcmlah90") (UNSPEC_SQRDCMLAH180 "sqrdcmlah180") (UNSPEC_SQRDCMLAH270 "sqrdcmlah270") + (UNSPEC_ST1_COUNT "st1") + (UNSPEC_STNT1_COUNT "stnt1") (UNSPEC_TRN1Q "trn1q") (UNSPEC_TRN2Q "trn2q") (UNSPEC_UMATMUL "umatmul") diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 0d96ae3d0b29..446a4fb2e910 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -194,6 +194,12 @@ aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/aarch64/aarch64-cc-fusion.cc +aarch64-early-ra.o: $(srcdir)/config/aarch64/aarch64-early-ra.cc \ + $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \ + $(RTL_SSA_H) tree-pass.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-early-ra.cc + comma=, MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 43341fe6e5e0..9d4a0b2820a6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21224,6 +21224,21 @@ Enable compiler hardening against straight line speculation (SLS). In addition, @samp{-mharden-sls=all} enables all SLS hardening while @samp{-mharden-sls=none} disables all SLS hardening. +@opindex mearly-ra +@item -mearly-ra=@var{scope} +Determine when to enable an early register allocation pass. This pass runs +before instruction scheduling and tries to find a spill-free allocation of +floating-point and vector code. It also tries to make use of strided +multi-register instructions, such as SME2's strided LD1 and ST1. + +The possible values of @var{scope} are: @var{all}, which runs the pass on +all functions; @var{strided}, which runs the pass on functions that have +access to strided multi-register instructions; and @var{none}, which +disables the pass. + +@option{-mearly-ra=all} is the default for @option{-O2} and above, and for +@option{-Os}. @option{-mearly-ra=none} is the default otherwise. + @opindex msve-vector-bits @item -msve-vector-bits=@var{bits} Specify the number of bits in an SVE vector register. This option only has diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c index ba14194d0a41..f1f46e051a86 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_16.c @@ -125,8 +125,9 @@ CONS4_FN (2, float); /* ** cons4_4_float: -** ins v([0-9]+)\.s.* -** ... +** ins v[0-9]+\.s[^\n]+ +** ins v[0-9]+\.s[^\n]+ +** zip1 v([0-9]+).4s, [^\n]+ ** stp q\1, q\1, \[x0\] ** stp q\1, q\1, \[x0, #?32\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/pr109078.c b/gcc/testsuite/gcc.target/aarch64/pr109078.c new file mode 100644 index 000000000000..0d7567222393 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr109078.c @@ -0,0 +1,59 @@ +/* { dg-options "-Ofast" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include +#include +#include + +/* +** simple_gemm: +** ... +** tbnz [^\n]+ +** ld1 [^\n]+ +** fadd [^\n]+ +** fadd [^\n]+ +** st1 [^\n]+ +** ret +*/ +void simple_gemm( + float* restrict out, + float const* restrict a, + float const* restrict b, + size_t k, bool zero_out +) { + register float32x4x2_t o0; + o0.val[0] = vdupq_n_f32(0.0f); + o0.val[1] = vdupq_n_f32(0.0f); + + // begin dot + { + register float32x4_t a0; + register float32x4x2_t b0; + + while (k >= 1) { + b0 = vld1q_f32_x2(b); + a0 = vdupq_n_f32(a[0]); + + o0.val[0] = vfmaq_f32(o0.val[0], a0, b0.val[0]); + o0.val[1] = vfmaq_f32(o0.val[1], a0, b0.val[1]); + + b += 8; + a += 1; + k -= 1; + } + } // end dot + + // begin writeback + { + if (!zero_out) { + register float32x4x2_t t0; + t0 = vld1q_f32_x2(out); + + o0.val[0] = vaddq_f32(o0.val[0], t0.val[0]); + o0.val[1] = vaddq_f32(o0.val[1], t0.val[1]); + } + + // TODO: both clang and gcc generates redundant mov because of bad register allocation. + vst1q_f32_x2(out, o0); + } // end writeback +} diff --git a/gcc/testsuite/gcc.target/aarch64/pr109391.c b/gcc/testsuite/gcc.target/aarch64/pr109391.c new file mode 100644 index 000000000000..232c43097c67 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr109391.c @@ -0,0 +1,14 @@ +/* { dg-options "-Ofast" } */ + +#include + +int16x8x3_t bsl(const uint16x8x3_t *check, const int16x8x3_t *in1, + const int16x8x3_t *in2) { + int16x8x3_t out; + for (uint32_t j = 0; j < 3; j++) { + out.val[j] = vbslq_s16(check->val[j], in1->val[j], in2->val[j]); + } + return out; +} + +/* { dg-final { scan-assembler-not {\tmov\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c new file mode 100644 index 000000000000..3620fff36687 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c @@ -0,0 +1,253 @@ +// { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } +// { dg-final { check-function-bodies "**" "" } } + +#include + +#pragma GCC target "+sme2" + +// This file deliberately contains nonsense code. + +/* +** test1: +** ptrue [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** ret +*/ +void test1(int32_t *dest, int32_t *src) __arm_streaming +{ + svcount_t pg = svptrue_c32(); + svint32x4_t l0 = svld1_vnum_x4(pg, src, 0); + svint32x4_t l1 = svld1_vnum_x4(pg, src, 4); + svint32x4_t l2 = svld1_vnum_x4(pg, src, 8); + svint32x4_t l3 = svld1_vnum_x4(pg, src, 12); + svst1_vnum(pg, dest, 0, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 4, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 8, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 12, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); +} + +/* +** test2: +** ptrue [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** ld1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** st1w [^\n]+ +** ret +*/ +void test2(int32_t *dest, int32_t *src) __arm_streaming +{ + svcount_t pg = svptrue_c32(); + svint32x4_t l0 = svld1_vnum_x4(pg, src, 0); + svint32x4_t l1 = svld1_vnum_x4(pg, src, 4); + svint32x4_t l2 = svld1_vnum_x4(pg, src, 8); + svint32x4_t l3 = svld1_vnum_x4(pg, src, 12); + svst1_vnum(pg, dest, 0, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 4, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 8, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 12, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + svst1_vnum(pg, dest, 16, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 20, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 24, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 28, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); +} + +/* +** test3: +** ptrue ([^\n]+)\.s +** ld1w {z16\.s - z19\.s}, \1/z, \[x1\] +** ld1w {z20\.s - z23\.s}, \1/z, \[x1, #4, mul vl\] +** ld1w {z24\.s - z27\.s}, \1/z, \[x1, #8, mul vl\] +** ld1w {z28\.s - z31\.s}, \1/z, \[x1, #12, mul vl\] +** sclamp [^\n]+ +** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #12, mul vl\] +** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0\] +** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #4, mul vl\] +** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #8, mul vl\] +** ret +*/ +void test3(int32_t *dest, int32_t *src) __arm_streaming +{ + svcount_t pg = svptrue_c32(); + svint32x4_t al0 = svld1_vnum_x4(pg, src, 0); + svint32x4_t l1 = svld1_vnum_x4(pg, src, 4); + svint32x4_t l2 = svld1_vnum_x4(pg, src, 8); + svint32x4_t l3 = svld1_vnum_x4(pg, src, 12); + svint32x4_t l0 = svclamp(al0, svget4(l3, 0), svget4(l3, 1)); + svst1_vnum(pg, dest, 12, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + svst1_vnum(pg, dest, 0, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 4, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 8, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); +} + +/* +** test4: +** ptrue ([^\n]+)\.s +** ld1w {z16\.s - z19\.s}, \1/z, \[x1\] +** ld1w {z20\.s - z23\.s}, \1/z, \[x1, #4, mul vl\] +** ld1w {z24\.s - z27\.s}, \1/z, \[x1, #8, mul vl\] +** ld1w {z28\.s - z31\.s}, \1/z, \[x1, #12, mul vl\] +** sclamp [^\n]+ +** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0\] +** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #4, mul vl\] +** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #8, mul vl\] +** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #12, mul vl\] +** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0, #16, mul vl\] +** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #20, mul vl\] +** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #24, mul vl\] +** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #28, mul vl\] +** ... +** ret +*/ +void test4(int32_t *dest, int32_t *src) __arm_streaming +{ + svcount_t pg = svptrue_c32(); + svint32x4_t l0 = svld1_vnum_x4(pg, src, 0); + svint32x4_t l1 = svld1_vnum_x4(pg, src, 4); + svint32x4_t l2 = svld1_vnum_x4(pg, src, 8); + svint32x4_t l3 = svld1_vnum_x4(pg, src, 12); + l0 = svclamp(l0, svget4(l3, 0), svget4(l3, 1)); + svst1_vnum(pg, dest, 0, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 4, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 8, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 12, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + svst1_vnum(pg, dest, 16, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 20, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 24, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 28, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); +} + +/* +** test5: +** ptrue [^\n]+ +** ld1b [^\n]+ +** ld1b [^\n]+ +** ptrue ([^\n]+)\.s +** ld1w [^\n]+, \1/z, \[x0\] +** luti4 {z16\.s, z20\.s, z24\.s, z28\.s}, zt0, z[0-9]+\[0\] +** luti4 {z17\.s, z21\.s, z25\.s, z29\.s}, zt0, z[0-9]+\[1\] +** luti4 {z18\.s, z22\.s, z26\.s, z30\.s}, zt0, z[0-9]+\[0\] +** luti4 {z19\.s, z23\.s, z27\.s, z31\.s}, zt0, z[0-9]+\[1\] +** uclamp {z16\.s - z19\.s}, z[0-9]+\.s, z[0-9]+\.s +** uclamp {z20\.s - z23\.s}, z[0-9]+\.s, z[0-9]+\.s +** uclamp {z24\.s - z27\.s}, z[0-9]+\.s, z[0-9]+\.s +** uclamp {z28\.s - z31\.s}, z[0-9]+\.s, z[0-9]+\.s +** st1w {z16\.s - z19\.s}, \1, \[x0\] +** st1w {z20\.s - z23\.s}, \1, \[x0, #4, mul vl\] +** st1w {z24\.s - z27\.s}, \1, \[x0, #8, mul vl\] +** st1w {z28\.s - z31\.s}, \1, \[x0, #12, mul vl\] +** ret +*/ +void test5(uint32_t *dest, uint8_t *indices) + __arm_streaming __arm_preserves("za") __arm_inout("zt0") +{ + svuint8_t indices1 = svld1_vnum(svptrue_b8(), indices, 0); + svuint8_t indices2 = svld1_vnum(svptrue_b8(), indices, 2); + + svcount_t pg = svptrue_c32(); + svuint32x4_t bounds = svld1_x4(pg, dest); + + svuint32x4_t x0 = svluti4_lane_zt_u32_x4(0, indices1, 0); + svuint32x4_t x1 = svluti4_lane_zt_u32_x4(0, indices1, 1); + svuint32x4_t x2 = svluti4_lane_zt_u32_x4(0, indices2, 0); + svuint32x4_t x3 = svluti4_lane_zt_u32_x4(0, indices2, 1); + + svuint32x4_t y0 = svcreate4(svget4(x0, 0), svget4(x1, 0), + svget4(x2, 0), svget4(x3, 0)); + svuint32x4_t y1 = svcreate4(svget4(x0, 1), svget4(x1, 1), + svget4(x2, 1), svget4(x3, 1)); + svuint32x4_t y2 = svcreate4(svget4(x0, 2), svget4(x1, 2), + svget4(x2, 2), svget4(x3, 2)); + svuint32x4_t y3 = svcreate4(svget4(x0, 3), svget4(x1, 3), + svget4(x2, 3), svget4(x3, 3)); + + y0 = svclamp(y0, svget4(bounds, 0), svget4(bounds, 1)); + y1 = svclamp(y1, svget4(bounds, 2), svget4(bounds, 3)); + y2 = svclamp(y2, svget4(bounds, 0), svget4(bounds, 1)); + y3 = svclamp(y3, svget4(bounds, 2), svget4(bounds, 3)); + + svst1_vnum(pg, dest, 0, y0); + svst1_vnum(pg, dest, 4, y1); + svst1_vnum(pg, dest, 8, y2); + svst1_vnum(pg, dest, 12, y3); +} + +/* +** test6: +** ptrue [^\n]+ +** ld1h [^\n]+ +** sclamp [^\n]+ +** st1h [^\n]+ +** ret +*/ +void test6(int16_t *ptr) + __arm_streaming __arm_preserves("za") __arm_inout("zt0") +{ + svcount_t pg = svptrue_c16(); + svint16x4_t x0 = svld1_x4(pg, ptr); + x0 = svclamp(x0, svget4(x0, 0), svget4(x0, 3)); + svst1(pg, ptr, x0); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr106694.c b/gcc/testsuite/gcc.target/aarch64/sve/pr106694.c new file mode 100644 index 000000000000..85a69d0372ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr106694.c @@ -0,0 +1,28 @@ +/* { dg-options "-Ofast" } */ + +#include "arm_sve.h" + +int coalesce (svbool_t pg, int64_t* base, int n, int64_t *in1, int64_t *in2, int64_t*out) +{ + svint64x4_t result = svld4_s64 (pg, base); + svint64_t v0 = svget4_s64(result, 0); + svint64_t v1 = svget4_s64(result, 1); + svint64_t v2 = svget4_s64(result, 2); + svint64_t v3 = svget4_s64(result, 3); + + for (int i = 0; i < n; i += 1) + { + svint64_t v18 = svld1_s64(pg, in1); + svint64_t v19 = svld1_s64(pg, in2); + v0 = svmad_s64_z(pg, v0, v18, v19); + v1 = svmad_s64_z(pg, v1, v18, v19); + v2 = svmad_s64_z(pg, v2, v18, v19); + v3 = svmad_s64_z(pg, v3, v18, v19); + } + svst1_s64(pg, out+0,v0); + svst1_s64(pg, out+1,v1); + svst1_s64(pg, out+2,v2); + svst1_s64(pg, out+3,v3); +} + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c index 5ee66da15caf..fe819c47a5ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c @@ -71,9 +71,9 @@ DO_IMMEDIATE_OPS (0, int64_t, 0); DO_IMMEDIATE_OPS (5, int64_t, 5); DO_IMMEDIATE_OPS (63, int64_t, 63); -/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ /* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ /* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ From 1395c573c523762957bde8c2a08832c5f4350815 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Wed, 6 Dec 2023 17:21:29 +0000 Subject: [PATCH 064/311] libstdc++: Fix recent changes to __glibcxx_assert [PR112882] The changes in r14-6198-g5e8a30d8b8f4d7 were broken, as I used _GLIBCXX17_CONSTEXPR for the 'if _GLIBCXX17_CONSTEXPR (true)' condition, forgetting that it would also be used for the is_constant_evaluated() check. Using 'if constexpr (std::is_constant_evaluated())' is a bug. Additionally, relying on __glibcxx_assert_fail to give a "not a constant expression" error is a problem because at -O0 an undefined reference to __glibcxx_assert_fail is present in the compiled code. This means you can't use libstdc++ headers without also linking to libstdc++ for the symbol definition. This fix rewrites the __glibcxx_assert macro again. This still avoids doing the duplicate checks, once for constexpr and once at runtime (if _GLIBCXX_ASSERTIONS is defined). When _GLIBCXX_ASSERTIONS is defined we still rely on __glibcxx_assert_fail to give a "not a constant expression" error during constant evaluation (because when assertions are defined it's not a problem to emit a reference to the symbol). But when that macro is not defined, we use a new inline (but not constexpr) overload of __glibcxx_assert_fail to cause compilation to fail. That inline function doesn't cause an undefined reference to a symbol in the library (and will be optimized away anyway). We can also add always_inline to the __is_constant_evaluated function, although this doesn't actually matter for -O0 and it's always inlined with any optimization enabled. libstdc++-v3/ChangeLog: PR libstdc++/112882 * include/bits/c++config (__is_constant_evaluated): Add always_inline attribute. (_GLIBCXX_DO_ASSERT): Remove macro. (__glibcxx_assert): Define separately for assertions-enabled and constexpr-only cases. --- libstdc++-v3/include/bits/c++config | 33 ++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config index 284d24d933f1..25d37428fc1b 100644 --- a/libstdc++-v3/include/bits/c++config +++ b/libstdc++-v3/include/bits/c++config @@ -538,6 +538,7 @@ namespace std // This can be used without checking if the compiler supports the feature. // The macro _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED can be used to check if // the compiler support is present to make this function work as expected. + __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR inline bool __is_constant_evaluated() _GLIBCXX_NOEXCEPT { @@ -598,19 +599,31 @@ namespace std #endif #if defined(_GLIBCXX_ASSERTIONS) -# define _GLIBCXX_DO_ASSERT true -#elif _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED -# define _GLIBCXX_DO_ASSERT std::__is_constant_evaluated() -#else -# define _GLIBCXX_DO_ASSERT false -#endif - +// Enable runtime assertion checks, and also check in constant expressions. # define __glibcxx_assert(cond) \ do { \ - if _GLIBCXX17_CONSTEXPR (_GLIBCXX_DO_ASSERT) \ - if (__builtin_expect(!bool(cond), false)) \ - _GLIBCXX_ASSERT_FAIL(cond); \ + if (__builtin_expect(!bool(cond), false)) \ + _GLIBCXX_ASSERT_FAIL(cond); \ } while (false) +#elif _GLIBCXX_HAVE_IS_CONSTANT_EVALUATED +// Only check assertions during constant evaluation. +namespace std +{ + __attribute__((__always_inline__,__visibility__("default"))) + inline void + __glibcxx_assert_fail() + { } +} +# define __glibcxx_assert(cond) \ + do { \ + if (std::__is_constant_evaluated()) \ + if (__builtin_expect(!bool(cond), false)) \ + std::__glibcxx_assert_fail(); \ + } while (false) +#else +// Don't check any assertions. +# define __glibcxx_assert(cond) +#endif // Macro indicating that TSAN is in use. #if __SANITIZE_THREAD__ From 2f512f6fcdd55296daff3e01a250d866491014e6 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 7 Dec 2023 11:00:02 +0000 Subject: [PATCH 065/311] libstdc++: Use instead of in In r14-5922-g6c8f2d3a08bc01 I added to , so that uintptr_t is declared if that header is compiled as a header unit. I used because that's what already includes, so it seemed simpler to be consistent. However, this means that name lookup for uintptr_t in depends on whether has been included by another header first. Whether name lookup finds std::uintptr_t or ::uintptr_t will depend on include order. This causes problems when compiling modules with Clang: bits/atomic_wait.h:251:7: error: 'std::__detail::__waiter_pool_base' has different definitions in different modules; first difference is defined here found method '_S_for' with body _S_for(const void* __addr) noexcept ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bits/atomic_wait.h:251:7: note: but in 'tm.' found method '_S_for' with different body _S_for(const void* __addr) noexcept ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ By including we would ensure that name lookup always finds the name in namespace std. Alternatively, we can stop including for those types, so that we don't declare the entire contents of when we only need a couple of types from it. This patch does the former, which is appropriate for backporting. libstdc++-v3/ChangeLog: * include/bits/atomic_wait.h: Include instead of . --- libstdc++-v3/include/bits/atomic_wait.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/atomic_wait.h b/libstdc++-v3/include/bits/atomic_wait.h index 1460b1d8d5cd..8e01a9c518d8 100644 --- a/libstdc++-v3/include/bits/atomic_wait.h +++ b/libstdc++-v3/include/bits/atomic_wait.h @@ -35,7 +35,7 @@ #include #if __glibcxx_atomic_wait -#include +#include #include #include #include From cab0083dc72dfd22a1b2016b068f9313beb7f091 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 7 Dec 2023 12:40:18 +0000 Subject: [PATCH 066/311] libstdc++: Fix misleading typedef name in This local typedef for uintptr_t was accidentally named uint64_t, probably from a careless code completion shortcut. We don't need the typedef at all since it's only used once. Just use __UINTPTR_TYPE__ directly instead. libstdc++-v3/ChangeLog: * include/std/format (_Iter_sink): Remove uint64_t local type. --- libstdc++-v3/include/std/format | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index 01f0a58392a3..04d03e0ceb70 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -2809,7 +2809,6 @@ namespace __format requires same_as, _CharT> class _Iter_sink<_CharT, _OutIter> : public _Sink<_CharT> { - using uint64_t = __UINTPTR_TYPE__; _OutIter _M_first; iter_difference_t<_OutIter> _M_max = -1; protected: @@ -2883,7 +2882,7 @@ namespace __format return {__ptr, __bytes / sizeof(_CharT)}; #endif // Avoid forming a pointer to a different memory page. - uint64_t __off = reinterpret_cast(__ptr) % 1024; + const auto __off = reinterpret_cast<__UINTPTR_TYPE__>(__ptr) % 1024; __n = (1024 - __off) / sizeof(_CharT); if (__n > 0) [[likely]] return {__ptr, static_cast(__n)}; From 0832cf42a6912f876086cb14b92f788d8406f393 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 7 Dec 2023 16:36:23 -0500 Subject: [PATCH 067/311] libstdc++: Simplify ranges::to closure objects We can use the existing _Partial range adaptor closure object for ranges::to instead of essentially reimplementing it. libstdc++-v3/ChangeLog: * include/std/ranges (__detail::_ToClosure): Replace with ... (__detail::_To): ... this. (__detail::_ToClosure2): Replace with ... (__detail::To2): ... this. (to): Simplify using the existing _Partial range adaptor closure object. --- libstdc++-v3/include/std/ranges | 128 +++++++------------------------- 1 file changed, 26 insertions(+), 102 deletions(-) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index afd0a38e0cfe..fb9df3d3e79b 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -1007,6 +1007,7 @@ namespace views::__adaptor // Invoke _Adaptor with arguments __r, _M_args... according to the // value category of this _Partial object. + // TODO: use explicit object functions ("deducing this"). template requires __adaptor_invocable<_Adaptor, _Range, const _Args&...> constexpr auto @@ -1137,6 +1138,7 @@ namespace views::__adaptor // Invoke _M_rhs(_M_lhs(__r)) according to the value category of this // range adaptor closure object. + // TODO: use explicit object functions ("deducing this"). template requires __pipe_invocable constexpr auto @@ -9387,58 +9389,15 @@ namespace __detail /// @cond undocumented namespace __detail { - template - class _ToClosure - : public views::__adaptor::_RangeAdaptorClosure<_ToClosure<_Cont, _Args...>> + template + struct _To { - tuple...> _M_bound_args; - - public: - constexpr - _ToClosure(_Args&&... __args) - : _M_bound_args(std::forward<_Args>(__args)...) - { } - - // TODO: use explicit object functions ("deducing this"). - - template + template constexpr auto - operator()(_Rg&& __r) & + operator()(_Range&& __r, _Args&&... __args) const { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, _M_bound_args); - } - - template - constexpr auto - operator()(_Rg&& __r) const & - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, _M_bound_args); - } - - template - constexpr auto - operator()(_Rg&& __r) && - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, std::move(_M_bound_args)); - } - - template - constexpr auto - operator()(_Rg&& __r) const && - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, std::move(_M_bound_args)); + return ranges::to<_Cont>(std::forward<_Range>(__r), + std::forward<_Args>(__args)...); } }; } // namespace __detail @@ -9461,65 +9420,26 @@ namespace __detail */ template requires (!view<_Cont>) - constexpr __detail::_ToClosure<_Cont, _Args...> + constexpr auto to [[nodiscard]] (_Args&&... __args) - { return {std::forward<_Args>(__args)...}; } + { + using __detail::_To; + using views::__adaptor::_Partial; + return _Partial<_To<_Cont>, decay_t<_Args>...>{std::forward<_Args>(__args)...}; + } /// @cond undocumented namespace __detail { - template typename _Cont, typename... _Args> - class _ToClosure2 - : public views::__adaptor::_RangeAdaptorClosure<_ToClosure2<_Cont, _Args...>> + template typename _Cont> + struct _To2 { - tuple...> _M_bound_args; - - public: - constexpr - _ToClosure2(_Args&&... __args) - : _M_bound_args(std::forward<_Args>(__args)...) - { } - - // TODO: use explicit object functions ("deducing this"). - - template + template constexpr auto - operator()(_Rg&& __r) & + operator()(_Range&& __r, _Args&&... __args) const { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, _M_bound_args); - } - - template - constexpr auto - operator()(_Rg&& __r) const & - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, _M_bound_args); - } - - template - constexpr auto - operator()(_Rg&& __r) && - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, std::move(_M_bound_args)); - } - - template - constexpr auto - operator()(_Rg&& __r) const && - { - return std::apply([&__r](_Tp&&... __args) { - return ranges::to<_Cont>(std::forward<_Rg>(__r), - std::forward<_Tp>(__args)...); - }, std::move(_M_bound_args)); + return ranges::to<_Cont>(std::forward<_Range>(__r), + std::forward<_Args>(__args)...); } }; } // namespace __detail @@ -9543,9 +9463,13 @@ namespace __detail * `r | std::ranges::to(an_allocator)`. */ template typename _Cont, typename... _Args> - constexpr __detail::_ToClosure2<_Cont, _Args...> + constexpr auto to [[nodiscard]] (_Args&&... __args) - { return {std::forward<_Args>(__args)...}; } + { + using __detail::_To2; + using views::__adaptor::_Partial; + return _Partial<_To2<_Cont>, decay_t<_Args>...>{std::forward<_Args>(__args)...}; + } } // namespace ranges #endif // __cpp_lib_ranges_to_container From 71a5ac6703d1b7a0409936fcdec4e592d7cc06b0 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Fri, 8 Dec 2023 06:09:10 +0800 Subject: [PATCH 068/311] RISC-V: Support interleave vector with different step sequence This patch fixes 64 ICEs in full coverage testing since they happens due to same reason. Before this patch: internal compiler error: in expand_const_vector, at config/riscv/riscv-v.cc:1270 appears 400 times in full coverage testing report. The root cause is we didn't support interleave vector with different steps. Here is the story: We already supported interleave with single same step, that is: e.g. v = { 0, 100, 2, 102, 4, 104, ... } This sequence can be interpreted as interleave vector by 2 seperate sequences: sequence1 = { 0, 2, 4, ... } and sequence2 = { 100, 102, 104, ... }. Their step are both 2. However, we didn't support interleave vector when they have different steps which cause ICE in such situations. This patch support different steps interleaved vector for the following 2 situations: 1. When vector can be extended EEW: Case 1: { 0, 0, 1, 0, 2, 0, ... } It's interleaved by sequence1 = { 0, 1, 2, ... } and sequence1 = { 0, 0, 0, ... } Suppose the original vector can be extended EEW, e.g. mode = RVVM1SImode. Then such interleaved vector can be achieved with { 1, 2, 3, ... } with RVVM1DImode. So, for this situation the codegen is pretty efficient and clean: .MASK_LEN_STORE (&s, 32B, { -1, ... }, 16, 0, { 0, 0, 1, 0, 2, 0, ... }); -> vsetvli a5,zero,e64,m8,ta,ma vid.v v8 vsetivli zero,16,e32,m8,ta,ma vse32.v v8,0(a4) Case 2: { 0, 100, 1, 100, 2, 100, ... } .MASK_LEN_STORE (&s, 32B, { -1, ... }, 16, 0, { 0, 100, 1, 100, 2, 100, ... }); -> vsetvli a1,zero,e64,m8,ta,ma vid.v v8 li a7,100 vand.vx v8,v8,a4 vsetivli zero,16,e32,m8,ta,ma vse32.v v8,0(a5) 2. When vector can't be extended EEW: Since we can't use EEW = 64, for example, RVVM1SImode in -march=rv32gc_zve32f, we use vmerge to combine the sequence. .MASK_LEN_STORE (&s, 32B, { -1, ... }, 16, 0, { 200, 100, 201, 103, 202, 106, ... }); 1. Generate sequence1 = { 200, 200, 201, 201, 202, 202, ... } and sequence2 = { 100, 100, 103, 103, 106, 106, ... } 2. Merge sequence1 and sequence2 with mask { 0, 1, 0, 1, ... } gcc/ChangeLog: * config/riscv/riscv-protos.h (expand_vec_series): Adapt function. * config/riscv/riscv-v.cc (rvv_builder::double_steps_npatterns_p): New function. (expand_vec_series): Adapt function. (expand_const_vector): Support new interleave vector with different step. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/slp-interleave-1.c: New test. * gcc.target/riscv/rvv/autovec/slp-interleave-2.c: New test. * gcc.target/riscv/rvv/autovec/slp-interleave-3.c: New test. * gcc.target/riscv/rvv/autovec/slp-interleave-4.c: New test. --- gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-v.cc | 148 ++++++++++++++++-- .../riscv/rvv/autovec/slp-interleave-1.c | 17 ++ .../riscv/rvv/autovec/slp-interleave-2.c | 18 +++ .../riscv/rvv/autovec/slp-interleave-3.c | 19 +++ .../riscv/rvv/autovec/slp-interleave-4.c | 19 +++ 6 files changed, 211 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-4.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index bfbd2bf0d18b..a6f204f3066f 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -543,7 +543,7 @@ void expand_tuple_move (rtx *); bool expand_block_move (rtx, rtx, rtx); machine_mode preferred_simd_mode (scalar_mode); machine_mode get_mask_mode (machine_mode); -void expand_vec_series (rtx, rtx, rtx); +void expand_vec_series (rtx, rtx, rtx, rtx = 0); void expand_vec_init (rtx, rtx); void expand_vec_perm (rtx, rtx, rtx, rtx); void expand_select_vl (rtx *); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 71cb7567f1a5..9b99d0aca844 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -432,6 +432,7 @@ public: bool single_step_npatterns_p () const; bool npatterns_all_equal_p () const; + bool interleaved_stepped_npatterns_p () const; machine_mode new_mode () const { return m_new_mode; } scalar_mode inner_mode () const { return m_inner_mode; } @@ -668,6 +669,27 @@ rvv_builder::single_step_npatterns_p () const return true; } +/* Return true if the permutation consists of two + interleaved patterns with a constant step each. + TODO: We currently only support NPATTERNS = 2. */ +bool +rvv_builder::interleaved_stepped_npatterns_p () const +{ + if (npatterns () != 2 || nelts_per_pattern () != 3) + return false; + for (unsigned int i = 0; i < npatterns (); i++) + { + poly_int64 ele0 = rtx_to_poly_int64 (elt (i)); + poly_int64 ele1 = rtx_to_poly_int64 (elt (npatterns () + i)); + poly_int64 ele2 = rtx_to_poly_int64 (elt (npatterns () * 2 + i)); + poly_int64 diff1 = ele1 - ele0; + poly_int64 diff2 = ele2 - ele1; + if (maybe_ne (diff1, diff2)) + return false; + } + return true; +} + /* Return true if all elements of NPATTERNS are equal. E.g. NPATTERNS = 4: @@ -955,10 +977,15 @@ get_repeating_sequence_dup_machine_mode (const rvv_builder &builder, return get_vector_mode (inner_mode, dup_nunit).require (); } -/* Expand series const vector. */ +/* Expand series const vector. If VID is NULL_RTX, we use vid.v + instructions to generate sequence for VID: + + VID = { 0, 1, 2, 3, ... } + + Otherwise, we use the VID argument directly. */ void -expand_vec_series (rtx dest, rtx base, rtx step) +expand_vec_series (rtx dest, rtx base, rtx step, rtx vid) { machine_mode mode = GET_MODE (dest); poly_int64 nunits_m1 = GET_MODE_NUNITS (mode) - 1; @@ -968,14 +995,18 @@ expand_vec_series (rtx dest, rtx base, rtx step) /* VECT_IV = BASE + I * STEP. */ /* Step 1: Generate I = { 0, 1, 2, ... } by vid.v. */ - rtx vid = gen_reg_rtx (mode); - rtx op[] = {vid}; - emit_vlmax_insn (code_for_pred_series (mode), NULLARY_OP, op); + bool reverse_p = !vid && rtx_equal_p (step, constm1_rtx) + && poly_int_rtx_p (base, &value) + && known_eq (nunits_m1, value); + if (!vid) + { + vid = gen_reg_rtx (mode); + rtx op[] = {vid}; + emit_vlmax_insn (code_for_pred_series (mode), NULLARY_OP, op); + } rtx step_adj; - if (rtx_equal_p (step, constm1_rtx) - && poly_int_rtx_p (base, &value) - && known_eq (nunits_m1, value)) + if (reverse_p) { /* Special case: {nunits - 1, nunits - 2, ... , 0}. @@ -1246,13 +1277,108 @@ expand_const_vector (rtx target, rtx src) BINARY_OP, add_ops); } } + else if (builder.interleaved_stepped_npatterns_p ()) + { + rtx base1 = builder.elt (0); + rtx base2 = builder.elt (1); + poly_int64 step1 + = rtx_to_poly_int64 (builder.elt (builder.npatterns ())) + - rtx_to_poly_int64 (base1); + poly_int64 step2 + = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1)) + - rtx_to_poly_int64 (base2); + + /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW + integer vector mode to generate such vector efficiently. + + E.g. EEW = 16, { 2, 0, 4, 0, ... } + + can be interpreted into: + + EEW = 32, { 2, 4, ... } */ + unsigned int new_smode_bitsize = builder.inner_bits_size () * 2; + scalar_int_mode new_smode; + machine_mode new_mode; + poly_uint64 new_nunits + = exact_div (GET_MODE_NUNITS (builder.mode ()), 2); + if (int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode) + && get_vector_mode (new_smode, new_nunits).exists (&new_mode)) + { + rtx tmp = gen_reg_rtx (new_mode); + base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode); + expand_vec_series (tmp, base1, gen_int_mode (step1, new_smode)); + + if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0)) + /* { 1, 0, 2, 0, ... }. */ + emit_move_insn (target, gen_lowpart (mode, tmp)); + else if (known_eq (step2, 0)) + { + /* { 1, 1, 2, 1, ... }. */ + rtx scalar = expand_simple_binop ( + new_smode, ASHIFT, + gen_int_mode (rtx_to_poly_int64 (base2), new_smode), + gen_int_mode (builder.inner_bits_size (), new_smode), + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp2 = gen_reg_rtx (new_mode); + rtx and_ops[] = {tmp2, tmp, scalar}; + emit_vlmax_insn (code_for_pred_scalar (AND, new_mode), + BINARY_OP, and_ops); + emit_move_insn (target, gen_lowpart (mode, tmp2)); + } + else + { + /* { 1, 3, 2, 6, ... }. */ + rtx tmp2 = gen_reg_rtx (new_mode); + base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode); + expand_vec_series (tmp2, base2, + gen_int_mode (step1, new_smode)); + rtx shifted_tmp2 = expand_simple_binop ( + new_mode, ASHIFT, tmp2, + gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX, + false, OPTAB_DIRECT); + rtx tmp3 = gen_reg_rtx (new_mode); + rtx ior_ops[] = {tmp3, tmp, shifted_tmp2}; + emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, + ior_ops); + emit_move_insn (target, gen_lowpart (mode, tmp3)); + } + } + else + { + rtx vid = gen_reg_rtx (mode); + expand_vec_series (vid, const0_rtx, const1_rtx); + /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */ + rtx shifted_vid + = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx, + NULL_RTX, false, OPTAB_DIRECT); + rtx tmp1 = gen_reg_rtx (mode); + rtx tmp2 = gen_reg_rtx (mode); + expand_vec_series (tmp1, base1, + gen_int_mode (step1, builder.inner_mode ()), + shifted_vid); + expand_vec_series (tmp2, base2, + gen_int_mode (step2, builder.inner_mode ()), + shifted_vid); + + /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */ + rtx and_vid = gen_reg_rtx (mode); + rtx and_ops[] = {and_vid, vid, const1_rtx}; + emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, + and_ops); + rtx mask = gen_reg_rtx (builder.mask_mode ()); + expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode)); + + rtx ops[] = {target, tmp1, tmp2, mask}; + emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops); + } + } else if (npatterns == 1 && nelts_per_pattern == 3) { /* Generate the following CONST_VECTOR: { base0, base1, base1 + step, base1 + step * 2, ... } */ - rtx base0 = CONST_VECTOR_ELT (src, 0); - rtx base1 = CONST_VECTOR_ELT (src, 1); - rtx step = CONST_VECTOR_ELT (src, 2); + rtx base0 = builder.elt (0); + rtx base1 = builder.elt (1); + rtx step = builder.elt (2); /* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */ rtx tmp = gen_reg_rtx (mode); expand_vec_series (tmp, base1, step); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-1.c new file mode 100644 index 000000000000..9f371436fe17 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl1024b -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-lmul=m8 -O3 -fdump-tree-optimized-details" } */ + +struct S { int a, b; } s[8]; + +void +foo () +{ + int i; + for (i = 0; i < 8; i++) + { + s[i].b = 0; + s[i].a = i; + } +} + +/* { dg-final { scan-tree-dump-times "\{ 0, 0, 1, 0, 2, 0, ... \}" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-2.c new file mode 100644 index 000000000000..6cc390c0b348 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl1024b -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-lmul=m8 -O3 -fdump-tree-optimized-details" } */ + +struct S { int a, b; } s[8]; + +void +foo () +{ + int i; + for (i = 0; i < 8; i++) + { + s[i].b = 1; + s[i].a = i; + } +} + +/* { dg-final { scan-tree-dump-times "\{ 0, 1, 1, 1, 2, 1, ... \}" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times {slli\t[a-x0-9]+,\s*[a-x0-9]+,\s*32} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-3.c new file mode 100644 index 000000000000..326d66e25594 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl1024b -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-lmul=m8 -O3 -fdump-tree-optimized-details" } */ + +struct S { int a, b; } s[8]; + +void +foo () +{ + int i; + for (i = 0; i < 8; i++) + { + s[i].b = i*3 + 100; + s[i].a = i + 200; + } +} + +/* { dg-final { scan-tree-dump-times "\{ 200, 100, 201, 103, 202, 106, ... \}" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times {vsll\.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vor\.vv} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-4.c new file mode 100644 index 000000000000..2bb73ebcfd10 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-interleave-4.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zve32f_zvl1024b -mabi=lp64d -fno-vect-cost-model --param=riscv-autovec-lmul=m8 -O3 -fdump-tree-optimized-details" } */ + +struct S { int a, b; } s[8]; + +void +foo () +{ + int i; + for (i = 0; i < 8; i++) + { + s[i].b = i*3 + 100; + s[i].a = i + 200; + } +} + +/* { dg-final { scan-tree-dump-times "\{ 200, 100, 201, 103, 202, 106, ... \}" 1 "optimized" } } */ +/* { dg-final { scan-assembler-times {vand\.vi} 1 } } */ +/* { dg-final { scan-assembler-times {vmseq\.vi} 1 } } */ From 08f89e5e7f466cf18387293c6a9d1d5b8308b83d Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 8 Dec 2023 00:17:33 +0000 Subject: [PATCH 069/311] Daily bump. --- gcc/ChangeLog | 575 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 28 ++ gcc/c-family/ChangeLog | 7 + gcc/m2/ChangeLog | 26 ++ gcc/testsuite/ChangeLog | 427 +++++++++++++++++++++++++++++ libgcc/ChangeLog | 6 + libstdc++-v3/ChangeLog | 38 +++ 8 files changed, 1108 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1f38282e0c96..6a402c8859b0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,578 @@ +2023-12-07 Juzhe-Zhong + + * config/riscv/riscv-protos.h (expand_vec_series): Adapt function. + * config/riscv/riscv-v.cc (rvv_builder::double_steps_npatterns_p): New function. + (expand_vec_series): Adapt function. + (expand_const_vector): Support new interleave vector with different step. + +2023-12-07 Richard Sandiford + + PR rtl-optimization/106694 + PR rtl-optimization/109078 + PR rtl-optimization/109391 + * config.gcc: Add aarch64-early-ra.o for AArch64 targets. + * config/aarch64/t-aarch64 (aarch64-early-ra.o): New rule. + * config/aarch64/aarch64-opts.h (aarch64_early_ra_scope): New enum. + * config/aarch64/aarch64.opt (mearly_ra): New option. + * doc/invoke.texi: Document it. + * common/config/aarch64/aarch64-common.cc + (aarch_option_optimization_table): Use -mearly-ra=strided by + default for -O2 and above. + * config/aarch64/aarch64-passes.def (pass_aarch64_early_ra): New pass. + * config/aarch64/aarch64-protos.h (aarch64_strided_registers_p) + (make_pass_aarch64_early_ra): Declare. + * config/aarch64/aarch64-sme.md (@aarch64_sme_lut): + Add a stride_type attribute. + (@aarch64_sme_lut_strided2): New pattern. + (@aarch64_sme_lut_strided4): Likewise. + * config/aarch64/aarch64-sve-builtins-base.cc (svld1_impl::expand) + (svldnt1_impl::expand, svst1_impl::expand, svstn1_impl::expand): Handle + new way of defining multi-register loads and stores. + * config/aarch64/aarch64-sve.md (@aarch64_ld1) + (@aarch64_ldnt1, @aarch64_st1) + (@aarch64_stnt1): Delete. + * config/aarch64/aarch64-sve2.md (@aarch64_) + (@aarch64__strided2): New patterns. + (@aarch64__strided4): Likewise. + (@aarch64_): Likewise. + (@aarch64__strided2): Likewise. + (@aarch64__strided4): Likewise. + * config/aarch64/aarch64.cc (aarch64_strided_registers_p): New + function. + * config/aarch64/aarch64.md (UNSPEC_LD1_SVE_COUNT): Delete. + (UNSPEC_ST1_SVE_COUNT, UNSPEC_LDNT1_SVE_COUNT): Likewise. + (UNSPEC_STNT1_SVE_COUNT): Likewise. + (stride_type): New attribute. + * config/aarch64/constraints.md (Uwd, Uwt): New constraints. + * config/aarch64/iterators.md (UNSPEC_LD1_COUNT, UNSPEC_LDNT1_COUNT) + (UNSPEC_ST1_COUNT, UNSPEC_STNT1_COUNT): New unspecs. + (optab): Handle them. + (LD1_COUNT, ST1_COUNT): New iterators. + * config/aarch64/aarch64-early-ra.cc: New file. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x4, vld1_u16_x4, vld1_u32_x4, vld1_u64_x4): New + (vld1_s8_x4, vld1_s16_x4, vld1_s32_x4, vld1_s64_x4): New. + (vld1_f16_x4, vld1_f32_x4): New. + (vld1_p8_x4, vld1_p16_x4, vld1_p64_x4): New. + (vld1_bf16_x4): New. + (vld1q_types_x4): Updated to use vld1q_x4 + from arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x4): Updated entries. + (vld1q_x4): New entries, but comes from the old vld1_x2 + * config/arm/neon.md (neon_vld1q_x4): + Updated from neon_vld1_x4. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x3, vld1_u16_x3, vld1_u32_x3, vld1_u64_x3): New + (vld1_s8_x3, vld1_s16_x3, vld1_s32_x3, vld1_s64_x3): New. + (vld1_f16_x3, vld1_f32_x3): New. + (vld1_p8_x3, vld1_p16_x3, vld1_p64_x3): New. + (vld1_bf16_x3): New. + (vld1q_types_x3): Updated to use vld1q_x3 from + arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x3): Updated entries. + (vld1q_x3): New entries, but comes from the old vld1_x2 + * config/arm/neon.md (neon_vld1q_x3): Updated from + neon_vld1_x3. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x2, vld1_u16_x2, vld1_u32_x2, vld1_u64_x2): New + (vld1_s8_x2, vld1_s16_x2, vld1_s32_x2, vld1_s64_x2): New. + (vld1_f16_x2, vld1_f32_x2): New. + (vld1_p8_x2, vld1_p16_x2, vld1_p64_x2): New. + (vld1_bf16_x2): New. + (vld1q_types_x2): Updated to use vld1q_x2 from + arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x2): Updated entries. + (vld1q_x2): New entries, but comes from the old vld1_x2 + * config/arm/neon.md + (neon_vld1_x2): Updated + from neon_vld1_x2. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x4, vst1q_u16_x4, vst1q_u32_x4, vst1q_u64_x4): New. + (vst1q_s8_x4, vst1q_s16_x4, vst1q_s32_x4, vst1q_s64_x4): New. + (vst1q_f16_x4, vst1q_f32_x4): New. + (vst1q_p8_x4, vst1q_p16_x4, vst1q_p64_x4): New. + (vst1q_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vst1q_x4): New entries. + * config/arm/neon.md (neon_vst1q_x4): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x3, vst1q_u16_x3, vst1q_u32_x3, vst1q_u64_x3): New. + (vst1q_s8_x3, vst1q_s16_x3, vst1q_s32_x3, vst1q_s64_x3): New. + (vst1q_f16_x3, vst1q_f32_x3): New. + (vst1q_p8_x3, vst1q_p16_x3, vst1q_p64_x3): New. + (vst1q_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vst1q_x3): New entries. + * config/arm/neon.md (neon_vst1q_x3): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x2, vst1q_u16_x2, vst1q_u32_x2, vst1q_u64_x2): New. + (vst1q_s8_x2, vst1q_s16_x2, vst1q_s32_x2, vst1q_s64_x2): New. + (vst1q_f16_x2, vst1q_f32_x2): New. + (vst1q_p8_x2, vst1q_p16_x2, vst1q_p64_x2): New. + (vst1q_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vst1q_x2): New entries. + * config/arm/neon.md + (neon_vst1_x2): Updated from + neon_vst1_x2. + * config/arm/iterators.md (VMEMX2): New mode iterator. + (VMEMX2_q): New mode attribute. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x4, vst1_u16_x4, vst1_u32_x4, vst1_u64_x4): New. + (vst1_s8_x4, vst1_s16_x4, vst1_s32_x4, vst1_s64_x4): New. + (vst1_f16_x4, vst1_f32_x4): New. + (vst1_p8_x4, vst1_p16_x4, vst1_p64_x4): New. + (vst1_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vst1_x4): New entries. + * config/arm/neon.md (vst1_x4): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x3, vst1_u16_x3, vst1_u32_x3, vst1_u64_x3): New. + (vst1_s8_x3, vst1_s16_x3, vst1_s32_x3, vst1_s64_x3): New. + (vst1_f16_x3, vst1_f32_x3): New. + (vst1_p8_x3, vst1_p16_x3, vst1_p64_x3): New. + (vst1_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vst1_x3): New entries. + * config/arm/neon.md (vst1_x3): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x2, vst1_u16_x2, vst1_u32_x2, vst1_u64_x2): New. + (vst1_s8_x2, vst1_s16_x2, vst1_s32_x2, vst1_s64_x2): New. + (vst1_f16_x2, vst1_f32_x2): New. + (vst1_p8_x2, vst1_p16_x2, vst1_p64_x2): New. + (vst1_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vst1_x2): New entries. + * config/arm/neon.md (vst1_x2): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x4, vld1q_u16_x4, vld1q_u32_x4, vld1q_u64_x4): New. + (vld1q_s8_x4, vld1q_s16_x4, vld1q_s32_x4, vld1q_s64_x4): New. + (vld1q_f16_x4, vld1q_f32_x4): New. + (vld1q_p8_x4, vld1q_p16_x4, vld1q_p64_x4): New. + (vld1q_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vld1_x4): New entries. + * config/arm/neon.md (vld1_x4): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x3, vld1q_u16_x3, vld1q_u32_x3, vld1q_u64_x3): New. + (vld1q_s8_x3, vld1q_s16_x3, vld1q_s32_x3, vld1q_s64_x3): New. + (vld1q_f16_x3, vld1q_f32_x3): New. + (vld1q_p8_x3, vld1q_p16_x3, vld1q_p64_x3): New. + (vld1q_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vld1_x3): New entries. + * config/arm/neon.md (vld1_x3): New. + +2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x2, vld1q_u16_x2, vld1q_u32_x2, vld1q_u64_x2): New. + (vld1q_s8_x2, vld1q_s16_x2, vld1q_s32_x2, vld1q_s64_x2): New. + (vld1q_f16_x2, vld1q_f32_x2): New. + (vld1q_p8_x2, vld1q_p16_x2, vld1q_p64_x2): New. + (vld1q_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vld1_x2): New entries. + * config/arm/neon.md (vld1_x2): New. + +2023-12-07 Stefan Schulze Frielinghaus + + * config/s390/vecintrin.h (vec_step): Expand vec_step to + __builtin_s390_vec_step. + +2023-12-07 Alexandre Oliva + + * target.def (have_strub_support_for): New hook. + * doc/tm.texi.in: Document it. + * doc/tm.texi: Rebuild. + * ipa-strub.cc: Include target.h. + (strub_target_support_p): New. + (can_strub_p): Call it. Test for no flag_split_stack. + (pass_ipa_strub::adjust_at_calls_call): Check for target + support. + * config/nvptx/nvptx.cc (TARGET_HAVE_STRUB_SUPPORT_FOR): + Disable. + * doc/sourcebuild.texi (strub): Document new effective + target. + +2023-12-07 Juzhe-Zhong + + * config/riscv/riscv-avlprop.cc (simplify_replace_avl): New function. + (simplify_replace_vlmax_avl): Fix bug. + * config/riscv/t-riscv: Add a new include file. + +2023-12-07 Christoph Müllner + + * config/riscv/thead.cc (th_memidx_classify_address_index): + Require TARGET_XTHEADMEMIDX for FP modes. + * config/riscv/thead.md: Require TARGET_XTHEADMEMIDX for all + XTheadFMemIdx pattern. + +2023-12-07 Jakub Jelinek + + PR middle-end/112881 + * expr.cc (count_type_elements): Handle BITINT_TYPE like INTEGER_TYPE. + +2023-12-07 Jakub Jelinek + + PR tree-optimization/112880 + * tree-ssa-dce.cc (maybe_optimize_arith_overflow): Use + unsigned_type_for instead of conditionally calling + build_nonstandard_integer_type. + +2023-12-07 Victor Do Nascimento + + * config/aarch64/arm_neon.h (vldap1_lane_u64): New. + (vldap1q_lane_u64): Likewise. + (vldap1_lane_s64): Likewise. + (vldap1q_lane_s64): Likewise. + (vldap1_lane_f64): Likewise. + (vldap1q_lane_f64): Likewise. + (vldap1_lane_p64): Likewise. + (vldap1q_lane_p64): Likewise. + (vstl1_lane_u64): Likewise. + (vstl1q_lane_u64): Likewise. + (vstl1_lane_s64): Likewise. + (vstl1q_lane_s64): Likewise. + (vstl1_lane_f64): Likewise. + (vstl1q_lane_f64): Likewise. + (vstl1_lane_p64): Likewise. + (vstl1q_lane_p64): Likewise. + +2023-12-07 Victor Do Nascimento + + * config/aarch64/aarch64-simd-builtins.def + (vec_ldap1_lane): New. + (vec_stl1_lane): Likewise. + * config/aarch64/aarch64-simd.md + (aarch64_vec_stl1_lanes_lane): New. + (aarch64_vec_stl1_lane): Likewise. + (aarch64_vec_ldap1_lanes_lane): Likewise. + (aarch64_vec_ldap1_lane): Likewise. + * config/aarch64/aarch64.md (UNSPEC_LDAP1_LANE): New. + (UNSPEC_STL1_LANE): Likewise. + +2023-12-07 Victor Do Nascimento + + * config/aarch64/iterators.md (V12DIF): New. + (V12DUP): Likewise. + (VEL): Add support for all V12DIF-associated modes. + (Vetype): Add support for V1DI and V1DF. + (Vel): Likewise. + +2023-12-07 Victor Do Nascimento + + * config/aarch64/aarch64-option-extensions.def (rcpc3): New. + * config/aarch64/aarch64.h (AARCH64_ISA_RCPC3): Likewise. + (TARGET_RCPC3): Likewise. + * doc/invoke.texi (rcpc3): Document feature in AArch64 Options. + +2023-12-07 Hongyu Wang + + * config/i386/i386-expand.cc (ix86_split_ashl_ndd): New + function to split NDD form lshift. + (ix86_split_rshift_ndd): Likewise for l/ashiftrt. + * config/i386/i386-protos.h (ix86_split_ashl_ndd): New + prototype. + (ix86_split_rshift_ndd): Likewise. + * config/i386/i386.md (ashl3_doubleword): Add NDD + alternative, call ndd split function when operands[0] + not equal to operands[1]. + (define_split for doubleword lshift): Likewise. + (define_peephole for doubleword lshift): Likewise. + (3_doubleword): Likewise for l/ashiftrt. + (define_split for doubleword l/ashiftrt): Likewise. + (define_peephole for doubleword l/ashiftrt): Likewise. + +2023-12-07 Hongyu Wang + + * config/i386/i386.md (*movcc_noc): Extend with new constraints + to support NDD. + (*movsicc_noc_zext): Likewise. + (*movsicc_noc_zext_1): Likewise. + (*movqicc_noc): Likewise. + +2023-12-07 Hongyu Wang + + * config/i386/i386.md (x86_64_shld_ndd): New define_insn. + (x86_64_shld_ndd_1): Likewise. + (*x86_64_shld_ndd_2): Likewise. + (x86_shld_ndd): Likewise. + (x86_shld_ndd_1): Likewise. + (*x86_shld_ndd_2): Likewise. + (x86_64_shrd_ndd): Likewise. + (x86_64_shrd_ndd_1): Likewise. + (*x86_64_shrd_ndd_2): Likewise. + (x86_shrd_ndd): Likewise. + (x86_shrd_ndd_1): Likewise. + (*x86_shrd_ndd_2): Likewise. + (*x86_64_shld_shrd_1_nozext): Adjust codegen under TARGET_APX_NDD. + (*x86_shld_shrd_1_nozext): Likewise. + (*x86_64_shrd_shld_1_nozext): Likewise. + (*x86_shrd_shld_1_nozext): Likewise. + +2023-12-07 Hongyu Wang + + * config/i386/i386.md (*3_1): Extend with a new + alternative to support NDD for SI/DI rotate, and adjust output + template. + (*si3_1_zext): Likewise. + (*3_1): Likewise for QI/HI modes. + (rcrsi2): Likewise, and use nonimmediate_operand for operands[1] + to accept memory input for NDD alternative. + (rcrdi2): Likewise. + +2023-12-07 Hongyu Wang + + * config/i386/i386.md (ashr3_cvt): Extend with new + alternatives to support NDD, and adjust output templates. + (*ashr3_1): Likewise for SI/DI mode. + (*lshr3_1): Likewise. + (*si3_1_zext): Likewise. + (*ashr3_1): Likewise for QI/HI mode. + (*lshrqi3_1): Likewise. + (*lshrhi3_1): Likewise. + (3_cmp): Likewise. + (*3_cconly): Likewise. + (*ashrsi3_cvt_zext): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + (*highpartdisi2): Likewise. + (*si3_cmp_zext): Likewise. + (3_carry): Likewise. + +2023-12-07 Hongyu Wang + + * config/i386/i386.md (*ashl3_1): Extend with new + alternatives to support NDD, limit the new alternative to + generate sal only, and adjust output template for NDD. + (*ashlsi3_1_zext): Likewise. + (*ashlhi3_1): Likewise. + (*ashlqi3_1): Likewise. + (*ashl3_cmp): Likewise. + (*ashlsi3_cmp_zext): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + (*ashl3_cconly): Likewise. + (*ashl3_doubleword_highpart): Adjust codegen for NDD. + +2023-12-07 Kong Lingling + + * config/i386/i386.md (3): Add new alternative for NDD + and adjust output templates. + (*_1): Likewise. + (*qi_1): Likewise. + (*notxor_1): Likewise. + (*si_1_zext): Likewise. + (*notxorqi_1): Likewise. + (*_2): Likewise. + (*si_2_zext): Likewise. + (*si_2_zext_imm): Likewise. + (*si_1_zext_imm): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + (*one_cmplsi2_2_zext): Likewise. + (define_split for *one_cmplsi2_2_zext): Use nonimmediate_operand for + operands[3]. + (*3_doubleword): Add NDD constraints, adopt '&' to NDD dest + and emit move for optimized case if operands[0] != operands[1] or + operands[4] != operands[5]. + (define_split for QI highpart OR/XOR): Prohibit splitter to split NDD + form OR/XOR insn to qi_ext_3. + (define_split for QI strict_lowpart optimization): Prohibit splitter to + split NDD form AND insn to *3_1_slp. + +2023-12-07 Kong Lingling + + * config/i386/i386.md (and3): Add NDD alternatives and adjust + output template. + (*anddi_1): Likewise. + (*and_1): Likewise. + (*andqi_1): Likewise. + (*andsi_1_zext): Likewise. + (*anddi_2): Likewise. + (*andsi_2_zext): Likewise. + (*andqi_2_maybe_si): Likewise. + (*and_2): Likewise. + (*and3_doubleword): Add NDD alternative, adopt '&' to NDD dest and + emit move for optimized case if operands[0] not equal to operands[1]. + (define_split for QI highpart AND): Prohibit splitter to split NDD + form AND insn to qi_ext_3. + (define_split for QI strict_lowpart optimization): Prohibit splitter to + split NDD form AND insn to *3_1_slp. + (define_split for zero_extend and optimization): Prohibit splitter to + split NDD form AND insn to zero_extend insn. + +2023-12-07 Kong Lingling + + * config/i386/i386.md (one_cmpl2): Add new constraints for NDD + and adjust output template. + (*one_cmpl2_1): Likewise. + (*one_cmplqi2_1): Likewise. + (*one_cmpl2_doubleword): Likewise, and adopt '&' to NDD dest. + (*one_cmpl2_2): Likewise. + (*one_cmplsi2_1_zext): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + +2023-12-07 Kong Lingling + + * config/i386/i386-expand.cc (ix86_expand_unary_operator): Add use_ndd + parameter and adjust for NDD. + * config/i386/i386-protos.h: Add use_ndd parameter for + ix86_unary_operator_ok and ix86_expand_unary_operator. + * config/i386/i386.cc (ix86_unary_operator_ok): Add use_ndd parameter + and adjust for NDD. + * config/i386/i386.md (neg2): Add new constraint for NDD and + adjust output template. + (*neg_1): Likewise. + (*neg2_doubleword): Likewise and adopt '&' to NDD dest. + (*neg_2): Likewise. + (*neg_ccc_1): Likewise. + (*neg_ccc_2): Likewise. + (*negsi_1_zext): Likewise, and use nonimmediate_operand for operands[1] + to accept memory input for NDD alternatives. + (*negsi_2_zext): Likewise. + +2023-12-07 Kong Lingling + + * config/i386/i386.md (*sub3_doubleword): Add new alternative for + NDD, adopt '&' modifier to NDD dest and emit move when operands[0] not + equal to operands[1]. + (*sub3_doubleword_zext): Likewise. + (*subv4_doubleword): Likewise. + (*subv4_doubleword_1): Likewise. + (*subv4_overflow_1): Add NDD alternatives and adjust output + templates. + (*subv4_overflow_2): Likewise. + (@sub3_carry): Likewise. + (*addsi3_carry_zext_0r): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + (*subsi3_carry_zext): Likewise. + (subborrow): Parse TARGET_APX_NDD to ix86_binary_operator_ok. + (subborrow_0): Likewise. + (*sub3_eq): Likewise. + (*sub3_ne): Likewise. + (*sub3_eq_1): Likewise. + +2023-12-07 Kong Lingling + + * config/i386/i386-expand.cc (ix86_fixup_binary_operands_no_copy): + Add use_ndd parameter and parse it. + * config/i386/i386-protos.h (ix86_fixup_binary_operands_no_copy): + Change define. + * config/i386/i386.md (sub3): Add new alternatives for NDD + and adjust output templates. + (*sub_1): Likewise. + (*sub_2): Likewise. + (subv4): Likewise. + (*subv4): Likewise. + (subv4_1): Likewise. + (usubv4): Likewise. + (*sub_3): Likewise. + (*subsi_1_zext): Likewise, and use nonimmediate_operand for operands[1] + to accept memory input for NDD alternatives. + (*subsi_2_zext): Likewise. + (*subsi_3_zext): Likewise. + +2023-12-07 Kong Lingling + + * config/i386/i386.md (*add3_doubleword): Add ndd alternatives, + adopt '&' to ndd dest and move operands[1] to operands[0] when they are + not equal. + (*add3_doubleword_cc_overflow_1): Likewise. + (*addv4_doubleword): Likewise. + (*addv4_doubleword_1): Likewise. + (*add3_doubleword_zext): Likewise. + (addv4_overflow_1): Add ndd alternatives. + (*addv4_overflow_2): Likewise. + (@add3_carry): Likewise. + (*add3_carry_0): Likewise. + (*addsi3_carry_zext): Likewise. + (addcarry): Likewise. + (addcarry_0): Likewise. + (*addcarry_1): Likewise. + (*add3_eq): Likewise. + (*add3_ne): Likewise. + (*addsi3_carry_zext_0): Likewise, and use nonimmediate_operand for + operands[1] to accept memory input for NDD alternative. + +2023-12-07 Hongyu Wang + + * config/i386/constraints.md (je): New constraint. + * config/i386/i386-protos.h (x86_poff_operand_p): New function to + check any *POFF constant in operand. + * config/i386/i386.cc (x86_poff_operand_p): New prototype. + * config/i386/i386.md (*add_1): Split out je alternative for add. + +2023-12-07 Kong Lingling + + * config/i386/i386.md: (addsi_1_zext): Add new alternatives for + NDD and adjust output templates. + (*add_2): Likewise. + (*addsi_2_zext): Likewise. + (*add_3): Likewise. + (*addsi_3_zext): Likewise. + (*adddi_4): Likewise. + (*add_4): Likewise. + (*add_5): Likewise. + (*addv4): Likewise. + (*addv4_1): Likewise. + (*add3_cconly_overflow_1): Likewise. + (*add3_cc_overflow_1): Likewise. + (*addsi3_zext_cc_overflow_1): Likewise. + (*add3_cconly_overflow_2): Likewise. + (*add3_cc_overflow_2): Likewise. + (*addsi3_zext_cc_overflow_2): Likewise. + +2023-12-07 Kong Lingling + + * config/i386/i386-expand.cc (ix86_fixup_binary_operands): Add + new use_ndd flag to check whether ndd can be used for this binop + and adjust operand emit. + (ix86_binary_operator_ok): Likewise. + (ix86_expand_binary_operator): Likewise, and void postreload + expand generate lea pattern when use_ndd is explicit parsed. + * config/i386/i386-options.cc (ix86_option_override_internal): + Prohibit apx subfeatures when not in 64bit mode. + * config/i386/i386-protos.h (ix86_binary_operator_ok): + Add use_ndd flag. + (ix86_fixup_binary_operand): Likewise. + (ix86_expand_binary_operand): Likewise. + * config/i386/i386.md (*add_1): Extend with new alternatives + to support NDD, and adjust output template. + (*addhi_1): Likewise. + (*addqi_1): Likewise. + +2023-12-07 David Malcolm + + PR analyzer/103546 + PR analyzer/112850 + * doc/invoke.texi: Add -Wanalyzer-symbol-too-complex. + 2023-12-06 Juzhe-Zhong * config/riscv/riscv-vsetvl.cc (extract_single_source): new function. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index af1f8a0d7194..9e296399fe4b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231207 +20231208 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 8dd3709b7aae..296acce9d25d 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,31 @@ +2023-12-07 Alexandre Oliva + + * region-model.cc (has_nondefault_case_for_value_p): Take + enumerate type as a parameter. + (region_model::apply_constraints_for_gswitch): Cope with + integral promotion type casts. + +2023-12-07 David Malcolm + + PR analyzer/103546 + PR analyzer/112850 + * analyzer.opt (-param=analyzer-max-svalue-depth=): Increase from + 12 to 18. + (Wanalyzer-symbol-too-complex): New. + * diagnostic-manager.cc + (null_assignment_sm_context::clear_all_per_svalue_state): New. + * engine.cc (impl_sm_context::clear_all_per_svalue_state): New. + * program-state.cc (sm_state_map::clear_all_per_svalue_state): + New. + * program-state.h (sm_state_map::clear_all_per_svalue_state): New + decl. + * region-model-manager.cc + (region_model_manager::reject_if_too_complex): Add + -Wanalyzer-symbol-too-complex. + * sm-taint.cc (taint_state_machine::on_condition): Handle + comparisons against UNKNOWN. + * sm.h (sm_context::clear_all_per_svalue_state): New. + 2023-12-06 David Malcolm * engine.cc (dump_analyzer_json): Use diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index fa9576ffab33..4cae882759db 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,10 @@ +2023-12-07 Andrew Pinski + Jakub Jelinek + + PR preprocessor/111965 + * c-opts.cc (c_common_handle_option) : Set + cpp_opts->debug to value rather than 1. + 2023-12-06 David Malcolm * c-opts.cc (c_diagnostic_finalizer): Make "diagnostic" param diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index 486ca569145e..c0ce63deb97d 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,29 @@ +2023-12-07 Gaius Mulley + + PR modula2/112893 + * gm2-compiler/M2Base.mod (Ass): Extend array to include proc row + and column. Allow PIM to assign cardinal variables to address + variables. + (Expr): Ditto. + (Comp): Ditto. + * gm2-compiler/M2Check.mod (getSType): New procedure function. + Replace all occurances of GetSType with getSType. + * gm2-compiler/M2GenGCC.mod (CodeParam): Rewrite format specifier + error message. + * gm2-compiler/M2Quads.mod (CheckProcTypeAndProcedure): Add tokno + parameter. + * gm2-compiler/M2Range.def (InitTypesParameterCheck): Add tokno + parameter. + (InitParameterRangeCheck): Add tokno parameter. + Remove EXPORT QUALIFIED list. + (InitParameterRangeCheck): Add tokno parameter. + * gm2-compiler/M2Range.mod (InitTypesParameterCheck): Add tokno + parameter and pass tokno to PutRangeParam. + (InitParameterRangeCheck): Add tokno parameter and pass tokno to + PutRangeParam. + (PutRangeParam): Add tokno parameter and assign to tokenNo. + (FoldTypeParam): Rewrite format string. + 2023-12-06 Thomas Schwinge * lang.opt (-isysroot): New. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3b1c0c7f9661..ea17860858b7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,430 @@ +2023-12-07 Juzhe-Zhong + + * gcc.target/riscv/rvv/autovec/slp-interleave-1.c: New test. + * gcc.target/riscv/rvv/autovec/slp-interleave-2.c: New test. + * gcc.target/riscv/rvv/autovec/slp-interleave-3.c: New test. + * gcc.target/riscv/rvv/autovec/slp-interleave-4.c: New test. + +2023-12-07 Richard Sandiford + + PR rtl-optimization/106694 + PR rtl-optimization/109078 + PR rtl-optimization/109391 + * gcc.target/aarch64/ldp_stp_16.c (cons4_4_float): Tighten expected + output test. + * gcc.target/aarch64/sve/shift_1.c: Allow reversed shifts for .s + as well as .d. + * gcc.target/aarch64/sme/strided_1.c: New test. + * gcc.target/aarch64/pr109078.c: Likewise. + * gcc.target/aarch64/pr109391.c: Likewise. + * gcc.target/aarch64/sve/pr106694.c: Likewise. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. + +2023-12-07 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new test. + +2023-12-07 Marek Polacek + + * gcc.target/aarch64/ldp_stp_unaligned_2.c: Use -fno-stack-protector. + * gcc.target/aarch64/shadow_call_stack_5.c: Likewise. + * gcc.target/aarch64/shadow_call_stack_6.c: Likewise. + * gcc.target/aarch64/shadow_call_stack_7.c: Likewise. + * gcc.target/aarch64/shadow_call_stack_8.c: Likewise. + * gcc.target/aarch64/stack-check-12.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-11.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-12.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-13.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-14.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-15.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-17.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-18.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-19.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-2.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-5.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-6.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-8.c: Likewise. + * gcc.target/aarch64/stack-check-prologue-9.c: Likewise. + * gcc.target/aarch64/sve/struct_vect_24.c: Likewise. + * gcc.target/aarch64/test_frame_1.c: Likewise. + * gcc.target/aarch64/test_frame_10.c: Likewise. + * gcc.target/aarch64/test_frame_11.c: Likewise. + * gcc.target/aarch64/test_frame_13.c: Likewise. + * gcc.target/aarch64/test_frame_15.c: Likewise. + * gcc.target/aarch64/test_frame_2.c: Likewise. + * gcc.target/aarch64/test_frame_4.c: Likewise. + * gcc.target/aarch64/test_frame_6.c: Likewise. + * gcc.target/aarch64/test_frame_7.c: Likewise. + * gcc.target/aarch64/test_frame_8.c: Likewise. + +2023-12-07 Alexandre Oliva + + * c-c++-common/strub-split-stack.c: New. + * c-c++-common/strub-unsupported.c: New. + * c-c++-common/strub-unsupported-2.c: New. + * c-c++-common/strub-unsupported-3.c: New. + * lib/target-supports.exp (check_effective_target_strub): New. + * c-c++-common/strub-O0.c: Require effective target strub. + * c-c++-common/strub-O1.c: Likewise. + * c-c++-common/strub-O2.c: Likewise. + * c-c++-common/strub-O2fni.c: Likewise. + * c-c++-common/strub-O3.c: Likewise. + * c-c++-common/strub-O3fni.c: Likewise. + * c-c++-common/strub-Og.c: Likewise. + * c-c++-common/strub-Os.c: Likewise. + * c-c++-common/strub-all1.c: Likewise. + * c-c++-common/strub-all2.c: Likewise. + * c-c++-common/strub-apply1.c: Likewise. + * c-c++-common/strub-apply2.c: Likewise. + * c-c++-common/strub-apply3.c: Likewise. + * c-c++-common/strub-apply4.c: Likewise. + * c-c++-common/strub-at-calls1.c: Likewise. + * c-c++-common/strub-at-calls2.c: Likewise. + * c-c++-common/strub-defer-O1.c: Likewise. + * c-c++-common/strub-defer-O2.c: Likewise. + * c-c++-common/strub-defer-O3.c: Likewise. + * c-c++-common/strub-defer-Os.c: Likewise. + * c-c++-common/strub-internal1.c: Likewise. + * c-c++-common/strub-internal2.c: Likewise. + * c-c++-common/strub-parms1.c: Likewise. + * c-c++-common/strub-parms2.c: Likewise. + * c-c++-common/strub-parms3.c: Likewise. + * c-c++-common/strub-relaxed1.c: Likewise. + * c-c++-common/strub-relaxed2.c: Likewise. + * c-c++-common/strub-short-O0-exc.c: Likewise. + * c-c++-common/strub-short-O0.c: Likewise. + * c-c++-common/strub-short-O1.c: Likewise. + * c-c++-common/strub-short-O2.c: Likewise. + * c-c++-common/strub-short-O3.c: Likewise. + * c-c++-common/strub-short-Os.c: Likewise. + * c-c++-common/strub-strict1.c: Likewise. + * c-c++-common/strub-strict2.c: Likewise. + * c-c++-common/strub-tail-O1.c: Likewise. + * c-c++-common/strub-tail-O2.c: Likewise. + * c-c++-common/strub-var1.c: Likewise. + * c-c++-common/torture/strub-callable1.c: Likewise. + * c-c++-common/torture/strub-callable2.c: Likewise. + * c-c++-common/torture/strub-const1.c: Likewise. + * c-c++-common/torture/strub-const2.c: Likewise. + * c-c++-common/torture/strub-const3.c: Likewise. + * c-c++-common/torture/strub-const4.c: Likewise. + * c-c++-common/torture/strub-data1.c: Likewise. + * c-c++-common/torture/strub-data2.c: Likewise. + * c-c++-common/torture/strub-data3.c: Likewise. + * c-c++-common/torture/strub-data4.c: Likewise. + * c-c++-common/torture/strub-data5.c: Likewise. + * c-c++-common/torture/strub-indcall1.c: Likewise. + * c-c++-common/torture/strub-indcall2.c: Likewise. + * c-c++-common/torture/strub-indcall3.c: Likewise. + * c-c++-common/torture/strub-inlinable1.c: Likewise. + * c-c++-common/torture/strub-inlinable2.c: Likewise. + * c-c++-common/torture/strub-ptrfn1.c: Likewise. + * c-c++-common/torture/strub-ptrfn2.c: Likewise. + * c-c++-common/torture/strub-ptrfn3.c: Likewise. + * c-c++-common/torture/strub-ptrfn4.c: Likewise. + * c-c++-common/torture/strub-pure1.c: Likewise. + * c-c++-common/torture/strub-pure2.c: Likewise. + * c-c++-common/torture/strub-pure3.c: Likewise. + * c-c++-common/torture/strub-pure4.c: Likewise. + * c-c++-common/torture/strub-run1.c: Likewise. + * c-c++-common/torture/strub-run2.c: Likewise. + * c-c++-common/torture/strub-run3.c: Likewise. + * c-c++-common/torture/strub-run4.c: Likewise. + * c-c++-common/torture/strub-run4c.c: Likewise. + * c-c++-common/torture/strub-run4d.c: Likewise. + * c-c++-common/torture/strub-run4i.c: Likewise. + * g++.dg/strub-run1.C: Likewise. + * g++.dg/torture/strub-init1.C: Likewise. + * g++.dg/torture/strub-init2.C: Likewise. + * g++.dg/torture/strub-init3.C: Likewise. + * gnat.dg/strub_attr.adb: Likewise. + * gnat.dg/strub_ind.adb: Likewise. + * gnat.dg/strub_access.adb: Likewise. + * gnat.dg/strub_access1.adb: Likewise. + * gnat.dg/strub_disp.adb: Likewise. + * gnat.dg/strub_disp1.adb: Likewise. + * gnat.dg/strub_ind1.adb: Likewise. + * gnat.dg/strub_ind2.adb: Likewise. + * gnat.dg/strub_intf.adb: Likewise. + * gnat.dg/strub_intf1.adb: Likewise. + * gnat.dg/strub_intf2.adb: Likewise. + * gnat.dg/strub_renm.adb: Likewise. + * gnat.dg/strub_renm1.adb: Likewise. + * gnat.dg/strub_renm2.adb: Likewise. + * gnat.dg/strub_var.adb: Likewise. + * gnat.dg/strub_var1.adb: Likewise. + +2023-12-07 Marc Poulhiès + + * gcc.target/i386/pr106910-1.c: Disable for newlib. + +2023-12-07 Marc Poulhiès + + * gcc.dg/analyzer/fd-4.c: Fix for newlib. + +2023-12-07 Marc Poulhiès + + * gcc.dg/vect/vect-ifcvt-18.c: Add dep on avx_runtime. + * gcc.dg/vect/vect-simd-clone-16f.c: Likewise. + * gcc.dg/vect/vect-simd-clone-18f.c: Likewise. + +2023-12-07 Gaius Mulley + + PR modula2/112893 + * gm2/iso/fail/proccard.mod: New test. + * gm2/pim/pass/proccard.mod: New test. + +2023-12-07 Juzhe-Zhong + + * gcc.target/riscv/rvv/vsetvl/avl_prop-2.c: New test. + +2023-12-07 Christoph Müllner + + * gcc.target/riscv/xtheadmemidx-inline-asm-1.c: New test. + +2023-12-07 Christoph Müllner + + * gcc.target/riscv/xtheadfmemidx-without-xtheadmemidx.c: New test. + +2023-12-07 Jakub Jelinek + + PR target/111068 + * gcc.target/i386/pr111068.c: New test. + +2023-12-07 Andrew Pinski + Jakub Jelinek + + PR preprocessor/111965 + * gcc.dg/cpp/pr111965-1.c: New test. + * gcc.dg/cpp/pr111965-2.c: New test. + +2023-12-07 Jakub Jelinek + + PR middle-end/112881 + * gcc.dg/bitint-50.c: New test. + +2023-12-07 Jakub Jelinek + + PR tree-optimization/112880 + * gcc.dg/bitint-49.c: New test. + +2023-12-07 Jakub Jelinek + + PR target/96127 + * gcc.target/s390/pr96127.c (c1): Add casts to long int *. + +2023-12-07 Alexandre Oliva + + * gcc.dg/analyzer/switch-short-enum-1.c: New. + * gcc.dg/analyzer/switch-no-short-enum-1.c: New. + +2023-12-07 Victor Do Nascimento + + * gcc.target/aarch64/acle/rcpc3.c: New. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd-ti-shift.c: New test. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd-cmov.c: New test. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd-shld-shrd.c: New test. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd.c: Add test for left/right rotate. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd.c: Add l/ashiftrt tests. + +2023-12-07 Hongyu Wang + + * gcc.target/i386/apx-ndd.c: Add tests for sal. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add or and xor test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add and test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add not test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add neg test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd-sbb.c: New test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add test for ndd sub. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd-adc.c: New test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: Add more test. + +2023-12-07 Kong Lingling + + * gcc.target/i386/apx-ndd.c: New test. + +2023-12-07 David Malcolm + + PR analyzer/103546 + PR analyzer/112850 + * c-c++-common/analyzer/call-summaries-pr107158-2.c: Add + -Wno-analyzer-symbol-too-complex. + * c-c++-common/analyzer/call-summaries-pr107158.c: Likewise. + * c-c++-common/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c: + Likewise. + * c-c++-common/analyzer/feasibility-3.c: Add + -Wno-analyzer-too-complex and -Wno-analyzer-symbol-too-complex. + * c-c++-common/analyzer/flex-with-call-summaries.c: Add + -Wno-analyzer-symbol-too-complex. Remove fail for + PR analyzer/103546 leak false positive. + * c-c++-common/analyzer/flex-without-call-summaries.c: Remove + xfail for PR analyzer/103546 leak false positive. + * c-c++-common/analyzer/infinite-recursion-3.c: Add + -Wno-analyzer-symbol-too-complex. + * c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c: + Likewise. + * c-c++-common/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c: + Likewise. + * c-c++-common/analyzer/null-deref-pr108400-SoftEtherVPN-WebUi.c: + Likewise. + * c-c++-common/analyzer/null-deref-pr108806-qemu.c: Likewise. + * c-c++-common/analyzer/null-deref-pr108830.c: Likewise. + * c-c++-common/analyzer/pr94596.c: Likewise. + * c-c++-common/analyzer/strtok-2.c: Likewise. + * c-c++-common/analyzer/strtok-4.c: Add -Wno-analyzer-too-complex + and -Wno-analyzer-symbol-too-complex. + * c-c++-common/analyzer/strtok-cppreference.c: Likewise. + * gcc.dg/analyzer/analyzer.exp: Add -Wanalyzer-symbol-too-complex + to DEFAULT_CFLAGS. + * gcc.dg/analyzer/attr-const-3.c: Add + -Wno-analyzer-symbol-too-complex. + * gcc.dg/analyzer/call-summaries-pr107072.c: Likewise. + * gcc.dg/analyzer/doom-s_sound-pr108867.c: Likewise. + * gcc.dg/analyzer/explode-4.c: Likewise. + * gcc.dg/analyzer/null-deref-pr102671-1.c: Likewise. + * gcc.dg/analyzer/null-deref-pr105755.c: Likewise. + * gcc.dg/analyzer/out-of-bounds-curl.c: Likewise. + * gcc.dg/analyzer/pr101503.c: Likewise. + * gcc.dg/analyzer/pr103892.c: Add -Wno-analyzer-too-complex and + -Wno-analyzer-symbol-too-complex. + * gcc.dg/analyzer/pr94851-4.c: Add + -Wno-analyzer-symbol-too-complex. + * gcc.dg/analyzer/pr96860-1.c: Likewise. + * gcc.dg/analyzer/pr96860-2.c: Likewise. + * gcc.dg/analyzer/pr98918.c: Likewise. + * gcc.dg/analyzer/pr99044-2.c: Likewise. + * gcc.dg/analyzer/uninit-pr108806-qemu.c: Likewise. + * gcc.dg/analyzer/use-after-free.c: Add -Wno-analyzer-too-complex + and -Wno-analyzer-symbol-too-complex. + * gcc.dg/plugin/plugin.exp: Add new tests for + analyzer_kernel_plugin.c. + * gcc.dg/plugin/taint-CVE-2011-0521-4.c: Update expected results. + * gcc.dg/plugin/taint-CVE-2011-0521-5.c: Likewise. + * gcc.dg/plugin/taint-CVE-2011-0521-6.c: Likewise. + * gcc.dg/plugin/taint-CVE-2011-0521-5-fixed.c: Remove xfail. + * gcc.dg/plugin/taint-pr112850-precise.c: New test. + * gcc.dg/plugin/taint-pr112850-too-complex.c: New test. + * gcc.dg/plugin/taint-pr112850-unsanitized.c: New test. + * gcc.dg/plugin/taint-pr112850.c: New test. + 2023-12-06 Victor Do Nascimento * gcc.target/aarch64/acle/rwsr.c (get_rsr128): New. diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 74bad754cc82..8dc9785ccb11 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2023-12-07 Alexandre Oliva + + * configure.ac: Check for strub support. + * configure: Rebuilt. + * Makefile.in: Compile strub.c conditionally. + 2023-12-06 Thomas Schwinge Jakub Jelinek diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 43b161ccb3df..4ffaa9d558b2 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,41 @@ +2023-12-07 Patrick Palka + + * include/std/ranges (__detail::_ToClosure): Replace with ... + (__detail::_To): ... this. + (__detail::_ToClosure2): Replace with ... + (__detail::To2): ... this. + (to): Simplify using the existing _Partial range adaptor + closure object. + +2023-12-07 Jonathan Wakely + + * include/std/format (_Iter_sink): + Remove uint64_t local type. + +2023-12-07 Jonathan Wakely + + * include/bits/atomic_wait.h: Include instead of + . + +2023-12-07 Jonathan Wakely + + PR libstdc++/112882 + * include/bits/c++config (__is_constant_evaluated): Add + always_inline attribute. + (_GLIBCXX_DO_ASSERT): Remove macro. + (__glibcxx_assert): Define separately for assertions-enabled and + constexpr-only cases. + +2023-12-07 Alexandre Oliva + + PR libstdc++/112858 + * config/os/gnu-linux/os_defines.h + (_GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL): Define. + * libsupc++/atexit_thread.cc [__GXX_WEAK__ && + _GLIBCXX_MAY_HAVE___CXA_THREAD_ATEXIT_IMPL] + (__cxa_thread_atexit): Add dynamic detection of + __cxa_thread_atexit_impl. + 2023-12-06 Jason Merrill DR 532 From 775aeabcb870b74e4d0986341c7a39add9bbb06d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 7 Dec 2023 19:42:45 -0500 Subject: [PATCH 070/311] analyzer: fix ICE for 2 bits before the start of base region [PR112889] Cncrete bindings were using -1 and -2 in the offset field to signify deleted and empty hash slots, but these are valid values, leading to assertion failures inside hash_map::put on a debug build, and probable bugs in a release build. (gdb) call k.dump(true) start: -2, size: 1, next: -1 (gdb) p k.is_empty() $6 = true Fix by using the size field rather than the offset. gcc/analyzer/ChangeLog: PR analyzer/112889 * store.h (concrete_binding::concrete_binding): Strengthen assertion to require size to be be positive, rather than just non-zero. (concrete_binding::mark_deleted): Use size rather than start bit offset. (concrete_binding::mark_empty): Likewise. (concrete_binding::is_deleted): Likewise. (concrete_binding::is_empty): Likewise. gcc/testsuite/ChangeLog: PR analyzer/112889 * c-c++-common/analyzer/ice-pr112889.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/store.h | 10 +++++----- .../c-c++-common/analyzer/ice-pr112889.c | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index cf10fa3b0108..d75d69d0b7f4 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -377,7 +377,7 @@ public: concrete_binding (bit_offset_t start_bit_offset, bit_size_t size_in_bits) : m_bit_range (start_bit_offset, size_in_bits) { - gcc_assert (!m_bit_range.empty_p ()); + gcc_assert (m_bit_range.m_size_in_bits > 0); } bool concrete_p () const final override { return true; } @@ -419,10 +419,10 @@ public: static int cmp_ptr_ptr (const void *, const void *); - void mark_deleted () { m_bit_range.m_start_bit_offset = -1; } - void mark_empty () { m_bit_range.m_start_bit_offset = -2; } - bool is_deleted () const { return m_bit_range.m_start_bit_offset == -1; } - bool is_empty () const { return m_bit_range.m_start_bit_offset == -2; } + void mark_deleted () { m_bit_range.m_size_in_bits = -1; } + void mark_empty () { m_bit_range.m_size_in_bits = -2; } + bool is_deleted () const { return m_bit_range.m_size_in_bits == -1; } + bool is_empty () const { return m_bit_range.m_size_in_bits == -2; } private: bit_range m_bit_range; diff --git a/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c b/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c new file mode 100644 index 000000000000..e90a53e79baf --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/ice-pr112889.c @@ -0,0 +1,17 @@ +typedef unsigned char __u8; +struct sk_buff +{ + unsigned char *data; +}; +struct cpl_pass_accept_req +{ + __u8 : 6; + __u8 sack : 1; +}; +void build_cpl_pass_accept_req(struct sk_buff* skb) +{ + struct cpl_pass_accept_req* req; + skb->data -= sizeof(*req); + req = (struct cpl_pass_accept_req *)skb->data; + req->sack = 1; +} From 9f7ad5eff3bf1e42aac0825b37d2c9ab43eaafd2 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Thu, 7 Dec 2023 17:51:08 -0700 Subject: [PATCH 071/311] [PATCH 1/5][V3][ifcvt] optimize x=c ? (y op z) : y by RISC-V Zicond like insns op=[PLUS, MINUS, IOR, XOR] Conditional op, if zero rd = (rc == 0) ? (rs1 op rs2) : rs1 --> czero.nez rd, rs2, rc op rd, rs1, rd Conditional op, if non-zero rd = (rc != 0) ? (rs1 op rs2) : rs1 --> czero.eqz rd, rs2, rc op rd, rs1, rd gcc/ChangeLog: * ifcvt.cc (noce_try_cond_zero_arith): New function. (noce_emit_czero, get_base_reg): Likewise. (noce_cond_zero_binary_op_supported): Likewise. (noce_bbs_ok_for_cond_zero_arith): Likewise. (noce_process_if_block): Use noce_try_cond_zero_arith. Co-authored-by: Xiao Zeng --- gcc/ifcvt.cc | 187 ++++++ .../gcc.target/riscv/zicond_ifcvt_opt.c | 566 ++++++++++++++++++ 2 files changed, 753 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index be42609a848f..e4eda1a68375 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -787,6 +787,7 @@ static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **); static bool noce_try_minmax (struct noce_if_info *); static bool noce_try_abs (struct noce_if_info *); static bool noce_try_sign_mask (struct noce_if_info *); +static int noce_try_cond_zero_arith (struct noce_if_info *); /* Return the comparison code for reversed condition for IF_INFO, or UNKNOWN if reversing the condition is not possible. */ @@ -1831,6 +1832,35 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, return NULL_RTX; } +/* Emit a conditional zero, returning TARGET or NULL_RTX upon failure. + IF_INFO describes the if-conversion scenario under consideration. + CZERO_CODE selects the condition (EQ/NE). + NON_ZERO_OP is the nonzero operand of the conditional move + TARGET is the desired output register. */ + +static rtx +noce_emit_czero (struct noce_if_info *if_info, enum rtx_code czero_code, + rtx non_zero_op, rtx target) +{ + machine_mode mode = GET_MODE (target); + rtx cond_op0 = XEXP (if_info->cond, 0); + rtx czero_cond + = gen_rtx_fmt_ee (czero_code, GET_MODE (cond_op0), cond_op0, const0_rtx); + rtx if_then_else + = gen_rtx_IF_THEN_ELSE (mode, czero_cond, const0_rtx, non_zero_op); + rtx set = gen_rtx_SET (target, if_then_else); + + rtx_insn *insn = make_insn_raw (set); + + if (recog_memoized (insn) >= 0) + { + add_insn (insn); + return target; + } + + return NULL_RTX; +} + /* Try only simple constants and registers here. More complex cases are handled in noce_try_cmove_arith after noce_try_store_flag_arith has had a go at it. */ @@ -2880,6 +2910,160 @@ noce_try_sign_mask (struct noce_if_info *if_info) return true; } +/* Check if OP is supported by conditional zero based if conversion, + returning TRUE if satisfied otherwise FALSE. + + OP is the operation to check. */ + +static bool +noce_cond_zero_binary_op_supported (rtx op) +{ + enum rtx_code opcode = GET_CODE (op); + + if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR) + return true; + + return false; +} + +/* Helper function to return REG itself, + otherwise NULL_RTX for other RTX_CODE. */ + +static rtx +get_base_reg (rtx exp) +{ + if (REG_P (exp)) + return exp; + + return NULL_RTX; +} + +/* Check if IF-BB and THEN-BB satisfy the condition for conditional zero + based if conversion, returning TRUE if satisfied otherwise FALSE. + + IF_INFO describes the if-conversion scenario under consideration. + COMMON_PTR points to the common REG of canonicalized IF_INFO->A and + IF_INFO->B. + CZERO_CODE_PTR points to the comparison code to use in czero RTX. + A_PTR points to the A expression of canonicalized IF_INFO->A. + TO_REPLACE points to the RTX to be replaced by czero RTX destnation. */ + +static bool +noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr, + enum rtx_code *czero_code_ptr, rtx *a_ptr, + rtx **to_replace) +{ + rtx common = NULL_RTX; + rtx cond = if_info->cond; + rtx a = copy_rtx (if_info->a); + rtx b = copy_rtx (if_info->b); + rtx bin_op1 = NULL_RTX; + enum rtx_code czero_code = UNKNOWN; + bool reverse = false; + rtx op0, op1, bin_exp; + + if (!noce_simple_bbs (if_info)) + return false; + + /* COND must be EQ or NE comparision of a reg and 0. */ + if (GET_CODE (cond) != NE && GET_CODE (cond) != EQ) + return false; + if (!REG_P (XEXP (cond, 0)) || !rtx_equal_p (XEXP (cond, 1), const0_rtx)) + return false; + + /* Canonicalize x = y : (y op z) to x = (y op z) : y. */ + if (REG_P (a) && noce_cond_zero_binary_op_supported (b)) + { + std::swap (a, b); + reverse = !reverse; + } + + /* Check if x = (y op z) : y is supported by czero based ifcvt. */ + if (!(noce_cond_zero_binary_op_supported (a) && REG_P (b))) + return false; + + bin_exp = a; + + /* Canonicalize x = (z op y) : y to x = (y op z) : y */ + op1 = get_base_reg (XEXP (bin_exp, 1)); + if (op1 && rtx_equal_p (op1, b) && COMMUTATIVE_ARITH_P (bin_exp)) + std::swap (XEXP (bin_exp, 0), XEXP (bin_exp, 1)); + + op0 = get_base_reg (XEXP (bin_exp, 0)); + if (op0 && rtx_equal_p (op0, b)) + { + common = b; + bin_op1 = XEXP (bin_exp, 1); + czero_code = reverse + ? noce_reversed_cond_code (if_info) + : GET_CODE (cond); + } + else + return false; + + if (czero_code == UNKNOWN) + return false; + + if (REG_P (bin_op1)) + *to_replace = &XEXP (bin_exp, 1); + else + return false; + + *common_ptr = common; + *czero_code_ptr = czero_code; + *a_ptr = a; + + return true; +} + +/* Try to covert if-then-else with conditional zero, + returning TURE on success or FALSE on failure. + IF_INFO describes the if-conversion scenario under consideration. */ + +static int +noce_try_cond_zero_arith (struct noce_if_info *if_info) +{ + rtx target, a; + rtx_insn *seq; + machine_mode mode = GET_MODE (if_info->x); + rtx common = NULL_RTX; + enum rtx_code czero_code = UNKNOWN; + rtx non_zero_op = NULL_RTX; + rtx *to_replace = NULL; + + if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &czero_code, &a, + &to_replace)) + return false; + + non_zero_op = *to_replace; + + start_sequence (); + + /* If x is used in both input and out like x = c ? x + z : x, + use a new reg to avoid modifying x */ + if (common && rtx_equal_p (common, if_info->x)) + target = gen_reg_rtx (mode); + else + target = if_info->x; + + target = noce_emit_czero (if_info, czero_code, non_zero_op, target); + if (!target || !to_replace) + { + end_sequence (); + return false; + } + + *to_replace = target; + noce_emit_move_insn (if_info->x, a); + + seq = end_ifcvt_sequence (if_info); + if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info)) + return false; + + emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a)); + if_info->transform_name = "noce_try_cond_zero_arith"; + return true; +} /* Optimize away "if (x & C) x |= C" and similar bit manipulation transformations. */ @@ -3937,6 +4121,9 @@ noce_process_if_block (struct noce_if_info *if_info) goto success; if (noce_try_store_flag_mask (if_info)) goto success; + if (HAVE_conditional_move + && noce_try_cond_zero_arith (if_info)) + goto success; if (HAVE_conditional_move && noce_try_cmove_arith (if_info)) goto success; diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c new file mode 100644 index 000000000000..dcb21c15d1a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c @@ -0,0 +1,566 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb_zicond -mabi=lp64d -O2 " } */ +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-Os" "-Og" "-O3" "-Oz" "-flto"} } */ + +long +test_ADD_ceqz (long x, long y, long z, long c) +{ + if (c) + x = y + z; + else + x = y; + return x; +} + +long +test_ADD_ceqz_x (long x, long z, long c) +{ + if (c) + x = x + z; + + return x; +} + +long +test_ADD_nez (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = y + z; + return x; +} + +long +test_ADD_nez_x (long x, long z, long c) +{ + if (c) + { + } + else + x = x + z; + return x; +} + +long +test_ADD_nez_2 (long x, long y, long z, long c) +{ + if (!c) + x = y + z; + else + x = y; + return x; +} + +long +test_ADD_nez_x_2 (long x, long z, long c) +{ + if (!c) + x = x + z; + + return x; +} + +long +test_ADD_eqz_2 (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = y + z; + return x; +} + +long +test_ADD_eqz_x_2 (long x, long z, long c) +{ + if (!c) + { + } + else + x = x + z; + return x; +} + +long +test_SUB_ceqz (long x, long y, long z, long c) +{ + if (c) + x = y - z; + else + x = y; + return x; +} + +long +test_SUB_ceqz_x (long x, long z, long c) +{ + if (c) + x = x - z; + + return x; +} + +long +test_SUB_nez (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = y - z; + return x; +} + +long +test_SUB_nez_x (long x, long z, long c) +{ + if (c) + { + } + else + x = x - z; + return x; +} + +long +test_SUB_nez_2 (long x, long y, long z, long c) +{ + if (!c) + x = y - z; + else + x = y; + return x; +} + +long +test_SUB_nez_x_2 (long x, long z, long c) +{ + if (!c) + x = x - z; + + return x; +} + +long +test_SUB_eqz_2 (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = y - z; + return x; +} + +long +test_SUB_eqz_x_2 (long x, long z, long c) +{ + if (!c) + { + } + else + x = x - z; + return x; +} + +long +test_IOR_ceqz (long x, long y, long z, long c) +{ + if (c) + x = y | z; + else + x = y; + return x; +} + +long +test_IOR_ceqz_x (long x, long z, long c) +{ + if (c) + x = x | z; + + return x; +} + +long +test_IOR_nez (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = y | z; + return x; +} + +long +test_IOR_nez_x (long x, long z, long c) +{ + if (c) + { + } + else + x = x | z; + return x; +} + +long +test_IOR_nez_2 (long x, long y, long z, long c) +{ + if (!c) + x = y | z; + else + x = y; + return x; +} + +long +test_IOR_nez_x_2 (long x, long z, long c) +{ + if (!c) + x = x | z; + + return x; +} + +long +test_IOR_eqz_2 (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = y | z; + return x; +} + +long +test_IOR_eqz_x_2 (long x, long z, long c) +{ + if (!c) + { + } + else + x = x | z; + return x; +} + +long +test_XOR_ceqz (long x, long y, long z, long c) +{ + if (c) + x = y ^ z; + else + x = y; + return x; +} + +long +test_XOR_ceqz_x (long x, long z, long c) +{ + if (c) + x = x ^ z; + + return x; +} + +long +test_XOR_nez (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = y ^ z; + return x; +} + +long +test_XOR_nez_x (long x, long z, long c) +{ + if (c) + { + } + else + x = x ^ z; + return x; +} + +long +test_XOR_nez_2 (long x, long y, long z, long c) +{ + if (!c) + x = y ^ z; + else + x = y; + return x; +} + +long +test_XOR_nez_x_2 (long x, long z, long c) +{ + if (!c) + x = x ^ z; + + return x; +} + +long +test_XOR_eqz_2 (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = y ^ z; + return x; +} + +long +test_XOR_eqz_x_2 (long x, long z, long c) +{ + if (!c) + { + } + else + x = x ^ z; + return x; +} + +long +test_ADD_ceqz_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = z + y; + else + x = y; + return x; +} + +long +test_ADD_ceqz_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + x = z + x; + + return x; +} + +long +test_ADD_nez_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = z + y; + return x; +} + +long +test_ADD_nez_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + { + } + else + x = z + x; + return x; +} + +long +test_ADD_nez_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = z + y; + else + x = y; + return x; +} + +long +test_ADD_nez_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + x = z + x; + + return x; +} + +long +test_ADD_eqz_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = z + y; + return x; +} + +long +test_ADD_eqz_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + { + } + else + x = z + x; + return x; +} + +long +test_IOR_ceqz_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = z | y; + else + x = y; + return x; +} + +long +test_IOR_ceqz_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + x = z | x; + + return x; +} + +long +test_IOR_nez_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = z | y; + return x; +} + +long +test_IOR_nez_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + { + } + else + x = z | x; + return x; +} + +long +test_IOR_nez_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = z | y; + else + x = y; + return x; +} + +long +test_IOR_nez_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + x = z | x; + + return x; +} + +long +test_IOR_eqz_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = z | y; + return x; +} + +long +test_IOR_eqz_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + { + } + else + x = z | x; + return x; +} + +long +test_XOR_ceqz_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = z ^ y; + else + x = y; + return x; +} + +long +test_XOR_ceqz_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + x = z ^ x; + + return x; +} + +long +test_XOR_nez_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = z ^ y; + return x; +} + +long +test_XOR_nez_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + { + } + else + x = z ^ x; + return x; +} + +long +test_XOR_nez_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = z ^ y; + else + x = y; + return x; +} + +long +test_XOR_nez_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + x = z ^ x; + + return x; +} + +long +test_XOR_eqz_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = z ^ y; + return x; +} + +long +test_XOR_eqz_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + { + } + else + x = z ^ x; + return x; +} + +/* { dg-final { scan-assembler-times {czero\.eqz} 28 } } */ +/* { dg-final { scan-assembler-times {czero\.nez} 28 } } */ From 2efe3a7de0107618397264017fb045f237764cc7 Mon Sep 17 00:00:00 2001 From: Hao Liu Date: Wed, 6 Dec 2023 14:52:19 +0800 Subject: [PATCH 072/311] tree-optimization/112774: extend the SCEV CHREC tree with a nonwrapping flag The flag is defined as CHREC_NOWRAP(tree), and will be dumped from "{offset, +, 1}_1" to "{offset, +, 1}_1" (nw is short for nonwrapping). Two SCEV interfaces record_nonwrapping_chrec and nonwrapping_chrec_p are added to set and check the flag respectively. As resetting the SCEV cache (i.e., the chrec trees) may not reset the loop->estimate_state, free_numbers_of_iterations_estimates is called explicitly in loop vectorization to make sure the flag can be calculated propriately by niter. gcc/ChangeLog: PR tree-optimization/112774 * tree-pretty-print.cc: if nonwrapping flag is set, chrec will be printed with additional info. * tree-scalar-evolution.cc: add record_nonwrapping_chrec and nonwrapping_chrec_p to set and check the new flag respectively. * tree-scalar-evolution.h: Likewise. * tree-ssa-loop-niter.cc (idx_infer_loop_bounds, infer_loop_bounds_from_pointer_arith, infer_loop_bounds_from_signedness, scev_probably_wraps_p): call record_nonwrapping_chrec before record_nonwrapping_iv, call nonwrapping_chrec_p to check the flag is set and return false from scev_probably_wraps_p. * tree-vect-loop.cc (vect_analyze_loop): call free_numbers_of_iterations_estimates explicitly. * tree-core.h: document the nothrow_flag usage in CHREC_NOWRAP * tree.h: add CHREC_NOWRAP(NODE), base.nothrow_flag is used to represent the nonwrapping info. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/scev-16.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 18 ++++++++++++++++++ gcc/tree-core.h | 3 +++ gcc/tree-pretty-print.cc | 2 +- gcc/tree-scalar-evolution.cc | 24 ++++++++++++++++++++++++ gcc/tree-scalar-evolution.h | 2 ++ gcc/tree-ssa-loop-niter.cc | 21 ++++++++++++++++----- gcc/tree-vect-loop.cc | 4 ++++ gcc/tree.h | 8 +++++--- 8 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c new file mode 100644 index 000000000000..120f40c0b6ce --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ + +int A[1024 * 2]; + +int foo (unsigned offset, unsigned N) +{ + int sum = 0; + + for (unsigned i = 0; i < N; i++) + sum += A[i + offset]; + + return sum; +} + +/* Loop can be vectorized by referring "i + offset" is nonwrapping from array. */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { ! { avr-*-* msp430-*-* pru-*-* } } } } } */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 65e51b939a28..04c04cf2f375 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -1387,6 +1387,9 @@ struct GTY(()) tree_base { DECL_NONALIASED in VAR_DECL + CHREC_NOWRAP in + POLYNOMIAL_CHREC + deprecated_flag: TREE_DEPRECATED in diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc index 1fadd752d059..0dabb6d1580d 100644 --- a/gcc/tree-pretty-print.cc +++ b/gcc/tree-pretty-print.cc @@ -3488,7 +3488,7 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, dump_generic_node (pp, CHREC_LEFT (node), spc, flags, false); pp_string (pp, ", +, "); dump_generic_node (pp, CHREC_RIGHT (node), spc, flags, false); - pp_string (pp, "}_"); + pp_string (pp, !CHREC_NOWRAP (node) ? "}_" : "}_"); pp_scalar (pp, "%u", CHREC_VARIABLE (node)); is_stmt = false; break; diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index 385fc64ab8ca..94250b143b36 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -2050,6 +2050,30 @@ analyze_scalar_evolution (class loop *loop, tree var) return res; } +/* If CHREC doesn't overflow, set the nonwrapping flag. */ + +void record_nonwrapping_chrec (tree chrec) +{ + CHREC_NOWRAP(chrec) = 1; + + if (dump_file && (dump_flags & TDF_SCEV)) + { + fprintf (dump_file, "(record_nonwrapping_chrec: "); + print_generic_expr (dump_file, chrec); + fprintf (dump_file, ")\n"); + } +} + +/* Return true if CHREC's nonwrapping flag is set. */ + +bool nonwrapping_chrec_p (tree chrec) +{ + if (!chrec || TREE_CODE(chrec) != POLYNOMIAL_CHREC) + return false; + + return CHREC_NOWRAP(chrec); +} + /* Analyzes and returns the scalar evolution of VAR address in LOOP. */ static tree diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h index a64ed78fe635..f57fde12ee22 100644 --- a/gcc/tree-scalar-evolution.h +++ b/gcc/tree-scalar-evolution.h @@ -43,6 +43,8 @@ extern bool simple_iv (class loop *, class loop *, tree, struct affine_iv *, bool); extern bool iv_can_overflow_p (class loop *, tree, tree, tree); extern tree compute_overall_effect_of_inner_loop (class loop *, tree); +extern void record_nonwrapping_chrec (tree); +extern bool nonwrapping_chrec_p (tree); /* Returns the basic block preceding LOOP, or the CFG entry block when the loop is function's body. */ diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index 2098bef9a970..d465e0ed7e17 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -4206,11 +4206,15 @@ idx_infer_loop_bounds (tree base, tree *idx, void *dta) /* If access is not executed on every iteration, we must ensure that overlow may not make the access valid later. */ - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt)) - && scev_probably_wraps_p (NULL_TREE, - initial_condition_in_loop_num (ev, loop->num), - step, data->stmt, loop, true)) - upper = false; + if (!dominated_by_p (CDI_DOMINATORS, loop->latch, gimple_bb (data->stmt))) + { + if (scev_probably_wraps_p (NULL_TREE, + initial_condition_in_loop_num (ev, loop->num), + step, data->stmt, loop, true)) + upper = false; + } + else + record_nonwrapping_chrec (ev); record_nonwrapping_iv (loop, init, step, data->stmt, low, high, false, upper); return true; @@ -4324,6 +4328,7 @@ infer_loop_bounds_from_pointer_arith (class loop *loop, gimple *stmt) if (flag_delete_null_pointer_checks && int_cst_value (low) == 0) low = build_int_cstu (TREE_TYPE (low), TYPE_ALIGN_UNIT (TREE_TYPE (type))); + record_nonwrapping_chrec (scev); record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true); } @@ -4371,6 +4376,7 @@ infer_loop_bounds_from_signedness (class loop *loop, gimple *stmt) high = wide_int_to_tree (type, r.upper_bound ()); } + record_nonwrapping_chrec (scev); record_nonwrapping_iv (loop, base, step, stmt, low, high, false, true); } @@ -5505,6 +5511,11 @@ scev_probably_wraps_p (tree var, tree base, tree step, if (loop_exits_before_overflow (base, step, at_stmt, loop)) return false; + /* Check the nonwrapping flag, which may be set by niter analysis (e.g., the + above loop exits before overflow). */ + if (var && nonwrapping_chrec_p (analyze_scalar_evolution (loop, var))) + return false; + /* At this point we still don't have a proof that the iv does not overflow: give up. */ return true; diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index dd584ab4a426..6261cd1be1dd 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3570,6 +3570,10 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) analysis are done under the assumptions. */ loop_constraint_set (loop, LOOP_C_FINITE); } + else + /* Clear the existing niter information to make sure the nonwrapping flag + will be calculated and set propriately. */ + free_numbers_of_iterations_estimates (loop); auto_vector_modes vector_modes; /* Autodetect first vector size we try. */ diff --git a/gcc/tree.h b/gcc/tree.h index 086b55f03754..59af8920f024 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1438,9 +1438,11 @@ class auto_suppress_location_wrappers #define COND_EXPR_ELSE(NODE) (TREE_OPERAND (COND_EXPR_CHECK (NODE), 2)) /* Accessors for the chains of recurrences. */ -#define CHREC_LEFT(NODE) TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 0) -#define CHREC_RIGHT(NODE) TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 1) -#define CHREC_VARIABLE(NODE) POLYNOMIAL_CHREC_CHECK (NODE)->base.u.chrec_var +#define CHREC_LEFT(NODE) TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 0) +#define CHREC_RIGHT(NODE) TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 1) +#define CHREC_VARIABLE(NODE) POLYNOMIAL_CHREC_CHECK (NODE)->base.u.chrec_var +/* Nonzero if this chrec doesn't overflow (i.e., nonwrapping). */ +#define CHREC_NOWRAP(NODE) POLYNOMIAL_CHREC_CHECK (NODE)->base.nothrow_flag /* LABEL_EXPR accessor. This gives access to the label associated with the given label expression. */ From b241d91f1e344a7324f66b216f11b40bbb65cf2e Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Fri, 8 Dec 2023 14:33:22 +0800 Subject: [PATCH 073/311] RISC-V: Remove redundant check of better_main_loop_than_p in COST model Since loop vectorizer won't call better_main_loop_than_p if !flag_vect_cost_model. Committed as it is obvious. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (costs::better_main_loop_than_p): Remove redundant check. --- gcc/config/riscv/riscv-vector-costs.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 8036c9c40d75..c062c12a2633 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -630,9 +630,6 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const { auto other = static_cast (uncast_other); - if (!flag_vect_cost_model) - return vector_costs::better_main_loop_than_p (other); - if (riscv_autovec_lmul == RVV_DYNAMIC) { bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS); From 642190b4160da74c5d382c1cea71c6e617c557cb Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Fri, 10 Nov 2023 10:03:37 +0800 Subject: [PATCH 074/311] i386: Mark Xeon Phi ISAs as deprecated Since Knight Landing and Knight Mill microarchitectures are EOL, we would like to remove its support in GCC 15. In GCC 14, we will first emit a warning for the usage. gcc/ChangeLog: * config/i386/driver-i386.cc (host_detect_local_cpu): Do not append "-mno-" for Xeon Phi ISAs. * config/i386/i386-options.cc (ix86_option_override_internal): Emit a warning for KNL/KNM targets. * config/i386/i386.opt: Emit a warning for Xeon Phi ISAs. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Adjust testcases. * g++.dg/other/i386-3.C: Ditto. * g++.dg/pr80481.C: Ditto. * gcc.dg/pr71279.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fmaddps-1.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fmaddps-2.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fmaddss-1.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c: Ditto. * gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c: Ditto. * gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c: Ditto. * gcc.target/i386/avx512er-vexp2pd-1.c: Ditto. * gcc.target/i386/avx512er-vexp2pd-2.c: Ditto. * gcc.target/i386/avx512er-vexp2ps-1.c: Ditto. * gcc.target/i386/avx512er-vexp2ps-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28pd-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28pd-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-3.c: Ditto. * gcc.target/i386/avx512er-vrcp28ps-4.c: Ditto. * gcc.target/i386/avx512er-vrcp28sd-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28sd-2.c: Ditto. * gcc.target/i386/avx512er-vrcp28ss-1.c: Ditto. * gcc.target/i386/avx512er-vrcp28ss-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28pd-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28pd-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-3.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-4.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-5.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ps-6.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28sd-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ss-1.c: Ditto. * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto. * gcc.target/i386/avx512f-gather-1.c: Ditto. * gcc.target/i386/avx512f-gather-2.c: Ditto. * gcc.target/i386/avx512f-gather-3.c: Ditto. * gcc.target/i386/avx512f-gather-4.c: Ditto. * gcc.target/i386/avx512f-gather-5.c: Ditto. * gcc.target/i386/avx512f-i32gatherd512-1.c: Ditto. * gcc.target/i386/avx512f-i32gatherd512-2.c: Ditto. * gcc.target/i386/avx512f-i32gatherpd512-1.c: Ditto. * gcc.target/i386/avx512f-i32gatherpd512-2.c: Ditto. * gcc.target/i386/avx512f-i32gatherps512-1.c: Ditto. * gcc.target/i386/avx512f-vect-perm-1.c: Ditto. * gcc.target/i386/avx512f-vect-perm-2.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0dps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf0qps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1dpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1dps-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1qpd-1.c: Ditto. * gcc.target/i386/avx512pf-vscatterpf1qps-1.c: Ditto. * gcc.target/i386/funcspec-56.inc: Ditto. * gcc.target/i386/pr103404.c: Ditto. * gcc.target/i386/pr104448.c: Ditto. * gcc.target/i386/pr107934.c: Ditto. * gcc.target/i386/pr64387.c: Ditto. * gcc.target/i386/pr70728.c: Ditto. * gcc.target/i386/pr71346.c: Ditto. * gcc.target/i386/pr82941-2.c: Ditto. * gcc.target/i386/pr82942-1.c: Ditto. * gcc.target/i386/pr82942-2.c: Ditto. * gcc.target/i386/pr82990-1.c: Ditto. * gcc.target/i386/pr82990-3.c: Ditto. * gcc.target/i386/pr82990-4.c: Ditto. * gcc.target/i386/pr82990-6.c: Ditto. * gcc.target/i386/pr88713-3.c: Ditto. * gcc.target/i386/pr89523-5.c: Ditto. * gcc.target/i386/pr89523-6.c: Ditto. * gcc.target/i386/pr91033.c: Ditto. * gcc.target/i386/pr94561.c: Ditto. * gcc.target/i386/prefetchwt1-1.c: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-26.c: Ditto. * gcc.target/i386/pr69471-3.c: Removed. --- gcc/config/i386/driver-i386.cc | 9 ++++-- gcc/config/i386/i386-options.cc | 25 +++++++++++++++++ gcc/config/i386/i386.opt | 10 +++---- gcc/testsuite/g++.dg/other/i386-2.C | 5 ++++ gcc/testsuite/g++.dg/other/i386-3.C | 5 ++++ gcc/testsuite/g++.dg/pr80481.C | 2 +- gcc/testsuite/gcc.dg/pr71279.c | 2 +- .../i386/avx5124fmadd-v4fmaddps-1.c | 1 + .../i386/avx5124fmadd-v4fmaddps-2.c | 1 + .../i386/avx5124fmadd-v4fmaddss-1.c | 1 + .../i386/avx5124fmadd-v4fnmaddps-1.c | 1 + .../i386/avx5124fmadd-v4fnmaddps-2.c | 1 + .../i386/avx5124fmadd-v4fnmaddss-1.c | 1 + .../i386/avx5124vnniw-vp4dpwssd-1.c | 1 + .../i386/avx5124vnniw-vp4dpwssd-2.c | 1 + .../i386/avx5124vnniw-vp4dpwssds-1.c | 1 + .../i386/avx5124vnniw-vp4dpwssds-2.c | 1 + .../gcc.target/i386/avx512er-vexp2pd-1.c | 1 + .../gcc.target/i386/avx512er-vexp2pd-2.c | 1 + .../gcc.target/i386/avx512er-vexp2ps-1.c | 1 + .../gcc.target/i386/avx512er-vexp2ps-2.c | 1 + .../gcc.target/i386/avx512er-vrcp28pd-1.c | 1 + .../gcc.target/i386/avx512er-vrcp28pd-2.c | 1 + .../gcc.target/i386/avx512er-vrcp28ps-1.c | 1 + .../gcc.target/i386/avx512er-vrcp28ps-2.c | 1 + .../gcc.target/i386/avx512er-vrcp28ps-3.c | 1 + .../gcc.target/i386/avx512er-vrcp28ps-4.c | 1 + .../gcc.target/i386/avx512er-vrcp28sd-1.c | 1 + .../gcc.target/i386/avx512er-vrcp28sd-2.c | 1 + .../gcc.target/i386/avx512er-vrcp28ss-1.c | 1 + .../gcc.target/i386/avx512er-vrcp28ss-2.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28pd-1.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28pd-2.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-1.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-2.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-3.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-4.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-5.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ps-6.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28sd-1.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28sd-2.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ss-1.c | 1 + .../gcc.target/i386/avx512er-vrsqrt28ss-2.c | 1 + .../gcc.target/i386/avx512f-gather-1.c | 2 +- .../gcc.target/i386/avx512f-gather-2.c | 2 +- .../gcc.target/i386/avx512f-gather-3.c | 2 +- .../gcc.target/i386/avx512f-gather-4.c | 2 +- .../gcc.target/i386/avx512f-gather-5.c | 2 +- .../gcc.target/i386/avx512f-i32gatherd512-1.c | 2 +- .../gcc.target/i386/avx512f-i32gatherd512-2.c | 2 +- .../i386/avx512f-i32gatherpd512-1.c | 2 +- .../i386/avx512f-i32gatherpd512-2.c | 2 +- .../i386/avx512f-i32gatherps512-1.c | 2 +- .../gcc.target/i386/avx512f-vect-perm-1.c | 28 +++++++++---------- .../gcc.target/i386/avx512f-vect-perm-2.c | 28 +++++++++---------- .../i386/avx512pf-vgatherpf0dpd-1.c | 1 + .../i386/avx512pf-vgatherpf0dps-1.c | 1 + .../i386/avx512pf-vgatherpf0qpd-1.c | 1 + .../i386/avx512pf-vgatherpf0qps-1.c | 1 + .../i386/avx512pf-vgatherpf1dpd-1.c | 1 + .../i386/avx512pf-vgatherpf1dps-1.c | 1 + .../i386/avx512pf-vgatherpf1qpd-1.c | 1 + .../i386/avx512pf-vgatherpf1qps-1.c | 1 + .../i386/avx512pf-vscatterpf0dpd-1.c | 1 + .../i386/avx512pf-vscatterpf0dps-1.c | 1 + .../i386/avx512pf-vscatterpf0qpd-1.c | 1 + .../i386/avx512pf-vscatterpf0qps-1.c | 1 + .../i386/avx512pf-vscatterpf1dpd-1.c | 1 + .../i386/avx512pf-vscatterpf1dps-1.c | 1 + .../i386/avx512pf-vscatterpf1qpd-1.c | 1 + .../i386/avx512pf-vscatterpf1qps-1.c | 1 + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 12 -------- gcc/testsuite/gcc.target/i386/pr103404.c | 2 +- gcc/testsuite/gcc.target/i386/pr104448.c | 1 + gcc/testsuite/gcc.target/i386/pr107934.c | 2 +- gcc/testsuite/gcc.target/i386/pr64387.c | 2 +- gcc/testsuite/gcc.target/i386/pr69471-3.c | 11 -------- gcc/testsuite/gcc.target/i386/pr70728.c | 2 +- gcc/testsuite/gcc.target/i386/pr71346.c | 2 +- gcc/testsuite/gcc.target/i386/pr82941-2.c | 1 + gcc/testsuite/gcc.target/i386/pr82942-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr82942-2.c | 2 ++ gcc/testsuite/gcc.target/i386/pr82990-1.c | 1 + gcc/testsuite/gcc.target/i386/pr82990-3.c | 1 + gcc/testsuite/gcc.target/i386/pr82990-4.c | 2 +- gcc/testsuite/gcc.target/i386/pr82990-6.c | 1 + gcc/testsuite/gcc.target/i386/pr88713-3.c | 2 +- gcc/testsuite/gcc.target/i386/pr89523-5.c | 1 + gcc/testsuite/gcc.target/i386/pr89523-6.c | 1 + gcc/testsuite/gcc.target/i386/pr91033.c | 1 + gcc/testsuite/gcc.target/i386/pr94561.c | 2 +- gcc/testsuite/gcc.target/i386/prefetchwt1-1.c | 1 + gcc/testsuite/gcc.target/i386/sse-12.c | 5 ++++ gcc/testsuite/gcc.target/i386/sse-13.c | 5 ++++ gcc/testsuite/gcc.target/i386/sse-14.c | 5 ++++ gcc/testsuite/gcc.target/i386/sse-26.c | 1 + 96 files changed, 175 insertions(+), 79 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/i386/pr69471-3.c diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 204600e128a1..0cfb2884d654 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -897,8 +897,13 @@ const char *host_detect_local_cpu (int argc, const char **argv) } /* Never push -mno-avx10.1-{256,512} under -march=native to avoid unnecessary warnings when building librarys. */ - else if ((isa_names_table[i].feature != FEATURE_AVX10_1_256) - && (isa_names_table[i].feature != FEATURE_AVX10_1_512) + else if (isa_names_table[i].feature != FEATURE_AVX10_1_256 + && isa_names_table[i].feature != FEATURE_AVX10_1_512 + && isa_names_table[i].feature != FEATURE_AVX512PF + && isa_names_table[i].feature != FEATURE_AVX512ER + && isa_names_table[i].feature != FEATURE_AVX5124FMAPS + && isa_names_table[i].feature != FEATURE_AVX5124VNNIW + && isa_names_table[i].feature != FEATURE_PREFETCHWT1 && check_avx512_features (cpu_model, cpu_features2, isa_names_table[i].feature)) options = concat (options, neg_option, diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 7d0a253e07f9..588a0878c0df 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2098,6 +2098,18 @@ ix86_option_override_internal (bool main_args_p, : G_("% is deprecated; use " "% or %" " instead as appropriate")); + else if (!strcmp (opts->x_ix86_tune_string, "knl")) + warning (OPT_Wdeprecated, + main_args_p + ? G_("%<-mtune=knl%> support will be removed in GCC 15") + : G_("% support will be removed in " + "GCC 15")); + else if (!strcmp (opts->x_ix86_tune_string, "knm")) + warning (OPT_Wdeprecated, + main_args_p + ? G_("%<-mtune=knm%> support will be removed in GCC 15") + : G_("% support will be removed in " + "GCC 15")); } else { @@ -2300,6 +2312,19 @@ ix86_option_override_internal (bool main_args_p, return false; } + if (!strcmp (opts->x_ix86_arch_string, "knl")) + warning (OPT_Wdeprecated, + main_args_p + ? G_("%<-march=knl%> support will be removed in GCC 15") + : G_("% support will be removed in " + "GCC 15")); + else if (!strcmp (opts->x_ix86_arch_string, "knm")) + warning (OPT_Wdeprecated, + main_args_p + ? G_("%<-march=knm%> support will be removed in GCC 15") + : G_("% support will be removed in " + "GCC 15")); + ix86_schedule = processor_alias_table[i].schedule; ix86_arch = processor_alias_table[i].processor; diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index b2edfac0b2ae..f0199585f9c5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -738,11 +738,11 @@ Target Mask(ISA_AVX512F) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation. mavx512pf -Target Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save +Target Mask(ISA_AVX512PF) Var(ix86_isa_flags) Save Warn(AVX512PF support will be removed in GCC 15) Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation. mavx512er -Target Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save +Target Mask(ISA_AVX512ER) Var(ix86_isa_flags) Save Warn(AVX512ER support will be removed in GCC 15) Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation. mavx512cd @@ -770,11 +770,11 @@ Target Mask(ISA_AVX512VBMI) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VBMI built-in functions and code generation. mavx5124fmaps -Target Mask(ISA2_AVX5124FMAPS) Var(ix86_isa_flags2) Save +Target Mask(ISA2_AVX5124FMAPS) Var(ix86_isa_flags2) Save Warn(AVX5124FMAPS support will be removed in GCC 15) Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124FMAPS built-in functions and code generation. mavx5124vnniw -Target Mask(ISA2_AVX5124VNNIW) Var(ix86_isa_flags2) Save +Target Mask(ISA2_AVX5124VNNIW) Var(ix86_isa_flags2) Save Warn(AVX5124VNNIW support will be removed in GCC 15) Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124VNNIW built-in functions and code generation. mavx512vpopcntdq @@ -969,7 +969,7 @@ Target Mask(ISA_F16C) Var(ix86_isa_flags) Save Support F16C built-in functions and code generation. mprefetchwt1 -Target Mask(ISA_PREFETCHWT1) Var(ix86_isa_flags) Save +Target Mask(ISA_PREFETCHWT1) Var(ix86_isa_flags) Save Warn(PREFETCHWT1 support will be removed in GCC 15) Support PREFETCHWT1 built-in functions and code generation. mfentry diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 7d68967488d9..b1c59579455a 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,10 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 9b775c33ab46..f7b6d18a20e2 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,10 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/pr80481.C b/gcc/testsuite/g++.dg/pr80481.C index 3a8869914634..c2931100906f 100644 --- a/gcc/testsuite/g++.dg/pr80481.C +++ b/gcc/testsuite/g++.dg/pr80481.C @@ -1,7 +1,7 @@ // { dg-do compile { target { i?86-*-* x86_64-*-* } && { ! *-*-solaris* } } } // -fopenmp implies -pthread // { dg-require-effective-target pthread } -// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" } +// { dg-options "-Ofast -funroll-loops -fopenmp -march=skylake-avx512" } // Disabling epilogues until we find a better way to deal with scans. // { dg-additional-options "--param vect-epilogues-nomask=0" } diff --git a/gcc/testsuite/gcc.dg/pr71279.c b/gcc/testsuite/gcc.dg/pr71279.c index 4ecc84b64259..46c7a95aea75 100644 --- a/gcc/testsuite/gcc.dg/pr71279.c +++ b/gcc/testsuite/gcc.dg/pr71279.c @@ -1,7 +1,7 @@ /* PR middle-end/71279 */ /* { dg-do compile } */ /* { dg-options "-O3" } */ -/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */ +/* { dg-additional-options "-march=skylake-avx512" { target { i?86-*-* x86_64-*-* } } } */ extern int a, b; long c[1][1][1]; diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-1.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-1.c index 1035f253f066..3c3c9f619ad6 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124fmaps" } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-2.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-2.c index f977b65dad01..ad453975b382 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddps-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-O2 -mavx5124fmaps" } */ /* { dg-require-effective-target avx5124fmaps } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ #define ESP_FLOAT 1.0 diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddss-1.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddss-1.c index 2f1a558a1a2e..53830420afad 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fmaddss-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124fmaps" } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c index 45bd7dace483..acb666b30930 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124fmaps" } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c index 3c75fcf2d15f..5dcdfe48c0ab 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-O2 -mavx5124fmaps" } */ /* { dg-require-effective-target avx5124fmaps } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ #define ESP_FLOAT 1.0 diff --git a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c index 1755afb9332c..bfeb46dc9a1e 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124fmaps" } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c index a234fddf54a9..bcabfac416f5 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124vnniw" } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c index a0a6825e7c99..9044ce5ea198 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-O2 -mavx5124vnniw" } */ /* { dg-require-effective-target avx5124vnniw } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ #define AVX5124VNNIW #include "avx512f-helper.h" diff --git a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c index d1bed37d1c0c..e3e46c533682 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c +++ b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mavx5124vnniw" } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c index e1e5536558c2..193477c98645 100644 --- a/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c +++ b/gcc/testsuite/gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-options "-O2 -mavx5124vnniw" } */ /* { dg-require-effective-target avx5124vnniw } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ #define DEFAULT_VALUE 0x7ffffffe diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c index 7e1eb6bf63ea..93766d87451c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vexp2pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c index ce4e86c1f956..20457e1fcd28 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2pd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c index e5de38087fb3..130a2e001c7b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vexp2ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c index ab911c017a33..6294dfffee73 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vexp2ps-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c index f00b42ea50a2..8959c0544a2b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c index 609aeaa31c69..aaab1dc0eb22 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28pd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c index 15f4a50cc1e0..f0da889361d3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c index 4059e0e7f522..241327143e06 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c index e08bea41c3e3..c0b1f7b31027 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-3.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c index 2c76d9671847..b042849cba4c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ps-4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-vrcp28ps-3.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-1.c index 03e75cc5f3b7..f70042580b6d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c index 93d370d0d785..b434c3c26d92 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-1.c index 87a8ac3026fe..c819c1abdaea 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+\[^\{\]*(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrcp28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c index 4ffa92c66eee..f3b894b2f47c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c index a9dfd6a2cff2..67822d152964 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28pd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c index 84a66addd556..3227df84b2ee 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28pd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c index 415980d21baa..63ff4eab7240 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c index a92472e61917..911683f5e54a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-3.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-3.c index 40aefb508444..a8ab49ed6c38 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-3.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-3.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include #include "avx512er-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-4.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-4.c index 2f5f73fd781d..4a793a6b0ae4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-4.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-vrsqrt28ps-3.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-5.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-5.c index 498f4d50aa52..9a8a88ae2b57 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-5.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-5.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include #include "avx512er-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-6.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-6.c index 77c5cba1cef7..f1f013572f7a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-6.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ps-6.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-vrsqrt28ps-5.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-1.c index ca549062b754..28f6b193b3b0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c index 2606191b97bf..319102105710 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-1.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-1.c index c97376ed746d..29a9736f89c9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512er -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vrsqrt28ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c index fa1c19b9f6bd..f9c4488facc0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c @@ -1,6 +1,7 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512er } */ /* { dg-options "-O2 -mavx512er" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "avx512er-check.h" #include "avx512f-mask-type.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c index eb9cbc1a6351..305d402a8466 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-1.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512f } */ -/* { dg-options "-O3 -mavx512f -mtune=knl" } */ +/* { dg-options "-O3 -mavx512f -march=sapphirerapids -mprefer-vector-width=512" } */ #include "avx512f-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c index df1b915fdd1f..ef424b09b8e0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ /* PR59617 */ -/* { dg-options "-O3 -mavx512f -fdump-tree-vect-details -mtune=knl" } */ +/* { dg-options "-O3 -mavx512f -march=sapphirerapids -fdump-tree-vect-details -mprefer-vector-width=512" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c index 2054a949325e..51012c7dcdf5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-3.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512f } */ -/* { dg-options "-O3 -mavx512f -ffast-math -mtune=knl" } */ +/* { dg-options "-O3 -mavx512f -march=sapphirerapids -ffast-math -mprefer-vector-width=512" } */ #include "avx512f-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c index b500ca1fd484..10264c63757a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-4.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target avx512f } */ -/* { dg-options "-O3 -mavx512f -mtune=knl" } */ +/* { dg-options "-O3 -mavx512f -march=sapphirerapids -mprefer-vector-width=512" } */ #include "avx512f-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c index 686da706edf7..ad8cba58d968 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -mavx512f -mtune=knl" } */ +/* { dg-options "-O3 -mavx512f -march=sapphirerapids -mprefer-vector-width=512" } */ /* Disabling epilogues until we find a better way to deal with scans. */ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c index b27d6c9f8e21..55b8d3985226 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -O2 -mtune=knl" } */ +/* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vpgatherdd\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c index faf96b6e6f79..d89ef048d82e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherd512-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-mavx512f -O2 -mtune=knl" } */ +/* { dg-options "-mavx512f -O2" } */ /* { dg-require-effective-target avx512f } */ #include "avx512f-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c index b94de200e67d..cf8e36905e56 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -O2 -mtune=knl" } */ +/* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vgatherdpd\[ \\t\]+\[^\{\n\]*ymm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c index d697ec22c8ed..3af491548ba4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherpd512-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-mavx512f -O2 -mtune=knl" } */ +/* { dg-options "-mavx512f -O2" } */ /* { dg-require-effective-target avx512f } */ #include "avx512f-check.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c index 9f3e65520fc2..1f1fab38d31f 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-i32gatherps512-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -O2 -mtune=knl" } */ +/* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vgatherdps\[ \\t\]+\[^\{\n\]*zmm\[0-9\]\[^\n\]*zmm\[0-9\]{%k\[1-7\]}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c index ea6760d481cd..a5b7abaedcdd 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-perm-1.c @@ -1,14 +1,14 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */ -/* { dg-final { scan-assembler-times "vpermps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ - -#define N 1024 -float f1[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); -float f2[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); - -void foo () -{ - int j; - for (j=0; j diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c index 0953769697ed..9376c1d6f380 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c index 1fb853431cad..13cfac5c7e69 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c index 7ccba277b2a4..79fc548619d3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c index 5dffec17fbfd..1949fe16f69a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c index 0ffbc9cfbd9e..a0fc183c91ea 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c index 22450fff7d4d..e3cedbb881c6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c index ddc4eacce3fe..6ed7297376f1 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c index 5a153ea3d4cd..3fc2c024ef34 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c index d1173a2b7f37..5689e3a51517 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf0dps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c index 67529e7be83e..cf5596377fe8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c index 9ff580fea4d9..eeb65d42f0ff 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf0qps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c index 73a029d10a1d..39b36d1bbe74 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c index 439bc8534850..edcde856d8aa 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf1dps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c index 3ae16cd2e197..c4cdaa8c590e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c index 35cd7d3b5d38..95ab72eebe23 100644 --- a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qps-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-times "vscatterpf1qps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 210da1d1a8a5..e910e1f92116 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -1,8 +1,6 @@ /* Common 32-bit and 64-bit function specific options. */ extern void test_sgx (void) __attribute__((__target__("sgx"))); -extern void test_avx5124fmaps(void) __attribute__((__target__("avx5124fmaps"))); -extern void test_avx5124vnniw(void) __attribute__((__target__("avx5124vnniw"))); extern void test_avx512vpopcntdq(void) __attribute__((__target__("avx512vpopcntdq"))); extern void test_avx512vbmi (void) __attribute__((__target__("avx512vbmi"))); @@ -10,8 +8,6 @@ extern void test_avx512ifma (void) __attribute__((__target__("avx512ifma"))); extern void test_avx512vl (void) __attribute__((__target__("avx512vl"))); extern void test_avx512bw (void) __attribute__((__target__("avx512bw"))); extern void test_avx512dq (void) __attribute__((__target__("avx512dq"))); -extern void test_avx512er (void) __attribute__((__target__("avx512er"))); -extern void test_avx512pf (void) __attribute__((__target__("avx512pf"))); extern void test_avx512cd (void) __attribute__((__target__("avx512cd"))); extern void test_avx512f (void) __attribute__((__target__("avx512f"))); extern void test_avx2 (void) __attribute__((__target__("avx2"))); @@ -38,7 +34,6 @@ extern void test_rtm (void) __attribute__((__target__("rtm"))); extern void test_prfchw (void) __attribute__((__target__("prfchw"))); extern void test_rdseed (void) __attribute__((__target__("rdseed"))); extern void test_adx (void) __attribute__((__target__("adx"))); -extern void test_prefetchwt1 (void) __attribute__((__target__("prefetchwt1"))); extern void test_clflushopt (void) __attribute__((__target__("clflushopt"))); extern void test_xsaves (void) __attribute__((__target__("xsaves"))); extern void test_xsavec (void) __attribute__((__target__("xsavec"))); @@ -95,8 +90,6 @@ extern void test_sm4 (void) __attribute__((__target__("sm4") extern void test_user_msr (void) __attribute__((__target__("usermsr"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); -extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); -extern void test_no_avx5124vnniw(void) __attribute__((__target__("no-avx5124vnniw"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); extern void test_no_avx512vbmi (void) __attribute__((__target__("no-avx512vbmi"))); @@ -104,8 +97,6 @@ extern void test_no_avx512ifma (void) __attribute__((__target__("no-avx512ifma" extern void test_no_avx512vl (void) __attribute__((__target__("no-avx512vl"))); extern void test_no_avx512bw (void) __attribute__((__target__("no-avx512bw"))); extern void test_no_avx512dq (void) __attribute__((__target__("no-avx512dq"))); -extern void test_no_avx512er (void) __attribute__((__target__("no-avx512er"))); -extern void test_no_avx512pf (void) __attribute__((__target__("no-avx512pf"))); extern void test_no_avx512cd (void) __attribute__((__target__("no-avx512cd"))); extern void test_no_avx512f (void) __attribute__((__target__("no-avx512f"))); extern void test_no_avx2 (void) __attribute__((__target__("no-avx2"))); @@ -132,7 +123,6 @@ extern void test_no_rtm (void) __attribute__((__target__("no-rtm"))); extern void test_no_prfchw (void) __attribute__((__target__("no-prfchw"))); extern void test_no_rdseed (void) __attribute__((__target__("no-rdseed"))); extern void test_no_adx (void) __attribute__((__target__("no-adx"))); -extern void test_no_prefetchwt1 (void) __attribute__((__target__("no-prefetchwt1"))); extern void test_no_clflushopt (void) __attribute__((__target__("no-clflushopt"))); extern void test_no_xsaves (void) __attribute__((__target__("no-xsaves"))); extern void test_no_xsavec (void) __attribute__((__target__("no-xsavec"))); @@ -200,8 +190,6 @@ extern void test_arch_tremont (void) __attribute__((__target__("arch=tremont")) extern void test_arch_sierraforest (void) __attribute__((__target__("arch=sierraforest"))); extern void test_arch_grandridge (void) __attribute__((__target__("arch=grandridge"))); extern void test_arch_clearwaterforest (void) __attribute__((__target__("arch=clearwaterforest"))); -extern void test_arch_knl (void) __attribute__((__target__("arch=knl"))); -extern void test_arch_knm (void) __attribute__((__target__("arch=knm"))); extern void test_arch_skylake (void) __attribute__((__target__("arch=skylake"))); extern void test_arch_skylake_avx512 (void) __attribute__((__target__("arch=skylake-avx512"))); extern void test_arch_cannonlake (void) __attribute__((__target__("arch=cannonlake"))); diff --git a/gcc/testsuite/gcc.target/i386/pr103404.c b/gcc/testsuite/gcc.target/i386/pr103404.c index 66f33645301d..3e970ef49663 100644 --- a/gcc/testsuite/gcc.target/i386/pr103404.c +++ b/gcc/testsuite/gcc.target/i386/pr103404.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-additional-options "-Og -fcse-follow-jumps -fno-dce -fno-early-inlining -fgcse -fharden-conditional-branches -frerun-cse-after-loop -fno-tree-ccp -mavx5124fmaps -std=c99 -w" } */ +/* { dg-additional-options "-Og -fcse-follow-jumps -fno-dce -fno-early-inlining -fgcse -fharden-conditional-branches -frerun-cse-after-loop -fno-tree-ccp -mavx512f -std=c99 -w" } */ typedef unsigned __attribute__((__vector_size__ (4))) U; typedef unsigned __attribute__((__vector_size__ (16))) V; diff --git a/gcc/testsuite/gcc.target/i386/pr104448.c b/gcc/testsuite/gcc.target/i386/pr104448.c index b10345afc483..3cf913e94eaa 100644 --- a/gcc/testsuite/gcc.target/i386/pr104448.c +++ b/gcc/testsuite/gcc.target/i386/pr104448.c @@ -1,6 +1,7 @@ /* PR target/104448 */ /* { dg-do compile { target { *-*-linux* && lp64 } } } */ /* { dg-options "-mavx5124vnniw -mno-xsave -mabi=ms" } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ int main () diff --git a/gcc/testsuite/gcc.target/i386/pr107934.c b/gcc/testsuite/gcc.target/i386/pr107934.c index 59106b29159e..67786cd26afc 100644 --- a/gcc/testsuite/gcc.target/i386/pr107934.c +++ b/gcc/testsuite/gcc.target/i386/pr107934.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mtune=knl -ffinite-math-only -msse2" } */ +/* { dg-options "-O2 -mtune=icelake-server -ffinite-math-only -msse2" } */ int foo (__bf16 bf) diff --git a/gcc/testsuite/gcc.target/i386/pr64387.c b/gcc/testsuite/gcc.target/i386/pr64387.c index dd381425a279..332a639871c5 100644 --- a/gcc/testsuite/gcc.target/i386/pr64387.c +++ b/gcc/testsuite/gcc.target/i386/pr64387.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -ffloat-store -mavx512er" } */ +/* { dg-options "-O2 -ftree-vectorize -ffloat-store -mavx2" } */ float x[256]; diff --git a/gcc/testsuite/gcc.target/i386/pr69471-3.c b/gcc/testsuite/gcc.target/i386/pr69471-3.c deleted file mode 100644 index 3826f9690900..000000000000 --- a/gcc/testsuite/gcc.target/i386/pr69471-3.c +++ /dev/null @@ -1,11 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=native -march=knl" } */ - -/* NB: We want to verify that -march=native -march=processor is the same - as -march=processor. Since it is very unlikely that GCC will be built - on KNL, -march=native will have -mno-avx512er and -march=knl should - enable AVX512ER. */ - -#ifndef __AVX512ER__ -# error __AVX512ER__ is not defined -#endif diff --git a/gcc/testsuite/gcc.target/i386/pr70728.c b/gcc/testsuite/gcc.target/i386/pr70728.c index 89c140dde200..ff0e8873d006 100644 --- a/gcc/testsuite/gcc.target/i386/pr70728.c +++ b/gcc/testsuite/gcc.target/i386/pr70728.c @@ -1,6 +1,6 @@ /* PR target/70728 */ /* { dg-do compile } */ -/* { dg-options "-S -Ofast -march=knl" } */ +/* { dg-options "-S -Ofast -march=skylake-avx512" } */ short a = -15726; int b = (int)-7003557328690506537LL; diff --git a/gcc/testsuite/gcc.target/i386/pr71346.c b/gcc/testsuite/gcc.target/i386/pr71346.c index 0a15869155a3..d17e3306cb2f 100644 --- a/gcc/testsuite/gcc.target/i386/pr71346.c +++ b/gcc/testsuite/gcc.target/i386/pr71346.c @@ -1,6 +1,6 @@ /* PR target/71346 */ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=knl" } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -march=skylake-avx512 -mno-avx512vl" } */ typedef int rvec[3]; int a; diff --git a/gcc/testsuite/gcc.target/i386/pr82941-2.c b/gcc/testsuite/gcc.target/i386/pr82941-2.c index db2f8589ab69..cafa1bf23ca7 100644 --- a/gcc/testsuite/gcc.target/i386/pr82941-2.c +++ b/gcc/testsuite/gcc.target/i386/pr82941-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=knl" } */ +/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82942-1.c b/gcc/testsuite/gcc.target/i386/pr82942-1.c index 9cdf81a9d603..b65b73d23ac5 100644 --- a/gcc/testsuite/gcc.target/i386/pr82942-1.c +++ b/gcc/testsuite/gcc.target/i386/pr82942-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mno-avx512er -O2" } */ +/* { dg-options "-mavx512f -O2" } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82942-2.c b/gcc/testsuite/gcc.target/i386/pr82942-2.c index ddb4e689659b..55572b03c9b2 100644 --- a/gcc/testsuite/gcc.target/i386/pr82942-2.c +++ b/gcc/testsuite/gcc.target/i386/pr82942-2.c @@ -1,5 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -mavx512er -mtune=knl -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "'-mtune=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82990-1.c b/gcc/testsuite/gcc.target/i386/pr82990-1.c index ff1d6d40eb26..a87288987c91 100644 --- a/gcc/testsuite/gcc.target/i386/pr82990-1.c +++ b/gcc/testsuite/gcc.target/i386/pr82990-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=knl -mvzeroupper" } */ +/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr82990-3.c b/gcc/testsuite/gcc.target/i386/pr82990-3.c index 201fa98d8d41..0c902cdf91ba 100644 --- a/gcc/testsuite/gcc.target/i386/pr82990-3.c +++ b/gcc/testsuite/gcc.target/i386/pr82990-3.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -mavx512er -mvzeroupper -O2" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82990-4.c b/gcc/testsuite/gcc.target/i386/pr82990-4.c index 09f161c7291d..4ee1fbcbcf75 100644 --- a/gcc/testsuite/gcc.target/i386/pr82990-4.c +++ b/gcc/testsuite/gcc.target/i386/pr82990-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mno-avx512er -mno-vzeroupper -O2" } */ +/* { dg-options "-mavx512f -mno-vzeroupper -O2" } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr82990-6.c b/gcc/testsuite/gcc.target/i386/pr82990-6.c index 063a61c111d7..41fd1cb17b6c 100644 --- a/gcc/testsuite/gcc.target/i386/pr82990-6.c +++ b/gcc/testsuite/gcc.target/i386/pr82990-6.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -march=skylake-avx512 -mtune=knl" } */ +/* { dg-warning "'-mtune=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "pr82941-1.c" diff --git a/gcc/testsuite/gcc.target/i386/pr88713-3.c b/gcc/testsuite/gcc.target/i386/pr88713-3.c index 85b6cf87a029..bd16d5e1d496 100644 --- a/gcc/testsuite/gcc.target/i386/pr88713-3.c +++ b/gcc/testsuite/gcc.target/i386/pr88713-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-Ofast -mno-avx512er -march=skylake-avx512" } */ +/* { dg-options "-Ofast -march=skylake-avx512" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/pr89523-5.c b/gcc/testsuite/gcc.target/i386/pr89523-5.c index 6a769c7a2495..cdf3190c1b32 100644 --- a/gcc/testsuite/gcc.target/i386/pr89523-5.c +++ b/gcc/testsuite/gcc.target/i386/pr89523-5.c @@ -1,6 +1,7 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-require-effective-target maybe_x32 } */ /* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler "\tvgather" } } */ /* { dg-final { scan-assembler-not "addr32 vgather" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89523-6.c b/gcc/testsuite/gcc.target/i386/pr89523-6.c index 82f795e085c0..d69206d1115b 100644 --- a/gcc/testsuite/gcc.target/i386/pr89523-6.c +++ b/gcc/testsuite/gcc.target/i386/pr89523-6.c @@ -1,6 +1,7 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-require-effective-target maybe_x32 } */ /* { dg-options "-mx32 -O2 -mavx512pf" } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler-not "\tvgather" } } */ /* { dg-final { scan-assembler "addr32 vgather" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr91033.c b/gcc/testsuite/gcc.target/i386/pr91033.c index 43d99d5a7dce..3b52c66380fc 100644 --- a/gcc/testsuite/gcc.target/i386/pr91033.c +++ b/gcc/testsuite/gcc.target/i386/pr91033.c @@ -1,6 +1,7 @@ /* PR tree-optimization/91033 */ /* { dg-do compile { target pthread } } */ /* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } */ +/* { dg-warning "'-march=knl' support will be removed in GCC 15" "" { target *-*-* } 0 } */ #define N 1024 int a[N]; diff --git a/gcc/testsuite/gcc.target/i386/pr94561.c b/gcc/testsuite/gcc.target/i386/pr94561.c index 49fdf7e687d0..9dbfc2d3d69d 100644 --- a/gcc/testsuite/gcc.target/i386/pr94561.c +++ b/gcc/testsuite/gcc.target/i386/pr94561.c @@ -1,6 +1,6 @@ /* PR target/94561 */ /* { dg-do compile } */ -/* { dg-options "-march=knl -O3 -funroll-loops" } */ +/* { dg-options "-mavx512f -O3 -funroll-loops" } */ struct xi { long int mg; diff --git a/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c b/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c index 1b88516ede38..742a32030696 100644 --- a/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c +++ b/gcc/testsuite/gcc.target/i386/prefetchwt1-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mprefetchwt1 -O2" } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ /* { dg-final { scan-assembler "\[ \\t\]+prefetchwt1\[ \\t\]+" } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index a553a5202d19..765fd4de7789 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -4,6 +4,11 @@ with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ /* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 946182f0e76f..2972b9f25444 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,6 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */ /* { dg-add-options bind_pic_locally } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 0d07aadc7f8f..1eb369697a3f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,6 +1,11 @@ /* { dg-do compile } */ /* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4" } */ /* { dg-add-options bind_pic_locally } */ +/* { dg-warning "AVX512ER support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX512PF support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124FMAPS support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "AVX5124VNNIW support will be removed in GCC 15" "" { target *-*-* } 0 } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-26.c b/gcc/testsuite/gcc.target/i386/sse-26.c index 04ffe10f42a1..5e044b639c48 100644 --- a/gcc/testsuite/gcc.target/i386/sse-26.c +++ b/gcc/testsuite/gcc.target/i386/sse-26.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse2 -mmmx -mno-sse3 -mno-3dnow -mno-fma -mno-fxsr -mno-xsave -mno-rtm -mno-prfchw -mno-rdseed -mno-adx -mno-prefetchwt1 -mno-clflushopt -mno-xsavec -mno-xsaves -mno-clwb -mno-mwaitx -mno-clzero -mno-pku -mno-rdpid -mno-gfni -mno-shstk -mno-vaes -mno-vpclmulqdq" } */ /* { dg-add-options bind_pic_locally } */ +/* { dg-warning "PREFETCHWT1 support will be removed in GCC 15" "" { target *-*-* } 0 } */ #include "sse-13.c" From 39a1ab9c33b6067b1cc843408886e7ba709fbb62 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 08:29:44 +0100 Subject: [PATCH 075/311] Add IntegerRange for -param=min-nondebug-insn-uid= and fix vector growing in LRA and vec [PR112411] As documented, --param min-nondebug-insn-uid= is very useful in debugging -fcompare-debug issues in RTL dumps, without it it is really hard to find differences. With it, DEBUG_INSNs generally use low INSN_UIDs (1+) and non-DEBUG_INSNs use INSN_UIDs from the parameter up. For good results, the parameter should be larger than the number of DEBUG_INSNs in all or at least problematic functions, so I typically use --param min-nondebug-insn-uid=10000 or --param min-nondebug-insn-uid=1000. The PR is about using --param min-nondebug-insn-uid=2147483647 or similar behavior can be achieved with that minus some epsilon, INSN_UIDs for the non-debug insns then wrap around and as they are signed, all kinds of things break. Obviously, that can happen even without that option, but functions containing more than 2147483647 insns usually don't compile much earlier due to getting out of memory. As it is a debugging option, I'd prefer not to impose any drastically small limits on it because if a function has a lot of DEBUG_INSNs, it is useful to start still above them, otherwise the allocation of uids will DTRT even for DEBUG_INSNs but there will be then differences in non-DEBUG_INSN allocations. So, the following patch uses 0x40000000 limit, half the maximum amount for DEBUG_INSNs and half for non-DEBUG_INSNs, it will still result in very unlikely overflows in real world. Note, using large min-nondebug-insn-uid is very expensive for compile time memory and compile time, because DF as well as various RTL passes use arrays indexed by INSN_UIDs, e.g. LRA with sizeof (void *) elements, ditto df (df->insns). Now, in LRA I've ran into ICEs already with --param min-nondebug-insn-uid=0x2aaaaaaa on 64-bit host. It uses a custom vector management and wants to grow allocation 1.5x when growing, but all this computation is done in int, so already 0x2aaaaaab * 3 / 2 + 1 overflows to negative value. And unlike vec.cc growing which also uses unsigned int type for the above (and the + 1 is not there), it also doesn't make sure if there is an overflow that it allocates at least as much as needed, vec.cc does if ... else /* Grow slower when large. */ alloc = (alloc * 3 / 2); /* If this is still too small, set it to the right size. */ if (alloc < desired) alloc = desired; so even if there is overflow during the * 1.5 computation, but desired is still representable in the range of the alloced counter (31-bits in both vec.h and LRA), it doesn't grow exponentially but at least works for the current value. The patch now uses there lra_insn_recog_data_len = index * 3U / 2; if (lra_insn_recog_data_len <= index) lra_insn_recog_data_len = index + 1; basically do what vec.cc does. I thought we could do better for both vec.cc and LRA on 64-bit hosts even without growing the allocated counters, but now that I look at it again, perhaps we can't. The above overflows already with original alloc or lra_insn_recog_data_len 0x55555556, where 0x5555555 * 3U / 2 is still 0x7fffffff and so representable in the 32-bit, but 0x55555556 * 3U / 2 is 1. I thought that we could use alloc * (size_t) 3 / 2 so that on 64-bit hosts it wouldn't overflow that quickly, but 0x55555556 * (size_t) 3 / 2 there is 0x80000001 which is still ok in unsigned, but given that vec.h then stores the counter into unsigned m_alloc:31; bit-field, it is too much. With the lra.cc change, one can actually compile simple function with -O0 on 64-bit host with --param min-nondebug-insn-uid=0x40000000 (i.e. the new limit), but already needed quite a big part of my 32GB RAM + 24GB swap. The patch adds a dg-skip-if for that case though, because such option is way too much for 32-bit hosts even at -O0 and empty function, and with -O3 on a longer function it is too much for average 64-bit host as well. Without the dg-skip-if I got on 64-bit host: cc1: out of memory allocating 571230784744 bytes after a total of 2772992 bytes and cc1: out of memory allocating 1388 bytes after a total of 2002944 bytes on 32-bit host. A test requiring more than 532GB of RAM on 64-bit hosts is just too much for our testsuite. 2023-12-08 Jakub Jelinek PR middle-end/112411 * params.opt (-param=min-nondebug-insn-uid=): Add IntegerRange(0, 1073741824). * lra.cc (check_and_expand_insn_recog_data): Use 3U rather than 3 in * 3 / 2 computation and if the result is smaller or equal to index, use index + 1. * gcc.dg/params/blocksort-part.c: Add dg-skip-if for --param min-nondebug-insn-uid=1073741824. --- gcc/lra.cc | 4 +++- gcc/params.opt | 2 +- gcc/testsuite/gcc.dg/params/blocksort-part.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gcc/lra.cc b/gcc/lra.cc index 29e2a3506e15..69081a8e0255 100644 --- a/gcc/lra.cc +++ b/gcc/lra.cc @@ -768,7 +768,9 @@ check_and_expand_insn_recog_data (int index) if (lra_insn_recog_data_len > index) return; old = lra_insn_recog_data_len; - lra_insn_recog_data_len = index * 3 / 2 + 1; + lra_insn_recog_data_len = index * 3U / 2; + if (lra_insn_recog_data_len <= index) + lra_insn_recog_data_len = index + 1; lra_insn_recog_data = XRESIZEVEC (lra_insn_recog_data_t, lra_insn_recog_data, lra_insn_recog_data_len); diff --git a/gcc/params.opt b/gcc/params.opt index f1202abc00d0..f18765965d08 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -779,7 +779,7 @@ Common Joined UInteger Var(param_min_loop_cond_split_prob) Init(30) IntegerRange The minimum threshold for probability of semi-invariant condition statement to trigger loop split. -param=min-nondebug-insn-uid= -Common Joined UInteger Var(param_min_nondebug_insn_uid) Param +Common Joined UInteger Var(param_min_nondebug_insn_uid) Param IntegerRange(0, 1073741824) The minimum UID to be used for a nondebug insn. -param=min-size-for-stack-sharing= diff --git a/gcc/testsuite/gcc.dg/params/blocksort-part.c b/gcc/testsuite/gcc.dg/params/blocksort-part.c index 1e677878e7bd..cc15223c0de2 100644 --- a/gcc/testsuite/gcc.dg/params/blocksort-part.c +++ b/gcc/testsuite/gcc.dg/params/blocksort-part.c @@ -1,4 +1,5 @@ /* { dg-skip-if "AArch64 does not support these bounds." { aarch64*-*-* } { "--param stack-clash-protection-*" } } */ +/* { dg-skip-if "For 32-bit hosts such param is too much and even for 64-bit might require hundreds of GB of RAM" { *-*-* } { "--param min-nondebug-insn-uid=1073741824" } } */ /*-------------------------------------------------------------*/ /*--- Block sorting machinery ---*/ From 2b2a0599e221786c2f019f1e8c02c80d23c71430 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 1 Dec 2023 10:09:33 +0800 Subject: [PATCH 076/311] LoongArch: Switch loongarch-def from C to C++ to make it possible. We'll use HOST_WIDE_INT in LoongArch static properties in following patches. To keep the same readability as C99 designated initializers, create a std::array like data structure with position setter function, and add field setter functions for structs used in loongarch-def.cc. Remove unneeded guards #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) in loongarch-def.h and loongarch-opts.h. gcc/ChangeLog: * config/loongarch/loongarch-def.h: Remove extern "C". (loongarch_isa_base_strings): Declare as loongarch_def_array instead of plain array. (loongarch_isa_ext_strings): Likewise. (loongarch_abi_base_strings): Likewise. (loongarch_abi_ext_strings): Likewise. (loongarch_cmodel_strings): Likewise. (loongarch_cpu_strings): Likewise. (loongarch_cpu_default_isa): Likewise. (loongarch_cpu_issue_rate): Likewise. (loongarch_cpu_multipass_dfa_lookahead): Likewise. (loongarch_cpu_cache): Likewise. (loongarch_cpu_align): Likewise. (loongarch_cpu_rtx_cost_data): Likewise. (loongarch_isa): Add a constructor and field setter functions. * config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not include for target libraries. * config/loongarch/loongarch-opts.cc: Comment code that doesn't run and causes compilation errors. * config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise. (struct loongarch_rtx_cost_data): Likewise. (struct loongarch_cache): Likewise. (struct loongarch_align): Likewise. * config/loongarch/t-loongarch: Compile loongarch-def.cc with the C++ compiler. * config/loongarch/loongarch-def-array.h: New file for a std:array like data structure with position setter function. * config/loongarch/loongarch-def.c: Rename to ... * config/loongarch/loongarch-def.cc: ... here. (loongarch_cpu_strings): Define as loongarch_def_array instead of plain array. (loongarch_cpu_default_isa): Likewise. (loongarch_cpu_cache): Likewise. (loongarch_cpu_align): Likewise. (loongarch_cpu_rtx_cost_data): Likewise. (loongarch_cpu_issue_rate): Likewise. (loongarch_cpu_multipass_dfa_lookahead): Likewise. (loongarch_isa_base_strings): Likewise. (loongarch_isa_ext_strings): Likewise. (loongarch_abi_base_strings): Likewise. (loongarch_abi_ext_strings): Likewise. (loongarch_cmodel_strings): Likewise. (abi_minimal_isa): Likewise. (loongarch_rtx_cost_optimize_size): Use field setter functions instead of designated initializers. (loongarch_rtx_cost_data): Implement default constructor. --- gcc/config/loongarch/loongarch-def-array.h | 40 ++++ gcc/config/loongarch/loongarch-def.c | 227 --------------------- gcc/config/loongarch/loongarch-def.cc | 187 +++++++++++++++++ gcc/config/loongarch/loongarch-def.h | 55 ++--- gcc/config/loongarch/loongarch-opts.cc | 7 + gcc/config/loongarch/loongarch-opts.h | 5 +- gcc/config/loongarch/loongarch-tune.h | 123 ++++++++++- gcc/config/loongarch/t-loongarch | 4 +- 8 files changed, 390 insertions(+), 258 deletions(-) create mode 100644 gcc/config/loongarch/loongarch-def-array.h delete mode 100644 gcc/config/loongarch/loongarch-def.c create mode 100644 gcc/config/loongarch/loongarch-def.cc diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h new file mode 100644 index 000000000000..bdb3e9c6a2bd --- /dev/null +++ b/gcc/config/loongarch/loongarch-def-array.h @@ -0,0 +1,40 @@ +/* A std::array like data structure for LoongArch static properties. + Copyright (C) 2023 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef _LOONGARCH_DEF_ARRAY_H +#define _LOONGARCH_DEF_ARRAY_H 1 + +template +class loongarch_def_array { +private: + T arr[N]; +public: + loongarch_def_array () : arr{} {} + + T &operator[] (int n) { return arr[n]; } + const T &operator[] (int n) const { return arr[n]; } + + loongarch_def_array set (int idx, T &&value) + { + (*this)[idx] = value; + return *this; + } +}; + +#endif diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c deleted file mode 100644 index f22d488acb24..000000000000 --- a/gcc/config/loongarch/loongarch-def.c +++ /dev/null @@ -1,227 +0,0 @@ -/* LoongArch static properties. - Copyright (C) 2021-2023 Free Software Foundation, Inc. - Contributed by Loongson Ltd. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -. */ - -#include "loongarch-def.h" -#include "loongarch-str.h" - -/* CPU property tables. */ -const char* -loongarch_cpu_strings[N_TUNE_TYPES] = { - [CPU_NATIVE] = STR_CPU_NATIVE, - [CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT, - [CPU_LOONGARCH64] = STR_CPU_LOONGARCH64, - [CPU_LA464] = STR_CPU_LA464, - [CPU_LA664] = STR_CPU_LA664, -}; - -struct loongarch_isa -loongarch_cpu_default_isa[N_ARCH_TYPES] = { - [CPU_LOONGARCH64] = { - .base = ISA_BASE_LA64V100, - .fpu = ISA_EXT_FPU64, - .simd = 0, - }, - [CPU_LA464] = { - .base = ISA_BASE_LA64V100, - .fpu = ISA_EXT_FPU64, - .simd = ISA_EXT_SIMD_LASX, - }, - [CPU_LA664] = { - .base = ISA_BASE_LA64V110, - .fpu = ISA_EXT_FPU64, - .simd = ISA_EXT_SIMD_LASX, - }, -}; - -struct loongarch_cache -loongarch_cpu_cache[N_TUNE_TYPES] = { - [CPU_LOONGARCH64] = { - .l1d_line_size = 64, - .l1d_size = 64, - .l2d_size = 256, - .simultaneous_prefetches = 4, - }, - [CPU_LA464] = { - .l1d_line_size = 64, - .l1d_size = 64, - .l2d_size = 256, - .simultaneous_prefetches = 4, - }, - [CPU_LA664] = { - .l1d_line_size = 64, - .l1d_size = 64, - .l2d_size = 256, - .simultaneous_prefetches = 4, - }, -}; - -struct loongarch_align -loongarch_cpu_align[N_TUNE_TYPES] = { - [CPU_LOONGARCH64] = { - .function = "32", - .label = "16", - }, - [CPU_LA464] = { - .function = "32", - .label = "16", - }, - [CPU_LA664] = { - .function = "32", - .label = "16", - }, -}; - - -/* Default RTX cost initializer. */ -#define COSTS_N_INSNS(N) ((N) * 4) -#define DEFAULT_COSTS \ - .fp_add = COSTS_N_INSNS (1), \ - .fp_mult_sf = COSTS_N_INSNS (2), \ - .fp_mult_df = COSTS_N_INSNS (4), \ - .fp_div_sf = COSTS_N_INSNS (6), \ - .fp_div_df = COSTS_N_INSNS (8), \ - .int_mult_si = COSTS_N_INSNS (1), \ - .int_mult_di = COSTS_N_INSNS (1), \ - .int_div_si = COSTS_N_INSNS (4), \ - .int_div_di = COSTS_N_INSNS (6), \ - .branch_cost = 6, \ - .memory_latency = 4 - -/* The following properties cannot be looked up directly using "cpucfg". - So it is necessary to provide a default value for "unknown native" - tune targets (i.e. -mtune=native while PRID does not correspond to - any known "-mtune" type). */ - -struct loongarch_rtx_cost_data -loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = { - [CPU_NATIVE] = { - DEFAULT_COSTS - }, - [CPU_LOONGARCH64] = { - DEFAULT_COSTS - }, - [CPU_LA464] = { - DEFAULT_COSTS - }, - [CPU_LA664] = { - DEFAULT_COSTS - }, -}; - -/* RTX costs to use when optimizing for size. */ -const struct loongarch_rtx_cost_data -loongarch_rtx_cost_optimize_size = { - .fp_add = 4, - .fp_mult_sf = 4, - .fp_mult_df = 4, - .fp_div_sf = 4, - .fp_div_df = 4, - .int_mult_si = 4, - .int_mult_di = 4, - .int_div_si = 4, - .int_div_di = 4, - .branch_cost = 6, - .memory_latency = 4, -}; - -int -loongarch_cpu_issue_rate[N_TUNE_TYPES] = { - [CPU_NATIVE] = 4, - [CPU_LOONGARCH64] = 4, - [CPU_LA464] = 4, - [CPU_LA664] = 6, -}; - -int -loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = { - [CPU_NATIVE] = 4, - [CPU_LOONGARCH64] = 4, - [CPU_LA464] = 4, - [CPU_LA664] = 6, -}; - -/* Wiring string definitions from loongarch-str.h to global arrays - with standard index values from loongarch-opts.h, so we can - print config-related messages and do ABI self-spec filtering - from the driver in a self-consistent manner. */ - -const char* -loongarch_isa_base_strings[N_ISA_BASE_TYPES] = { - [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100, - [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110, -}; - -const char* -loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = { - [ISA_EXT_NONE] = STR_NONE, - [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32, - [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64, - [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX, - [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX, -}; - -const char* -loongarch_abi_base_strings[N_ABI_BASE_TYPES] = { - [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D, - [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F, - [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S, -}; - -const char* -loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = { - [ABI_EXT_BASE] = STR_ABI_EXT_BASE, -}; - -const char* -loongarch_cmodel_strings[] = { - [CMODEL_NORMAL] = STR_CMODEL_NORMAL, - [CMODEL_TINY] = STR_CMODEL_TINY, - [CMODEL_TINY_STATIC] = STR_CMODEL_TS, - [CMODEL_MEDIUM] = STR_CMODEL_MEDIUM, - [CMODEL_LARGE] = STR_CMODEL_LARGE, - [CMODEL_EXTREME] = STR_CMODEL_EXTREME, -}; - - -/* ABI-related definitions. */ -const struct loongarch_isa -abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = { - [ABI_BASE_LP64D] = { - [ABI_EXT_BASE] = { - .base = ISA_BASE_LA64V100, - .fpu = ISA_EXT_FPU64, - .simd = 0 - }, - }, - [ABI_BASE_LP64F] = { - [ABI_EXT_BASE] = { - .base = ISA_BASE_LA64V100, - .fpu = ISA_EXT_FPU32, - .simd = 0 - }, - }, - [ABI_BASE_LP64S] = { - [ABI_EXT_BASE] = { - .base = ISA_BASE_LA64V100, - .fpu = ISA_EXT_NONE, - .simd = 0 - }, - }, -}; diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc new file mode 100644 index 000000000000..6990c86c2c49 --- /dev/null +++ b/gcc/config/loongarch/loongarch-def.cc @@ -0,0 +1,187 @@ +/* LoongArch static properties. + Copyright (C) 2021-2023 Free Software Foundation, Inc. + Contributed by Loongson Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "loongarch-def.h" +#include "loongarch-str.h" + +template +using array = loongarch_def_array; + +template +using array_tune = array; + +template +using array_arch = array; + +/* CPU property tables. */ +array_tune loongarch_cpu_strings = array_tune () + .set (CPU_NATIVE, STR_CPU_NATIVE) + .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT) + .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64) + .set (CPU_LA464, STR_CPU_LA464) + .set (CPU_LA664, STR_CPU_LA664); + +array_arch loongarch_cpu_default_isa = + array_arch () + .set (CPU_LOONGARCH64, + loongarch_isa () + .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64)) + .set (CPU_LA464, + loongarch_isa () + .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)) + .set (CPU_LA664, + loongarch_isa () + .base_ (ISA_BASE_LA64V110) + .fpu_ (ISA_EXT_FPU64) + .simd_ (ISA_EXT_SIMD_LASX)); + +static inline loongarch_cache la464_cache () +{ + return loongarch_cache () + .l1d_line_size_ (64) + .l1d_size_ (64) + .l2d_size_ (256) + .simultaneous_prefetches_ (4); +} + +array_tune loongarch_cpu_cache = + array_tune () + .set (CPU_LOONGARCH64, la464_cache ()) + .set (CPU_LA464, la464_cache ()) + .set (CPU_LA664, la464_cache ()); + +static inline loongarch_align la464_align () +{ + return loongarch_align ().function_ ("32").label_ ("16"); +} + +array_tune loongarch_cpu_align = + array_tune () + .set (CPU_LOONGARCH64, la464_align ()) + .set (CPU_LA464, la464_align ()) + .set (CPU_LA664, la464_align ()); + +#define COSTS_N_INSNS(N) ((N) * 4) + +/* Default RTX cost initializer. */ +loongarch_rtx_cost_data::loongarch_rtx_cost_data () + : fp_add (COSTS_N_INSNS (1)), + fp_mult_sf (COSTS_N_INSNS (2)), + fp_mult_df (COSTS_N_INSNS (4)), + fp_div_sf (COSTS_N_INSNS (6)), + fp_div_df (COSTS_N_INSNS (8)), + int_mult_si (COSTS_N_INSNS (1)), + int_mult_di (COSTS_N_INSNS (1)), + int_div_si (COSTS_N_INSNS (4)), + int_div_di (COSTS_N_INSNS (6)), + branch_cost (6), + memory_latency (4) {} + +/* The following properties cannot be looked up directly using "cpucfg". + So it is necessary to provide a default value for "unknown native" + tune targets (i.e. -mtune=native while PRID does not correspond to + any known "-mtune" type). Currently all numbers are default. */ +array_tune loongarch_cpu_rtx_cost_data = + array_tune (); + +/* RTX costs to use when optimizing for size. */ +const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size = + loongarch_rtx_cost_data () + .fp_add_ (4) + .fp_mult_sf_ (4) + .fp_mult_df_ (4) + .fp_div_sf_ (4) + .fp_div_df_ (4) + .int_mult_si_ (4) + .int_mult_di_ (4) + .int_div_si_ (4) + .int_div_di_ (4); + +array_tune loongarch_cpu_issue_rate = array_tune () + .set (CPU_NATIVE, 4) + .set (CPU_LOONGARCH64, 4) + .set (CPU_LA464, 4) + .set (CPU_LA664, 6); + +array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () + .set (CPU_NATIVE, 4) + .set (CPU_LOONGARCH64, 4) + .set (CPU_LA464, 4) + .set (CPU_LA664, 6); + +/* Wiring string definitions from loongarch-str.h to global arrays + with standard index values from loongarch-opts.h, so we can + print config-related messages and do ABI self-spec filtering + from the driver in a self-consistent manner. */ + +array loongarch_isa_base_strings = + array () + .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) + .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); + +array loongarch_isa_ext_strings = + array () + .set (ISA_EXT_NONE, STR_NONE) + .set (ISA_EXT_FPU32, STR_ISA_EXT_FPU32) + .set (ISA_EXT_FPU64, STR_ISA_EXT_FPU64) + .set (ISA_EXT_SIMD_LSX, STR_ISA_EXT_LSX) + .set (ISA_EXT_SIMD_LASX, STR_ISA_EXT_LASX); + +array loongarch_abi_base_strings = + array () + .set (ABI_BASE_LP64D, STR_ABI_BASE_LP64D) + .set (ABI_BASE_LP64F, STR_ABI_BASE_LP64F) + .set (ABI_BASE_LP64S, STR_ABI_BASE_LP64S); + +array loongarch_abi_ext_strings = + array () + .set (ABI_EXT_BASE, STR_ABI_EXT_BASE); + +array loongarch_cmodel_strings = + array () + .set (CMODEL_NORMAL, STR_CMODEL_NORMAL) + .set (CMODEL_TINY, STR_CMODEL_TINY) + .set (CMODEL_TINY_STATIC, STR_CMODEL_TS) + .set (CMODEL_MEDIUM, STR_CMODEL_MEDIUM) + .set (CMODEL_LARGE, STR_CMODEL_LARGE) + .set (CMODEL_EXTREME, STR_CMODEL_EXTREME); + +array, N_ABI_BASE_TYPES> + abi_minimal_isa = array, + N_ABI_BASE_TYPES> () + .set (ABI_BASE_LP64D, + array () + .set (ABI_EXT_BASE, + loongarch_isa () + .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU64))) + .set (ABI_BASE_LP64F, + array () + .set (ABI_EXT_BASE, + loongarch_isa () + .base_ (ISA_BASE_LA64V100) + .fpu_ (ISA_EXT_FPU32))) + .set (ABI_BASE_LP64S, + array () + .set (ABI_EXT_BASE, + loongarch_isa ().base_ (ISA_BASE_LA64V100))); diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h index 851ff864eb25..68a9a461e549 100644 --- a/gcc/config/loongarch/loongarch-def.h +++ b/gcc/config/loongarch/loongarch-def.h @@ -50,20 +50,18 @@ along with GCC; see the file COPYING3. If not see #include #endif +#include "loongarch-def-array.h" #include "loongarch-tune.h" -#ifdef __cplusplus -extern "C" { -#endif - /* enum isa_base */ -extern const char* loongarch_isa_base_strings[]; /* LoongArch V1.00. */ #define ISA_BASE_LA64V100 0 /* LoongArch V1.10. */ #define ISA_BASE_LA64V110 1 #define N_ISA_BASE_TYPES 2 +extern loongarch_def_array + loongarch_isa_base_strings; #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) /* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is @@ -72,7 +70,6 @@ extern int64_t loongarch_isa_base_features[]; #endif /* enum isa_ext_* */ -extern const char* loongarch_isa_ext_strings[]; #define ISA_EXT_NONE 0 #define ISA_EXT_FPU32 1 #define ISA_EXT_FPU64 2 @@ -80,13 +77,16 @@ extern const char* loongarch_isa_ext_strings[]; #define ISA_EXT_SIMD_LSX 3 #define ISA_EXT_SIMD_LASX 4 #define N_ISA_EXT_TYPES 5 +extern loongarch_def_array + loongarch_isa_ext_strings; /* enum abi_base */ -extern const char* loongarch_abi_base_strings[]; #define ABI_BASE_LP64D 0 #define ABI_BASE_LP64F 1 #define ABI_BASE_LP64S 2 #define N_ABI_BASE_TYPES 3 +extern loongarch_def_array + loongarch_abi_base_strings; #define TO_LP64_ABI_BASE(C) (C) @@ -99,12 +99,12 @@ extern const char* loongarch_abi_base_strings[]; /* enum abi_ext */ -extern const char* loongarch_abi_ext_strings[]; #define ABI_EXT_BASE 0 #define N_ABI_EXT_TYPES 1 +extern loongarch_def_array + loongarch_abi_ext_strings; /* enum cmodel */ -extern const char* loongarch_cmodel_strings[]; #define CMODEL_NORMAL 0 #define CMODEL_TINY 1 #define CMODEL_TINY_STATIC 2 @@ -112,6 +112,8 @@ extern const char* loongarch_cmodel_strings[]; #define CMODEL_LARGE 4 #define CMODEL_EXTREME 5 #define N_CMODEL_TYPES 6 +extern loongarch_def_array + loongarch_cmodel_strings; /* enum explicit_relocs */ #define EXPLICIT_RELOCS_AUTO 0 @@ -126,7 +128,6 @@ extern const char* loongarch_cmodel_strings[]; #define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET) -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) /* Internal representation of the target. */ struct loongarch_isa { @@ -139,6 +140,13 @@ struct loongarch_isa Using int64_t instead of HOST_WIDE_INT for C compatibility. */ int64_t evolution; + + loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {} + loongarch_isa base_ (int _base) { base = _base; return *this; } + loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; } + loongarch_isa simd_ (int _simd) { simd = _simd; return *this; } + loongarch_isa evolution_ (int64_t _evolution) + { evolution = _evolution; return *this; } }; struct loongarch_abi @@ -156,9 +164,6 @@ struct loongarch_target int cmodel; /* CMODEL_ */ }; -extern struct loongarch_isa loongarch_cpu_default_isa[]; -#endif - /* CPU properties. */ /* index */ #define CPU_NATIVE 0 @@ -170,15 +175,19 @@ extern struct loongarch_isa loongarch_cpu_default_isa[]; #define N_TUNE_TYPES 5 /* parallel tables. */ -extern const char* loongarch_cpu_strings[]; -extern int loongarch_cpu_issue_rate[]; -extern int loongarch_cpu_multipass_dfa_lookahead[]; +extern loongarch_def_array + loongarch_cpu_strings; +extern loongarch_def_array + loongarch_cpu_default_isa; +extern loongarch_def_array + loongarch_cpu_issue_rate; +extern loongarch_def_array + loongarch_cpu_multipass_dfa_lookahead; +extern loongarch_def_array + loongarch_cpu_cache; +extern loongarch_def_array + loongarch_cpu_align; +extern loongarch_def_array + loongarch_cpu_rtx_cost_data; -extern struct loongarch_cache loongarch_cpu_cache[]; -extern struct loongarch_align loongarch_cpu_align[]; -extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[]; - -#ifdef __cplusplus -} -#endif #endif /* LOONGARCH_DEF_H */ diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc index b5836f198c0b..7b63ef57a2a6 100644 --- a/gcc/config/loongarch/loongarch-opts.cc +++ b/gcc/config/loongarch/loongarch-opts.cc @@ -163,6 +163,7 @@ loongarch_config_target (struct loongarch_target *target, int follow_multilib_list_p) { struct loongarch_target t; + if (!target) return; @@ -657,12 +658,18 @@ abi_str (struct loongarch_abi abi) strlen (loongarch_abi_base_strings[abi.base])); else { + /* This situation has not yet occurred, so in order to avoid the + -Warray-bounds warning during C++ syntax checking, this part + of the code is commented first. */ + /* APPEND_STRING (loongarch_abi_base_strings[abi.base]) APPEND1 ('/') APPEND_STRING (loongarch_abi_ext_strings[abi.ext]) APPEND1 ('\0') return XOBFINISH (&msg_obstack, const char *); + */ + gcc_unreachable (); } } diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h index fa3773223bca..e1ec702335de 100644 --- a/gcc/config/loongarch/loongarch-opts.h +++ b/gcc/config/loongarch/loongarch-opts.h @@ -21,7 +21,10 @@ along with GCC; see the file COPYING3. If not see #ifndef LOONGARCH_OPTS_H #define LOONGARCH_OPTS_H +/* This is a C++ header and it shouldn't be used by target libraries. */ +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) #include "loongarch-def.h" +#endif /* Target configuration */ extern struct loongarch_target la_target; @@ -33,7 +36,6 @@ struct loongarch_flags { int sx[2]; }; -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) /* Initialize loongarch_target from separate option variables. */ void @@ -54,7 +56,6 @@ void loongarch_update_gcc_opt_status (struct loongarch_target *target, struct gcc_options *opts, struct gcc_options *opts_set); -#endif /* Macros for common conditional expressions used in loongarch.{c,h,md} */ diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h index 5c03262daffa..4aa01c54c08d 100644 --- a/gcc/config/loongarch/loongarch-tune.h +++ b/gcc/config/loongarch/loongarch-tune.h @@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see #ifndef LOONGARCH_TUNE_H #define LOONGARCH_TUNE_H +#include "loongarch-def-array.h" + /* RTX costs of various operations on the different architectures. */ struct loongarch_rtx_cost_data { @@ -35,6 +37,76 @@ struct loongarch_rtx_cost_data unsigned short int_div_di; unsigned short branch_cost; unsigned short memory_latency; + + /* Default RTX cost initializer, implemented in loongarch-def.cc. */ + loongarch_rtx_cost_data (); + + loongarch_rtx_cost_data fp_add_ (unsigned short _fp_add) + { + fp_add = _fp_add; + return *this; + } + + loongarch_rtx_cost_data fp_mult_sf_ (unsigned short _fp_mult_sf) + { + fp_mult_sf = _fp_mult_sf; + return *this; + } + + loongarch_rtx_cost_data fp_mult_df_ (unsigned short _fp_mult_df) + { + fp_mult_df = _fp_mult_df; + return *this; + } + + loongarch_rtx_cost_data fp_div_sf_ (unsigned short _fp_div_sf) + { + fp_div_sf = _fp_div_sf; + return *this; + } + + loongarch_rtx_cost_data fp_div_df_ (unsigned short _fp_div_df) + { + fp_div_df = _fp_div_df; + return *this; + } + + loongarch_rtx_cost_data int_mult_si_ (unsigned short _int_mult_si) + { + int_mult_si = _int_mult_si; + return *this; + } + + loongarch_rtx_cost_data int_mult_di_ (unsigned short _int_mult_di) + { + int_mult_di = _int_mult_di; + return *this; + } + + loongarch_rtx_cost_data int_div_si_ (unsigned short _int_div_si) + { + int_div_si = _int_div_si; + return *this; + } + + loongarch_rtx_cost_data int_div_di_ (unsigned short _int_div_di) + { + int_div_di = _int_div_di; + return *this; + } + + loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost) + { + branch_cost = _branch_cost; + return *this; + } + + loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency) + { + memory_latency = _memory_latency; + return *this; + } + }; /* Costs to use when optimizing for size. */ @@ -42,10 +114,39 @@ extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size; /* Cache size record of known processor models. */ struct loongarch_cache { - int l1d_line_size; /* bytes */ - int l1d_size; /* KiB */ - int l2d_size; /* kiB */ - int simultaneous_prefetches; /* number of parallel prefetch */ + int l1d_line_size; /* bytes */ + int l1d_size; /* KiB */ + int l2d_size; /* kiB */ + int simultaneous_prefetches; /* number of parallel prefetch */ + + loongarch_cache () : l1d_line_size (0), + l1d_size (0), + l2d_size (0), + simultaneous_prefetches (0) {} + + loongarch_cache l1d_line_size_ (int _l1d_line_size) + { + l1d_line_size = _l1d_line_size; + return *this; + } + + loongarch_cache l1d_size_ (int _l1d_size) + { + l1d_size = _l1d_size; + return *this; + } + + loongarch_cache l2d_size_ (int _l2d_size) + { + l2d_size = _l2d_size; + return *this; + } + + loongarch_cache simultaneous_prefetches_ (int _simultaneous_prefetches) + { + simultaneous_prefetches = _simultaneous_prefetches; + return *this; + } }; /* Alignment for functions and labels for best performance. For new uarchs @@ -54,6 +155,20 @@ struct loongarch_cache { struct loongarch_align { const char *function; /* default value for -falign-functions */ const char *label; /* default value for -falign-labels */ + + loongarch_align () : function (nullptr), label (nullptr) {} + + loongarch_align function_ (const char *_function) + { + function = _function; + return *this; + } + + loongarch_align label_ (const char *_label) + { + label = _label; + return *this; + } }; #endif /* LOONGARCH_TUNE_H */ diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch index 7e65bb6e2a85..10a984f3cb12 100644 --- a/gcc/config/loongarch/t-loongarch +++ b/gcc/config/loongarch/t-loongarch @@ -64,8 +64,8 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< -loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H) - $(CC) -c $(ALL_CFLAGS) $(INCLUDES) $< +loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \ From f6cc6eb5b6c7833385f66dbc878e8f6ec1ebdb8f Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Fri, 1 Dec 2023 11:51:51 +0800 Subject: [PATCH 077/311] LoongArch: Remove the definition of ISA_BASE_LA64V110 from the code. The instructions defined in LoongArch Reference Manual v1.1 are not the instruction set v1.1 version. The CPU defined later may only support some instructions in LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and related definitions are removed here. gcc/ChangeLog: * config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110. * config/loongarch/genopts/loongarch.opt.in: Likewise. * config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro. (fill_native_cpu_config): Define a new variable hw_isa_evolution record the extended instruction set support read from cpucfg. * config/loongarch/loongarch-def.cc: Set evolution at initialization. * config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete. (ISA_BASE_LA64V110): Likewise. (N_ISA_BASE_TYPES): Likewise. (defined): Likewise. * config/loongarch/loongarch-opts.cc: Likewise. * config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise. (ISA_BASE_IS_LA64V110): Likewise. * config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise. * config/loongarch/loongarch.opt: Regenerate. --- .../loongarch/genopts/loongarch-strings | 1 - gcc/config/loongarch/genopts/loongarch.opt.in | 3 --- gcc/config/loongarch/loongarch-cpu.cc | 23 +++++-------------- gcc/config/loongarch/loongarch-def.cc | 14 +++++++---- gcc/config/loongarch/loongarch-def.h | 12 ++-------- gcc/config/loongarch/loongarch-opts.cc | 3 --- gcc/config/loongarch/loongarch-opts.h | 4 +--- gcc/config/loongarch/loongarch-str.h | 1 - gcc/config/loongarch/loongarch.opt | 3 --- 9 files changed, 19 insertions(+), 45 deletions(-) diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings index b2070c83ed00..7bc4824007e2 100644 --- a/gcc/config/loongarch/genopts/loongarch-strings +++ b/gcc/config/loongarch/genopts/loongarch-strings @@ -30,7 +30,6 @@ STR_CPU_LA664 la664 # Base architecture STR_ISA_BASE_LA64V100 la64 -STR_ISA_BASE_LA64V110 la64v1.1 # -mfpu OPTSTR_ISA_EXT_FPU fpu diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in index 8af6cc6f532e..483b185b059b 100644 --- a/gcc/config/loongarch/genopts/loongarch.opt.in +++ b/gcc/config/loongarch/genopts/loongarch.opt.in @@ -32,9 +32,6 @@ Basic ISAs of LoongArch: EnumValue Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100) -EnumValue -Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110) - ;; ISA extensions / adjustments Enum Name(isa_ext_fpu) Type(int) diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc index 622df47916f3..4033320d0e1e 100644 --- a/gcc/config/loongarch/loongarch-cpu.cc +++ b/gcc/config/loongarch/loongarch-cpu.cc @@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "tm.h" #include "diagnostic-core.h" #include "loongarch-def.h" @@ -32,19 +31,6 @@ along with GCC; see the file COPYING3. If not see #include "loongarch-cpucfg-map.h" #include "loongarch-str.h" -/* loongarch_isa_base_features defined here instead of loongarch-def.c - because we need to use options.h. Pay attention on the order of elements - in the initializer becaue ISO C++ does not allow C99 designated - initializers! */ - -#define ISA_BASE_LA64V110_FEATURES \ - (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \ - | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS) - -int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = { - /* [ISA_BASE_LA64V100] = */ 0, - /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES, -}; /* Native CPU detection with "cpucfg" */ static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 }; @@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt) /* Use the native value anyways. */ preset.simd = tmp; + + int64_t hw_isa_evolution = 0; + /* Features added during ISA evolution. */ for (const auto &entry: cpucfg_map) if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit) - preset.evolution |= entry.isa_evolution_bit; + hw_isa_evolution |= entry.isa_evolution_bit; if (native_cpu_type != CPU_NATIVE) { /* Check if the local CPU really supports the features of the base ISA of probed native_cpu_type. If any feature is not detected, either GCC or the hardware is buggy. */ - auto base_isa_feature = loongarch_isa_base_features[preset.base]; - if ((preset.evolution & base_isa_feature) != base_isa_feature) + if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution) warning (0, "detected base architecture %qs, but some of its " "features are not detected; the detected base " @@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt) "features will be enabled", loongarch_isa_base_strings[preset.base]); } + preset.evolution = hw_isa_evolution; } if (tune_native_p) diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index 6990c86c2c49..bc6997e45b5c 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" + #include "loongarch-def.h" #include "loongarch-str.h" @@ -51,9 +56,11 @@ array_arch loongarch_cpu_default_isa = .simd_ (ISA_EXT_SIMD_LASX)) .set (CPU_LA664, loongarch_isa () - .base_ (ISA_BASE_LA64V110) + .base_ (ISA_BASE_LA64V100) .fpu_ (ISA_EXT_FPU64) - .simd_ (ISA_EXT_SIMD_LASX)); + .simd_ (ISA_EXT_SIMD_LASX) + .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA + | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); static inline loongarch_cache la464_cache () { @@ -136,8 +143,7 @@ array_tune loongarch_cpu_multipass_dfa_lookahead = array_tune () array loongarch_isa_base_strings = array () - .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100) - .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110); + .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100); array loongarch_isa_ext_strings = array () diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h index 68a9a461e549..a2c86df5bd94 100644 --- a/gcc/config/loongarch/loongarch-def.h +++ b/gcc/config/loongarch/loongarch-def.h @@ -56,19 +56,11 @@ along with GCC; see the file COPYING3. If not see /* enum isa_base */ /* LoongArch V1.00. */ -#define ISA_BASE_LA64V100 0 -/* LoongArch V1.10. */ -#define ISA_BASE_LA64V110 1 -#define N_ISA_BASE_TYPES 2 +#define ISA_BASE_LA64V100 0 +#define N_ISA_BASE_TYPES 1 extern loongarch_def_array loongarch_isa_base_strings; -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) -/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is - we cannot use the C++ header options.h in loongarch-def.c. */ -extern int64_t loongarch_isa_base_features[]; -#endif - /* enum isa_ext_* */ #define ISA_EXT_NONE 0 #define ISA_EXT_FPU32 1 diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc index 7b63ef57a2a6..8689f2dab051 100644 --- a/gcc/config/loongarch/loongarch-opts.cc +++ b/gcc/config/loongarch/loongarch-opts.cc @@ -285,9 +285,6 @@ config_target_isa: /* Get default ISA from "-march" or its default value. */ t.isa = loongarch_cpu_default_isa[t.cpu_arch]; - if (t.cpu_arch != CPU_NATIVE) - t.isa.evolution |= loongarch_isa_base_features[t.isa.base]; - /* Apply incremental changes. */ /* "-march=native" overrides the default FPU type. */ diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h index e1ec702335de..651c1c18ca84 100644 --- a/gcc/config/loongarch/loongarch-opts.h +++ b/gcc/config/loongarch/loongarch-opts.h @@ -77,8 +77,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, #define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64) #define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D) -#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \ - || la_target.isa.base == ISA_BASE_LA64V110) +#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100) #define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \ || la_target.abi.base == ABI_BASE_LP64F \ || la_target.abi.base == ABI_BASE_LP64S) @@ -90,7 +89,6 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target, /* TARGET_ macros for use in *.md template conditionals */ #define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464) #define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664) -#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110) /* Note: optimize_size may vary across functions, while -m[no]-memcpy imposes a global constraint. */ diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h index 0384493765c5..7c78d1443d5f 100644 --- a/gcc/config/loongarch/loongarch-str.h +++ b/gcc/config/loongarch/loongarch-str.h @@ -33,7 +33,6 @@ along with GCC; see the file COPYING3. If not see #define STR_CPU_LA664 "la664" #define STR_ISA_BASE_LA64V100 "la64" -#define STR_ISA_BASE_LA64V110 "la64v1.1" #define OPTSTR_ISA_EXT_FPU "fpu" #define STR_NONE "none" diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index 4d36e3ec4de7..41e6424e8616 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -40,9 +40,6 @@ Basic ISAs of LoongArch: EnumValue Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100) -EnumValue -Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110) - ;; ISA extensions / adjustments Enum Name(isa_ext_fpu) Type(int) From 8f60f5499e10d19218cada082e0909516ebf0e74 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 08:56:33 +0100 Subject: [PATCH 078/311] haifa-sched: Avoid overflows in extend_h_i_d [PR112411] On Thu, Dec 07, 2023 at 09:36:23AM +0100, Jakub Jelinek wrote: > Without the dg-skip-if I got on 64-bit host with > -O3 --param min-nondebug-insn-uid=0x40000000: > cc1: out of memory allocating 571230784744 bytes after a total of 2772992 bytes I've looked at this and the problem is in haifa-sched.cc: 9047 h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true); get_max_uid () is 0x4000024d with the --param min-nondebug-insn-uid=0x40000000 and so 3 * get_max_uid () / 2 actually overflows to -536870028 but as vec.h then treats the value as unsigned, it attempts to allocate 0xe0000374U * 152UL bytes, i.e. those 532GB. If the above is fixed to do 3U * get_max_uid () / 2 instead, it will get slightly better and will only need 0x60000373U * 152UL bytes, i.e. 228GB. Perhaps more could be helped by making the vector indirect (contain pointers to haifa_insn_data_def rather than the structures themselves) and pool allocate those, but the more important question is how sparse are uids in normal compilations without those large --param min-nondebug-insn-uid= parameters. Because if they aren't enough, such a change would increase compile time memory just to help the unusual case. 2023-12-08 Jakub Jelinek PR middle-end/112411 * haifa-sched.cc (extend_h_i_d): Use 3U instead of 3 in 3 * get_max_uid () / 2 calculation. --- gcc/haifa-sched.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/haifa-sched.cc b/gcc/haifa-sched.cc index 8e8add709b3a..35c2c9f2bdc7 100644 --- a/gcc/haifa-sched.cc +++ b/gcc/haifa-sched.cc @@ -9044,7 +9044,7 @@ extend_h_i_d (void) if (reserve > 0 && ! h_i_d.space (reserve)) { - h_i_d.safe_grow_cleared (3 * get_max_uid () / 2, true); + h_i_d.safe_grow_cleared (3U * get_max_uid () / 2, true); sched_extend_target (); } } From b5cfbb8f4cceb621d6812eec3e0fb876b648241c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 09:02:15 +0100 Subject: [PATCH 079/311] vr-values: Avoid ICEs on large _BitInt cast to floating point [PR112901] For casts from integers to floating point, simplify_float_conversion_using_ranges uses SCALAR_INT_TYPE_MODE and queries optabs on the optimization it wants to make. That doesn't really work for large/huge BITINT_TYPE, those have BLKmode which is not scalar int mode. Querying an optab is not useful for that either. I think it is best to just skip this optimization for those bitints, after all, bitint lowering uses ranges already to determine minimum precision for bitint operands of the integer to float casts. 2023-12-08 Jakub Jelinek PR tree-optimization/112901 * vr-values.cc (simplify_using_ranges::simplify_float_conversion_using_ranges): Return false if rhs1 has BITINT_TYPE type with BLKmode TYPE_MODE. * gcc.dg/bitint-51.c: New test. --- gcc/testsuite/gcc.dg/bitint-51.c | 14 ++++++++++++++ gcc/vr-values.cc | 5 +++++ 2 files changed, 19 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/bitint-51.c diff --git a/gcc/testsuite/gcc.dg/bitint-51.c b/gcc/testsuite/gcc.dg/bitint-51.c new file mode 100644 index 000000000000..057407099e40 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-51.c @@ -0,0 +1,14 @@ +/* PR tree-optimization/112901 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-O2" } */ + +float f; +#if __BITINT_MAXWIDTH__ >= 256 +_BitInt(256) i; + +void +foo (void) +{ + f *= 4 * i; +} +#endif diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc index ecb294131b06..711538912aa8 100644 --- a/gcc/vr-values.cc +++ b/gcc/vr-values.cc @@ -1656,6 +1656,11 @@ simplify_using_ranges::simplify_float_conversion_using_ranges || vr.undefined_p ()) return false; + /* The code below doesn't work for large/huge _BitInt, nor is really + needed for those, bitint lowering does use ranges already. */ + if (TREE_CODE (TREE_TYPE (rhs1)) == BITINT_TYPE + && TYPE_MODE (TREE_TYPE (rhs1)) == BLKmode) + return false; /* First check if we can use a signed type in place of an unsigned. */ scalar_int_mode rhs_mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs1)); if (TYPE_UNSIGNED (TREE_TYPE (rhs1)) From f32e49add80cb3a22969b12034509d326aa69c5d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 09:03:18 +0100 Subject: [PATCH 080/311] lower-bitint: Avoid merging non-mergeable stmt with cast and mergeable stmt [PR112902] Before bitint lowering, the IL has: b.0_1 = b; _2 = -b.0_1; _3 = (unsigned _BitInt(512)) _2; a.1_4 = a; a.2_5 = (unsigned _BitInt(512)) a.1_4; _6 = _3 * a.2_5; on the first function. Now, gimple_lower_bitint has an optimization (when not -O0) that it avoids assigning underlying VAR_DECLs for certain SSA_NAMEs where it is possible to lower it in a single loop (or straight line code) rather than in multiple loops. So, e.g. the multiplication above uses handle_operand_addr, which can deal with INTEGER_CST arguments, loads but also casts, so it is fine not to assign an underlying VAR_DECL for SSA_NAMEs a.1_4 and a.2_5, as the multiplication can handle it fine. The more problematic case is the other multiplication operand. It is again a result of a (in this case narrowing) cast, so it is fine not to assign VAR_DECL for _3. Normally we can merge the load (b.0_1) with the negation (_2) and even with the following cast (_3). If _3 was used in a mergeable operation like addition, subtraction, negation, &|^ or equality comparison, all of b.0_1, _2 and _3 could be without underlying VAR_DECLs. The problem is that the current code does that even when the cast is used by a non-mergeable operation, and handle_operand_addr certainly can't handle the mergeable operations feeding the rhs1 of the cast, for multiplication we don't emit any loop in which it could appear, for other operations like shifts or non-equality comparisons we emit loops, but either in the reverse direction or with unpredictable indexes (for shifts). So, in order to lower the above correctly, we need to have an underlying VAR_DECL for either _2 or _3; if we choose _2, then the load and negation would be done in one loop and extension handled as part of the multiplication, if we choose _3, then the load, negation and cast are done in one loop and the multiplication just uses the underlying VAR_DECL computed by that. It is far easier to do this for _3, which is what the following patch implements. It actually already had code for most of it, just it did that for widening casts only (optimize unless the cast rhs1 is not SSA_NAME, or is SSA_NAME defined in some other bb, or with more than one use, etc.). This falls through into such code even for the narrowing or same precision casts, unless the cast is used in a mergeable operation. 2023-12-08 Jakub Jelinek PR tree-optimization/112902 * gimple-lower-bitint.cc (gimple_lower_bitint): For a narrowing or same precision cast don't set SSA_NAME_VERSION in m_names only if use_stmt is mergeable_op or fall through into the check that use is a store or rhs1 is not mergeable or other reasons prevent merging. * gcc.dg/bitint-52.c: New test. --- gcc/gimple-lower-bitint.cc | 13 ++++++++----- gcc/testsuite/gcc.dg/bitint-52.c | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/bitint-52.c diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index d2026f6c9057..c55c32fb40d6 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -5989,10 +5989,11 @@ gimple_lower_bitint (void) { if (TREE_CODE (TREE_TYPE (rhs1)) != BITINT_TYPE || (bitint_precision_kind (TREE_TYPE (rhs1)) - < bitint_prec_large) - || (TYPE_PRECISION (TREE_TYPE (rhs1)) - >= TYPE_PRECISION (TREE_TYPE (s))) - || mergeable_op (SSA_NAME_DEF_STMT (s))) + < bitint_prec_large)) + continue; + if ((TYPE_PRECISION (TREE_TYPE (rhs1)) + >= TYPE_PRECISION (TREE_TYPE (s))) + && mergeable_op (use_stmt)) continue; /* Prevent merging a widening non-mergeable cast on result of some narrower mergeable op @@ -6011,7 +6012,9 @@ gimple_lower_bitint (void) || !mergeable_op (SSA_NAME_DEF_STMT (rhs1)) || gimple_store_p (use_stmt)) continue; - if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1))) + if ((TYPE_PRECISION (TREE_TYPE (rhs1)) + < TYPE_PRECISION (TREE_TYPE (s))) + && gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1))) { /* Another exception is if the widening cast is from mergeable same precision cast from something diff --git a/gcc/testsuite/gcc.dg/bitint-52.c b/gcc/testsuite/gcc.dg/bitint-52.c new file mode 100644 index 000000000000..befd19e8d2a3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-52.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/112902 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +double c; +#if __BITINT_MAXWIDTH__ >= 2048 +_BitInt (512) a; +_BitInt (2048) b; + +void +foo (void) +{ + b = __builtin_mul_overflow_p (40, (_BitInt (512)) (-b * a), 0); +} + + +void +bar (void) +{ + c -= (unsigned _BitInt (512)) (a | a << b); +} +#endif From bf3ff057f62971ee9de6e3051c3e295be55eb62d Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Fri, 17 Nov 2023 17:00:21 +0800 Subject: [PATCH 081/311] LoongArch: Add support for xorsign. This patch adds support for xorsign pattern to scalar fp and vector. With the new expands, uniformly using vector bitwise logical operations to handle xorsign. On LoongArch64, floating-point registers and vector registers share the same register, so this patch also allows conversion between LSX vector mode and scalar fp mode to avoid unnecessary instruction generation. gcc/ChangeLog: * config/loongarch/lasx.md (xorsign3): New expander. * config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow conversion between LSX vector mode and scalar fp mode. * config/loongarch/loongarch.md (@xorsign3): New expander. * config/loongarch/lsx.md (@xorsign3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test. * gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test. * gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test. * gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test. * gcc.target/loongarch/xorsign-run.c: New test. * gcc.target/loongarch/xorsign.c: New test. --- gcc/config/loongarch/lasx.md | 22 +++++-- gcc/config/loongarch/loongarch.cc | 5 ++ gcc/config/loongarch/loongarch.md | 17 ++++++ gcc/config/loongarch/lsx.md | 23 +++++-- .../loongarch/vector/lasx/lasx-xorsign-run.c | 60 +++++++++++++++++++ .../loongarch/vector/lasx/lasx-xorsign.c | 19 ++++++ .../loongarch/vector/lsx/lsx-xorsign-run.c | 60 +++++++++++++++++++ .../loongarch/vector/lsx/lsx-xorsign.c | 19 ++++++ .../gcc.target/loongarch/xorsign-run.c | 25 ++++++++ gcc/testsuite/gcc.target/loongarch/xorsign.c | 18 ++++++ 10 files changed, 260 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 116b30c07745..de7c88f14187 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -1065,10 +1065,10 @@ (set_attr "mode" "")]) (define_insn "xor3" - [(set (match_operand:ILASX 0 "register_operand" "=f,f,f") - (xor:ILASX - (match_operand:ILASX 1 "register_operand" "f,f,f") - (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + [(set (match_operand:LASX 0 "register_operand" "=f,f,f") + (xor:LASX + (match_operand:LASX 1 "register_operand" "f,f,f") + (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] "ISA_HAS_LASX" "@ xvxor.v\t%u0,%u1,%u2 @@ -3061,6 +3061,20 @@ operands[5] = gen_reg_rtx (mode); }) +(define_expand "xorsign3" + [(set (match_dup 4) + (and:FLASX (match_dup 3) + (match_operand:FLASX 2 "register_operand"))) + (set (match_operand:FLASX 0 "register_operand") + (xor:FLASX (match_dup 4) + (match_operand:FLASX 1 "register_operand")))] + "ISA_HAS_LASX" +{ + operands[3] = loongarch_build_signbit_mask (mode, 1, 0); + + operands[4] = gen_reg_rtx (mode); +}) + (define_insn "absv4df2" [(set (match_operand:V4DF 0 "register_operand" "=f") diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 3545e66a10e4..f140d6992383 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -6707,6 +6707,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to, if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)) return true; + /* Allow conversion between LSX vector mode and scalar fp mode. */ + if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to)) + || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to)))) + return true; + return !reg_classes_intersect_p (FP_REGS, rclass); } diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 7a101dd64b73..b79ca752b644 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1164,6 +1164,23 @@ "fcopysign.\t%0,%1,%2" [(set_attr "type" "fcopysign") (set_attr "mode" "")]) + +(define_expand "@xorsign3" + [(match_operand:ANYF 0 "register_operand") + (match_operand:ANYF 1 "register_operand") + (match_operand:ANYF 2 "register_operand")] + "ISA_HAS_LSX" +{ + machine_mode lsx_mode + = mode == SFmode ? V4SFmode : V2DFmode; + rtx tmp = gen_reg_rtx (lsx_mode); + rtx op1 = lowpart_subreg (lsx_mode, operands[1], mode); + rtx op2 = lowpart_subreg (lsx_mode, operands[2], mode); + emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2)); + emit_move_insn (operands[0], + lowpart_subreg (mode, tmp, lsx_mode)); + DONE; +}) ;; ;; .................... diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 232399934045..ce6ec6d69c95 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -957,10 +957,10 @@ (set_attr "mode" "")]) (define_insn "xor3" - [(set (match_operand:ILSX 0 "register_operand" "=f,f,f") - (xor:ILSX - (match_operand:ILSX 1 "register_operand" "f,f,f") - (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] + [(set (match_operand:LSX 0 "register_operand" "=f,f,f") + (xor:LSX + (match_operand:LSX 1 "register_operand" "f,f,f") + (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))] "ISA_HAS_LSX" "@ vxor.v\t%w0,%w1,%w2 @@ -2786,6 +2786,21 @@ operands[5] = gen_reg_rtx (mode); }) +(define_expand "@xorsign3" + [(set (match_dup 4) + (and:FLSX (match_dup 3) + (match_operand:FLSX 2 "register_operand"))) + (set (match_operand:FLSX 0 "register_operand") + (xor:FLSX (match_dup 4) + (match_operand:FLSX 1 "register_operand")))] + "ISA_HAS_LSX" +{ + operands[3] = loongarch_build_signbit_mask (mode, 1, 0); + + operands[4] = gen_reg_rtx (mode); +}) + + (define_insn "absv2df2" [(set (match_operand:V2DF 0 "register_operand" "=f") (abs:V2DF (match_operand:V2DF 1 "register_operand" "f")))] diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c new file mode 100644 index 000000000000..2295503d4a10 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ +/* { dg-require-effective-target loongarch_asx_hw } */ + +#include "lasx-xorsign.c" + +extern void abort (); + +#define N 16 +float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, + -12.5f, -15.6f, -18.7f, -21.8f, + 24.9f, 27.1f, 30.2f, 33.3f, + 36.4f, 39.5f, 42.6f, 45.7f}; +float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f}; +float r[N]; + +double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d, + -12.5d, -15.6d, -18.7d, -21.8d, + 24.9d, 27.1d, 30.2d, 33.3d, + 36.4d, 39.5d, 42.6d, 45.7d}; +double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d, + -9.0d, 1.0d, -2.0d, 3.0d, + -4.0d, -5.0d, 6.0d, 7.0d, + -8.0d, -9.0d, 10.0d, 11.0d}; +double rd[N]; + +void +__attribute__ ((optimize ("-O0"))) +check_xorsignf (void) +{ + for (int i = 0; i < N; i++) + if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) + abort (); +} + +void +__attribute__ ((optimize ("-O0"))) +check_xorsign (void) +{ + for (int i = 0; i < N; i++) + if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i])) + abort (); +} + +int +main (void) +{ + my_xorsignf (r, a, b, N); + /* check results: */ + check_xorsignf (); + + my_xorsign (rd, ad, bd, N); + /* check results: */ + check_xorsign (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c new file mode 100644 index 000000000000..190a9239b313 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mlasx" } */ +/* { dg-final { scan-assembler "xvand\\.v" } } */ +/* { dg-final { scan-assembler "xvxor\\.v" } } */ +/* { dg-final { scan-assembler-not "xvfmul" } } */ + +double +my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i] * __builtin_copysign (1.0d, c[i]); +} + +float +my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i] * __builtin_copysignf (1.0f, c[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c new file mode 100644 index 000000000000..22c5c03cc7de --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ +/* { dg-require-effective-target loongarch_sx_hw } */ + +#include "lsx-xorsign.c" + +extern void abort (); + +#define N 16 +float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, + -12.5f, -15.6f, -18.7f, -21.8f, + 24.9f, 27.1f, 30.2f, 33.3f, + 36.4f, 39.5f, 42.6f, 45.7f}; +float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f}; +float r[N]; + +double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d, + -12.5d, -15.6d, -18.7d, -21.8d, + 24.9d, 27.1d, 30.2d, 33.3d, + 36.4d, 39.5d, 42.6d, 45.7d}; +double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d, + -9.0d, 1.0d, -2.0d, 3.0d, + -4.0d, -5.0d, 6.0d, 7.0d, + -8.0d, -9.0d, 10.0d, 11.0d}; +double rd[N]; + +void +__attribute__ ((optimize ("-O0"))) +check_xorsignf (void) +{ + for (int i = 0; i < N; i++) + if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) + abort (); +} + +void +__attribute__ ((optimize ("-O0"))) +check_xorsign (void) +{ + for (int i = 0; i < N; i++) + if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i])) + abort (); +} + +int +main (void) +{ + my_xorsignf (r, a, b, N); + /* check results: */ + check_xorsignf (); + + my_xorsign (rd, ad, bd, N); + /* check results: */ + check_xorsign (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c new file mode 100644 index 000000000000..c2694c11e795 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -mlsx" } */ +/* { dg-final { scan-assembler "vand\\.v" } } */ +/* { dg-final { scan-assembler "vxor\\.v" } } */ +/* { dg-final { scan-assembler-not "vfmul" } } */ + +double +my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i] * __builtin_copysign (1.0d, c[i]); +} + +float +my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n) +{ + for (int i = 0; i < n; i++) + a[i] = b[i] * __builtin_copysignf (1.0f, c[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c new file mode 100644 index 000000000000..b4f28adf8c8f --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mlsx" } */ +/* { dg-require-effective-target loongarch_sx_hw } */ + +extern void abort(void); + +static double x = 2.0; +static float y = 2.0; + +int main() +{ + if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5) + abort(); + + if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5) + abort(); + + if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5) + abort(); + + if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5) + abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign.c b/gcc/testsuite/gcc.target/loongarch/xorsign.c new file mode 100644 index 000000000000..ca80603d48b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/xorsign.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx" } */ +/* { dg-final { scan-assembler "vand\\.v" } } */ +/* { dg-final { scan-assembler "vxor\\.v" } } */ +/* { dg-final { scan-assembler-not "fcopysign" } } */ +/* { dg-final { scan-assembler-not "fmul" } } */ + +double +my_xorsign (double a, double b) +{ + return a * __builtin_copysign (1.0d, b); +} + +float +my_xorsignf (float a, float b) +{ + return a * __builtin_copysignf (1.0f, b); +} From 51b8259212791dbea846706bc5e9db5310f1fc10 Mon Sep 17 00:00:00 2001 From: Pan Li Date: Fri, 8 Dec 2023 14:48:48 +0800 Subject: [PATCH 082/311] RISC-V: Fix ICE for incorrect mode attr in V_F2DI_CONVERT_BRIDGE The mode attr V_F2DI_CONVERT_BRIDGE converts the floating-point mode to the widden floating-point by design. But we take (RVVM1HF "RVVM2SI") by mistake. This patch would like to fix it by replacing the (RVVM1HF "RVVM2SI") to (RVVM1HF "RVVM2SF") as design. gcc/ChangeLog: * config/riscv/vector-iterators.md: Replace RVVM2SI to RVVM2SF for mode attr V_F2DI_CONVERT_BRIDGE. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/vector-iterators.md | 2 +- .../riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 56080ed1f5f8..5f5f7b5b986f 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -3267,7 +3267,7 @@ ]) (define_mode_attr V_F2DI_CONVERT_BRIDGE [ - (RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SI") (RVVMF2HF "RVVM1SF") + (RVVM2HF "RVVM4SF") (RVVM1HF "RVVM2SF") (RVVMF2HF "RVVM1SF") (RVVMF4HF "RVVMF2SF") (RVVM4SF "VOID") (RVVM2SF "VOID") (RVVM1SF "VOID") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c new file mode 100644 index 000000000000..5fb61c7b44c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c @@ -0,0 +1,7 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "--param=riscv-autovec-lmul=m4 -march=rv64gcv_zvfh_zfh -mabi=lp64d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "test-math.h" + +TEST_UNARY_CALL_CVT (_Float16, long, __builtin_lroundf16) From 63a541a0905867cbd5762f55a928778724ed0ec7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 6 Dec 2023 11:33:10 +0100 Subject: [PATCH 083/311] Shrink out-of-SSA dump The following removes the second GIMPLE function dump after remove_ssa_form which used to rewrite the IL with the coalescing result but doesn't do so since a long time now. * tree-outof-ssa.cc (rewrite_out_of_ssa): Dump GIMPLE once only, after final IL adjustments. --- gcc/tree-outof-ssa.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcc/tree-outof-ssa.cc b/gcc/tree-outof-ssa.cc index 767623ab8eac..5dc58f1b8081 100644 --- a/gcc/tree-outof-ssa.cc +++ b/gcc/tree-outof-ssa.cc @@ -1352,8 +1352,5 @@ rewrite_out_of_ssa (struct ssaexpand *sa) remove_ssa_form (flag_tree_ter, sa); - if (dump_file && (dump_flags & TDF_DETAILS)) - gimple_dump_cfg (dump_file, dump_flags & ~TDF_DETAILS); - return 0; } From 61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 6 Dec 2023 15:04:49 +0800 Subject: [PATCH 084/311] LoongArch: Add support for LoongArch V1.1 approximate instructions. This patch adds define_insn/builtins/intrinsics for these instructions, and add option -mfrecipe to control instruction generation. gcc/ChangeLog: * config/loongarch/genopts/isa-evolution.in (fecipe): Add. * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. (__frecipe_d): Ditto. (__frsqrte_s): Ditto. (__frsqrte_d): Ditto. * config/loongarch/lasx.md (lasx_xvfrecipe_): New insn pattern. (lasx_xvfrsqrte_): Ditto. * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. (__lasx_xvfrecipe_d): Ditto. (__lasx_xvfrsqrte_s): Ditto. (__lasx_xvfrsqrte_d): Ditto. * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. (LSX_EXT_BUILTIN): New macro. (LASX_EXT_BUILTIN): Ditto. * config/loongarch/loongarch-cpucfg-map.h: Regenerate. * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". * config/loongarch/loongarch-def.cc: Regenerate. * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. * config/loongarch/loongarch.md (loongarch_frecipe_): New insn pattern. (loongarch_frsqrte_): Ditto. * config/loongarch/loongarch.opt: Regenerate. * config/loongarch/lsx.md (lsx_vfrecipe_): New insn pattern. (lsx_vfrsqrte_): Ditto. * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. (__lsx_vfrecipe_d): Ditto. (__lsx_vfrsqrte_s): Ditto. (__lsx_vfrsqrte_d): Ditto. * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. gcc/testsuite/ChangeLog: * gcc.target/loongarch/larch-frecipe-builtin.c: New test. * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. --- gcc/config/loongarch/genopts/isa-evolution.in | 1 + gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++ gcc/config/loongarch/lasx.md | 24 +++++++++++ gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++ gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++ gcc/config/loongarch/loongarch-c.cc | 3 ++ gcc/config/loongarch/loongarch-cpucfg-map.h | 1 + gcc/config/loongarch/loongarch-def.cc | 3 +- gcc/config/loongarch/loongarch-str.h | 1 + gcc/config/loongarch/loongarch.cc | 1 + gcc/config/loongarch/loongarch.md | 35 +++++++++++++++- gcc/config/loongarch/loongarch.opt | 4 ++ gcc/config/loongarch/lsx.md | 24 +++++++++++ gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++ gcc/doc/extend.texi | 35 ++++++++++++++++ .../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++ .../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++ .../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++ 18 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in index a6bc3f87f201..11a198b649f6 100644 --- a/gcc/config/loongarch/genopts/isa-evolution.in +++ b/gcc/config/loongarch/genopts/isa-evolution.in @@ -1,3 +1,4 @@ +2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions. 2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. 2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions. 2 28 lamcas Support amcas[_db].{b/h/w/d} instructions. diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h index e571ed27b374..bb1cda831eb7 100644 --- a/gcc/config/loongarch/larchintrin.h +++ b/gcc/config/loongarch/larchintrin.h @@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2) } #endif +#ifdef __loongarch_frecipe +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: SF, SF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frecipe_s (float _1) +{ + __builtin_loongarch_frecipe_s ((float) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: DF, DF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frecipe_d (double _1) +{ + __builtin_loongarch_frecipe_d ((double) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: SF, SF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frsqrte_s (float _1) +{ + __builtin_loongarch_frsqrte_s ((float) _1); +} + +/* Assembly instruction format: fd, fj. */ +/* Data types in instruction templates: DF, DF. */ +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__frsqrte_d (double _1) +{ + __builtin_loongarch_frsqrte_d ((double) _1); +} +#endif + /* Assembly instruction format: ui15. */ /* Data types in instruction templates: USI. */ #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1)) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index de7c88f14187..b1416f6c370a 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -40,8 +40,10 @@ UNSPEC_LASX_XVFCVTL UNSPEC_LASX_XVFLOGB UNSPEC_LASX_XVFRECIP + UNSPEC_LASX_XVFRECIPE UNSPEC_LASX_XVFRINT UNSPEC_LASX_XVFRSQRT + UNSPEC_LASX_XVFRSQRTE UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SEQ UNSPEC_LASX_XVFCMP_SLE @@ -1633,6 +1635,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lasx_xvfrecipe_" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRECIPE))] + "ISA_HAS_LASX && TARGET_FRECIPE" + "xvfrecipe.\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + (define_insn "lasx_xvfrsqrt_" [(set (match_operand:FLASX 0 "register_operand" "=f") (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] @@ -1642,6 +1655,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lasx_xvfrsqrte_" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRTE))] + "ISA_HAS_LASX && TARGET_FRECIPE" + "xvfrsqrte.\t%u0,%u1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + (define_insn "lasx_xvftint_u__" [(set (match_operand: 0 "register_operand" "=f") (unspec: [(match_operand:FLASX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h index 7bce2c757f19..5e65e76e74c0 100644 --- a/gcc/config/loongarch/lasxintrin.h +++ b/gcc/config/loongarch/lasxintrin.h @@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1) return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1); } +#if defined(__loongarch_frecipe) +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrecipe_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrecipe_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V8SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_xvfrsqrte_s (__m256 _1) +{ + return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1); +} + +/* Assembly instruction format: xd, xj. */ +/* Data types in instruction templates: V4DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_xvfrsqrte_d (__m256d _1) +{ + return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1); +} +#endif + /* Assembly instruction format: xd, xj. */ /* Data types in instruction templates: V8SF, V8SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 5d037ab7f10a..507fc953c721 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -120,6 +120,9 @@ struct loongarch_builtin_description AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI) AVAIL_ALL (lsx, ISA_HAS_LSX) AVAIL_ALL (lasx, ISA_HAS_LASX) +AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI) +AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE) +AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) /* Construct a loongarch_builtin_description from the given arguments. @@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ FUNCTION_TYPE, loongarch_builtin_avail_lsx } + /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_ + for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description + field. AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + { CODE_FOR_lsx_ ## INSN, \ + "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \ + FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } + /* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_ for instruction CODE_FOR_lsx_. FUNCTION_TYPE is a builtin_description @@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX) "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ FUNCTION_TYPE, loongarch_builtin_avail_lasx } +/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_ + for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description + field. AVAIL is the name of the availability predicate, without the leading + loongarch_builtin_avail_. */ +#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \ + { CODE_FOR_lasx_ ## INSN, \ + "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \ + FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL } + /* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_ for instruction CODE_FOR_lasx_. FUNCTION_TYPE is a builtin_description field. */ @@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default), DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default), + /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */ + + DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe), + DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe), + DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe), + DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe), + + /* Built-in functions for new LSX instructions. */ + + LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe), + LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe), + + /* Built-in functions for new LASX instructions. */ + + LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe), + LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe), + /* Built-in functions for LSX. */ LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI), LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI), diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc index fbc33a10351f..44f52245c783 100644 --- a/gcc/config/loongarch/loongarch-c.cc +++ b/gcc/config/loongarch/loongarch-c.cc @@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile) else builtin_define ("__loongarch_frlen=0"); + if (TARGET_HARD_FLOAT && TARGET_FRECIPE) + builtin_define ("__loongarch_frecipe"); + if (ISA_HAS_LSX) { builtin_define ("__loongarch_simd"); diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h index 02ff16712551..148333c249cb 100644 --- a/gcc/config/loongarch/loongarch-cpucfg-map.h +++ b/gcc/config/loongarch/loongarch-cpucfg-map.h @@ -29,6 +29,7 @@ static constexpr struct { unsigned int cpucfg_bit; HOST_WIDE_INT isa_evolution_bit; } cpucfg_map[] = { + { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE }, { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index bc6997e45b5c..c41804a180e2 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -60,7 +60,8 @@ array_arch loongarch_cpu_default_isa = .fpu_ (ISA_EXT_FPU64) .simd_ (ISA_EXT_SIMD_LASX) .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA - | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)); + | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS + | OPTION_MASK_ISA_FRECIPE)); static inline loongarch_cache la464_cache () { diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h index 7c78d1443d5f..4d1bfd675e83 100644 --- a/gcc/config/loongarch/loongarch-str.h +++ b/gcc/config/loongarch/loongarch-str.h @@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see #define STR_EXPLICIT_RELOCS_NONE "none" #define STR_EXPLICIT_RELOCS_ALWAYS "always" +#define OPTSTR_FRECIPE "frecipe" #define OPTSTR_DIV32 "div32" #define OPTSTR_LAM_BH "lam-bh" #define OPTSTR_LAMCAS "lamcas" diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index f140d6992383..d9faf84df538 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11508,6 +11508,7 @@ loongarch_asm_code_end (void) loongarch_cpu_strings [la_target.cpu_tune]); fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START, loongarch_isa_base_strings [la_target.isa.base]); + DUMP_FEATURE (TARGET_FRECIPE); DUMP_FEATURE (TARGET_DIV32); DUMP_FEATURE (TARGET_LAM_BH); DUMP_FEATURE (TARGET_LAMCAS); diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index b79ca752b644..b16dbd6e5123 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -59,6 +59,12 @@ ;; Stack tie UNSPEC_TIE + ;; RSQRT + UNSPEC_RSQRTE + + ;; RECIP + UNSPEC_RECIPE + ;; CRC UNSPEC_CRC UNSPEC_CRCC @@ -220,6 +226,7 @@ ;; fmadd floating point multiply-add ;; fdiv floating point divide ;; frdiv floating point reciprocal divide +;; frecipe floating point approximate reciprocal ;; fabs floating point absolute value ;; flogb floating point exponent extract ;; fneg floating point negation @@ -229,6 +236,7 @@ ;; fscaleb floating point scale ;; fsqrt floating point square root ;; frsqrt floating point reciprocal square root +;; frsqrte floating point approximate reciprocal square root ;; multi multiword sequence (or user asm statements) ;; atomic atomic memory update instruction ;; syncloop memory atomic operation implemented as a sync loop @@ -238,8 +246,8 @@ "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore, prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical, shift,slt,signext,clz,trap,imul,idiv,move, - fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt, - fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost, + fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt, + fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost, simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd, simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp, simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill, @@ -908,6 +916,18 @@ [(set_attr "type" "frdiv") (set_attr "mode" "")]) +;; Approximate Reciprocal Instructions. + +(define_insn "loongarch_frecipe_" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RECIPE))] + "TARGET_FRECIPE" + "frecipe.\t%0,%1" + [(set_attr "type" "frecipe") + (set_attr "mode" "") + (set_attr "insn_count" "1")]) + ;; Integer division and modulus. (define_expand "3" [(set (match_operand:GPR 0 "register_operand") @@ -1133,6 +1153,17 @@ [(set_attr "type" "frsqrt") (set_attr "mode" "") (set_attr "insn_count" "1")]) + +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "loongarch_frsqrte_" + [(set (match_operand:ANYF 0 "register_operand" "=f") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRTE))] + "TARGET_FRECIPE" + "frsqrte.\t%0,%1" + [(set_attr "type" "frsqrte") + (set_attr "mode" "")]) ;; ;; .................... diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index 41e6424e8616..cdd59ae4fcf2 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -260,6 +260,10 @@ default value is 4. Variable HOST_WIDE_INT isa_evolution = 0 +mfrecipe +Target Mask(ISA_FRECIPE) Var(isa_evolution) +Support frecipe.{s/d} and frsqrte.{s/d} instructions. + mdiv32 Target Mask(ISA_DIV32) Var(isa_evolution) Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index ce6ec6d69c95..37bdc6910d9e 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -42,8 +42,10 @@ UNSPEC_LSX_VFCVTL UNSPEC_LSX_VFLOGB UNSPEC_LSX_VFRECIP + UNSPEC_LSX_VFRECIPE UNSPEC_LSX_VFRINT UNSPEC_LSX_VFRSQRT + UNSPEC_LSX_VFRSQRTE UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SLE @@ -1546,6 +1548,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) +;; Approximate Reciprocal Instructions. + +(define_insn "lsx_vfrecipe_" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRECIPE))] + "ISA_HAS_LSX && TARGET_FRECIPE" + "vfrecipe.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + (define_insn "lsx_vfrsqrt_" [(set (match_operand:FLSX 0 "register_operand" "=f") (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] @@ -1555,6 +1568,17 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) +;; Approximate Reciprocal Square Root Instructions. + +(define_insn "lsx_vfrsqrte_" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRTE))] + "ISA_HAS_LSX && TARGET_FRECIPE" + "vfrsqrte.\t%w0,%w1" + [(set_attr "type" "simd_fdiv") + (set_attr "mode" "")]) + (define_insn "lsx_vftint_u__" [(set (match_operand: 0 "register_operand" "=f") (unspec: [(match_operand:FLSX 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h index 29553c093fab..57a6fc40a8f1 100644 --- a/gcc/config/loongarch/lsxintrin.h +++ b/gcc/config/loongarch/lsxintrin.h @@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1) return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1); } +#if defined(__loongarch_frecipe) +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrecipe_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrecipe_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lsx_vfrsqrte_s (__m128 _1) +{ + return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1); +} + +/* Assembly instruction format: vd, vj. */ +/* Data types in instruction templates: V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lsx_vfrsqrte_d (__m128d _1) +{ + return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1); +} +#endif + /* Assembly instruction format: vd, vj. */ /* Data types in instruction templates: V4SF, V4SF. */ extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index af782b3f228c..c074b360f78f 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -17339,6 +17339,14 @@ The intrinsics provided are listed below: void __builtin_loongarch_break (imm0_32767) @end smallexample +These instrisic functions are available by using @option{-mfrecipe}. +@smallexample + float __builtin_loongarch_frecipe_s (float); + double __builtin_loongarch_frecipe_d (double); + float __builtin_loongarch_frsqrte_s (float); + double __builtin_loongarch_frsqrte_d (double); +@end smallexample + @emph{Note:}Since the control register is divided into 32-bit and 64-bit, but the access instruction is not distinguished. So GCC renames the control instructions when implementing intrinsics. @@ -17411,6 +17419,15 @@ function you need to include @code{larchintrin.h}. void __break (imm0_32767) @end smallexample +These instrisic functions are available by including @code{larchintrin.h} and +using @option{-mfrecipe}. +@smallexample + float __frecipe_s (float); + double __frecipe_d (double); + float __frsqrte_s (float); + double __frsqrte_d (double); +@end smallexample + Additional built-in functions are available for LoongArch family processors to efficiently use 128-bit floating-point (__float128) values. @@ -18251,6 +18268,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255); __m128i __lsx_vxor_v (__m128i, __m128i); @end smallexample +These instrisic functions are available by including @code{lsxintrin.h} and +using @option{-mfrecipe} and @option{-mlsx}. +@smallexample +__m128d __lsx_vfrecipe_d (__m128d); +__m128 __lsx_vfrecipe_s (__m128); +__m128d __lsx_vfrsqrte_d (__m128d); +__m128 __lsx_vfrsqrte_s (__m128); +@end smallexample + @node LoongArch ASX Vector Intrinsics @subsection LoongArch ASX Vector Intrinsics @@ -19090,6 +19116,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255); __m256i __lasx_xvxor_v (__m256i, __m256i); @end smallexample +These instrisic functions are available by including @code{lasxintrin.h} and +using @option{-mfrecipe} and @option{-mlasx}. +@smallexample +__m256d __lasx_xvfrecipe_d (__m256d); +__m256 __lasx_xvfrecipe_s (__m256); +__m256d __lasx_xvfrsqrte_d (__m256d); +__m256 __lasx_xvfrsqrte_s (__m256); +@end smallexample + @node MIPS DSP Built-in Functions @subsection MIPS DSP Built-in Functions diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c new file mode 100644 index 000000000000..b9329f346765 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c @@ -0,0 +1,28 @@ +/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mfrecipe" } */ +/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */ + +float +test_frecipe_s (float _1) +{ + return __builtin_loongarch_frecipe_s (_1); +} +double +test_frecipe_d (double _1) +{ + return __builtin_loongarch_frecipe_d (_1); +} +float +test_frsqrte_s (float _1) +{ + return __builtin_loongarch_frsqrte_s (_1); +} +double +test_frsqrte_d (double _1) +{ + return __builtin_loongarch_frsqrte_d (_1); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c new file mode 100644 index 000000000000..522535b45a32 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c @@ -0,0 +1,30 @@ +/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mlasx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */ + +#include + +v8f32 +__lasx_xvfrecipe_s (v8f32 _1) +{ + return __builtin_lasx_xvfrecipe_s (_1); +} +v4f64 +__lasx_xvfrecipe_d (v4f64 _1) +{ + return __builtin_lasx_xvfrecipe_d (_1); +} +v8f32 +__lasx_xvfrsqrte_s (v8f32 _1) +{ + return __builtin_lasx_xvfrsqrte_s (_1); +} +v4f64 +__lasx_xvfrsqrte_d (v4f64 _1) +{ + return __builtin_lasx_xvfrsqrte_d (_1); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c new file mode 100644 index 000000000000..4ad0cb0ffd6d --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c @@ -0,0 +1,30 @@ +/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */ +/* { dg-do compile } */ +/* { dg-options "-mlsx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */ +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */ + +#include + +v4f32 +__lsx_vfrecipe_s (v4f32 _1) +{ + return __builtin_lsx_vfrecipe_s (_1); +} +v2f64 +__lsx_vfrecipe_d (v2f64 _1) +{ + return __builtin_lsx_vfrecipe_d (_1); +} +v4f32 +__lsx_vfrsqrte_s (v4f32 _1) +{ + return __builtin_lsx_vfrsqrte_s (_1); +} +v2f64 +__lsx_vfrsqrte_d (v2f64 _1) +{ + return __builtin_lsx_vfrsqrte_d (_1); +} From cd2f1d911c766926c1e690476966d6d0b8089623 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 6 Dec 2023 15:04:50 +0800 Subject: [PATCH 085/311] LoongArch: Use standard pattern name for xvfrsqrt/vfrsqrt instructions. Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt2 to align with standard pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab. gcc/ChangeLog: * config/loongarch/lasx.md (lasx_xvfrsqrt_): Renamed to .. (rsqrt2): .. this. * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name. (CODE_FOR_lsx_vfrsqrt_s): Ditto. (CODE_FOR_lasx_xvfrsqrt_d): Ditto. (CODE_FOR_lasx_xvfrsqrt_s): Ditto. * config/loongarch/loongarch.cc (use_rsqrt_p): New function. (loongarch_optab_supported_p): Ditto. (TARGET_OPTAB_SUPPORTED_P): New hook. * config/loongarch/loongarch.md (*rsqrta): Remove. (*rsqrt2): New insn pattern. (*rsqrtb): Remove. * config/loongarch/lsx.md (lsx_vfrsqrt_): Renamed to .. (rsqrt2): .. this. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test. * gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test. --- gcc/config/loongarch/lasx.md | 6 ++--- gcc/config/loongarch/loongarch-builtins.cc | 4 +++ gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++ gcc/config/loongarch/loongarch.md | 24 +++++------------ gcc/config/loongarch/lsx.md | 6 ++--- .../loongarch/vector/lasx/lasx-rsqrt.c | 26 ++++++++++++++++++ .../loongarch/vector/lsx/lsx-rsqrt.c | 26 ++++++++++++++++++ 7 files changed, 96 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index b1416f6c370a..3a4a1fe51cbf 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -1646,10 +1646,10 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) -(define_insn "lasx_xvfrsqrt_" +(define_insn "rsqrt2" [(set (match_operand:FLASX 0 "register_operand" "=f") - (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] - UNSPEC_LASX_XVFRSQRT))] + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRT))] "ISA_HAS_LASX" "xvfrsqrt.\t%u0,%u1" [(set_attr "type" "simd_fdiv") diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 507fc953c721..ba8686d4cebe 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d +#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 +#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 /* LoongArch ASX define CODE_FOR_lasx_mxxx */ #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 @@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du +#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 +#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 static const struct loongarch_builtin_description loongarch_builtins[] = { #define LARCH_MOVFCSR2GR 0 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index d9faf84df538..d5a920d12f13 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11492,6 +11492,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode, is_packed); } +static bool +use_rsqrt_p (void) +{ + return (flag_finite_math_only + && !flag_trapping_math + && flag_unsafe_math_optimizations); +} + +/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ + +static bool +loongarch_optab_supported_p (int op, machine_mode, machine_mode, + optimization_type opt_type) +{ + switch (op) + { + case rsqrt_optab: + return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); + + default: + return true; + } +} + /* If -fverbose-asm, dump some info for debugging. */ static void loongarch_asm_code_end (void) @@ -11630,6 +11654,9 @@ loongarch_asm_code_end (void) #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary +#undef TARGET_OPTAB_SUPPORTED_P +#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p + #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index b16dbd6e5123..ba82ca73263a 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -60,6 +60,7 @@ UNSPEC_TIE ;; RSQRT + UNSPEC_RSQRT UNSPEC_RSQRTE ;; RECIP @@ -1134,25 +1135,14 @@ (set_attr "mode" "") (set_attr "insn_count" "1")]) -(define_insn "*rsqrta" +(define_insn "*rsqrt2" [(set (match_operand:ANYF 0 "register_operand" "=f") - (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") - (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))] - "flag_unsafe_math_optimizations" - "frsqrt.\t%0,%2" + (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] + UNSPEC_RSQRT))] + "TARGET_HARD_FLOAT" + "frsqrt.\t%0,%1" [(set_attr "type" "frsqrt") - (set_attr "mode" "") - (set_attr "insn_count" "1")]) - -(define_insn "*rsqrtb" - [(set (match_operand:ANYF 0 "register_operand" "=f") - (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "") - (match_operand:ANYF 2 "register_operand" "f"))))] - "flag_unsafe_math_optimizations" - "frsqrt.\t%0,%2" - [(set_attr "type" "frsqrt") - (set_attr "mode" "") - (set_attr "insn_count" "1")]) + (set_attr "mode" "")]) ;; Approximate Reciprocal Square Root Instructions. diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 37bdc6910d9e..cb4a448e7433 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -1559,10 +1559,10 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) -(define_insn "lsx_vfrsqrt_" +(define_insn "rsqrt2" [(set (match_operand:FLSX 0 "register_operand" "=f") - (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] - UNSPEC_LSX_VFRSQRT))] + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRT))] "ISA_HAS_LSX" "vfrsqrt.\t%w0,%w1" [(set_attr "type" "simd_fdiv") diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c new file mode 100644 index 000000000000..24316944d4e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlasx -ffast-math" } */ +/* { dg-final { scan-assembler "xvfrsqrt.s" } } */ +/* { dg-final { scan-assembler "xvfrsqrt.d" } } */ + +extern float sqrtf (float); + +float a[8], b[8]; + +void +foo1(void) +{ + for (int i = 0; i < 8; i++) + a[i] = 1 / sqrtf (b[i]); +} + +extern double sqrt (double); + +double da[4], db[4]; + +void +foo2(void) +{ + for (int i = 0; i < 4; i++) + da[i] = 1 / sqrt (db[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c new file mode 100644 index 000000000000..519cc47644c3 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx -ffast-math" } */ +/* { dg-final { scan-assembler "vfrsqrt.s" } } */ +/* { dg-final { scan-assembler "vfrsqrt.d" } } */ + +extern float sqrtf (float); + +float a[4], b[4]; + +void +foo1(void) +{ + for (int i = 0; i < 4; i++) + a[i] = 1 / sqrtf (b[i]); +} + +extern double sqrt (double); + +double da[2], db[2]; + +void +foo2(void) +{ + for (int i = 0; i < 2; i++) + da[i] = 1 / sqrt (db[i]); +} From 276c7618bf41bc2f03cf4b22c24b23c8ba29eca4 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 6 Dec 2023 15:04:51 +0800 Subject: [PATCH 086/311] LoongArch: Redefine pattern for xvfrecip/vfrecip instructions. Redefine pattern for [x]vfrecip instructions use rtx code instead of unspec, and enable [x]vfrecip instructions to be generated during auto-vectorization. gcc/ChangeLog: * config/loongarch/lasx.md (lasx_xvfrecip_): Renamed to .. (recip3): .. this. * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine to new pattern name. (CODE_FOR_lsx_vfrecip_s): Ditto. (CODE_FOR_lasx_xvfrecip_d): Ditto. (CODE_FOR_lasx_xvfrecip_s): Ditto. (loongarch_expand_builtin_direct): For the vector recip instructions, construct a temporary parameter const1_vector. * config/loongarch/lsx.md (lsx_vfrecip_): Renamed to .. (recip3): .. this. * config/loongarch/predicates.md (const_vector_1_operand): New predicate. --- gcc/config/loongarch/lasx.md | 8 ++++---- gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++ gcc/config/loongarch/lsx.md | 8 ++++---- gcc/config/loongarch/predicates.md | 4 ++++ 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 3a4a1fe51cbf..ad49a3ffbd52 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -1626,12 +1626,12 @@ [(set_attr "type" "simd_fminmax") (set_attr "mode" "")]) -(define_insn "lasx_xvfrecip_" +(define_insn "recip3" [(set (match_operand:FLASX 0 "register_operand" "=f") - (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] - UNSPEC_LASX_XVFRECIP))] + (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "") + (match_operand:FLASX 2 "register_operand" "f")))] "ISA_HAS_LASX" - "xvfrecip.\t%u0,%u1" + "xvfrecip.\t%u0,%u2" [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index ba8686d4cebe..c77394176db9 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2 #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2 +#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3 +#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3 /* LoongArch ASX define CODE_FOR_lasx_mxxx */ #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3 @@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE) #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2 #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2 +#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3 +#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3 static const struct loongarch_builtin_description loongarch_builtins[] = { #define LARCH_MOVFCSR2GR 0 @@ -3024,6 +3028,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, if (has_target_p) create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp))); + /* For the vector reciprocal instructions, we need to construct a temporary + parameter const1_vector. */ + switch (icode) + { + case CODE_FOR_recipv8sf3: + case CODE_FOR_recipv4df3: + case CODE_FOR_recipv4sf3: + case CODE_FOR_recipv2df3: + loongarch_prepare_builtin_arg (&ops[2], exp, 0); + create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode); + return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); + + default: + break; + } + /* Map the arguments to the other operands. */ gcc_assert (opno + call_expr_nargs (exp) == insn_data[icode].n_generator_args); diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index cb4a448e7433..f2774f021871 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -1539,12 +1539,12 @@ [(set_attr "type" "simd_fminmax") (set_attr "mode" "")]) -(define_insn "lsx_vfrecip_" +(define_insn "recip3" [(set (match_operand:FLSX 0 "register_operand" "=f") - (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] - UNSPEC_LSX_VFRECIP))] + (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "") + (match_operand:FLSX 2 "register_operand" "f")))] "ISA_HAS_LSX" - "vfrecip.\t%w0,%w1" + "vfrecip.\t%w0,%w2" [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index d02e846cb123..f7796da10b29 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -227,6 +227,10 @@ (and (match_code "const_int,const_wide_int,const_double,const_vector") (match_test "op == CONST1_RTX (GET_MODE (op))"))) +(define_predicate "const_vector_1_operand" + (and (match_code "const_vector") + (match_test "op == CONST1_RTX (GET_MODE (op))"))) + (define_predicate "reg_or_1_operand" (ior (match_operand 0 "const_1_operand") (match_operand 0 "register_operand"))) From 9a07bc477e197ef935679bacd0a923a98c006b2a Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 6 Dec 2023 15:04:52 +0800 Subject: [PATCH 087/311] LoongArch: New options -mrecip and -mrecip= with ffast-math. When both the -mrecip and -mfrecipe options are enabled, use approximate reciprocal instructions and approximate reciprocal square root instructions with additional Newton-Raphson steps to implement single precision floating-point division, square root and reciprocal square root operations, for a better performance. gcc/ChangeLog: * config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable. (-mrecip, -mrecip): New options. * config/loongarch/lasx.md (div3): New expander. (*div3): Rename. (sqrt2): New expander. (*sqrt2): Rename. (rsqrt2): New expander. * config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype. (loongarch_emit_swdivsf): Ditto. * config/loongarch/loongarch.cc (loongarch_option_override_internal): Set recip_mask for -mrecip and -mrecip= options. (loongarch_emit_swrsqrtsf): New function. (loongarch_emit_swdivsf): Ditto. * config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT RECIP_MASK_ALL): New bitmasks. (TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests. * config/loongarch/loongarch.md (sqrt2): New expander. (*sqrt2): Rename. (rsqrt2): New expander. * config/loongarch/loongarch.opt (recip_mask): New variable. (-mrecip, -mrecip): New options. * config/loongarch/lsx.md (div3): New expander. (*div3): Rename. (sqrt2): New expander. (*sqrt2): Rename. (rsqrt2): New expander. * config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate. * doc/invoke.texi (LoongArch Options): Document new options. gcc/testsuite/ChangeLog: * gcc.target/loongarch/divf.c: New test. * gcc.target/loongarch/recip-divf.c: New test. * gcc.target/loongarch/recip-sqrtf.c: New test. * gcc.target/loongarch/sqrtf.c: New test. * gcc.target/loongarch/vector/lasx/lasx-divf.c: New test. * gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test. * gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test. * gcc.target/loongarch/vector/lasx/lasx-recip.c: New test. * gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test. * gcc.target/loongarch/vector/lsx/lsx-divf.c: New test. * gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test. * gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test. * gcc.target/loongarch/vector/lsx/lsx-recip.c: New test. * gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test. --- gcc/config/loongarch/genopts/loongarch.opt.in | 11 + gcc/config/loongarch/lasx.md | 53 ++++- gcc/config/loongarch/loongarch-protos.h | 2 + gcc/config/loongarch/loongarch.cc | 188 ++++++++++++++++++ gcc/config/loongarch/loongarch.h | 18 ++ gcc/config/loongarch/loongarch.md | 49 ++++- gcc/config/loongarch/loongarch.opt | 11 + gcc/config/loongarch/lsx.md | 53 ++++- gcc/config/loongarch/predicates.md | 4 + gcc/doc/invoke.texi | 54 +++++ gcc/testsuite/gcc.target/loongarch/divf.c | 10 + .../gcc.target/loongarch/recip-divf.c | 9 + .../gcc.target/loongarch/recip-sqrtf.c | 23 +++ gcc/testsuite/gcc.target/loongarch/sqrtf.c | 24 +++ .../loongarch/vector/lasx/lasx-divf.c | 13 ++ .../loongarch/vector/lasx/lasx-recip-divf.c | 12 ++ .../loongarch/vector/lasx/lasx-recip-sqrtf.c | 28 +++ .../loongarch/vector/lasx/lasx-recip.c | 24 +++ .../loongarch/vector/lasx/lasx-sqrtf.c | 29 +++ .../loongarch/vector/lsx/lsx-divf.c | 13 ++ .../loongarch/vector/lsx/lsx-recip-divf.c | 12 ++ .../loongarch/vector/lsx/lsx-recip-sqrtf.c | 28 +++ .../loongarch/vector/lsx/lsx-recip.c | 24 +++ .../loongarch/vector/lsx/lsx-sqrtf.c | 29 +++ 24 files changed, 711 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/sqrtf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in index 483b185b059b..c3848d02fd39 100644 --- a/gcc/config/loongarch/genopts/loongarch.opt.in +++ b/gcc/config/loongarch/genopts/loongarch.opt.in @@ -23,6 +23,9 @@ config/loongarch/loongarch-opts.h HeaderInclude config/loongarch/loongarch-str.h +TargetVariable +unsigned int recip_mask = 0 + ; ISA related options ;; Base ISA Enum @@ -194,6 +197,14 @@ mexplicit-relocs Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) Use %reloc() assembly operators (for backward compatibility). +mrecip +Target RejectNegative Var(loongarch_recip) +Generate approximate reciprocal divide and square root for better throughput. + +mrecip= +Target RejectNegative Joined Var(loongarch_recip_name) +Control generation of reciprocal estimates. + ; The code model option names for -mcmodel. Enum Name(cmodel) Type(int) diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index ad49a3ffbd52..eeac8cd984ba 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -1194,7 +1194,25 @@ [(set_attr "type" "simd_fmul") (set_attr "mode" "")]) -(define_insn "div3" +(define_expand "div3" + [(set (match_operand:FLASX 0 "register_operand") + (div:FLASX (match_operand:FLASX 1 "reg_or_vecotr_1_operand") + (match_operand:FLASX 2 "register_operand")))] + "ISA_HAS_LASX" +{ + if (mode == V8SFmode + && TARGET_RECIP_VEC_DIV + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + loongarch_emit_swdivsf (operands[0], operands[1], + operands[2], V8SFmode); + DONE; + } +}) + +(define_insn "*div3" [(set (match_operand:FLASX 0 "register_operand" "=f") (div:FLASX (match_operand:FLASX 1 "register_operand" "f") (match_operand:FLASX 2 "register_operand" "f")))] @@ -1223,7 +1241,23 @@ [(set_attr "type" "simd_fmadd") (set_attr "mode" "")]) -(define_insn "sqrt2" +(define_expand "sqrt2" + [(set (match_operand:FLASX 0 "register_operand") + (sqrt:FLASX (match_operand:FLASX 1 "register_operand")))] + "ISA_HAS_LASX" +{ + if (mode == V8SFmode + && TARGET_RECIP_VEC_SQRT + && flag_unsafe_math_optimizations + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 0); + DONE; + } +}) + +(define_insn "*sqrt2" [(set (match_operand:FLASX 0 "register_operand" "=f") (sqrt:FLASX (match_operand:FLASX 1 "register_operand" "f")))] "ISA_HAS_LASX" @@ -1646,7 +1680,20 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) -(define_insn "rsqrt2" +(define_expand "rsqrt2" + [(set (match_operand:FLASX 0 "register_operand" "=f") + (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] + UNSPEC_LASX_XVFRSQRT))] + "ISA_HAS_LASX" + { + if (mode == V8SFmode && TARGET_RECIP_VEC_RSQRT) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], V8SFmode, 1); + DONE; + } +}) + +(define_insn "*rsqrt2" [(set (match_operand:FLASX 0 "register_operand" "=f") (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")] UNSPEC_LASX_XVFRSQRT))] diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index cb8fc36b0863..f2ff93b5e10e 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -220,5 +220,7 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int); extern tree loongarch_build_builtin_va_list (void); extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool); +extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool); +extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode); extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type); #endif /* ! GCC_LOONGARCH_PROTOS_H */ diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index d5a920d12f13..838fc1019c67 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -7552,6 +7552,71 @@ loongarch_option_override_internal (struct gcc_options *opts, /* Function to allocate machine-dependent function status. */ init_machine_status = &loongarch_init_machine_status; + + /* -mrecip options. */ + static struct + { + const char *string; /* option name. */ + unsigned int mask; /* mask bits to set. */ + } + const recip_options[] = { + { "all", RECIP_MASK_ALL }, + { "none", RECIP_MASK_NONE }, + { "div", RECIP_MASK_DIV }, + { "sqrt", RECIP_MASK_SQRT }, + { "rsqrt", RECIP_MASK_RSQRT }, + { "vec-div", RECIP_MASK_VEC_DIV }, + { "vec-sqrt", RECIP_MASK_VEC_SQRT }, + { "vec-rsqrt", RECIP_MASK_VEC_RSQRT }, + }; + + if (loongarch_recip_name) + { + char *p = ASTRDUP (loongarch_recip_name); + char *q; + unsigned int mask, i; + bool invert; + + while ((q = strtok (p, ",")) != NULL) + { + p = NULL; + if (*q == '!') + { + invert = true; + q++; + } + else + invert = false; + + if (!strcmp (q, "default")) + mask = RECIP_MASK_ALL; + else + { + for (i = 0; i < ARRAY_SIZE (recip_options); i++) + if (!strcmp (q, recip_options[i].string)) + { + mask = recip_options[i].mask; + break; + } + + if (i == ARRAY_SIZE (recip_options)) + { + error ("unknown option for %<-mrecip=%s%>", q); + invert = false; + mask = RECIP_MASK_NONE; + } + } + + if (invert) + recip_mask &= ~mask; + else + recip_mask |= mask; + } + } + if (loongarch_recip) + recip_mask |= RECIP_MASK_ALL; + if (!TARGET_FRECIPE) + recip_mask = RECIP_MASK_NONE; } @@ -11475,6 +11540,126 @@ loongarch_build_signbit_mask (machine_mode mode, bool vect, bool invert) return force_reg (vec_mode, v); } +/* Use rsqrte instruction and Newton-Rhapson to compute the approximation of + a single precision floating point [reciprocal] square root. */ + +void loongarch_emit_swrsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) +{ + rtx x0, e0, e1, e2, mhalf, monehalf; + REAL_VALUE_TYPE r; + int unspec; + + x0 = gen_reg_rtx (mode); + e0 = gen_reg_rtx (mode); + e1 = gen_reg_rtx (mode); + e2 = gen_reg_rtx (mode); + + real_arithmetic (&r, ABS_EXPR, &dconsthalf, NULL); + mhalf = const_double_from_real_value (r, SFmode); + + real_arithmetic (&r, PLUS_EXPR, &dconsthalf, &dconst1); + monehalf = const_double_from_real_value (r, SFmode); + unspec = UNSPEC_RSQRTE; + + if (VECTOR_MODE_P (mode)) + { + mhalf = loongarch_build_const_vector (mode, true, mhalf); + monehalf = loongarch_build_const_vector (mode, true, monehalf); + unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRSQRTE + : UNSPEC_LSX_VFRSQRTE; + } + + /* rsqrt(a) = rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) + sqrt(a) = a * rsqrte(a) * (1.5 - 0.5 * a * rsqrte(a) * rsqrte(a)) */ + + a = force_reg (mode, a); + + /* x0 = rsqrt(a) estimate. */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), + unspec))); + + /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ + if (!recip) + { + rtx zero = force_reg (mode, CONST0_RTX (mode)); + + if (VECTOR_MODE_P (mode)) + { + machine_mode imode = related_int_vector_mode (mode).require (); + rtx mask = gen_reg_rtx (imode); + emit_insn (gen_rtx_SET (mask, gen_rtx_NE (imode, a, zero))); + emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, + gen_lowpart (mode, mask)))); + } + else + { + rtx target = emit_conditional_move (x0, { GT, a, zero, mode }, + x0, zero, mode, 0); + if (target != x0) + emit_move_insn (x0, target); + } + } + + /* e0 = x0 * a */ + emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); + /* e1 = e0 * x0 */ + emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); + + /* e2 = 1.5 - e1 * 0.5 */ + mhalf = force_reg (mode, mhalf); + monehalf = force_reg (mode, monehalf); + emit_insn (gen_rtx_SET (e2, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, e1), + mhalf, monehalf))); + + if (recip) + /* res = e2 * x0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, x0, e2))); + else + /* res = e2 * e0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e0))); +} + +/* Use recipe instruction and Newton-Rhapson to compute the approximation of + a single precision floating point divide. */ + +void loongarch_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) +{ + rtx x0, e0, mtwo; + REAL_VALUE_TYPE r; + x0 = gen_reg_rtx (mode); + e0 = gen_reg_rtx (mode); + int unspec = UNSPEC_RECIPE; + + real_arithmetic (&r, ABS_EXPR, &dconst2, NULL); + mtwo = const_double_from_real_value (r, SFmode); + + if (VECTOR_MODE_P (mode)) + { + mtwo = loongarch_build_const_vector (mode, true, mtwo); + unspec = GET_MODE_SIZE (mode) == 32 ? UNSPEC_LASX_XVFRECIPE + : UNSPEC_LSX_VFRECIPE; + } + + mtwo = force_reg (mode, mtwo); + + /* a / b = a * recipe(b) * (2.0 - b * recipe(b)) */ + + /* x0 = 1./b estimate. */ + emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), + unspec))); + /* 2.0 - b * x0 */ + emit_insn (gen_rtx_SET (e0, gen_rtx_FMA (mode, + gen_rtx_NEG (mode, b), x0, mtwo))); + + /* x0 = a * x0 */ + if (a != CONST1_RTX (mode)) + emit_insn (gen_rtx_SET (x0, gen_rtx_MULT (mode, a, x0))); + + /* res = e0 * x0 */ + emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e0, x0))); +} + static bool loongarch_builtin_support_vector_misalignment (machine_mode mode, const_tree type, @@ -11670,6 +11855,9 @@ loongarch_asm_code_end (void) #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ loongarch_autovectorize_vector_modes +#undef TARGET_OPTAB_SUPPORTED_P +#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS loongarch_init_builtins #undef TARGET_BUILTIN_DECL diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index fa8a3f5582fd..f1350b6048f8 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -702,6 +702,24 @@ enum reg_class && (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT \ || GET_MODE_CLASS (MODE) == MODE_VECTOR_FLOAT)) +#define RECIP_MASK_NONE 0x00 +#define RECIP_MASK_DIV 0x01 +#define RECIP_MASK_SQRT 0x02 +#define RECIP_MASK_RSQRT 0x04 +#define RECIP_MASK_VEC_DIV 0x08 +#define RECIP_MASK_VEC_SQRT 0x10 +#define RECIP_MASK_VEC_RSQRT 0x20 +#define RECIP_MASK_ALL (RECIP_MASK_DIV | RECIP_MASK_SQRT \ + | RECIP_MASK_RSQRT | RECIP_MASK_VEC_SQRT \ + | RECIP_MASK_VEC_DIV | RECIP_MASK_VEC_RSQRT) + +#define TARGET_RECIP_DIV ((recip_mask & RECIP_MASK_DIV) != 0 || TARGET_uARCH_LA664) +#define TARGET_RECIP_SQRT ((recip_mask & RECIP_MASK_SQRT) != 0 || TARGET_uARCH_LA664) +#define TARGET_RECIP_RSQRT ((recip_mask & RECIP_MASK_RSQRT) != 0 || TARGET_uARCH_LA664) +#define TARGET_RECIP_VEC_DIV ((recip_mask & RECIP_MASK_VEC_DIV) != 0 || TARGET_uARCH_LA664) +#define TARGET_RECIP_VEC_SQRT ((recip_mask & RECIP_MASK_VEC_SQRT) != 0 || TARGET_uARCH_LA664) +#define TARGET_RECIP_VEC_RSQRT ((recip_mask & RECIP_MASK_VEC_RSQRT) != 0 || TARGET_uARCH_LA664) + /* 1 if N is a possible register number for function argument passing. We have no FP argument registers when soft-float. */ diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index ba82ca73263a..afbf201d4d06 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -893,9 +893,21 @@ ;; Float division and modulus. (define_expand "div3" [(set (match_operand:ANYF 0 "register_operand") - (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") - (match_operand:ANYF 2 "register_operand")))] - "") + (div:ANYF (match_operand:ANYF 1 "reg_or_1_operand") + (match_operand:ANYF 2 "register_operand")))] + "" +{ + if (mode == SFmode + && TARGET_RECIP_DIV + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + loongarch_emit_swdivsf (operands[0], operands[1], + operands[2], SFmode); + DONE; + } +}) (define_insn "*div3" [(set (match_operand:ANYF 0 "register_operand" "=f") @@ -1126,7 +1138,23 @@ ;; ;; .................... -(define_insn "sqrt2" +(define_expand "sqrt2" + [(set (match_operand:ANYF 0 "register_operand") + (sqrt:ANYF (match_operand:ANYF 1 "register_operand")))] + "" + { + if (mode == SFmode + && TARGET_RECIP_SQRT + && flag_unsafe_math_optimizations + && !optimize_insn_for_size_p () + && flag_finite_math_only && !flag_trapping_math) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 0); + DONE; + } + }) + +(define_insn "*sqrt2" [(set (match_operand:ANYF 0 "register_operand" "=f") (sqrt:ANYF (match_operand:ANYF 1 "register_operand" "f")))] "" @@ -1135,6 +1163,19 @@ (set_attr "mode" "") (set_attr "insn_count" "1")]) +(define_expand "rsqrt2" + [(set (match_operand:ANYF 0 "register_operand") + (unspec:ANYF [(match_operand:ANYF 1 "register_operand")] + UNSPEC_RSQRT))] + "TARGET_HARD_FLOAT" +{ + if (mode == SFmode && TARGET_RECIP_RSQRT) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], SFmode, 1); + DONE; + } +}) + (define_insn "*rsqrt2" [(set (match_operand:ANYF 0 "register_operand" "=f") (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")] diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index cdd59ae4fcf2..61d25130ea98 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -31,6 +31,9 @@ config/loongarch/loongarch-opts.h HeaderInclude config/loongarch/loongarch-str.h +TargetVariable +unsigned int recip_mask = 0 + ; ISA related options ;; Base ISA Enum @@ -202,6 +205,14 @@ mexplicit-relocs Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET) Use %reloc() assembly operators (for backward compatibility). +mrecip +Target RejectNegative Var(loongarch_recip) +Generate approximate reciprocal divide and square root for better throughput. + +mrecip= +Target RejectNegative Joined Var(loongarch_recip_name) +Control generation of reciprocal estimates. + ; The code model option names for -mcmodel. Enum Name(cmodel) Type(int) diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index f2774f021871..dbdb423011bb 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -1083,7 +1083,25 @@ [(set_attr "type" "simd_fmul") (set_attr "mode" "")]) -(define_insn "div3" +(define_expand "div3" + [(set (match_operand:FLSX 0 "register_operand") + (div:FLSX (match_operand:FLSX 1 "reg_or_vecotr_1_operand") + (match_operand:FLSX 2 "register_operand")))] + "ISA_HAS_LSX" +{ + if (mode == V4SFmode + && TARGET_RECIP_VEC_DIV + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math + && flag_unsafe_math_optimizations) + { + loongarch_emit_swdivsf (operands[0], operands[1], + operands[2], V4SFmode); + DONE; + } +}) + +(define_insn "*div3" [(set (match_operand:FLSX 0 "register_operand" "=f") (div:FLSX (match_operand:FLSX 1 "register_operand" "f") (match_operand:FLSX 2 "register_operand" "f")))] @@ -1112,7 +1130,23 @@ [(set_attr "type" "simd_fmadd") (set_attr "mode" "")]) -(define_insn "sqrt2" +(define_expand "sqrt2" + [(set (match_operand:FLSX 0 "register_operand") + (sqrt:FLSX (match_operand:FLSX 1 "register_operand")))] + "ISA_HAS_LSX" +{ + if (mode == V4SFmode + && TARGET_RECIP_VEC_SQRT + && flag_unsafe_math_optimizations + && optimize_insn_for_speed_p () + && flag_finite_math_only && !flag_trapping_math) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 0); + DONE; + } +}) + +(define_insn "*sqrt2" [(set (match_operand:FLSX 0 "register_operand" "=f") (sqrt:FLSX (match_operand:FLSX 1 "register_operand" "f")))] "ISA_HAS_LSX" @@ -1559,7 +1593,20 @@ [(set_attr "type" "simd_fdiv") (set_attr "mode" "")]) -(define_insn "rsqrt2" +(define_expand "rsqrt2" + [(set (match_operand:FLSX 0 "register_operand" "=f") + (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] + UNSPEC_LSX_VFRSQRT))] + "ISA_HAS_LSX" +{ + if (mode == V4SFmode && TARGET_RECIP_VEC_RSQRT) + { + loongarch_emit_swrsqrtsf (operands[0], operands[1], V4SFmode, 1); + DONE; + } +}) + +(define_insn "*rsqrt2" [(set (match_operand:FLSX 0 "register_operand" "=f") (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")] UNSPEC_LSX_VFRSQRT))] diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index f7796da10b29..9e9ce58cb53f 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -235,6 +235,10 @@ (ior (match_operand 0 "const_1_operand") (match_operand 0 "register_operand"))) +(define_predicate "reg_or_vecotr_1_operand" + (ior (match_operand 0 "const_vector_1_operand") + (match_operand 0 "register_operand"))) + ;; These are used in vec_merge, hence accept bitmask as const_int. (define_predicate "const_exp_2_operand" (and (match_code "const_int") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9d4a0b2820a6..d4e689b64c01 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1056,6 +1056,7 @@ Objective-C and Objective-C++ Dialects}. -mexplicit-relocs=@var{style} -mexplicit-relocs -mno-explicit-relocs -mdirect-extern-access -mno-direct-extern-access -mcmodel=@var{code-model} -mrelax -mpass-mrelax-to-as} +-mrecip -mrecip=@var{opt} @emph{M32R/D Options} @gccoptlist{-m32r2 -m32rx -m32r @@ -26757,6 +26758,59 @@ detecting corresponding assembler support: This option is mostly useful for debugging, or interoperation with assemblers different from the build-time one. +@opindex mrecip +@item -mrecip +This option enables use of the reciprocal estimate and reciprocal square +root estimate instructions with additional Newton-Raphson steps to increase +precision instead of doing a divide or square root and divide for +floating-point arguments. +These instructions are generated only when @option{-funsafe-math-optimizations} +is enabled together with @option{-ffinite-math-only} and +@option{-fno-trapping-math}. +This option is off by default. Before you can use this option, you must sure the +target CPU supports frecipe and frsqrte instructions. +Note that while the throughput of the sequence is higher than the throughput of +the non-reciprocal instruction, the precision of the sequence can be decreased +by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). + +@opindex mrecip=opt +@item -mrecip=@var{opt} +This option controls which reciprocal estimate instructions +may be used. @var{opt} is a comma-separated list of options, which may +be preceded by a @samp{!} to invert the option: + +@table @samp +@item all +Enable all estimate instructions. + +@item default +Enable the default instructions, equivalent to @option{-mrecip}. + +@item none +Disable all estimate instructions, equivalent to @option{-mno-recip}. + +@item div +Enable the approximation for scalar division. + +@item vec-div +Enable the approximation for vectorized division. + +@item sqrt +Enable the approximation for scalar square root. + +@item vec-sqrt +Enable the approximation for vectorized square root. + +@item rsqrt +Enable the approximation for scalar reciprocal square root. + +@item vec-rsqrt +Enable the approximation for vectorized reciprocal square root. +@end table + +So, for example, @option{-mrecip=all,!sqrt} enables +all of the reciprocal approximations, except for scalar square root. + @item loongarch-vect-unroll-limit The vectorizer will use available tuning information to determine whether it would be beneficial to unroll the main vectorized loop and by how much. This diff --git a/gcc/testsuite/gcc.target/loongarch/divf.c b/gcc/testsuite/gcc.target/loongarch/divf.c new file mode 100644 index 000000000000..6c831817c9e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/divf.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ +/* { dg-final { scan-assembler "fdiv.s" } } */ +/* { dg-final { scan-assembler-not "frecipe.s" } } */ + +float +foo(float a, float b) +{ + return a / b; +} diff --git a/gcc/testsuite/gcc.target/loongarch/recip-divf.c b/gcc/testsuite/gcc.target/loongarch/recip-divf.c new file mode 100644 index 000000000000..db5e3e488886 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/recip-divf.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ +/* { dg-final { scan-assembler "frecipe.s" } } */ + +float +foo(float a, float b) +{ + return a / b; +} diff --git a/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c new file mode 100644 index 000000000000..7f45db6cdea1 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/recip-sqrtf.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe" } */ +/* { dg-final { scan-assembler-times "frsqrte.s" 3 } } */ + +extern float sqrtf (float); + +float +foo1 (float a, float b) +{ + return a/sqrtf(b); +} + +float +foo2 (float a, float b) +{ + return sqrtf(a/b); +} + +float +foo3 (float a) +{ + return sqrtf(a); +} diff --git a/gcc/testsuite/gcc.target/loongarch/sqrtf.c b/gcc/testsuite/gcc.target/loongarch/sqrtf.c new file mode 100644 index 000000000000..c2720faac7b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/sqrtf.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mfrecipe -fno-unsafe-math-optimizations" } */ +/* { dg-final { scan-assembler-times "fsqrt.s" 3 } } */ +/* { dg-final { scan-assembler-not "frsqrte.s" } } */ + +extern float sqrtf (float); + +float +foo1 (float a, float b) +{ + return a/sqrtf(b); +} + +float +foo2 (float a, float b) +{ + return sqrtf(a/b); +} + +float +foo3 (float a) +{ + return sqrtf(a); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c new file mode 100644 index 000000000000..748a82200d95 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-divf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mrecip -mlasx -mfrecipe -fno-unsafe-math-optimizations" } */ +/* { dg-final { scan-assembler "xvfdiv.s" } } */ +/* { dg-final { scan-assembler-not "xvfrecipe.s" } } */ + +float a[8],b[8],c[8]; + +void +foo () +{ + for (int i = 0; i < 8; i++) + c[i] = a[i] / b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c new file mode 100644 index 000000000000..6532756f07dc --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-divf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ +/* { dg-final { scan-assembler "xvfrecipe.s" } } */ + +float a[8],b[8],c[8]; + +void +foo () +{ + for (int i = 0; i < 8; i++) + c[i] = a[i] / b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c new file mode 100644 index 000000000000..a623dff8f279 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlasx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "xvfrsqrte.s" 3 } } */ + +float a[8], b[8], c[8]; + +extern float sqrtf (float); + +void +foo1 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = a[i] / sqrtf (b[i]); +} + +void +foo2 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = sqrtf (a[i] / b[i]); +} + +void +foo3 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = sqrtf (a[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c new file mode 100644 index 000000000000..083c868406b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-recip.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */ +/* { dg-final { scan-assembler "xvfrecip.s" } } */ +/* { dg-final { scan-assembler "xvfrecip.d" } } */ +/* { dg-final { scan-assembler-not "xvfdiv.s" } } */ +/* { dg-final { scan-assembler-not "xvfdiv.d" } } */ + +float a[8], b[8]; + +void +foo1(void) +{ + for (int i = 0; i < 8; i++) + a[i] = 1 / (b[i]); +} + +double da[4], db[4]; + +void +foo2(void) +{ + for (int i = 0; i < 4; i++) + da[i] = 1 / (db[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c new file mode 100644 index 000000000000..a005a38865df --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-sqrtf.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fno-unsafe-math-optimizations -mrecip -mlasx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "xvfsqrt.s" 3 } } */ +/* { dg-final { scan-assembler-not "xvfrsqrte.s" } } */ + +float a[8], b[8], c[8]; + +extern float sqrtf (float); + +void +foo1 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = a[i] / sqrtf (b[i]); +} + +void +foo2 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = sqrtf (a[i] / b[i]); +} + +void +foo3 (void) +{ + for (int i = 0; i < 8; i++) + c[i] = sqrtf (a[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c new file mode 100644 index 000000000000..1219b1ef842d --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-divf.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ +/* { dg-final { scan-assembler "vfdiv.s" } } */ +/* { dg-final { scan-assembler-not "vfrecipe.s" } } */ + +float a[4],b[4],c[4]; + +void +foo () +{ + for (int i = 0; i < 4; i++) + c[i] = a[i] / b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c new file mode 100644 index 000000000000..edbe8d9098f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-divf.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ +/* { dg-final { scan-assembler "vfrecipe.s" } } */ + +float a[4],b[4],c[4]; + +void +foo () +{ + for (int i = 0; i < 4; i++) + c[i] = a[i] / b[i]; +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c new file mode 100644 index 000000000000..d356f915eb58 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe" } */ +/* { dg-final { scan-assembler-times "vfrsqrte.s" 3 } } */ + +float a[4], b[4], c[4]; + +extern float sqrtf (float); + +void +foo1 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = a[i] / sqrtf (b[i]); +} + +void +foo2 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = sqrtf (a[i] / b[i]); +} + +void +foo3 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = sqrtf (a[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c new file mode 100644 index 000000000000..c4d6af4db936 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-recip.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */ +/* { dg-final { scan-assembler "vfrecip.s" } } */ +/* { dg-final { scan-assembler "vfrecip.d" } } */ +/* { dg-final { scan-assembler-not "vfdiv.s" } } */ +/* { dg-final { scan-assembler-not "vfdiv.d" } } */ + +float a[4], b[4]; + +void +foo1(void) +{ + for (int i = 0; i < 4; i++) + a[i] = 1 / (b[i]); +} + +double da[2], db[2]; + +void +foo2(void) +{ + for (int i = 0; i < 2; i++) + da[i] = 1 / (db[i]); +} diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c new file mode 100644 index 000000000000..3ff6570a67a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-sqrtf.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -mrecip -mlsx -mfrecipe -fno-unsafe-math-optimizations" } */ +/* { dg-final { scan-assembler-times "vfsqrt.s" 3 } } */ +/* { dg-final { scan-assembler-not "vfrsqrte.s" } } */ + +float a[4], b[4], c[4]; + +extern float sqrtf (float); + +void +foo1 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = a[i] / sqrtf (b[i]); +} + +void +foo2 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = sqrtf (a[i] / b[i]); +} + +void +foo3 (void) +{ + for (int i = 0; i < 4; i++) + c[i] = sqrtf (a[i]); +} From 22362d0f77574e7c06c47181a581bacefff9d030 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 6 Dec 2023 15:04:53 +0800 Subject: [PATCH 088/311] LoongArch: Vectorized loop unrolling is disable for divf/sqrtf/rsqrtf when -mrecip is enabled. Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number of generated instructions is close to or exceeds the maximum issue instructions per cycle of the LoongArch, so vectorized loop unrolling is not performed on them. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor): If m_has_recip is true, uf return 1. (loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence. --- gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 838fc1019c67..17508073eda1 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3974,7 +3974,9 @@ protected: /* Reduction factor for suggesting unroll factor. */ unsigned m_reduc_factor = 0; /* True if the loop contains an average operation. */ - bool m_has_avg =false; + bool m_has_avg = false; + /* True if the loop uses approximation instruction sequence. */ + bool m_has_recip = false; }; /* Implement TARGET_VECTORIZE_CREATE_COSTS. */ @@ -4021,7 +4023,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - if (m_has_avg) + if (m_has_avg || m_has_recip) return 1; /* Don't unroll if it's specified explicitly not to be unrolled. */ @@ -4081,6 +4083,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, } } + combined_fn cfn; + if (kind == vector_stmt + && stmt_info + && stmt_info->stmt) + { + /* Detect the use of approximate instruction sequence. */ + if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT) + && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST) + switch (cfn) + { + case CFN_BUILT_IN_SQRTF: + m_has_recip = true; + default: + break; + } + else if (TARGET_RECIP_VEC_DIV + && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) + { + machine_mode mode = TYPE_MODE (vectype); + switch (gimple_assign_rhs_code (stmt_info->stmt)) + { + case RDIV_EXPR: + if (GET_MODE_INNER (mode) == SFmode) + m_has_recip = true; + default: + break; + } + } + } + return retval; } From 40366b89e9c8e727af70ecf7007cba6c51e4b7d2 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 29 Nov 2023 11:16:59 +0800 Subject: [PATCH 089/311] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests fail on LA664 [PR112611] For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers undefined behavior on LA464, but not on LA664. To ensure compatibility of these two tests on both LA464 and LA664, we have modified both tests to ensure that the index value in the selector does not exceed 63. gcc/testsuite/ChangeLog: PR target/112611 * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. --- .../loongarch/vector/lasx/lasx-xvshuf_b.c | 345 ++++++------------ .../loongarch/vector/lsx/lsx-vshuf.c | 162 +++----- 2 files changed, 165 insertions(+), 342 deletions(-) diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c index d8a29dbd225f..b8ab387118a8 100644 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c @@ -43,9 +43,9 @@ main () *((unsigned long *)&__m256i_op1[1]) = 0xfffffefefffffefe; *((unsigned long *)&__m256i_op1[0]) = 0xfffffefefffffefe; *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0xfffffff8fffffff8; + *((unsigned long *)&__m256i_op2[2]) = 0x3f3f3f383f3f3f38; *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0xfffffff8fc000000; + *((unsigned long *)&__m256i_op2[0]) = 0x3f3f3f383c000000; *((unsigned long *)&__m256i_result[3]) = 0xfafafafafafafafa; *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_result[1]) = 0xfefefefefefefefe; @@ -137,33 +137,14 @@ main () *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[0]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op2[3]) = 0x0000ffffffffffff; - *((unsigned long *)&__m256i_op2[2]) = 0x0000ffff0000ffff; - *((unsigned long *)&__m256i_op2[1]) = 0x0000ffffffffffff; - *((unsigned long *)&__m256i_op2[0]) = 0x0000ffff0000ffff; + *((unsigned long *)&__m256i_op2[3]) = 0x0000111111111111; + *((unsigned long *)&__m256i_op2[2]) = 0x0000222200002222; + *((unsigned long *)&__m256i_op2[1]) = 0x0000111111111111; + *((unsigned long *)&__m256i_op2[0]) = 0x0000222200002222; *((unsigned long *)&__m256i_result[3]) = 0xffff000000000000; - *((unsigned long *)&__m256i_result[2]) = 0xffff0000ffff0000; + *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_result[1]) = 0xffff000000000000; - *((unsigned long *)&__m256i_result[0]) = 0xffff0000ffff0000; - __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x000000000000ffff; - *((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff; - *((unsigned long *)&__m256i_op2[1]) = 0x000000000000ffff; - *((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); @@ -176,7 +157,7 @@ main () *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000077fff; + *((unsigned long *)&__m256i_op2[2]) = 0x0000000000032f1f; *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; @@ -186,9 +167,9 @@ main () __m256i_out = __lasx_xvshuf_b (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0xfffffffffffffefe; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000101; - *((unsigned long *)&__m256i_op0[1]) = 0xfffffffffffffefe; + *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001; + *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011; *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000101; *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[2]) = 0x67eee33567eee435; @@ -198,35 +179,16 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op2[1]) = 0x00000000ffffffff; *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[3]) = 0x0022002200000000; *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0xffffffff80000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0xffffffff80000000; + *((unsigned long *)&__m256i_op0[1]) = 0x001f001f00000000; *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; @@ -243,10 +205,10 @@ main () __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op0[3]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op0[2]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op0[1]) = 0x0011001100110011; + *((unsigned long *)&__m256i_op0[0]) = 0x0011001100110011; *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff; @@ -255,17 +217,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[2]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[1]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_result[0]) = 0xffffffffffffffff; __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op0[3]) = 0x003f003f003f003f; + *((unsigned long *)&__m256i_op0[2]) = 0x003f003f003f003f; + *((unsigned long *)&__m256i_op0[1]) = 0x003f003f003f003f; + *((unsigned long *)&__m256i_op0[0]) = 0x003f003f003f003f; *((unsigned long *)&__m256i_op1[3]) = 0xefdfefdf00000000; *((unsigned long *)&__m256i_op1[2]) = 0xefdfefdfefdfefdf; *((unsigned long *)&__m256i_op1[1]) = 0xefdfefdf00000000; @@ -274,36 +236,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op2[1]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op2[0]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0xefdfefdfefdfefdf; + *((unsigned long *)&__m256i_result[2]) = 0xefdfefdfefdfefdf; + *((unsigned long *)&__m256i_result[1]) = 0xefdfefdfefdfefdf; + *((unsigned long *)&__m256i_result[0]) = 0xefdfefdfefdfefdf; __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0x7575ffff75757595; - *((unsigned long *)&__m256i_op0[2]) = 0x7575ffff7575f575; - *((unsigned long *)&__m256i_op0[1]) = 0x7575ffff75757595; - *((unsigned long *)&__m256i_op0[0]) = 0x7575ffff7575f575; + *((unsigned long *)&__m256i_op0[3]) = 0x0035000000350005; + *((unsigned long *)&__m256i_op0[2]) = 0x0035000000350015; + *((unsigned long *)&__m256i_op0[1]) = 0x0035000000350025; + *((unsigned long *)&__m256i_op0[0]) = 0x0035000000350035; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000003; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000003; @@ -312,10 +255,10 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0x7575757575757575; *((unsigned long *)&__m256i_op2[1]) = 0x7575757575757575; *((unsigned long *)&__m256i_op2[0]) = 0x7575757575757575; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x7575757575757575; + *((unsigned long *)&__m256i_result[2]) = 0x7575757575757575; + *((unsigned long *)&__m256i_result[1]) = 0x7575757575757575; + *((unsigned long *)&__m256i_result[0]) = 0x7575757575757575; __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); @@ -357,29 +300,10 @@ main () __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0x000000000000fffe; - *((unsigned long *)&__m256i_op0[2]) = 0x00000000000000f0; - *((unsigned long *)&__m256i_op0[1]) = 0x000000000000fffe; - *((unsigned long *)&__m256i_op0[0]) = 0x00000000000000f0; + *((unsigned long *)&__m256i_op0[3]) = 0x000000000000003e; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010; + *((unsigned long *)&__m256i_op0[1]) = 0x000000000000003e; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000010; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -389,16 +313,16 @@ main () *((unsigned long *)&__m256i_op2[1]) = 0x8000000000000000; *((unsigned long *)&__m256i_op2[0]) = 0x000000ffff88ff88; *((unsigned long *)&__m256i_result[3]) = 0xff88ff88ff880000; - *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff880000; + *((unsigned long *)&__m256i_result[2]) = 0xff88ff88ff88ff88; *((unsigned long *)&__m256i_result[1]) = 0xff88ff88ff880000; - *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff880000; + *((unsigned long *)&__m256i_result[0]) = 0xff88ff88ff88ff88; __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x000000010000ffe1; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000101001e18; - *((unsigned long *)&__m256i_op0[1]) = 0x000000010000ffe1; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000101001e18; + *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000011; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000018; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000001; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000008; *((unsigned long *)&__m256i_op1[3]) = 0x98111cca98111cca; *((unsigned long *)&__m256i_op1[2]) = 0x98111cca98111cca; *((unsigned long *)&__m256i_op1[1]) = 0x98111cca98111cca; @@ -407,17 +331,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0x0000000101001e18; *((unsigned long *)&__m256i_op2[1]) = 0x000000010000ffe1; *((unsigned long *)&__m256i_op2[0]) = 0x0000000101001e18; - *((unsigned long *)&__m256i_result[3]) = 0x0000000100000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000100000001; *((unsigned long *)&__m256i_result[2]) = 0x0000000101001e18; - *((unsigned long *)&__m256i_result[1]) = 0x0000000100000000; + *((unsigned long *)&__m256i_result[1]) = 0x0000000100000001; *((unsigned long *)&__m256i_result[0]) = 0x0000000101001e18; __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001; - *((unsigned long *)&__m256i_op0[2]) = 0x80008000b3e8fef1; - *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001; - *((unsigned long *)&__m256i_op0[0]) = 0x80008000802ea100; + *((unsigned long *)&__m256i_op0[3]) = 0x000000010000001a; + *((unsigned long *)&__m256i_op0[2]) = 0x0000001100000001; + *((unsigned long *)&__m256i_op0[1]) = 0x0000002100000010; + *((unsigned long *)&__m256i_op0[0]) = 0x000000310000001f; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -426,17 +350,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000002; *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001; *((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000000001; *((unsigned long *)&__m256i_result[2]) = 0x0000000200000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x012e2110012e2110; + *((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110; + *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000082a54290; - *((unsigned long *)&__m256i_op0[2]) = 0x00000000028aa700; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000082a54290; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000002a54287; + *((unsigned long *)&__m256i_op0[3]) = 0x0000002f00000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000001a00000000; + *((unsigned long *)&__m256i_op0[1]) = 0x000000010000001c; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000e0000000c; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x00000000002a542a; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -447,8 +371,8 @@ main () *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[1]) = 0x00000000002a542a; + *((unsigned long *)&__m256i_result[0]) = 0x00000000002a542a; __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); @@ -471,10 +395,10 @@ main () __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[2]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[1]) = 0xffffffffffffffff; - *((unsigned long *)&__m256i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m256i_op0[3]) = 0x0000000100000031; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000100000031; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000100000031; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000100000031; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -490,10 +414,10 @@ main () __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0001000100010001; - *((unsigned long *)&__m256i_op0[2]) = 0x0001000100010001; - *((unsigned long *)&__m256i_op0[1]) = 0x0001000100010001; - *((unsigned long *)&__m256i_op0[0]) = 0x0001000100010001; + *((unsigned long *)&__m256i_op0[3]) = 0x0000000200000001; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000400000003; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000600000005; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000800000007; *((unsigned long *)&__m256i_op1[3]) = 0x000000007fc00000; *((unsigned long *)&__m256i_op1[2]) = 0x000000007fc00000; *((unsigned long *)&__m256i_op1[1]) = 0x000000007fc00000; @@ -503,7 +427,7 @@ main () *((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff; *((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000; *((unsigned long *)&__m256i_result[3]) = 0x8000000080000000; - *((unsigned long *)&__m256i_result[2]) = 0x8000000080000000; + *((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff; *((unsigned long *)&__m256i_result[1]) = 0x8000000080000000; *((unsigned long *)&__m256i_result[0]) = 0x8000000080000000; __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2); @@ -529,9 +453,9 @@ main () ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0001000104000200; + *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000030; *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0001000104000200; + *((unsigned long *)&__m256i_op0[0]) = 0x0000001000000000; *((unsigned long *)&__m256i_op1[3]) = 0xffffffffffffffff; *((unsigned long *)&__m256i_op1[2]) = 0xffff0000ffff0000; *((unsigned long *)&__m256i_op1[1]) = 0xffffffffffffffff; @@ -585,10 +509,10 @@ main () __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000fffffe01fe52; - *((unsigned long *)&__m256i_op0[2]) = 0x00000000ff01ff02; - *((unsigned long *)&__m256i_op0[1]) = 0x0000fffffe01fe52; - *((unsigned long *)&__m256i_op0[0]) = 0x00000000ff01ff02; + *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000001; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000003; *((unsigned long *)&__m256i_op1[3]) = 0x0000800000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000080008001; *((unsigned long *)&__m256i_op1[1]) = 0x0000800000000000; @@ -597,17 +521,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0x000000000000ffff; *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; *((unsigned long *)&__m256i_op2[0]) = 0x000000000000ffff; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000080008001; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000080008001; + *((unsigned long *)&__m256i_result[3]) = 0x000000000000ffff; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[1]) = 0x0000000080008001; + *((unsigned long *)&__m256i_result[0]) = 0x0000800000000000; __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000011; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000022; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000033; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -623,63 +547,6 @@ main () __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[2]) = 0x0008000000000000; - *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op1[0]) = 0x0008000000000000; - *((unsigned long *)&__m256i_op2[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; - __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); - ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; @@ -700,9 +567,9 @@ main () ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000002000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000002000000000; + *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000010; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000020; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000030; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000000000; @@ -718,10 +585,10 @@ main () __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0xfffeb6839ffffd80; - *((unsigned long *)&__m256i_op0[2]) = 0xfffeb8649d0d6250; - *((unsigned long *)&__m256i_op0[1]) = 0xfffeb6839ffffd80; - *((unsigned long *)&__m256i_op0[0]) = 0xfffeb8649d0d6250; + *((unsigned long *)&__m256i_op0[3]) = 0x000000000000000a; + *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b; + *((unsigned long *)&__m256i_op0[1]) = 0x000000000000002c; + *((unsigned long *)&__m256i_op0[0]) = 0x000000000000003d; *((unsigned long *)&__m256i_op1[3]) = 0xfffeb6839ffffd80; *((unsigned long *)&__m256i_op1[2]) = 0xfffe97c020010001; *((unsigned long *)&__m256i_op1[1]) = 0xfffeb6839ffffd80; @@ -730,17 +597,17 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0xfffe97c020010001; *((unsigned long *)&__m256i_op2[1]) = 0xfffeb6839ffffd80; *((unsigned long *)&__m256i_op2[0]) = 0xfffe97c020010001; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0xfffe97c020010001; + *((unsigned long *)&__m256i_result[2]) = 0xfffeb6839ffffd80; + *((unsigned long *)&__m256i_result[1]) = 0xfffe97c020010001; + *((unsigned long *)&__m256i_result[0]) = 0xfffeb6839ffffd80; __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); - *((unsigned long *)&__m256i_op0[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_op0[3]) = 0x000000000000001a; + *((unsigned long *)&__m256i_op0[2]) = 0x000000000000001b; + *((unsigned long *)&__m256i_op0[1]) = 0x0000000000000002; + *((unsigned long *)&__m256i_op0[0]) = 0x0000000000000007; *((unsigned long *)&__m256i_op1[3]) = 0x0000000000010001; *((unsigned long *)&__m256i_op1[2]) = 0x0000000000010001; *((unsigned long *)&__m256i_op1[1]) = 0x0000000000010001; @@ -749,10 +616,10 @@ main () *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000; *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000000; *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m256i_result[3]) = 0x0000000000010001; + *((unsigned long *)&__m256i_result[2]) = 0x0000000000010001; + *((unsigned long *)&__m256i_result[1]) = 0x0000000000010001; + *((unsigned long *)&__m256i_result[0]) = 0x0000000000010001; __m256i_out = __lasx_xvshuf_d (__m256i_op0, __m256i_op1, __m256i_op2); ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out); diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c index 8153964cf1d1..f3b800f88042 100644 --- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c @@ -20,7 +20,7 @@ main () *((unsigned long *)&__m128i_op1[1]) = 0x0000000401000001; *((unsigned long *)&__m128i_op1[0]) = 0x0001000100000004; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x00000000007f0000; + *((unsigned long *)&__m128i_op2[0]) = 0x00000000003f0000; *((unsigned long *)&__m128i_result[1]) = 0x0404040404040404; *((unsigned long *)&__m128i_result[0]) = 0x0404040404000404; __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); @@ -31,7 +31,7 @@ main () *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0xffffffff00000000; + *((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000; *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); @@ -63,10 +63,10 @@ main () *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; *((unsigned long *)&__m128i_op1[1]) = 0x52525252adadadad; *((unsigned long *)&__m128i_op1[0]) = 0x52525252adadadad; - *((unsigned long *)&__m128i_op2[1]) = 0x800000007fffffff; - *((unsigned long *)&__m128i_op2[0]) = 0x800000007fffffff; - *((unsigned long *)&__m128i_result[1]) = 0x00adadad00000000; - *((unsigned long *)&__m128i_result[0]) = 0x00adadad00000000; + *((unsigned long *)&__m128i_op2[1]) = 0x2000000004030201; + *((unsigned long *)&__m128i_op2[0]) = 0x2000000014131211; + *((unsigned long *)&__m128i_result[1]) = 0xadadadad52adadad; + *((unsigned long *)&__m128i_result[0]) = 0xadadadadffffffff; __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); @@ -96,10 +96,10 @@ main () *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000100; *((unsigned long *)&__m128i_op1[1]) = 0x04040403fafafafc; *((unsigned long *)&__m128i_op1[0]) = 0x000000000000ff80; - *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x8080808080808080; - *((unsigned long *)&__m128i_result[0]) = 0x8080808080808080; + *((unsigned long *)&__m128i_op2[1]) = 0x00101a1b1c1d1e1f; + *((unsigned long *)&__m128i_op2[0]) = 0x0807060504030201; + *((unsigned long *)&__m128i_result[1]) = 0x8000020202000000; + *((unsigned long *)&__m128i_result[0]) = 0xfc000000000000ff; __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); @@ -118,10 +118,10 @@ main () *((unsigned long *)&__m128i_op0[0]) = 0xffd7ff8dffa4ff7a; *((unsigned long *)&__m128i_op1[1]) = 0x34947b4b11684f92; *((unsigned long *)&__m128i_op1[0]) = 0xee297a731e5c5f86; - *((unsigned long *)&__m128i_op2[1]) = 0x7fffffffffffffff; - *((unsigned long *)&__m128i_op2[0]) = 0xffc0000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000868686868686; + *((unsigned long *)&__m128i_op2[1]) = 0x1f0710301a2b332d; + *((unsigned long *)&__m128i_op2[0]) = 0x1f20000000000000; + *((unsigned long *)&__m128i_result[1]) = 0xffee7a7a9811ff7b; + *((unsigned long *)&__m128i_result[0]) = 0xff86868686868686; __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); @@ -136,19 +136,19 @@ main () __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff; - *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op0[1]) = 0x001f002f003f000f; + *((unsigned long *)&__m128i_op0[0]) = 0x001f002f003f000f; *((unsigned long *)&__m128i_op1[1]) = 0x7fffffffffffffff; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[1]) = 0x7fff7fff7fff7fff; + *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff; __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op0[1]) = 0x000100040010001f; + *((unsigned long *)&__m128i_op0[0]) = 0x0002000300110012; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461; *((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff; @@ -169,74 +169,41 @@ main () __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; - *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; - __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); - ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - - *((unsigned long *)&__m128i_op0[1]) = 0x000300037ff000ff; - *((unsigned long *)&__m128i_op0[0]) = 0x0003000300a10003; + *((unsigned long *)&__m128i_op0[1]) = 0x000300030000001f; + *((unsigned long *)&__m128i_op0[0]) = 0x0003000300000003; *((unsigned long *)&__m128i_op1[1]) = 0x000300037ff000ff; *((unsigned long *)&__m128i_op1[0]) = 0x0003000300a10003; *((unsigned long *)&__m128i_op2[1]) = 0x000000007ff000ff; *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[1]) = 0x0000000000000003; *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0909000009090000; - *((unsigned long *)&__m128i_op0[0]) = 0x0909000009090000; + *((unsigned long *)&__m128i_op0[1]) = 0x0019000000090000; + *((unsigned long *)&__m128i_op0[0]) = 0x0019000000090000; *((unsigned long *)&__m128i_op1[1]) = 0x0909000009090000; *((unsigned long *)&__m128i_op1[0]) = 0x0909000009090000; *((unsigned long *)&__m128i_op2[1]) = 0x002a05a2f059094a; *((unsigned long *)&__m128i_op2[0]) = 0x05ad3ba576eae048; - *((unsigned long *)&__m128i_result[1]) = 0x0909e0480909e048; - *((unsigned long *)&__m128i_result[0]) = 0x0909e0480909e048; + *((unsigned long *)&__m128i_result[1]) = 0x909e0480909e048; + *((unsigned long *)&__m128i_result[0]) = 0x909e0480909e048; __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; - __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2); - ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - - *((unsigned long *)&__m128i_op0[1]) = 0x00000000000000c0; - *((unsigned long *)&__m128i_op0[0]) = 0x00000001ffffff29; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000030; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0; *((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29; - *((unsigned long *)&__m128i_result[1]) = 0xffffff2900000000; + *((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29; *((unsigned long *)&__m128i_result[0]) = 0x0000000100000001; __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; - __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); - ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x00000000000000ff; + *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001f; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x1f54e0ab00000000; *((unsigned long *)&__m128i_op2[1]) = 0x0101010101010101; @@ -246,19 +213,8 @@ main () __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; - __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); - ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000007fff; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op0[1]) = 0x0000002f0000002f; + *((unsigned long *)&__m128i_op0[0]) = 0x0000001000000000; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020; @@ -279,30 +235,30 @@ main () __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000004870ba0; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000900000010; + *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000003; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010; *((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000; *((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103; *((unsigned long *)&__m128i_result[1]) = 0x0000010300000103; - *((unsigned long *)&__m128i_result[0]) = 0x0000010300000000; + *((unsigned long *)&__m128i_result[0]) = 0x0000010380000001; __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x000000ff0000857a; - *((unsigned long *)&__m128i_op0[0]) = 0x05fafe0101fe000e; + *((unsigned long *)&__m128i_op0[1]) = 0x0000001000000007; + *((unsigned long *)&__m128i_op0[0]) = 0x0000002000000001; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff; *((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff; *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0xffffffff00000000; + *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff; __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0xada4808924882588; - *((unsigned long *)&__m128i_op0[0]) = 0xacad25090caca5a4; + *((unsigned long *)&__m128i_op0[1]) = 0x0000001a0000001b; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000a0000000b; *((unsigned long *)&__m128i_op1[1]) = 0x021b7d24c9678a35; *((unsigned long *)&__m128i_op1[0]) = 0x030298a6a1030a49; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; @@ -312,8 +268,8 @@ main () __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffffffff; - *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000003; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000013; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; @@ -323,14 +279,14 @@ main () __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0xdfa6e0c6d46cdc13; - *((unsigned long *)&__m128i_op0[0]) = 0x21fc7081ec69b5f2; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000011; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x000000002c002400; *((unsigned long *)&__m128i_op2[1]) = 0xffffb96bffff57c9; *((unsigned long *)&__m128i_op2[0]) = 0xffff6080ffff4417; - *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[1]) = 0xffffb96bffff57c9; + *((unsigned long *)&__m128i_result[0]) = 0xffffb96bffff57c9; __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); @@ -345,8 +301,8 @@ main () __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000; - *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000020; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000002000; *((unsigned long *)&__m128i_op1[0]) = 0xf0003000f0003000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; @@ -356,30 +312,30 @@ main () __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x021b7d2449678a35; - *((unsigned long *)&__m128i_op0[0]) = 0x030298a621030a49; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000007; + *((unsigned long *)&__m128i_op0[0]) = 0x000000000000001a; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff; *((unsigned long *)&__m128i_op2[1]) = 0x021b7d24c9678a35; *((unsigned long *)&__m128i_op2[0]) = 0x030298a6a1030a49; - *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[0]) = 0x7fff7fff7fff7fff; __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0x7f7f00007f7f0000; - *((unsigned long *)&__m128i_op0[0]) = 0x7f7f80807f7f8080; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000002; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000fffe0000fffe; *((unsigned long *)&__m128i_op2[1]) = 0x7f8000007f800000; *((unsigned long *)&__m128i_op2[0]) = 0x7f8000007f800000; - *((unsigned long *)&__m128i_result[1]) = 0x7f8000007f800000; - *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000; + *((unsigned long *)&__m128i_result[1]) = 0x0000fffe0000fffe; + *((unsigned long *)&__m128i_result[0]) = 0x7f8000007f800000; __m128i_out = __lsx_vshuf_d (__m128i_op0, __m128i_op1, __m128i_op2); ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out); - *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff; - *((unsigned long *)&__m128i_op0[0]) = 0xfffffffffff10000; + *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000010; + *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000020; *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000; *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000; *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000; From 75f9c2ea29989b1663e4fc2648096ab2836d3877 Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Wed, 29 Nov 2023 11:18:00 +0800 Subject: [PATCH 090/311] LoongArch: Fix ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG directly. loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported in gcc, it causes an ICE: ice.c:55:1: error: unrecognizable insn: 55 | } | ^ (insn 63 62 64 8 (set (reg:V4DI 278) (subreg:V4DI (subreg:V4DF (reg:V4DI 273 [ vect__53.26 ]) 0) 0)) -1 (nil)) during RTL pass: vregs ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804 Last time, Ruoyao has fixed a similar ICE: https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible to avoid the same ice happening again. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use simplify_gen_subreg instead of gen_rtx_SUBREG. (loongarch_expand_vec_perm_const_2): Ditto. (loongarch_expand_vec_cond_expr): Ditto. gcc/testsuite/ChangeLog: * gcc.target/loongarch/pr112476-3.c: New test. * gcc.target/loongarch/pr112476-4.c: New test. --- gcc/config/loongarch/loongarch.cc | 79 +++++++++++-------- .../gcc.target/loongarch/pr112476-3.c | 58 ++++++++++++++ .../gcc.target/loongarch/pr112476-4.c | 4 + 3 files changed, 108 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 17508073eda1..8630db44e897 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -8826,13 +8826,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) if (d->vmode == E_V2DFmode) { sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); - tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0); + tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); emit_move_insn (tmp, sel); } else if (d->vmode == E_V4SFmode) { sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); - tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0); + tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); emit_move_insn (tmp, sel); } else @@ -9616,8 +9616,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) /* Adjust op1 for selecting correct value in high 128bit of target register. op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */ - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, conv_op0, GEN_INT (0x21))); @@ -9646,8 +9646,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op0_alt, d->op0); /* Generate subreg for fitting into insn gen function. */ - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); /* Adjust op value in temp register. op0 = {0,1,2,3}, op1 = {4,5,0,1} */ @@ -9693,9 +9693,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op1_alt, d->op1); emit_move_insn (op0_alt, d->op0); - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); - rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, + d->vmode, 0); emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, conv_op0, GEN_INT (0x02))); @@ -9727,9 +9728,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */ if (!d->testing_p) { - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); - rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, + d->vmode, 0); /* We can achieve the expectation by using sinple xvpermi.q insn. */ emit_move_insn (conv_target, conv_op1); @@ -9754,8 +9756,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op1_alt, d->op1); emit_move_insn (op0_alt, d->op0); - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); /* Adjust op value in temp regiter. op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */ emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, @@ -9799,9 +9801,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op1_alt, d->op1); emit_move_insn (op0_alt, d->op0); - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); - rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0); + rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target, + d->vmode, 0); emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1, conv_op0, GEN_INT (0x13))); @@ -9833,10 +9836,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */ if (!d->testing_p) { - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); rtx temp_reg = gen_reg_rtx (d->vmode); - rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0); + rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg, + d->vmode, 0); emit_move_insn (temp_reg, d->op0); @@ -9945,9 +9949,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op0_alt, d->op0); emit_move_insn (op1_alt, d->op1); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); - rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); - rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, + d->vmode, 0); + rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, + d->vmode, 0); /* Duplicate op0's low 128bit in op0, then duplicate high 128bit in op1. After this, xvshuf.* insn's selector argument can @@ -9980,10 +9986,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) emit_move_insn (op0_alt, d->op0); emit_move_insn (op1_alt, d->op1); - rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0); - rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0); - rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0); - rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0); + rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt, + d->vmode, 0); + rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt, + d->vmode, 0); + rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0); + rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0); /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure that selector's low 128bit can access all op0's elements, and @@ -10103,12 +10111,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) { case E_V4DFmode: sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm)); - tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0); + tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); emit_move_insn (tmp, sel); break; case E_V8SFmode: sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm)); - tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0); + tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); emit_move_insn (tmp, sel); break; default: @@ -10194,7 +10202,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d) 64bit in target vector register. */ else if (extract_ev_od) { - rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0); + rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted, GEN_INT (0xD8))); } @@ -11284,7 +11292,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, if (mode != vimode) { xop1 = gen_reg_rtx (vimode); - emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0)); + emit_move_insn (xop1, + simplify_gen_subreg (vimode, operands[1], + mode, 0)); } emit_move_insn (src1, xop1); } @@ -11301,7 +11311,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, if (mode != vimode) { xop2 = gen_reg_rtx (vimode); - emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0)); + emit_move_insn (xop2, + simplify_gen_subreg (vimode, operands[2], + mode, 0)); } emit_move_insn (src2, xop2); } @@ -11320,7 +11332,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode, gen_rtx_AND (vimode, mask, src1)); /* The result is placed back to a register with the mask. */ emit_insn (gen_rtx_SET (mask, bsel)); - emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0)); + emit_move_insn (operands[0], + simplify_gen_subreg (mode, mask, vimode, 0)); } } diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-3.c b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c new file mode 100644 index 000000000000..d696d4182bbb --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlsx" } */ + +#include + +typedef int8_t orc_int8; +typedef int16_t orc_int16; +typedef int32_t orc_int32; +typedef int64_t orc_int64; + +typedef union +{ + orc_int32 i; + float f; + orc_int16 x2[2]; + orc_int8 x4[4]; +} orc_union32; +typedef union +{ + orc_int64 i; + double f; + orc_int32 x2[2]; + float x2f[2]; + orc_int16 x4[4]; +} orc_union64; + +void +audio_orc_s32_to_double (double * restrict d1, + const signed int * restrict s1, int n) +{ + int i; + orc_union64 *restrict ptr0; + const orc_union32 *restrict ptr4; + orc_union32 var33; + orc_union64 var34; + orc_union64 var35; + orc_union64 var36; + + ptr0 = (orc_union64 *) d1; + ptr4 = (orc_union32 *) s1; + + var34.i = 0x41e0000000000000UL; + + for (i = 0; i < n; i++) { + var33 = ptr4[i]; + var36.f = var33.i; + { + orc_union64 _src1; + orc_union64 _src2; + orc_union64 _dest1; + _src1.i = ((var36.i) & ((((var36.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); + _src2.i = ((var34.i) & ((((var34.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); + _dest1.f = _src1.f / _src2.f; + var35.i = ((_dest1.i) & ((((_dest1.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL)); + } + ptr0[i] = var35; + } +} diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-4.c b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c new file mode 100644 index 000000000000..955d98552ebb --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c @@ -0,0 +1,4 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx" } */ + +#include "pr112476-3.c" From 3b93ce50af3d02455c17ee5723137b8865ca4a39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc=20Poulhi=C3=A8s?= Date: Fri, 8 Dec 2023 10:14:22 +0100 Subject: [PATCH 091/311] Revert "testsuite: require avx_runtime for some tests" This reverts commit 249404649d26f544d1ad6808625807532c2b6a42. --- gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c | 3 +-- gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c | 4 ++-- gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c index 607194496e90..c1d3c27d8193 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c +++ b/gcc/testsuite/gcc.dg/vect/vect-ifcvt-18.c @@ -1,7 +1,6 @@ /* { dg-require-effective-target vect_condition } */ /* { dg-require-effective-target vect_float } */ -/* { dg-require-effective-target avx_runtime } */ -/* { dg-additional-options "-Ofast -mavx" } */ +/* { dg-additional-options "-Ofast -mavx" { target avx_runtime } } */ int A0[4] = {36,39,42,45}; diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c index c6615dc626dd..7cd29e894d05 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target vect_simd_clones } */ -/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 -mavx" } */ -/* { dg-require-effective-target avx_runtime } */ +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } && { ! lp64 } } } } */ #define TYPE __INT64_TYPE__ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c index 787b918d0c46..4dd51381d73c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target vect_simd_clones } */ -/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0 -mavx" } */ -/* { dg-require-effective-target avx_runtime } */ +/* { dg-additional-options "-fopenmp-simd --param vect-epilogues-nomask=0" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ /* { dg-additional-options "-mno-avx512f" { target { { i?86*-*-* x86_64-*-* } && { ! lp64 } } } } */ #define TYPE __INT64_TYPE__ From 5e25baa7e577f9b73f746005efb5ccd4e000e51e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 8 Dec 2023 09:14:43 +0100 Subject: [PATCH 092/311] tree-optimization/112909 - uninit diagnostic with abnormal copy The following avoids spurious uninit diagnostics for SSA name copies which mostly appear when the source is marked as abnormal which prevents copy propagation. To prevent regressions I remove the bail out for anonymous SSA names in the PHI arg place from warn_uninitialized_phi leaving that to warn_uninit where I handle SSA copies from a SSA name which isn't anonymous. In theory this might cause more valid and false positive diagnostics to pop up. PR tree-optimization/112909 * tree-ssa-uninit.cc (find_uninit_use): Look through a single level of SSA name copies with single use. * gcc.dg/uninit-pr112909.c: New testcase. --- gcc/testsuite/gcc.dg/uninit-pr112909.c | 28 +++++++++++++++ gcc/tree-ssa-uninit.cc | 47 ++++++++++++++++++++------ 2 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/uninit-pr112909.c diff --git a/gcc/testsuite/gcc.dg/uninit-pr112909.c b/gcc/testsuite/gcc.dg/uninit-pr112909.c new file mode 100644 index 000000000000..d2998f715aaa --- /dev/null +++ b/gcc/testsuite/gcc.dg/uninit-pr112909.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wuninitialized" } */ + +struct machine_thread_all_state { + int set; +} _hurd_setup_sighandler_state; +int _hurd_setup_sighandler_ss_0; +struct { + int ctx; +} *_hurd_setup_sighandler_stackframe; +void _setjmp(); +void __thread_get_state(); +int machine_get_basic_state(struct machine_thread_all_state *state) { + if (state->set) + __thread_get_state(); + return 1; +} +int *_hurd_setup_sighandler() { + int *scp; /* { dg-bogus "used uninitialized" } */ + if (_hurd_setup_sighandler_ss_0) { + _setjmp(); + _hurd_setup_sighandler_state.set |= 5; + } + machine_get_basic_state(&_hurd_setup_sighandler_state); + scp = &_hurd_setup_sighandler_stackframe->ctx; + _setjmp(); + return scp; +} diff --git a/gcc/tree-ssa-uninit.cc b/gcc/tree-ssa-uninit.cc index f42f76cd5c62..9a7c7d12dd84 100644 --- a/gcc/tree-ssa-uninit.cc +++ b/gcc/tree-ssa-uninit.cc @@ -204,14 +204,29 @@ warn_uninit (opt_code opt, tree t, tree var, gimple *context, { var_def_stmt = SSA_NAME_DEF_STMT (t); - if (is_gimple_assign (var_def_stmt) - && gimple_assign_rhs_code (var_def_stmt) == COMPLEX_EXPR) + if (gassign *ass = dyn_cast (var_def_stmt)) { - tree v = gimple_assign_rhs1 (var_def_stmt); - if (TREE_CODE (v) == SSA_NAME - && has_undefined_value_p (v) - && zerop (gimple_assign_rhs2 (var_def_stmt))) - var = SSA_NAME_VAR (v); + switch (gimple_assign_rhs_code (var_def_stmt)) + { + case COMPLEX_EXPR: + { + tree v = gimple_assign_rhs1 (ass); + if (TREE_CODE (v) == SSA_NAME + && has_undefined_value_p (v) + && zerop (gimple_assign_rhs2 (ass))) + var = SSA_NAME_VAR (v); + break; + } + case SSA_NAME: + { + tree v = gimple_assign_rhs1 (ass); + if (TREE_CODE (v) == SSA_NAME + && SSA_NAME_VAR (v)) + var = SSA_NAME_VAR (v); + break; + } + default:; + } } if (gimple_call_internal_p (var_def_stmt, IFN_DEFERRED_INIT)) @@ -1229,6 +1244,18 @@ find_uninit_use (gphi *phi, unsigned uninit_opnds, int *bb_to_rpo) if (is_gimple_debug (use_stmt)) continue; + /* Look through a single level of SSA name copies. This is + important for copies involving abnormals which we can't always + proapgate out but which result in spurious unguarded uses. */ + use_operand_p use2_p; + gimple *use2_stmt; + if (gimple_assign_ssa_name_copy_p (use_stmt) + && single_imm_use (gimple_assign_lhs (use_stmt), &use2_p, &use2_stmt)) + { + use_p = use2_p; + use_stmt = use2_stmt; + } + if (gphi *use_phi = dyn_cast (use_stmt)) { unsigned idx = PHI_ARG_INDEX_FROM_USE (use_p); @@ -1262,9 +1289,9 @@ find_uninit_use (gphi *phi, unsigned uninit_opnds, int *bb_to_rpo) e->src->index, e->dest->index); print_gimple_stmt (dump_file, use_stmt, 0); } - /* Found a phi use that is not guarded, mark the phi_result as + /* Found a phi use that is not guarded, mark the use as possibly undefined. */ - possibly_undefined_names->add (phi_result); + possibly_undefined_names->add (USE_FROM_PTR (use_p)); } else cands.safe_push (use_stmt); @@ -1318,8 +1345,6 @@ warn_uninitialized_phi (gphi *phi, unsigned uninit_opnds, int *bb_to_rpo) unsigned phiarg_index = MASK_FIRST_SET_BIT (uninit_opnds); tree uninit_op = gimple_phi_arg_def (phi, phiarg_index); - if (SSA_NAME_VAR (uninit_op) == NULL_TREE) - return; location_t loc = UNKNOWN_LOCATION; if (gimple_phi_arg_has_location (phi, phiarg_index)) From 3ebb591c65b4fbe0cddd23ebc0ca2d9f7aef2cec Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 14 Nov 2022 17:14:18 +0000 Subject: [PATCH 093/311] libgcc: aarch64: Configure check for .variant_pcs support Ideally SME support routines in libgcc are marked as variant PCS symbols so check if as supports the directive. libgcc/ChangeLog: * config.in: Undef HAVE_AS_VARIANT_PCS. * configure: Regenerate. * configure.ac: Check for .variant_pcs. --- libgcc/config.in | 3 +++ libgcc/configure | 39 +++++++++++++++++++++++++++++++++++++++ libgcc/configure.ac | 17 +++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/libgcc/config.in b/libgcc/config.in index f93c64a00c36..5dd96cdf648c 100644 --- a/libgcc/config.in +++ b/libgcc/config.in @@ -13,6 +13,9 @@ /* Define to 1 if the assembler supports LSE. */ #undef HAVE_AS_LSE +/* Define to 1 if the assembler supports .variant_pcs. */ +#undef HAVE_AS_VARIANT_PCS + /* Define to 1 if the target assembler supports thread-local storage. */ #undef HAVE_CC_TLS diff --git a/libgcc/configure b/libgcc/configure index 567158955a32..82c8558a13dc 100755 --- a/libgcc/configure +++ b/libgcc/configure @@ -5619,6 +5619,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h ;; esac + + +case "${target}" in +aarch64*-*-*) + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports .variant_pcs" >&5 +$as_echo_n "checking if as supports .variant_pcs... " >&6; } +if ${libgcc_cv_as_variant_pcs+:} false; then : + $as_echo_n "(cached) " >&6 +else + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +asm (".variant_pcs foobar"); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + libgcc_cv_as_variant_pcs=yes +else + libgcc_cv_as_variant_pcs=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5 +$as_echo "$libgcc_cv_as_variant_pcs" >&6; } + if test x$libgcc_cv_as_variant_pcs = xyes; then + +$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h + + fi + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 $as_echo_n "checking for init priority support... " >&6; } if ${libgcc_cv_init_priority+:} false; then : diff --git a/libgcc/configure.ac b/libgcc/configure.ac index 9c0e415501a8..fd0934c9ad0d 100644 --- a/libgcc/configure.ac +++ b/libgcc/configure.ac @@ -648,6 +648,23 @@ changequote([,])dnl esac]) LIBGCC_CHECK_AS_LSE +dnl Check if as supports .variant_pcs. +AC_DEFUN([LIBGCC_CHECK_AS_VARIANT_PCS], [ +case "${target}" in +aarch64*-*-*) + AC_CACHE_CHECK([if as supports .variant_pcs], libgcc_cv_as_variant_pcs, [ + AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, + [[asm (".variant_pcs foobar");]])], + [libgcc_cv_as_variant_pcs=yes], [libgcc_cv_as_variant_pcs=no]) + ]) + if test x$libgcc_cv_as_variant_pcs = xyes; then + AC_DEFINE(HAVE_AS_VARIANT_PCS, 1, + [Define to 1 if the assembler supports .variant_pcs.]) + fi + ;; +esac]) +LIBGCC_CHECK_AS_VARIANT_PCS + dnl Check if as supports RTM instructions. AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, From dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 4 Dec 2023 10:52:52 +0000 Subject: [PATCH 094/311] libgcc: aarch64: Configure check for __getauxval Add configure check for the __getauxval ABI symbol, which is always available on aarch64 glibc, and may be available on other linux C runtimes. For now only enabled on glibc, others have to override it target_configargs=libgcc_cv_have___getauxval=yes This is deliberately obscure as it should be auto detected, ideally via a feature test macro in unistd.h (link time detection is not possible since the libc may not be installed at libgcc build time), but currently there is no such feature test mechanism. Without __getauxval, libgcc cannot do runtime CPU feature detection and has to assume only the build time known features are available. libgcc/ChangeLog: * config.in: Undef HAVE___GETAUXVAL. * configure: Regenerate. * configure.ac: Check for __getauxval. --- libgcc/config.in | 3 +++ libgcc/configure | 26 ++++++++++++++++++++++++++ libgcc/configure.ac | 19 +++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/libgcc/config.in b/libgcc/config.in index 5dd96cdf648c..441d4d39b95b 100644 --- a/libgcc/config.in +++ b/libgcc/config.in @@ -16,6 +16,9 @@ /* Define to 1 if the assembler supports .variant_pcs. */ #undef HAVE_AS_VARIANT_PCS +/* Define to 1 if __getauxval is available. */ +#undef HAVE___GETAUXVAL + /* Define to 1 if the target assembler supports thread-local storage. */ #undef HAVE_CC_TLS diff --git a/libgcc/configure b/libgcc/configure index 82c8558a13dc..3671d9b1a1aa 100755 --- a/libgcc/configure +++ b/libgcc/configure @@ -5658,6 +5658,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h ;; esac +# Check __getauxval ABI symbol for CPU feature detection. +case ${target} in +aarch64*-linux-*) + # No link check because the libc may not be present. + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5 +$as_echo_n "checking for __getauxval... " >&6; } +if ${libgcc_cv_have___getauxval+:} false; then : + $as_echo_n "(cached) " >&6 +else + case ${target} in + *-linux-gnu*) + libgcc_cv_have___getauxval=yes + ;; + *) + libgcc_cv_have___getauxval=no + esac +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" >&5 +$as_echo "$libgcc_cv_have___getauxval" >&6; } + if test x$libgcc_cv_have___getauxval = xyes; then + +$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h + + fi +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 $as_echo_n "checking for init priority support... " >&6; } if ${libgcc_cv_init_priority+:} false; then : diff --git a/libgcc/configure.ac b/libgcc/configure.ac index fd0934c9ad0d..467f5e63ef26 100644 --- a/libgcc/configure.ac +++ b/libgcc/configure.ac @@ -665,6 +665,25 @@ aarch64*-*-*) esac]) LIBGCC_CHECK_AS_VARIANT_PCS +# Check __getauxval ABI symbol for CPU feature detection. +case ${target} in +aarch64*-linux-*) + # No link check because the libc may not be present. + AC_CACHE_CHECK([for __getauxval], + [libgcc_cv_have___getauxval], + [case ${target} in + *-linux-gnu*) + libgcc_cv_have___getauxval=yes + ;; + *) + libgcc_cv_have___getauxval=no + esac]) + if test x$libgcc_cv_have___getauxval = xyes; then + AC_DEFINE(HAVE___GETAUXVAL, 1, + [Define to 1 if __getauxval is available.]) + fi +esac + dnl Check if as supports RTM instructions. AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, From 328c17af772207cb03740809c05ba2c3abfb86be Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Tue, 15 Nov 2022 14:08:55 +0000 Subject: [PATCH 095/311] libgcc: aarch64: Add SME runtime support The call ABI for SME (Scalable Matrix Extension) requires a number of helper routines which are added to libgcc so they are tied to the compiler version instead of the libc version. See https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines The routines are in shared libgcc and static libgcc eh, even though they are not related to exception handling. This is to avoid linking a copy of the routines into dynamic linked binaries, because TPIDR2_EL0 block can be extended in the future which is better to handle in a single place per process. The support routines have to decide if SME is accessible or not. Linux tells userspace if SME is accessible via AT_HWCAP2, otherwise a new __aarch64_sme_accessible symbol was introduced that a libc can define. Due to libgcc and libc build order, the symbol availability cannot be checked so for __aarch64_sme_accessible an unistd.h feature test macro is used while such detection mechanism is not available for __getauxval so we rely on configure checks based on the target triplet. Asm helper code is added to make writing the routines easier. libgcc/ChangeLog: * config/aarch64/t-aarch64: Add sources to the build. * config/aarch64/__aarch64_have_sme.c: New file. * config/aarch64/__arm_sme_state.S: New file. * config/aarch64/__arm_tpidr2_restore.S: New file. * config/aarch64/__arm_tpidr2_save.S: New file. * config/aarch64/__arm_za_disable.S: New file. * config/aarch64/aarch64-asm.h: New file. * config/aarch64/libgcc-sme.ver: New file. --- libgcc/config/aarch64/__aarch64_have_sme.c | 75 ++++++++++++++ libgcc/config/aarch64/__arm_sme_state.S | 55 ++++++++++ libgcc/config/aarch64/__arm_tpidr2_restore.S | 89 ++++++++++++++++ libgcc/config/aarch64/__arm_tpidr2_save.S | 101 +++++++++++++++++++ libgcc/config/aarch64/__arm_za_disable.S | 65 ++++++++++++ libgcc/config/aarch64/aarch64-asm.h | 98 ++++++++++++++++++ libgcc/config/aarch64/libgcc-sme.ver | 24 +++++ libgcc/config/aarch64/t-aarch64 | 10 ++ 8 files changed, 517 insertions(+) create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c create mode 100644 libgcc/config/aarch64/__arm_sme_state.S create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S create mode 100644 libgcc/config/aarch64/__arm_za_disable.S create mode 100644 libgcc/config/aarch64/aarch64-asm.h create mode 100644 libgcc/config/aarch64/libgcc-sme.ver diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c b/libgcc/config/aarch64/__aarch64_have_sme.c new file mode 100644 index 000000000000..5e649246270b --- /dev/null +++ b/libgcc/config/aarch64/__aarch64_have_sme.c @@ -0,0 +1,75 @@ +/* Initializer for SME support. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "auto-target.h" + +#ifndef inhibit_libc +/* For libc feature test macros. */ +# include +#endif + +#if __ARM_FEATURE_SME +/* Avoid runtime SME detection if libgcc is built with SME. */ +# define HAVE_SME_CONST const +# define HAVE_SME_VALUE 1 +#elif HAVE___GETAUXVAL +/* SME access detection on Linux. */ +# define HAVE_SME_CONST +# define HAVE_SME_VALUE 0 +# define HAVE_SME_CTOR sme_accessible () + +# define AT_HWCAP2 26 +# define HWCAP2_SME (1 << 23) +unsigned long int __getauxval (unsigned long int); + +static _Bool +sme_accessible (void) +{ + unsigned long hwcap2 = __getauxval (AT_HWCAP2); + return (hwcap2 & HWCAP2_SME) != 0; +} +#elif __LIBC___AARCH64_SME_ACCESSIBLE +/* Alternative SME access detection. */ +# define HAVE_SME_CONST +# define HAVE_SME_VALUE 0 +# define HAVE_SME_CTOR __aarch64_sme_accessible () +_Bool __aarch64_sme_accessible (void); +#else +# define HAVE_SME_CONST const +# define HAVE_SME_VALUE 0 +#endif + +/* Define the symbol gating SME support in libgcc. */ +HAVE_SME_CONST _Bool __aarch64_have_sme + __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE; + +#ifdef HAVE_SME_CTOR +/* Use a higher priority to ensure it runs before user constructors + with priority 100. */ +static void __attribute__((constructor (90))) +init_have_sme (void) +{ + __aarch64_have_sme = HAVE_SME_CTOR; +} +#endif diff --git a/libgcc/config/aarch64/__arm_sme_state.S b/libgcc/config/aarch64/__arm_sme_state.S new file mode 100644 index 000000000000..c4e16cac00d1 --- /dev/null +++ b/libgcc/config/aarch64/__arm_sme_state.S @@ -0,0 +1,55 @@ +/* Support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "aarch64-asm.h" + +/* Query SME state. Call ABI: + - Private ZA, streaming-compatible. + - x2-x15, x19-x29, sp and fp regs are call preserved. + - Takes no argument. + - Returns SME state in x0 and TPIDR2_EL0 in x1. */ + +.hidden __aarch64_have_sme + +variant_pcs (__arm_sme_state) + +ENTRY (__arm_sme_state) + /* Check if SME is available. */ + adrp x1, __aarch64_have_sme + ldrb w1, [x1, :lo12:__aarch64_have_sme] + cbz w1, L(nosme) + + /* Expose the bottom 2 bits of svcr (SM, ZA) in x0 and set the + top 2 bits indicating that SME and TPIDR2_EL0 are available. */ + .inst 0xd53b4240 /* mrs x0, svcr */ + .inst 0xd53bd0a1 /* mrs x1, tpidr2_el0 */ + and x0, x0, 3 + orr x0, x0, 0xc000000000000000 + ret + +L(nosme): + mov x0, 0 + mov x1, 0 + ret +END (__arm_sme_state) diff --git a/libgcc/config/aarch64/__arm_tpidr2_restore.S b/libgcc/config/aarch64/__arm_tpidr2_restore.S new file mode 100644 index 000000000000..4569d04a2d06 --- /dev/null +++ b/libgcc/config/aarch64/__arm_tpidr2_restore.S @@ -0,0 +1,89 @@ +/* Support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "aarch64-asm.h" + +/* Used for lazy ZA restore. Call ABI: + - Shared ZA, streaming-compatible. + - x0 is a pointer to a TPIDR2 block. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +variant_pcs (__arm_tpidr2_restore) + +ENTRY (__arm_tpidr2_restore) + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbnz x14, L(fail) + + /* check reserved bytes. */ + ldrh w15, [x0, 10] + ldr w16, [x0, 12] + orr w15, w15, w16 + cbnz w15, L(fail) + + ldr x16, [x0] + cbz x16, L(end) + ldrh w17, [x0, 8] + cbz w17, L(end) + + /* x0: blk, x14: 0, x15: 0, + x16: za_save_buffer, x17: num_za_save_slices. */ + +L(restore_loop): + .inst 0xe1006200 /* ldr za[w15, 0], [x16] */ + .inst 0xe1006201 /* ldr za[w15, 1], [x16, 1, mul vl] */ + .inst 0xe1006202 /* ldr za[w15, 2], [x16, 2, mul vl] */ + .inst 0xe1006203 /* ldr za[w15, 3], [x16, 3, mul vl] */ + .inst 0xe1006204 /* ldr za[w15, 4], [x16, 4, mul vl] */ + .inst 0xe1006205 /* ldr za[w15, 5], [x16, 5, mul vl] */ + .inst 0xe1006206 /* ldr za[w15, 6], [x16, 6, mul vl] */ + .inst 0xe1006207 /* ldr za[w15, 7], [x16, 7, mul vl] */ + .inst 0xe1006208 /* ldr za[w15, 8], [x16, 8, mul vl] */ + .inst 0xe1006209 /* ldr za[w15, 9], [x16, 9, mul vl] */ + .inst 0xe100620a /* ldr za[w15, 10], [x16, 10, mul vl] */ + .inst 0xe100620b /* ldr za[w15, 11], [x16, 11, mul vl] */ + .inst 0xe100620c /* ldr za[w15, 12], [x16, 12, mul vl] */ + .inst 0xe100620d /* ldr za[w15, 13], [x16, 13, mul vl] */ + .inst 0xe100620e /* ldr za[w15, 14], [x16, 14, mul vl] */ + .inst 0xe100620f /* ldr za[w15, 15], [x16, 15, mul vl] */ + add w15, w15, 16 + .inst 0x04305a10 /* addsvl x16, x16, 16 */ + cmp w17, w15 + bhi L(restore_loop) +L(end): + ret +L(fail): + PACIASP + stp x29, x30, [sp, -32]! + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + mov x29, sp + .inst 0x04e0e3f0 /* cntd x16 */ + str x16, [sp, 16] + .cfi_rel_offset 46, 16 + .inst 0xd503467f /* smstop */ + bl abort +END (__arm_tpidr2_restore) diff --git a/libgcc/config/aarch64/__arm_tpidr2_save.S b/libgcc/config/aarch64/__arm_tpidr2_save.S new file mode 100644 index 000000000000..879cf7980798 --- /dev/null +++ b/libgcc/config/aarch64/__arm_tpidr2_save.S @@ -0,0 +1,101 @@ +/* Support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "aarch64-asm.h" + +/* Used for lazy ZA save. Call ABI: + - Private ZA, streaming-compatible. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - Takes no argument. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +.hidden __aarch64_have_sme + +variant_pcs (__arm_tpidr2_save) + +ENTRY (__arm_tpidr2_save) + /* Check if SME is available. */ + adrp x14, __aarch64_have_sme + ldrb w14, [x14, :lo12:__aarch64_have_sme] + cbz w14, L(end) + + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbz x14, L(end) + + /* check reserved bytes. */ + ldrh w15, [x14, 10] + ldr w16, [x14, 12] + orr w15, w15, w16 + cbnz w15, L(fail) + + ldr x16, [x14] + cbz x16, L(end) + ldrh w17, [x14, 8] + cbz w17, L(end) + + /* x14: tpidr2, x15: 0, + x16: za_save_buffer, x17: num_za_save_slices. */ + +L(save_loop): + .inst 0xe1206200 /* str za[w15, 0], [x16] */ + .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ + .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ + .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ + .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ + .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ + .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ + .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ + .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ + .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ + .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ + .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ + .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ + .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ + .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ + .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ + add w15, w15, 16 + .inst 0x04305a10 /* addsvl x16, x16, 16 */ + cmp w17, w15 + bhi L(save_loop) +L(end): + ret +L(fail): + PACIASP + stp x29, x30, [sp, -32]! + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + mov x29, sp + .inst 0x04e0e3f0 /* cntd x16 */ + str x16, [sp, 16] + .cfi_rel_offset 46, 16 + .inst 0xd503467f /* smstop */ + bl abort +END (__arm_tpidr2_save) + +/* Hidden alias used by __arm_za_disable. */ +.global __libgcc_arm_tpidr2_save +.hidden __libgcc_arm_tpidr2_save +.set __libgcc_arm_tpidr2_save, __arm_tpidr2_save diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S new file mode 100644 index 000000000000..cff5b9cec47e --- /dev/null +++ b/libgcc/config/aarch64/__arm_za_disable.S @@ -0,0 +1,65 @@ +/* Support routine for SME. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "aarch64-asm.h" + +/* Disable ZA. Call ABI: + - Private ZA, streaming-compatible. + - x0-x13, x19-x29, sp and fp regs are call preserved. + - Takes no argument. + - Does not return a value. + - Can abort on failure (then registers are not preserved). */ + +.hidden __aarch64_have_sme + +.hidden __libgcc_arm_tpidr2_save + +variant_pcs (__arm_za_disable) + +ENTRY (__arm_za_disable) + /* Check if SME is available. */ + adrp x14, __aarch64_have_sme + ldrb w14, [x14, :lo12:__aarch64_have_sme] + cbz w14, L(end) + + .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ + cbz x14, L(end) + + PACIASP + stp x29, x30, [sp, -16]! + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + mov x29, sp + bl __libgcc_arm_tpidr2_save + .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ + .inst 0xd503447f /* smstop za */ + ldp x29, x30, [sp], 16 + .cfi_adjust_cfa_offset -16 + .cfi_restore x29 + .cfi_restore x30 + AUTIASP +L(end): + ret +END (__arm_za_disable) diff --git a/libgcc/config/aarch64/aarch64-asm.h b/libgcc/config/aarch64/aarch64-asm.h new file mode 100644 index 000000000000..8969b06b09c6 --- /dev/null +++ b/libgcc/config/aarch64/aarch64-asm.h @@ -0,0 +1,98 @@ +/* AArch64 asm definitions. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "auto-target.h" + +#define L(label) .L ## label + +/* Marking variant PCS symbol references is important for PLT calls + otherwise it is for documenting the PCS in the symbol table. */ +#ifdef HAVE_AS_VARIANT_PCS +# define variant_pcs(name) .variant_pcs name +#else +# define variant_pcs(name) +#endif + +/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ +#define FEATURE_1_AND 0xc0000000 +#define FEATURE_1_BTI 1 +#define FEATURE_1_PAC 2 + +/* Supported features based on the code generation options. */ +#if defined(__ARM_FEATURE_BTI_DEFAULT) +# define BTI_FLAG FEATURE_1_BTI +# define BTI_C hint 34 +#else +# define BTI_FLAG 0 +# define BTI_C +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +# define PAC_FLAG FEATURE_1_PAC +# define PACIASP hint 25; .cfi_window_save +# define AUTIASP hint 29; .cfi_window_save +#else +# define PAC_FLAG 0 +# define PACIASP +# define AUTIASP +#endif + +/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ +#define GNU_PROPERTY(type, value) \ + .section .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word 5; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; \ + .previous + +#if defined(__linux__) || defined(__FreeBSD__) +/* Do not require executable stack. */ +.section .note.GNU-stack, "", %progbits +.previous + +/* Add GNU property note if built with branch protection. */ +# if (BTI_FLAG|PAC_FLAG) != 0 +GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) +# endif +#endif + +#define ENTRY_ALIGN(name, align) \ + .global name; \ + .type name,%function; \ + .balign align; \ + name: \ + .cfi_startproc; \ + BTI_C + +#define ENTRY(name) ENTRY_ALIGN(name, 16) + +#define END(name) \ + .cfi_endproc; \ + .size name, .-name diff --git a/libgcc/config/aarch64/libgcc-sme.ver b/libgcc/config/aarch64/libgcc-sme.ver new file mode 100644 index 000000000000..da889c6c09ef --- /dev/null +++ b/libgcc/config/aarch64/libgcc-sme.ver @@ -0,0 +1,24 @@ +# Copyright (C) 2023 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +GCC_14.0 { + __arm_sme_state + __arm_tpidr2_restore + __arm_tpidr2_save + __arm_za_disable +} diff --git a/libgcc/config/aarch64/t-aarch64 b/libgcc/config/aarch64/t-aarch64 index a40b6241c86e..7b852022a4d9 100644 --- a/libgcc/config/aarch64/t-aarch64 +++ b/libgcc/config/aarch64/t-aarch64 @@ -19,3 +19,13 @@ # . LIB2ADD += $(srcdir)/config/aarch64/sync-cache.c + +# Add sme runtime to shared libgcc +LIB2ADDEH += \ + $(srcdir)/config/aarch64/__aarch64_have_sme.c \ + $(srcdir)/config/aarch64/__arm_sme_state.S \ + $(srcdir)/config/aarch64/__arm_tpidr2_restore.S \ + $(srcdir)/config/aarch64/__arm_tpidr2_save.S \ + $(srcdir)/config/aarch64/__arm_za_disable.S + +SHLIB_MAPFILES += $(srcdir)/config/aarch64/libgcc-sme.ver From 91d68665b8b7a5dffd0bbf8cd1f74c3c41d4c2d8 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Fri, 29 Sep 2023 13:55:51 +0100 Subject: [PATCH 096/311] libgcc: aarch64: Add SME unwinder support To support the ZA lazy save scheme, the PCS requires the unwinder to reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry to an exception handler. We use the __arm_za_disable SME runtime call unconditionally to achieve this. https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions The hidden alias is used to avoid a PLT and avoid inconsistent VPCS marking (we don't rely on special PCS at the call site). In case of static linking the SME runtime init code is linked in code that raises exceptions. libgcc/ChangeLog: * config/aarch64/__arm_za_disable.S: Add hidden alias. * config/aarch64/aarch64-unwind.h: Reset the SME state before EH return via the _Unwind_Frames_Extra hook. --- libgcc/config/aarch64/__arm_za_disable.S | 5 +++++ libgcc/config/aarch64/aarch64-unwind.h | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S index cff5b9cec47e..03fc28a39317 100644 --- a/libgcc/config/aarch64/__arm_za_disable.S +++ b/libgcc/config/aarch64/__arm_za_disable.S @@ -63,3 +63,8 @@ ENTRY (__arm_za_disable) L(end): ret END (__arm_za_disable) + +/* Hidden alias used by the unwinder. */ +.global __libgcc_arm_za_disable +.hidden __libgcc_arm_za_disable +.set __libgcc_arm_za_disable, __arm_za_disable diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h index d669edd671b4..9fe6c8f61c35 100644 --- a/libgcc/config/aarch64/aarch64-unwind.h +++ b/libgcc/config/aarch64/aarch64-unwind.h @@ -78,4 +78,20 @@ aarch64_demangle_return_addr (struct _Unwind_Context *context, return addr; } +/* SME runtime function local to libgcc, streaming compatible + and preserves more registers than the base PCS requires, but + we don't rely on that here. */ +__attribute__ ((visibility ("hidden"))) +void __libgcc_arm_za_disable (void); + +/* Disable the SME ZA state in case an unwound frame used the ZA + lazy saving scheme. */ +#undef _Unwind_Frames_Extra +#define _Unwind_Frames_Extra(x) \ + do \ + { \ + __libgcc_arm_za_disable (); \ + } \ + while (0) + #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */ From 47575ec9edcd3078f066aa54ba428420be796bef Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Fri, 8 Dec 2023 12:22:54 +0000 Subject: [PATCH 097/311] libgcc: Fix config.in It was updated incorrectly in commit dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 Author: Szabolcs Nagy CommitDate: 2023-12-08 11:29:06 +0000 libgcc: aarch64: Configure check for __getauxval so regenerate it. libgcc/ChangeLog: * config.in: Regenerate. --- libgcc/config.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libgcc/config.in b/libgcc/config.in index 441d4d39b95b..8f7dd437b0ed 100644 --- a/libgcc/config.in +++ b/libgcc/config.in @@ -16,9 +16,6 @@ /* Define to 1 if the assembler supports .variant_pcs. */ #undef HAVE_AS_VARIANT_PCS -/* Define to 1 if __getauxval is available. */ -#undef HAVE___GETAUXVAL - /* Define to 1 if the target assembler supports thread-local storage. */ #undef HAVE_CC_TLS @@ -67,6 +64,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H +/* Define to 1 if __getauxval is available. */ +#undef HAVE___GETAUXVAL + /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT From d4b6d147920b93297e621124a99ed01e7e310d92 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Fri, 8 Dec 2023 15:18:25 +0100 Subject: [PATCH 098/311] OpenMP/Fortran: Implement omp allocators/allocate for ptr/allocatables This commit adds -fopenmp-allocators which enables support for 'omp allocators' and 'omp allocate' that are associated with a Fortran allocate-stmt. If such a construct is encountered, an error is shown, unless the -fopenmp-allocators flag is present. With -fopenmp -fopenmp-allocators, those constructs get turned into GOMP_alloc allocations, while -fopenmp-allocators (also without -fopenmp) ensures deallocation and reallocation (via intrinsic assignments) are properly directed to GOMP_free/omp_realloc - while normal Fortran allocations are processed by free/realloc. In order to distinguish a 'malloc'ed from a 'GOMP_alloc'ed memory, the version field of the Fortran array discriptor is (mis)used: 0 indicates the normal Fortran allocation while 1 denotes GOMP_alloc. For scalars, there is record keeping in libgomp: GOMP_add_alloc(ptr) will add the pointer address to a splay_tree while GOMP_is_alloc(ptr) will return true it was previously added but also removes it from the list. Besides Fortran FE work, BUILT_IN_GOMP_REALLOC is no part of omp-builtins.def and libgomp gains the mentioned two new function. gcc/ChangeLog: * builtin-types.def (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE): New. * omp-builtins.def (BUILT_IN_GOMP_REALLOC): New. * builtins.cc (builtin_fnspec): Handle it. * gimple-ssa-warn-access.cc (fndecl_alloc_p, matching_alloc_calls_p): Likewise. * gimple.cc (nonfreeing_call_p): Likewise. * predict.cc (expr_expected_value_1): Likewise. * tree-ssa-ccp.cc (evaluate_stmt): Likewise. * tree.cc (fndecl_dealloc_argno): Likewise. gcc/fortran/ChangeLog: * dump-parse-tree.cc (show_omp_node): Handle EXEC_OMP_ALLOCATE and EXEC_OMP_ALLOCATORS. * f95-lang.cc (ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST): Add 'ECF_LEAF | ECF_MALLOC' to existing 'ECF_NOTHROW'. (ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST): Define. * gfortran.h (gfc_omp_clauses): Add contained_in_target_construct. * invoke.texi (-fopenacc, -fopenmp): Update based on C version. (-fopenmp-simd): New, based on C version. (-fopenmp-allocators): New. * lang.opt (fopenmp-allocators): Add. * openmp.cc (resolve_omp_clauses): For allocators/allocate directive, add target and no dynamic_allocators diagnostic and more invalid diagnostic. * parse.cc (decode_omp_directive): Set contains_teams_construct. * trans-array.h (gfc_array_allocate): Update prototype. (gfc_conv_descriptor_version): New prototype. * trans-decl.cc (gfc_init_default_dt): Fix comment. * trans-array.cc (gfc_conv_descriptor_version): New. (gfc_array_allocate): Support GOMP_alloc allocation. (gfc_alloc_allocatable_for_assignment, structure_alloc_comps): Handle GOMP_free/omp_realloc as needed. * trans-expr.cc (gfc_conv_procedure_call): Likewise. (alloc_scalar_allocatable_for_assignment): Likewise. * trans-intrinsic.cc (conv_intrinsic_move_alloc): Likewise. * trans-openmp.cc (gfc_trans_omp_allocators, gfc_trans_omp_directive): Handle allocators/allocate directive. (gfc_omp_call_add_alloc, gfc_omp_call_is_alloc): New. * trans-stmt.h (gfc_trans_allocate): Update prototype. * trans-stmt.cc (gfc_trans_allocate): Support GOMP_alloc. * trans-types.cc (gfc_get_dtype_rank_type): Set version field. * trans.cc (gfc_allocate_using_malloc, gfc_allocate_allocatable): Update to handle GOMP_alloc. (gfc_deallocate_with_status, gfc_deallocate_scalar_with_status): Handle GOMP_free. (trans_code): Update call. * trans.h (gfc_allocate_allocatable, gfc_allocate_using_malloc): Update prototype. (gfc_omp_call_add_alloc, gfc_omp_call_is_alloc): New prototype. * types.def (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE): New. libgomp/ChangeLog: * allocator.c (struct fort_alloc_splay_tree_key_s, fort_alloc_splay_compare, GOMP_add_alloc, GOMP_is_alloc): New. * libgomp.h: Define splay_tree_static for 'reverse' splay tree. * libgomp.map (GOMP_5.1.2): New; add GOMP_add_alloc and GOMP_is_alloc; move GOMP_target_map_indirect_ptr from ... (GOMP_5.1.1): ... here. * libgomp.texi (Impl. Status, Memory management): Update for allocators/allocate directives. * splay-tree.c: Handle splay_tree_static define to declare all functions as static. (splay_tree_lookup_node): New. * splay-tree.h: Handle splay_tree_decl_only define. (splay_tree_lookup_node): New prototype. * target.c: Define splay_tree_static for 'reverse'. * testsuite/libgomp.fortran/allocators-1.f90: New test. * testsuite/libgomp.fortran/allocators-2.f90: New test. * testsuite/libgomp.fortran/allocators-3.f90: New test. * testsuite/libgomp.fortran/allocators-4.f90: New test. * testsuite/libgomp.fortran/allocators-5.f90: New test. gcc/testsuite/ChangeLog: * gfortran.dg/gomp/allocate-14.f90: Add coarray and not-listed tests. * gfortran.dg/gomp/allocate-5.f90: Remove sorry dg-message. * gfortran.dg/bind_c_array_params_2.f90: Update expected dump for dtype '.version=0'. * gfortran.dg/gomp/allocate-16.f90: New test. * gfortran.dg/gomp/allocators-3.f90: New test. * gfortran.dg/gomp/allocators-4.f90: New test. --- gcc/builtin-types.def | 2 + gcc/builtins.cc | 1 + gcc/fortran/dump-parse-tree.cc | 2 + gcc/fortran/f95-lang.cc | 4 +- gcc/fortran/gfortran.h | 1 + gcc/fortran/invoke.texi | 79 ++++++--- gcc/fortran/lang.opt | 4 + gcc/fortran/openmp.cc | 120 ++++++++++++-- gcc/fortran/parse.cc | 7 +- gcc/fortran/trans-array.cc | 152 ++++++++++++++---- gcc/fortran/trans-array.h | 4 +- gcc/fortran/trans-decl.cc | 2 +- gcc/fortran/trans-expr.cc | 24 ++- gcc/fortran/trans-intrinsic.cc | 5 +- gcc/fortran/trans-openmp.cc | 61 ++++++- gcc/fortran/trans-stmt.cc | 92 ++++++++++- gcc/fortran/trans-stmt.h | 2 +- gcc/fortran/trans-types.cc | 4 + gcc/fortran/trans.cc | 85 +++++++--- gcc/fortran/trans.h | 10 +- gcc/fortran/types.def | 2 + gcc/gimple-ssa-warn-access.cc | 18 ++- gcc/gimple.cc | 2 + gcc/omp-builtins.def | 3 + gcc/predict.cc | 1 + .../gfortran.dg/bind_c_array_params_2.f90 | 2 +- .../gfortran.dg/gomp/allocate-14.f90 | 41 +++++ .../gfortran.dg/gomp/allocate-16.f90 | 10 ++ gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 | 17 +- .../gfortran.dg/gomp/allocators-3.f90 | 36 +++++ .../gfortran.dg/gomp/allocators-4.f90 | 9 ++ gcc/tree-ssa-ccp.cc | 1 + gcc/tree.cc | 2 + libgomp/allocator.c | 63 ++++++++ libgomp/libgomp.h | 1 + libgomp/libgomp.map | 8 +- libgomp/libgomp.texi | 16 +- libgomp/splay-tree.c | 40 ++++- libgomp/splay-tree.h | 17 ++ libgomp/target.c | 1 + .../libgomp.fortran/allocators-1.f90 | 68 ++++++++ .../libgomp.fortran/allocators-2.f90 | 101 ++++++++++++ .../libgomp.fortran/allocators-3.f90 | 25 +++ .../libgomp.fortran/allocators-4.f90 | 57 +++++++ .../libgomp.fortran/allocators-5.f90 | 27 ++++ 45 files changed, 1113 insertions(+), 116 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocate-16.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocators-3.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/allocators-4.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocators-1.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocators-2.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocators-3.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocators-4.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/allocators-5.f90 diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 43381bc89493..183ef62bad2a 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -840,6 +840,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_PTR_PTR_CONST_PTR_SIZE_SIZE, BT_PTR, BT_PTR, BT_CONST_PTR, BT_SIZE, BT_SIZE) DEF_FUNCTION_TYPE_4 (BT_FN_PTR_PTR_INT_SIZE_SIZE, BT_PTR, BT_PTR, BT_INT, BT_SIZE, BT_SIZE) +DEF_FUNCTION_TYPE_4 (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE, + BT_PTR, BT_PTR, BT_SIZE, BT_PTRMODE, BT_PTRMODE) DEF_FUNCTION_TYPE_4 (BT_FN_UINT_UINT_UINT_UINT_UINT, BT_UINT, BT_UINT, BT_UINT, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_4 (BT_FN_UINT_UINT_UINT_UINT_UINTPTR, diff --git a/gcc/builtins.cc b/gcc/builtins.cc index afa9be514437..38b0acff1312 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -12410,6 +12410,7 @@ builtin_fnspec (tree callee) return ".cO "; /* Realloc serves both as allocation point and deallocation point. */ case BUILT_IN_REALLOC: + case BUILT_IN_GOMP_REALLOC: return ".Cw "; case BUILT_IN_GAMMA_R: case BUILT_IN_GAMMAF_R: diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc index cc4846e5d745..ecf71036444c 100644 --- a/gcc/fortran/dump-parse-tree.cc +++ b/gcc/fortran/dump-parse-tree.cc @@ -2241,6 +2241,8 @@ show_omp_node (int level, gfc_code *c) case EXEC_OACC_CACHE: case EXEC_OACC_ENTER_DATA: case EXEC_OACC_EXIT_DATA: + case EXEC_OMP_ALLOCATE: + case EXEC_OMP_ALLOCATORS: case EXEC_OMP_ASSUME: case EXEC_OMP_CANCEL: case EXEC_OMP_CANCELLATION_POINT: diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc index 32fddcde9571..539bc271e78f 100644 --- a/gcc/fortran/f95-lang.cc +++ b/gcc/fortran/f95-lang.cc @@ -566,7 +566,9 @@ gfc_builtin_function (tree decl) #define ATTR_NOTHROW_LIST (ECF_NOTHROW) #define ATTR_CONST_NOTHROW_LIST (ECF_NOTHROW | ECF_CONST) #define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST \ - (ECF_NOTHROW) + (ECF_NOTHROW | ECF_LEAF | ECF_MALLOC) +#define ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST \ + (ECF_NOTHROW | ECF_LEAF) #define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ (ECF_COLD | ECF_NORETURN | \ ECF_NOTHROW | ECF_LEAF) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index a77441f38e7c..28569d07e716 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1579,6 +1579,7 @@ typedef struct gfc_omp_clauses unsigned grainsize_strict:1, num_tasks_strict:1, compare:1, weak:1; unsigned non_rectangular:1, order_concurrent:1; unsigned contains_teams_construct:1, target_first_st_is_teams:1; + unsigned contained_in_target_construct:1; ENUM_BITFIELD (gfc_omp_sched_kind) sched_kind:3; ENUM_BITFIELD (gfc_omp_device_type) device_type:2; ENUM_BITFIELD (gfc_omp_memorder) memorder:3; diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi index 2f1d1f284292..7523d7595328 100644 --- a/gcc/fortran/invoke.texi +++ b/gcc/fortran/invoke.texi @@ -126,8 +126,9 @@ by type. Explanations are in the following sections. -ffree-form -ffree-line-length-@var{n} -ffree-line-length-none -fimplicit-none -finteger-4-integer-8 -fmax-identifier-length -fmodule-private -ffixed-form -fno-range-check -fopenacc -fopenmp --freal-4-real-10 -freal-4-real-16 -freal-4-real-8 -freal-8-real-10 --freal-8-real-16 -freal-8-real-4 -std=@var{std} -ftest-forall-temp +-fopenmp-allocators -fopenmp-simd -freal-4-real-10 -freal-4-real-16 +-freal-4-real-8 -freal-8-real-10 -freal-8-real-16 -freal-8-real-4 +-std=@var{std} -ftest-forall-temp } @item Preprocessing Options @@ -410,26 +411,64 @@ Specify that no implicit typing is allowed, unless overridden by explicit Enable the Cray pointer extension, which provides C-like pointer functionality. -@opindex @code{fopenacc} -@cindex OpenACC -@item -fopenacc -Enable the OpenACC extensions. This includes OpenACC @code{!$acc} -directives in free form and @code{c$acc}, @code{*$acc} and -@code{!$acc} directives in fixed form, @code{!$} conditional -compilation sentinels in free form and @code{c$}, @code{*$} and -@code{!$} sentinels in fixed form, and when linking arranges for the -OpenACC runtime library to be linked in. -@opindex @code{fopenmp} -@cindex OpenMP +@opindex fopenacc +@cindex OpenACC accelerator programming +@item -fopenacc +Enable handling of OpenACC directives @samp{!$acc} in free-form Fortran and +@samp{!$acc}, @samp{c$acc} and @samp{*$acc} in fixed-form Fortran. When +@option{-fopenacc} is specified, the compiler generates accelerated code +according to the OpenACC Application Programming Interface v2.6 +@w{@uref{https://www.openacc.org}}. This option implies @option{-pthread}, +and thus is only supported on targets that have support for @option{-pthread}. +The option @option{-fopenacc} implies @option{-frecursive}. + +@opindex fopenmp +@cindex OpenMP parallel @item -fopenmp -Enable the OpenMP extensions. This includes OpenMP @code{!$omp} directives -in free form -and @code{c$omp}, @code{*$omp} and @code{!$omp} directives in fixed form, -@code{!$} conditional compilation sentinels in free form -and @code{c$}, @code{*$} and @code{!$} sentinels in fixed form, -and when linking arranges for the OpenMP runtime library to be linked -in. The option @option{-fopenmp} implies @option{-frecursive}. +Enable handling of OpenMP directives @samp{!$omp} in Fortran. It +additionally enables the conditional compilation sentinel @samp{!$} in +Fortran. In fixed source form Fortran, the sentinels can also start with +@samp{c} or @samp{*}. When @option{-fopenmp} is specified, the +compiler generates parallel code according to the OpenMP Application +Program Interface v4.5 @w{@uref{https://www.openmp.org}}. This option +implies @option{-pthread}, and thus is only supported on targets that +have support for @option{-pthread}. @option{-fopenmp} implies +@option{-fopenmp-simd} and @option{-frecursive}. + +@opindex fopenmp-allocators +@cindex OpenMP Allocators +@item -fopenmp-allocators +Enables handling of allocation, reallocation and deallocation of Fortran +allocatable and pointer variables that are allocated using the +@samp{!$omp allocators} and @samp{!$omp allocate} constructs. Files +containing either directive have to be compiled with this option in addition +to @option{-fopenmp}. Additionally, all files that might deallocate or +reallocate a variable that has been allocated with an OpenMP allocator +have to be compiled with this option. This includes intrinsic assignment +to allocatable variables when reallocation may occur and deallocation +due to either of the following: end of scope, explicit deallocation, +@samp{intent(out)}, deallocation of allocatable components etc. +Files not changing the allocation status or only for components of +a derived type that have not been allocated using those two directives +do not need to be compiled with this option. Nor do files that handle +such variables after they have been deallocated or allocated by the +normal Fortran allocator. + +@opindex fopenmp-simd +@cindex OpenMP SIMD +@cindex SIMD +@item -fopenmp-simd +Enable handling of OpenMP's @code{simd}, @code{declare simd}, +@code{declare reduction}, @code{assume}, @code{ordered}, @code{scan} +and @code{loop} directive, and of combined or composite directives with +@code{simd} as constituent with @code{!$omp} in Fortran. It additionally +enables the conditional compilation sentinel @samp{!$} in Fortran. In +fixed source form Fortran, the sentinels can also start with @samp{c} or +@samp{*}. Other OpenMP directives are ignored. Unless @option{-fopenmp} +is additionally specified, the @code{loop} region binds to the current task +region, independent of the specified @code{bind} clause. + @opindex @code{frange-check} @item -fno-range-check diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index adcfc280b5ae..7c301431cbcd 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -716,6 +716,10 @@ fopenmp-simd Fortran ; Documented in C +fopenmp-allocators +Fortran Var(flag_openmp_allocators) +Handle OpenMP allocators for allocatables and pointers. + fpack-derived Fortran Var(flag_pack_derived) Try to lay out derived types as compactly as possible. diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index 794df19a4d1a..251da667236d 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -7424,6 +7424,9 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, if (omp_clauses == NULL) return; + if (ns == NULL) + ns = gfc_current_ns; + if (omp_clauses->orderedc && omp_clauses->orderedc < omp_clauses->collapse) gfc_error ("ORDERED clause parameter is less than COLLAPSE at %L", &code->loc); @@ -7657,23 +7660,22 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && n->sym->result == n->sym && n->sym->attr.function) { - if (gfc_current_ns->proc_name == n->sym - || (gfc_current_ns->parent - && gfc_current_ns->parent->proc_name == n->sym)) + if (ns->proc_name == n->sym + || (ns->parent && ns->parent->proc_name == n->sym)) continue; - if (gfc_current_ns->proc_name->attr.entry_master) + if (ns->proc_name->attr.entry_master) { - gfc_entry_list *el = gfc_current_ns->entries; + gfc_entry_list *el = ns->entries; for (; el; el = el->next) if (el->sym == n->sym) break; if (el) continue; } - if (gfc_current_ns->parent - && gfc_current_ns->parent->proc_name->attr.entry_master) + if (ns->parent + && ns->parent->proc_name->attr.entry_master) { - gfc_entry_list *el = gfc_current_ns->parent->entries; + gfc_entry_list *el = ns->parent->entries; for (; el; el = el->next) if (el->sym == n->sym) break; @@ -7973,24 +7975,120 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && code->block->next->op == EXEC_ALLOCATE) { gfc_alloc *a; + gfc_omp_namelist *n_null = NULL; + bool missing_allocator = false; + gfc_symbol *missing_allocator_sym = NULL; for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next) { + if (n->u2.allocator == NULL) + { + if (!missing_allocator_sym) + missing_allocator_sym = n->sym; + missing_allocator = true; + } if (n->sym == NULL) - continue; + { + n_null = n; + continue; + } if (n->sym->attr.codimension) gfc_error ("Unexpected coarray %qs in % at %L", n->sym->name, &n->where); for (a = code->block->next->ext.alloc.list; a; a = a->next) if (a->expr->expr_type == EXPR_VARIABLE && a->expr->symtree->n.sym == n->sym) - break; + { + gfc_ref *ref; + for (ref = a->expr->ref; ref; ref = ref->next) + if (ref->type == REF_COMPONENT) + break; + if (ref == NULL) + break; + } if (a == NULL) gfc_error ("%qs specified in % at %L but not " "in the associated ALLOCATE statement", n->sym->name, &n->where); } - } + /* If there is an ALLOCATE directive without list argument, a + namelist with its allocator/align clauses and n->sym = NULL is + created during parsing; here, we add all not otherwise specified + items from the Fortran allocate to that list. + For an ALLOCATORS directive, not listed items use the normal + Fortran way. + The behavior of an ALLOCATE directive that does not list all + arguments but there is no directive without list argument is not + well specified. Thus, we reject such code below. In OpenMP 5.2 + the executable ALLOCATE directive is deprecated and in 6.0 + deleted such that no spec clarification is to be expected. */ + for (a = code->block->next->ext.alloc.list; a; a = a->next) + if (a->expr->expr_type == EXPR_VARIABLE) + { + for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next) + if (a->expr->symtree->n.sym == n->sym) + { + gfc_ref *ref; + for (ref = a->expr->ref; ref; ref = ref->next) + if (ref->type == REF_COMPONENT) + break; + if (ref == NULL) + break; + } + if (n == NULL && n_null == NULL) + { + /* OK for ALLOCATORS but for ALLOCATE: Unspecified whether + that should use the default allocator of OpenMP or the + Fortran allocator. Thus, just reject it. */ + if (code->op == EXEC_OMP_ALLOCATE) + gfc_error ("%qs listed in % statement at %L " + "but it is neither explicitly in listed in " + "the % directive nor exists" + " a directive without argument list", + a->expr->symtree->n.sym->name, + &a->expr->where); + break; + } + if (n == NULL) + { + if (a->expr->symtree->n.sym->attr.codimension) + gfc_error ("Unexpected coarray %qs in % at " + "%L, implicitly listed in %" + " at %L", a->expr->symtree->n.sym->name, + &a->expr->where, &n_null->where); + break; + } + } + gfc_namespace *prog_unit = ns; + while (prog_unit->parent) + prog_unit = prog_unit->parent; + gfc_namespace *fn_ns = ns; + while (fn_ns) + { + if (ns->proc_name + && (ns->proc_name->attr.subroutine + || ns->proc_name->attr.function)) + break; + fn_ns = fn_ns->parent; + } + if (missing_allocator + && !(prog_unit->omp_requires & OMP_REQ_DYNAMIC_ALLOCATORS) + && ((fn_ns && fn_ns->proc_name->attr.omp_declare_target) + || omp_clauses->contained_in_target_construct)) + { + if (code->op == EXEC_OMP_ALLOCATORS) + gfc_error ("ALLOCATORS directive at %L inside a target region " + "must specify an ALLOCATOR modifier for %qs", + &code->loc, missing_allocator_sym->name); + else if (missing_allocator_sym) + gfc_error ("ALLOCATE directive at %L inside a target region " + "must specify an ALLOCATOR clause for %qs", + &code->loc, missing_allocator_sym->name); + else + gfc_error ("ALLOCATE directive at %L inside a target region " + "must specify an ALLOCATOR clause", &code->loc); + } + } } /* OpenACC reductions. */ diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc index abd3a424f385..c0eb0575a90f 100644 --- a/gcc/fortran/parse.cc +++ b/gcc/fortran/parse.cc @@ -1364,6 +1364,8 @@ decode_omp_directive (void) prog_unit->omp_target_seen = true; break; } + case ST_OMP_ALLOCATE_EXEC: + case ST_OMP_ALLOCATORS: case ST_OMP_TEAMS: case ST_OMP_TEAMS_DISTRIBUTE: case ST_OMP_TEAMS_DISTRIBUTE_SIMD: @@ -1386,7 +1388,10 @@ decode_omp_directive (void) case EXEC_OMP_TARGET_PARALLEL_DO_SIMD: case EXEC_OMP_TARGET_PARALLEL_LOOP: case EXEC_OMP_TARGET_SIMD: - stk->tail->ext.omp_clauses->contains_teams_construct = 1; + if (ret == ST_OMP_ALLOCATE_EXEC || ret == ST_OMP_ALLOCATORS) + new_st.ext.omp_clauses->contained_in_target_construct = 1; + else + stk->tail->ext.omp_clauses->contains_teams_construct = 1; break; default: break; diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 82f60a656f3e..2930406a8e42 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -363,6 +363,21 @@ gfc_conv_descriptor_rank (tree desc) } +tree +gfc_conv_descriptor_version (tree desc) +{ + tree tmp; + tree dtype; + + dtype = gfc_conv_descriptor_dtype (desc); + tmp = gfc_advance_chain (TYPE_FIELDS (TREE_TYPE (dtype)), GFC_DTYPE_VERSION); + gcc_assert (tmp != NULL_TREE + && TREE_TYPE (tmp) == integer_type_node); + return fold_build3_loc (input_location, COMPONENT_REF, TREE_TYPE (tmp), + dtype, tmp, NULL_TREE); +} + + /* Return the element length from the descriptor dtype field. */ tree @@ -6196,7 +6211,7 @@ bool gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, tree errlen, tree label_finish, tree expr3_elem_size, tree *nelems, gfc_expr *expr3, tree e3_arr_desc, - bool e3_has_nodescriptor) + bool e3_has_nodescriptor, gfc_omp_namelist *omp_alloc) { tree tmp; tree pointer; @@ -6218,6 +6233,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_ref *ref, *prev_ref = NULL, *coref; bool allocatable, coarray, dimension, alloc_w_e3_arr_spec = false, non_ulimate_coarray_ptr_comp; + tree omp_cond = NULL_TREE, omp_alt_alloc = NULL_TREE; ref = expr->ref; @@ -6368,7 +6384,11 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, token = gfc_build_addr_expr (NULL_TREE, token); } else - pointer = gfc_conv_descriptor_data_get (se->expr); + { + pointer = gfc_conv_descriptor_data_get (se->expr); + if (omp_alloc) + omp_cond = boolean_true_node; + } STRIP_NOPS (pointer); if (allocatable) @@ -6384,18 +6404,66 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_start_block (&elseblock); + tree succ_add_expr = NULL_TREE; + if (omp_cond) + { + tree align, alloc, sz; + gfc_se se2; + if (omp_alloc->u2.allocator) + { + gfc_init_se (&se2, NULL); + gfc_conv_expr (&se2, omp_alloc->u2.allocator); + gfc_add_block_to_block (&elseblock, &se2.pre); + alloc = gfc_evaluate_now (se2.expr, &elseblock); + gfc_add_block_to_block (&elseblock, &se2.post); + } + else + alloc = build_zero_cst (ptr_type_node); + tmp = TREE_TYPE (TREE_TYPE (pointer)); + if (tmp == void_type_node) + tmp = gfc_typenode_for_spec (&expr->ts, 0); + if (omp_alloc->u.align) + { + gfc_init_se (&se2, NULL); + gfc_conv_expr (&se2, omp_alloc->u.align); + gcc_assert (CONSTANT_CLASS_P (se2.expr) + && se2.pre.head == NULL + && se2.post.head == NULL); + align = build_int_cst (size_type_node, + MAX (tree_to_uhwi (se2.expr), + TYPE_ALIGN_UNIT (tmp))); + } + else + align = build_int_cst (size_type_node, TYPE_ALIGN_UNIT (tmp)); + sz = fold_build2_loc (input_location, MAX_EXPR, size_type_node, + fold_convert (size_type_node, size), + build_int_cst (size_type_node, 1)); + omp_alt_alloc = builtin_decl_explicit (BUILT_IN_GOMP_ALLOC); + DECL_ATTRIBUTES (omp_alt_alloc) + = tree_cons (get_identifier ("omp allocator"), + build_tree_list (NULL_TREE, alloc), + DECL_ATTRIBUTES (omp_alt_alloc)); + omp_alt_alloc = build_call_expr (omp_alt_alloc, 3, align, sz, alloc); + succ_add_expr = fold_build2_loc (input_location, MODIFY_EXPR, + void_type_node, + gfc_conv_descriptor_version (se->expr), + build_int_cst (integer_type_node, 1)); + } + /* The allocatable variant takes the old pointer as first argument. */ if (allocatable) gfc_allocate_allocatable (&elseblock, pointer, size, token, status, errmsg, errlen, label_finish, expr, - coref != NULL ? coref->u.ar.as->corank : 0); + coref != NULL ? coref->u.ar.as->corank : 0, + omp_cond, omp_alt_alloc, succ_add_expr); else if (non_ulimate_coarray_ptr_comp && token) /* The token is set only for GFC_FCOARRAY_LIB mode. */ gfc_allocate_using_caf_lib (&elseblock, pointer, size, token, status, errmsg, errlen, GFC_CAF_COARRAY_ALLOC_ALLOCATE_ONLY); else - gfc_allocate_using_malloc (&elseblock, pointer, size, status); + gfc_allocate_using_malloc (&elseblock, pointer, size, status, + omp_cond, omp_alt_alloc, succ_add_expr); if (dimension) { @@ -9603,11 +9671,6 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest, else if (attr->dimension && !attr->proc_pointer) caf_token = gfc_conv_descriptor_token (comp); } - if (attr->dimension && !attr->codimension && !attr->proc_pointer) - /* When this is an array but not in conjunction with a coarray - then add the data-ref. For coarray'ed arrays the data-ref - is added by deallocate_with_status. */ - comp = gfc_conv_descriptor_data_get (comp); tmp = gfc_deallocate_with_status (comp, NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE, true, @@ -10292,29 +10355,50 @@ structure_alloc_comps (gfc_symbol * der_type, tree decl, tree dest, gfc_add_expr_to_block (&fnblock, tmp); } - if (c->attr.pdt_array) + if (c->attr.pdt_array || c->attr.pdt_string) { - tmp = gfc_conv_descriptor_data_get (comp); + tmp = comp; + if (c->attr.pdt_array) + tmp = gfc_conv_descriptor_data_get (comp); null_cond = fold_build2_loc (input_location, NE_EXPR, logical_type_node, tmp, build_int_cst (TREE_TYPE (tmp), 0)); - tmp = gfc_call_free (tmp); + if (flag_openmp_allocators) + { + tree cd, t; + if (c->attr.pdt_array) + cd = fold_build2_loc (input_location, EQ_EXPR, + boolean_type_node, + gfc_conv_descriptor_version (comp), + build_int_cst (integer_type_node, 1)); + else + cd = gfc_omp_call_is_alloc (tmp); + t = builtin_decl_explicit (BUILT_IN_GOMP_FREE); + t = build_call_expr_loc (input_location, t, 1, tmp); + + stmtblock_t tblock; + gfc_init_block (&tblock); + gfc_add_expr_to_block (&tblock, t); + if (c->attr.pdt_array) + gfc_add_modify (&tblock, gfc_conv_descriptor_version (comp), + build_zero_cst (integer_type_node)); + tmp = build3_loc (input_location, COND_EXPR, void_type_node, + cd, gfc_finish_block (&tblock), + gfc_call_free (tmp)); + } + else + tmp = gfc_call_free (tmp); tmp = build3_v (COND_EXPR, null_cond, tmp, build_empty_stmt (input_location)); gfc_add_expr_to_block (&fnblock, tmp); - gfc_conv_descriptor_data_set (&fnblock, comp, null_pointer_node); - } - else if (c->attr.pdt_string) - { - null_cond = fold_build2_loc (input_location, NE_EXPR, - logical_type_node, comp, - build_int_cst (TREE_TYPE (comp), 0)); - tmp = gfc_call_free (comp); - tmp = build3_v (COND_EXPR, null_cond, tmp, - build_empty_stmt (input_location)); - gfc_add_expr_to_block (&fnblock, tmp); - tmp = fold_convert (TREE_TYPE (comp), null_pointer_node); - gfc_add_modify (&fnblock, comp, tmp); + + if (c->attr.pdt_array) + gfc_conv_descriptor_data_set (&fnblock, comp, null_pointer_node); + else + { + tmp = fold_convert (TREE_TYPE (comp), null_pointer_node); + gfc_add_modify (&fnblock, comp, tmp); + } } break; @@ -11248,8 +11332,22 @@ gfc_alloc_allocatable_for_assignment (gfc_loopinfo *loop, builtin_decl_explicit (BUILT_IN_REALLOC), 2, fold_convert (pvoid_type_node, array1), size2); - gfc_conv_descriptor_data_set (&realloc_block, - desc, tmp); + if (flag_openmp_allocators) + { + tree cond, omp_tmp; + cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, + gfc_conv_descriptor_version (desc), + build_int_cst (integer_type_node, 1)); + omp_tmp = builtin_decl_explicit (BUILT_IN_GOMP_REALLOC); + omp_tmp = build_call_expr_loc (input_location, omp_tmp, 4, + fold_convert (pvoid_type_node, array1), size2, + build_zero_cst (ptr_type_node), + build_zero_cst (ptr_type_node)); + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), cond, + omp_tmp, tmp); + } + + gfc_conv_descriptor_data_set (&realloc_block, desc, tmp); } else { diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 5408755138ea..6cdcc9a3e750 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -21,7 +21,8 @@ along with GCC; see the file COPYING3. If not see /* Generate code to initialize and allocate an array. Statements are added to se, which should contain an expression for the array descriptor. */ bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, - tree, tree *, gfc_expr *, tree, bool); + tree, tree *, gfc_expr *, tree, bool, + gfc_omp_namelist *); /* Allow the bounds of a loop to be set from a callee's array spec. */ void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *, @@ -177,6 +178,7 @@ tree gfc_conv_descriptor_span_get (tree); tree gfc_conv_descriptor_dtype (tree); tree gfc_conv_descriptor_rank (tree); tree gfc_conv_descriptor_elem_len (tree); +tree gfc_conv_descriptor_version (tree); tree gfc_conv_descriptor_attribute (tree); tree gfc_conv_descriptor_type (tree); tree gfc_get_descriptor_dimension (tree); diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index b86cfec7d499..cf848406a05a 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -4350,7 +4350,7 @@ gfc_init_default_dt (gfc_symbol * sym, stmtblock_t * block, bool dealloc) /* Initialize INTENT(OUT) derived type dummies. As well as giving - them their default initializer, if they do not have allocatable + them their default initializer, if they have allocatable components, they have their allocatable components deallocated. */ static void diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index ea0872942499..b2463a28748f 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -7173,8 +7173,6 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, if (TREE_TYPE(tmp) != pvoid_type_node) tmp = build_fold_indirect_ref_loc (input_location, parmse.expr); - if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (tmp))) - tmp = gfc_conv_descriptor_data_get (tmp); tmp = gfc_deallocate_with_status (tmp, NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE, true, e, @@ -11731,8 +11729,30 @@ alloc_scalar_allocatable_for_assignment (stmtblock_t *block, builtin_decl_explicit (BUILT_IN_REALLOC), 2, fold_convert (pvoid_type_node, lse.expr), size_in_bytes); + tree omp_cond = NULL_TREE; + if (flag_openmp_allocators) + { + tree omp_tmp; + omp_cond = gfc_omp_call_is_alloc (lse.expr); + omp_cond = gfc_evaluate_now (omp_cond, block); + + omp_tmp = builtin_decl_explicit (BUILT_IN_GOMP_REALLOC); + omp_tmp = build_call_expr_loc (input_location, omp_tmp, 4, + fold_convert (pvoid_type_node, + lse.expr), size_in_bytes, + build_zero_cst (ptr_type_node), + build_zero_cst (ptr_type_node)); + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), + omp_cond, omp_tmp, tmp); + } tmp = fold_convert (TREE_TYPE (lse.expr), tmp); gfc_add_modify (block, lse.expr, tmp); + if (omp_cond) + gfc_add_expr_to_block (block, + build3_loc (input_location, COND_EXPR, + void_type_node, omp_cond, + gfc_omp_call_add_alloc (lse.expr), + build_empty_stmt (input_location))); tmp = build1_v (LABEL_EXPR, jump_label2); gfc_add_expr_to_block (block, tmp); diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index 289309190a5e..05e111c0fcc3 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -12819,9 +12819,8 @@ conv_intrinsic_move_alloc (gfc_code *code) gfc_add_expr_to_block (&block, tmp); } - tmp = gfc_conv_descriptor_data_get (to_se.expr); - tmp = gfc_deallocate_with_status (tmp, NULL_TREE, NULL_TREE, NULL_TREE, - NULL_TREE, true, to_expr, + tmp = gfc_deallocate_with_status (to_se.expr, NULL_TREE, NULL_TREE, + NULL_TREE, NULL_TREE, true, to_expr, GFC_CAF_COARRAY_NOCOARRAY); gfc_add_expr_to_block (&block, tmp); } diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 82bbc41b3886..9e166c94f8e6 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -4841,6 +4841,30 @@ gfc_trans_oacc_wait_directive (gfc_code *code) static tree gfc_trans_omp_sections (gfc_code *, gfc_omp_clauses *); static tree gfc_trans_omp_workshare (gfc_code *, gfc_omp_clauses *); +static tree +gfc_trans_omp_allocators (gfc_code *code) +{ + static bool warned = false; + gfc_omp_namelist *omp_allocate + = code->ext.omp_clauses->lists[OMP_LIST_ALLOCATE]; + if (!flag_openmp_allocators && !warned) + { + omp_allocate = NULL; + gfc_error ("% at %L requires %<-fopenmp-allocators%>", + code->op == EXEC_OMP_ALLOCATE ? "ALLOCATE" : "ALLOCATORS", + &code->loc); + warning (0, "All files that might deallocate such a variable must be " + "compiled with %<-fopenmp-allocators%>"); + inform (UNKNOWN_LOCATION, + "This includes explicit DEALLOCATE, reallocation on intrinsic " + "assignment, INTENT(OUT) for allocatable dummy arguments, and " + "reallocation of allocatable components allocated with an " + "OpenMP allocator"); + warned = true; + } + return gfc_trans_allocate (code->block->next, omp_allocate); +} + static tree gfc_trans_omp_assume (gfc_code *code) { @@ -7992,9 +8016,7 @@ gfc_trans_omp_directive (gfc_code *code) { case EXEC_OMP_ALLOCATE: case EXEC_OMP_ALLOCATORS: - sorry ("% not yet supported", - code->op == EXEC_OMP_ALLOCATE ? "ALLOCATE" : "ALLOCATORS"); - return NULL_TREE; + return gfc_trans_omp_allocators (code); case EXEC_OMP_ASSUME: return gfc_trans_omp_assume (code); case EXEC_OMP_ATOMIC: @@ -8329,3 +8351,36 @@ gfc_trans_omp_declare_variant (gfc_namespace *ns) } } } + +/* Add ptr for tracking as being allocated by GOMP_alloc. */ + +tree +gfc_omp_call_add_alloc (tree ptr) +{ + static tree fn = NULL_TREE; + if (fn == NULL_TREE) + { + fn = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + fn = build_fn_decl ("GOMP_add_alloc", fn); +/* FIXME: attributes. */ + } + return build_call_expr_loc (input_location, fn, 1, ptr); +} + +/* Generated function returns true when it was tracked via GOMP_add_alloc and + removes it from the tracking. As called just before GOMP_free or omp_realloc + the pointer is or might become invalid, thus, it is always removed. */ + +tree +gfc_omp_call_is_alloc (tree ptr) +{ + static tree fn = NULL_TREE; + if (fn == NULL_TREE) + { + fn = build_function_type_list (boolean_type_node, ptr_type_node, + NULL_TREE); + fn = build_fn_decl ("GOMP_is_alloc", fn); +/* FIXME: attributes. */ + } + return build_call_expr_loc (input_location, fn, 1, ptr); +} diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index 50b71e67234c..5530e893a620 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -6228,7 +6228,7 @@ allocate_get_initializer (gfc_code * code, gfc_expr * expr) /* Translate the ALLOCATE statement. */ tree -gfc_trans_allocate (gfc_code * code) +gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) { gfc_alloc *al; gfc_expr *expr, *e3rhs = NULL, *init_expr; @@ -6790,11 +6790,38 @@ gfc_trans_allocate (gfc_code * code) else tmp = expr3_esize; + gfc_omp_namelist *omp_alloc_item = NULL; + if (omp_allocate) + { + gfc_omp_namelist *n = NULL; + gfc_omp_namelist *n_null = NULL; + for (n = omp_allocate; n; n = n->next) + { + if (n->sym == NULL) + { + n_null = n; + continue; + } + if (expr->expr_type == EXPR_VARIABLE + && expr->symtree->n.sym == n->sym) + { + gfc_ref *ref; + for (ref = expr->ref; ref; ref = ref->next) + if (ref->type == REF_COMPONENT) + break; + if (ref == NULL) + break; + } + } + omp_alloc_item = n ? n : n_null; + + } + if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, label_finish, tmp, &nelems, e3rhs ? e3rhs : code->expr3, e3_is == E3_DESC ? expr3 : NULL_TREE, - e3_has_nodescriptor)) + e3_has_nodescriptor, omp_alloc_item)) { /* A scalar or derived type. First compute the size to allocate. @@ -6874,10 +6901,59 @@ gfc_trans_allocate (gfc_code * code) /* Handle size computation of the type declared to alloc. */ memsz = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (se.expr))); + bool use_coarray_alloc + = (flag_coarray == GFC_FCOARRAY_LIB + && (caf_attr = gfc_caf_attr (expr, true, &caf_refs_comp)) + .codimension); + tree omp_cond = NULL_TREE; + tree omp_alt_alloc = NULL_TREE; + tree succ_add_expr = NULL_TREE; + if (!use_coarray_alloc && omp_alloc_item) + { + tree align, alloc, sz; + gfc_se se2; + + omp_cond = boolean_true_node; + if (omp_alloc_item->u2.allocator) + { + gfc_init_se (&se2, NULL); + gfc_conv_expr (&se2, omp_alloc_item->u2.allocator); + gfc_add_block_to_block (&se.pre, &se2.pre); + alloc = gfc_evaluate_now (se2.expr, &se.pre); + gfc_add_block_to_block (&se.pre, &se2.post); + } + else + alloc = build_zero_cst (ptr_type_node); + tmp = TREE_TYPE (TREE_TYPE (se.expr)); + if (tmp == void_type_node) + tmp = gfc_typenode_for_spec (&expr->ts, 0); + if (omp_alloc_item->u.align) + { + gfc_init_se (&se2, NULL); + gfc_conv_expr (&se2, omp_alloc_item->u.align); + gcc_assert (CONSTANT_CLASS_P (se2.expr) + && se2.pre.head == NULL + && se2.post.head == NULL); + align = build_int_cst (size_type_node, + MAX (tree_to_uhwi (se2.expr), + TYPE_ALIGN_UNIT (tmp))); + } + else + align = build_int_cst (size_type_node, TYPE_ALIGN_UNIT (tmp)); + sz = fold_build2_loc (input_location, MAX_EXPR, size_type_node, + fold_convert (size_type_node, memsz), + build_int_cst (size_type_node, 1)); + omp_alt_alloc = builtin_decl_explicit (BUILT_IN_GOMP_ALLOC); + DECL_ATTRIBUTES (omp_alt_alloc) + = tree_cons (get_identifier ("omp allocator"), + build_tree_list (NULL_TREE, alloc), + DECL_ATTRIBUTES (omp_alt_alloc)); + omp_alt_alloc = build_call_expr (omp_alt_alloc, 3, align, sz, alloc); + succ_add_expr = gfc_omp_call_add_alloc (se.expr); + } + /* Store the caf-attributes for latter use. */ - if (flag_coarray == GFC_FCOARRAY_LIB - && (caf_attr = gfc_caf_attr (expr, true, &caf_refs_comp)) - .codimension) + if (use_coarray_alloc) { /* Scalar allocatable components in coarray'ed derived types make it here and are treated now. */ @@ -6904,9 +6980,11 @@ gfc_trans_allocate (gfc_code * code) else if (gfc_expr_attr (expr).allocatable) gfc_allocate_allocatable (&se.pre, se.expr, memsz, NULL_TREE, stat, errmsg, errlen, - label_finish, expr, 0); + label_finish, expr, 0, + omp_cond, omp_alt_alloc, succ_add_expr); else - gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat); + gfc_allocate_using_malloc (&se.pre, se.expr, memsz, stat, + omp_cond, omp_alt_alloc, succ_add_expr); } else { diff --git a/gcc/fortran/trans-stmt.h b/gcc/fortran/trans-stmt.h index 101a0540ef49..270ebcf9915b 100644 --- a/gcc/fortran/trans-stmt.h +++ b/gcc/fortran/trans-stmt.h @@ -64,7 +64,7 @@ tree gfc_trans_change_team (gfc_code *); tree gfc_trans_end_team (gfc_code *); tree gfc_trans_sync_team (gfc_code *); tree gfc_trans_where (gfc_code *); -tree gfc_trans_allocate (gfc_code *); +tree gfc_trans_allocate (gfc_code *, gfc_omp_namelist *); tree gfc_trans_deallocate (gfc_code *); /* trans-openmp.cc */ diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc index 5b11ffc3cc94..11a583ca92cf 100644 --- a/gcc/fortran/trans-types.cc +++ b/gcc/fortran/trans-types.cc @@ -1601,6 +1601,10 @@ gfc_get_dtype_rank_type (int rank, tree etype) GFC_DTYPE_ELEM_LEN); CONSTRUCTOR_APPEND_ELT (v, field, fold_convert (TREE_TYPE (field), size)); + field = gfc_advance_chain (TYPE_FIELDS (dtype_type_node), + GFC_DTYPE_VERSION); + CONSTRUCTOR_APPEND_ELT (v, field, + build_zero_cst (TREE_TYPE (field))); field = gfc_advance_chain (TYPE_FIELDS (dtype_type_node), GFC_DTYPE_RANK); diff --git a/gcc/fortran/trans.cc b/gcc/fortran/trans.cc index e2e1b6940123..961b0b5a573f 100644 --- a/gcc/fortran/trans.cc +++ b/gcc/fortran/trans.cc @@ -796,7 +796,10 @@ gfc_call_malloc (stmtblock_t * block, tree type, tree size) if (stat requested) stat = 0; + // if cond == NULL_NULL: newmem = malloc (MAX (size, 1)); + // otherwise: + newmem = ? : malloc (MAX (size, 1)) if (newmem == NULL) { if (stat) @@ -808,7 +811,8 @@ gfc_call_malloc (stmtblock_t * block, tree type, tree size) } */ void gfc_allocate_using_malloc (stmtblock_t * block, tree pointer, - tree size, tree status) + tree size, tree status, tree cond, tree alt_alloc, + tree extra_success_expr) { tree tmp, error_cond; stmtblock_t on_error; @@ -822,13 +826,18 @@ gfc_allocate_using_malloc (stmtblock_t * block, tree pointer, /* The allocation itself. */ size = fold_convert (size_type_node, size); - gfc_add_modify (block, pointer, - fold_convert (TREE_TYPE (pointer), - build_call_expr_loc (input_location, - builtin_decl_explicit (BUILT_IN_MALLOC), 1, - fold_build2_loc (input_location, - MAX_EXPR, size_type_node, size, - build_int_cst (size_type_node, 1))))); + tmp = fold_build2_loc (input_location, MAX_EXPR, size_type_node, + size, build_int_cst (size_type_node, 1)); + + tmp = build_call_expr_loc (input_location, + builtin_decl_explicit (BUILT_IN_MALLOC), 1, tmp); + if (cond == boolean_true_node) + tmp = alt_alloc; + else if (cond) + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), cond, + alt_alloc, tmp); + + gfc_add_modify (block, pointer, fold_convert (TREE_TYPE (pointer), tmp)); /* What to do in case of error. */ gfc_start_block (&on_error); @@ -852,7 +861,9 @@ gfc_allocate_using_malloc (stmtblock_t * block, tree pointer, tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, gfc_unlikely (error_cond, PRED_FORTRAN_FAIL_ALLOC), gfc_finish_block (&on_error), - build_empty_stmt (input_location)); + extra_success_expr + ? extra_success_expr + : build_empty_stmt (input_location)); gfc_add_expr_to_block (block, tmp); } @@ -938,7 +949,8 @@ gfc_allocate_using_caf_lib (stmtblock_t * block, tree pointer, tree size, void gfc_allocate_allocatable (stmtblock_t * block, tree mem, tree size, tree token, tree status, tree errmsg, tree errlen, - tree label_finish, gfc_expr* expr, int corank) + tree label_finish, gfc_expr* expr, int corank, + tree cond, tree alt_alloc, tree extra_success_expr) { stmtblock_t alloc_block; tree tmp, null_mem, alloc, error; @@ -963,7 +975,7 @@ gfc_allocate_allocatable (stmtblock_t * block, tree mem, tree size, if (flag_coarray == GFC_FCOARRAY_LIB && (corank > 0 || caf_attr.codimension)) { - tree cond, sub_caf_tree; + tree cond2, sub_caf_tree; gfc_se se; bool compute_special_caf_types_size = false; @@ -1027,16 +1039,17 @@ gfc_allocate_allocatable (stmtblock_t * block, tree mem, tree size, { TREE_USED (label_finish) = 1; tmp = build1_v (GOTO_EXPR, label_finish); - cond = fold_build2_loc (input_location, NE_EXPR, logical_type_node, - status, build_zero_cst (TREE_TYPE (status))); + cond2 = fold_build2_loc (input_location, NE_EXPR, logical_type_node, + status, build_zero_cst (TREE_TYPE (status))); tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, - gfc_unlikely (cond, PRED_FORTRAN_FAIL_ALLOC), + gfc_unlikely (cond2, PRED_FORTRAN_FAIL_ALLOC), tmp, build_empty_stmt (input_location)); gfc_add_expr_to_block (&alloc_block, tmp); } } else - gfc_allocate_using_malloc (&alloc_block, mem, size, status); + gfc_allocate_using_malloc (&alloc_block, mem, size, status, + cond, alt_alloc, extra_success_expr); alloc = gfc_finish_block (&alloc_block); @@ -1781,6 +1794,7 @@ gfc_deallocate_with_status (tree pointer, tree status, tree errmsg, tree cond, tmp, error; tree status_type = NULL_TREE; tree token = NULL_TREE; + tree descr = NULL_TREE; gfc_coarray_deregtype caf_dereg_type = GFC_CAF_COARRAY_DEREGISTER; if (coarray_dealloc_mode >= GFC_CAF_COARRAY_ANALYZE) @@ -1788,7 +1802,11 @@ gfc_deallocate_with_status (tree pointer, tree status, tree errmsg, if (flag_coarray == GFC_FCOARRAY_LIB) { if (caf_token) - token = caf_token; + { + token = caf_token; + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (pointer))) + pointer = gfc_conv_descriptor_data_get (pointer); + } else { tree caf_type, caf_decl = pointer; @@ -1824,7 +1842,10 @@ gfc_deallocate_with_status (tree pointer, tree status, tree errmsg, pointer = gfc_conv_descriptor_data_get (pointer); } else if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (pointer))) - pointer = gfc_conv_descriptor_data_get (pointer); + { + descr = pointer; + pointer = gfc_conv_descriptor_data_get (pointer); + } cond = fold_build2_loc (input_location, EQ_EXPR, logical_type_node, pointer, build_int_cst (TREE_TYPE (pointer), 0)); @@ -1876,9 +1897,27 @@ gfc_deallocate_with_status (tree pointer, tree status, tree errmsg, tmp = build_call_expr_loc (input_location, builtin_decl_explicit (BUILT_IN_FREE), 1, fold_convert (pvoid_type_node, pointer)); + if (flag_openmp_allocators && coarray_dealloc_mode < GFC_CAF_COARRAY_ANALYZE) + { + tree cond, omp_tmp; + if (descr) + cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, + gfc_conv_descriptor_version (descr), + build_int_cst (integer_type_node, 1)); + else + cond = gfc_omp_call_is_alloc (pointer); + omp_tmp = builtin_decl_explicit (BUILT_IN_GOMP_FREE); + omp_tmp = build_call_expr_loc (input_location, omp_tmp, 2, pointer, + build_zero_cst (ptr_type_node)); + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), cond, + omp_tmp, tmp); + } gfc_add_expr_to_block (&non_null, tmp); gfc_add_modify (&non_null, pointer, build_int_cst (TREE_TYPE (pointer), 0)); + if (flag_openmp_allocators && descr) + gfc_add_modify (&non_null, gfc_conv_descriptor_version (descr), + build_zero_cst (integer_type_node)); if (status != NULL_TREE && !integer_zerop (status)) { @@ -2050,6 +2089,16 @@ gfc_deallocate_scalar_with_status (tree pointer, tree status, tree label_finish, tmp = build_call_expr_loc (input_location, builtin_decl_explicit (BUILT_IN_FREE), 1, fold_convert (pvoid_type_node, pointer)); + if (flag_openmp_allocators) + { + tree cond, omp_tmp; + cond = gfc_omp_call_is_alloc (pointer); + omp_tmp = builtin_decl_explicit (BUILT_IN_GOMP_FREE); + omp_tmp = build_call_expr_loc (input_location, omp_tmp, 2, pointer, + build_zero_cst (ptr_type_node)); + tmp = build3_loc (input_location, COND_EXPR, TREE_TYPE (tmp), cond, + omp_tmp, tmp); + } gfc_add_expr_to_block (&non_null, tmp); if (status != NULL_TREE && !integer_zerop (status)) @@ -2483,7 +2532,7 @@ trans_code (gfc_code * code, tree cond) break; case EXEC_ALLOCATE: - res = gfc_trans_allocate (code); + res = gfc_trans_allocate (code, NULL); break; case EXEC_DEALLOCATE: diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 109d76472354..728d4f8f43f9 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -764,10 +764,14 @@ void gfc_allocate_using_caf_lib (stmtblock_t *, tree, tree, tree, tree, tree, /* Allocate memory for allocatable variables, with optional status variable. */ void gfc_allocate_allocatable (stmtblock_t*, tree, tree, tree, tree, - tree, tree, tree, gfc_expr*, int); + tree, tree, tree, gfc_expr*, int, + tree = NULL_TREE, tree = NULL_TREE, + tree = NULL_TREE); /* Allocate memory, with optional status variable. */ -void gfc_allocate_using_malloc (stmtblock_t *, tree, tree, tree); +void gfc_allocate_using_malloc (stmtblock_t *, tree, tree, tree, + tree = NULL_TREE, tree = NULL_TREE, + tree = NULL_TREE); /* Generate code to deallocate an array. */ tree gfc_deallocate_with_status (tree, tree, tree, tree, tree, bool, @@ -817,6 +821,8 @@ struct array_descr_info; bool gfc_get_array_descr_info (const_tree, struct array_descr_info *); /* In trans-openmp.cc */ +tree gfc_omp_call_add_alloc (tree); +tree gfc_omp_call_is_alloc (tree); bool gfc_omp_is_allocatable_or_ptr (const_tree); tree gfc_omp_check_optional_argument (tree, bool); tree gfc_omp_array_data (tree, bool); diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 7a465c89c5fb..5462381cdd40 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -155,6 +155,8 @@ DEF_FUNCTION_TYPE_3 (BT_FN_UINT_UINT_PTR_PTR, BT_UINT, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_3 (BT_FN_PTR_SIZE_SIZE_PTRMODE, BT_PTR, BT_SIZE, BT_SIZE, BT_PTRMODE) +DEF_FUNCTION_TYPE_4 (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE, + BT_PTR, BT_PTR, BT_SIZE, BT_PTRMODE, BT_PTRMODE) DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT) DEF_FUNCTION_TYPE_4 (BT_FN_UINT_OMPFN_PTR_UINT_UINT, diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc index da2e3fe3a0dc..1646bd1be14c 100644 --- a/gcc/gimple-ssa-warn-access.cc +++ b/gcc/gimple-ssa-warn-access.cc @@ -1574,6 +1574,7 @@ fndecl_alloc_p (tree fndecl, bool all_alloc) case BUILT_IN_ALIGNED_ALLOC: case BUILT_IN_CALLOC: case BUILT_IN_GOMP_ALLOC: + case BUILT_IN_GOMP_REALLOC: case BUILT_IN_MALLOC: case BUILT_IN_REALLOC: case BUILT_IN_STRDUP: @@ -1801,9 +1802,20 @@ matching_alloc_calls_p (tree alloc_decl, tree dealloc_decl) case BUILT_IN_ALLOCA_WITH_ALIGN: return false; + case BUILT_IN_GOMP_ALLOC: + case BUILT_IN_GOMP_REALLOC: + if (DECL_IS_OPERATOR_DELETE_P (dealloc_decl)) + return false; + + if (fndecl_built_in_p (dealloc_decl, BUILT_IN_GOMP_FREE, + BUILT_IN_GOMP_REALLOC)) + return true; + + alloc_dealloc_kind = alloc_kind_t::builtin; + break; + case BUILT_IN_ALIGNED_ALLOC: case BUILT_IN_CALLOC: - case BUILT_IN_GOMP_ALLOC: case BUILT_IN_MALLOC: case BUILT_IN_REALLOC: case BUILT_IN_STRDUP: @@ -1829,7 +1841,8 @@ matching_alloc_calls_p (tree alloc_decl, tree dealloc_decl) if (fndecl_built_in_p (dealloc_decl, BUILT_IN_NORMAL)) { built_in_function dealloc_code = DECL_FUNCTION_CODE (dealloc_decl); - if (dealloc_code == BUILT_IN_REALLOC) + if (dealloc_code == BUILT_IN_REALLOC + || dealloc_code == BUILT_IN_GOMP_REALLOC) realloc_kind = alloc_kind_t::builtin; for (tree amats = DECL_ATTRIBUTES (alloc_decl); @@ -1882,6 +1895,7 @@ matching_alloc_calls_p (tree alloc_decl, tree dealloc_decl) case BUILT_IN_ALIGNED_ALLOC: case BUILT_IN_CALLOC: case BUILT_IN_GOMP_ALLOC: + case BUILT_IN_GOMP_REALLOC: case BUILT_IN_MALLOC: case BUILT_IN_REALLOC: case BUILT_IN_STRDUP: diff --git a/gcc/gimple.cc b/gcc/gimple.cc index 7924d900b358..67f3fb2dabf0 100644 --- a/gcc/gimple.cc +++ b/gcc/gimple.cc @@ -2988,6 +2988,8 @@ nonfreeing_call_p (gimple *call) case BUILT_IN_TM_FREE: case BUILT_IN_REALLOC: case BUILT_IN_STACK_RESTORE: + case BUILT_IN_GOMP_FREE: + case BUILT_IN_GOMP_REALLOC: return false; default: return true; diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index ed78d49d2053..7b6b1dca3e34 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -467,6 +467,9 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, DEF_GOMP_BUILTIN (BUILT_IN_GOMP_ALLOC, "GOMP_alloc", BT_FN_PTR_SIZE_SIZE_PTRMODE, ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_REALLOC, + "omp_realloc", BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE, + ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_FREE, "GOMP_free", BT_FN_VOID_PTR_PTRMODE, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_WARNING, "GOMP_warning", diff --git a/gcc/predict.cc b/gcc/predict.cc index 396746cbfd1a..2e9b7dd07a7c 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -2566,6 +2566,7 @@ expr_expected_value_1 (tree type, tree op0, enum tree_code code, *predictor = PRED_COMPARE_AND_SWAP; return boolean_true_node; case BUILT_IN_REALLOC: + case BUILT_IN_GOMP_REALLOC: if (predictor) *predictor = PRED_MALLOC_NONNULL; /* FIXME: This is wrong and we need to convert the logic diff --git a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 index 04faa433435e..0825efc7a2ff 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_array_params_2.f90 @@ -25,7 +25,7 @@ end ! { dg-final { scan-tree-dump "parm...span = 4;" "original" } } -! { dg-final { scan-tree-dump "parm...dtype = {.elem_len=4, .rank=2, .type=1};" "original" } } +! { dg-final { scan-tree-dump "parm...dtype = {.elem_len=4, .version=0, .rank=2, .type=1};" "original" } } ! { dg-final { scan-tree-dump "parm...dim\\\[0\\\].lbound = 1;" "original" } } ! { dg-final { scan-tree-dump "parm...dim\\\[0\\\].ubound = 4;" "original" } } ! { dg-final { scan-tree-dump "parm...dim\\\[0\\\].stride = 1;" "original" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/allocate-14.f90 b/gcc/testsuite/gfortran.dg/gomp/allocate-14.f90 index 8ff9c252e49b..4fed19249a3d 100644 --- a/gcc/testsuite/gfortran.dg/gomp/allocate-14.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/allocate-14.f90 @@ -93,3 +93,44 @@ subroutine c_and_func_ptrs !$omp allocate(cfunptr) ! OK? A normal derived-type var? !$omp allocate(p) ! { dg-error "Argument 'p' at .1. to declarative !.OMP ALLOCATE directive must be a variable" } end + + +subroutine coarray_2 + use m + implicit none + integer :: x + integer, allocatable :: a, b, c[:], d + x = 5 ! executable stmt + !$omp allocate(a,b) align(16) + !$omp allocate ! { dg-error "Unexpected coarray 'c' in 'allocate' at .1., implicitly listed in '!.OMP ALLOCATE' at .2." } + !$omp allocate(d) align(32) + allocate(a,b,c[*],d) ! { dg-error "Unexpected coarray 'c' in 'allocate' at .1., implicitly listed in '!.OMP ALLOCATE' at .2." } +end + + +subroutine coarray_3 + use m + implicit none + integer :: x + integer, allocatable :: a, b, c[:], d + x = 5 ! executable stmt + !$omp allocators allocate(align(16): a,b) allocate(align(32) : d) + allocate(a,b,c[*],d) ! OK - Fortran allocator used for 'C' +end + + +subroutine unclear + use m + implicit none + integer :: x + integer, allocatable :: a, b, c[:], d + + ! OpenMP is unclear which allocator is used for 'C' - the fortran one or the OpenMP one. + ! GCC therefore rejects it. + + x = 5 ! executable stmt + + !$omp allocate(a,b) align(16) + !$omp allocate(d) align(32) + allocate(a,b,c[*],d) ! { dg-error "'c' listed in 'allocate' statement at .1. but it is neither explicitly in listed in the '!.OMP ALLOCATE' directive nor exists a directive without argument list" } +end diff --git a/gcc/testsuite/gfortran.dg/gomp/allocate-16.f90 b/gcc/testsuite/gfortran.dg/gomp/allocate-16.f90 new file mode 100644 index 000000000000..6c203e02d57a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/allocate-16.f90 @@ -0,0 +1,10 @@ +integer, pointer :: ptr + +!$omp flush +!$omp allocate(ptr) +allocate(ptr) +end + +! { dg-error "'!.OMP ALLOCATE' at .1. requires '-fopenmp-allocators'" "" { target *-*-* } 4 } +! { dg-warning "All files that might deallocate such a variable must be compiled with '-fopenmp-allocators'" "" { target *-*-* } 4 } +! { dg-note "This includes explicit DEALLOCATE, reallocation on intrinsic assignment, INTENT\\(OUT\\) for allocatable dummy arguments, and reallocation of allocatable components allocated with an OpenMP allocator" "" { target *-*-* } 0 } diff --git a/gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 b/gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 index bf9c781dcc50..28369ae876bf 100644 --- a/gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/allocate-5.f90 @@ -1,3 +1,4 @@ +! { dg-additional-options "-fopenmp-allocators" } module my_omp_lib use iso_c_binding, only: c_intptr_t !use omp_lib @@ -45,15 +46,15 @@ subroutine two(c,x2,y2) class(t), pointer :: y2(:) !$omp flush ! some executable statement - !$omp allocate(a) ! { dg-message "not yet supported" } - allocate(a,b(4),c(3,4)) - deallocate(a,b,c) + !$omp allocate(a) + allocate(a) + deallocate(a) - !$omp allocate(x1,y1,x2,y2) ! { dg-message "not yet supported" } + !$omp allocate(x1,y1,x2,y2) allocate(x1,y1,x2(5),y2(5)) deallocate(x1,y1,x2,y2) - !$omp allocate(b,a) align ( 128 ) ! { dg-message "not yet supported" } + !$omp allocate(b,a) align ( 128 ) !$omp allocate align ( 64 ) allocate(a,b(4),c(3,4)) deallocate(a,b,c) @@ -66,7 +67,7 @@ subroutine three(c) integer, allocatable :: a, b(:), c(:,:) call foo() ! executable stmt - !$omp allocate allocator( omp_large_cap_mem_alloc ) , align(64) ! { dg-message "not yet supported" } + !$omp allocate allocator( omp_large_cap_mem_alloc ) , align(64) !$omp allocate(b) allocator( omp_high_bw_mem_alloc ) !$omp allocate(c) allocator( omp_high_bw_mem_alloc ) allocate(a,b(4),c(3,4)) @@ -74,7 +75,7 @@ subroutine three(c) block q = 5 ! executable stmt - !$omp allocate(a) align(64) ! { dg-message "not yet supported" } + !$omp allocate(a) align(64) !$omp allocate(b) allocator( omp_high_bw_mem_alloc ), align(32) !$omp allocate(c) allocator( omp_thread_mem_alloc ) allocate(a,b(4),c(3,4)) @@ -84,7 +85,7 @@ subroutine three(c) contains subroutine inner call foo() ! executable stmt - !$omp allocate(a) align(64) ! { dg-message "not yet supported" } + !$omp allocate(a) align(64) !$omp allocate(b) allocator( omp_high_bw_mem_alloc ), align(32) !$omp allocate(c) allocator( omp_thread_mem_alloc ) allocate(a,b(4),c(3,4)) diff --git a/gcc/testsuite/gfortran.dg/gomp/allocators-3.f90 b/gcc/testsuite/gfortran.dg/gomp/allocators-3.f90 new file mode 100644 index 000000000000..d0e31ee87272 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/allocators-3.f90 @@ -0,0 +1,36 @@ +subroutine f + integer, allocatable :: A1, A2, B(:), C + !$omp declare target + + !$omp allocators ! OK + allocate(A1) + + !$omp allocators allocate(align(8) : a2) ! { dg-error "ALLOCATORS directive at .1. inside a target region must specify an ALLOCATOR modifier for 'a2'" } + allocate(A2) + + !$omp allocate ! { dg-error "ALLOCATE directive at .1. inside a target region must specify an ALLOCATOR clause" } + allocate(B(5)) + + !$omp allocate(c) ! { dg-error "ALLOCATE directive at .1. inside a target region must specify an ALLOCATOR clause for 'c'" } + allocate(C) +end + +subroutine g + integer, allocatable :: A1, A2, B(:), C + + !$omp target + !$omp single + !$omp allocators ! OK + allocate(A1) + + !$omp allocators allocate(align(8) : a2) ! { dg-error "ALLOCATORS directive at .1. inside a target region must specify an ALLOCATOR modifier for 'a2'" } + allocate(A2) + + !$omp allocate ! { dg-error "ALLOCATE directive at .1. inside a target region must specify an ALLOCATOR clause" } + allocate(B(5)) + + !$omp allocate(c) ! { dg-error "ALLOCATE directive at .1. inside a target region must specify an ALLOCATOR clause for 'c'" } + allocate(C) + !$omp end single + !$omp end target +end diff --git a/gcc/testsuite/gfortran.dg/gomp/allocators-4.f90 b/gcc/testsuite/gfortran.dg/gomp/allocators-4.f90 new file mode 100644 index 000000000000..55ae48d61f2b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/allocators-4.f90 @@ -0,0 +1,9 @@ +integer, pointer :: ptr + +!$omp allocators allocate(ptr) +allocate(ptr) +end + +! { dg-error "'!.OMP ALLOCATORS' at .1. requires '-fopenmp-allocators'" "" { target *-*-* } 3 } +! { dg-warning "All files that might deallocate such a variable must be compiled with '-fopenmp-allocators'" "" { target *-*-* } 3 } +! { dg-note "This includes explicit DEALLOCATE, reallocation on intrinsic assignment, INTENT\\(OUT\\) for allocatable dummy arguments, and reallocation of allocatable components allocated with an OpenMP allocator" "" { target *-*-* } 0 } diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 03ff88afaddd..ddcbaaaa417d 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -2346,6 +2346,7 @@ evaluate_stmt (gimple *stmt) { case BUILT_IN_MALLOC: case BUILT_IN_REALLOC: + case BUILT_IN_GOMP_REALLOC: case BUILT_IN_CALLOC: case BUILT_IN_STRDUP: case BUILT_IN_STRNDUP: diff --git a/gcc/tree.cc b/gcc/tree.cc index 10c6e1ecc588..b626553a1e13 100644 --- a/gcc/tree.cc +++ b/gcc/tree.cc @@ -15023,6 +15023,8 @@ fndecl_dealloc_argno (tree fndecl) { case BUILT_IN_FREE: case BUILT_IN_REALLOC: + case BUILT_IN_GOMP_FREE: + case BUILT_IN_GOMP_REALLOC: return 0; default: break; diff --git a/libgomp/allocator.c b/libgomp/allocator.c index a8a80f8028dd..58a4c57f8835 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -35,6 +35,69 @@ #include #endif +/* Keeping track whether a Fortran scalar allocatable/pointer has been + allocated via 'omp allocators'/'omp allocate'. */ + +struct fort_alloc_splay_tree_key_s { + void *ptr; +}; + +typedef struct fort_alloc_splay_tree_node_s *fort_alloc_splay_tree_node; +typedef struct fort_alloc_splay_tree_s *fort_alloc_splay_tree; +typedef struct fort_alloc_splay_tree_key_s *fort_alloc_splay_tree_key; + +static inline int +fort_alloc_splay_compare (fort_alloc_splay_tree_key x, fort_alloc_splay_tree_key y) +{ + if (x->ptr < y->ptr) + return -1; + if (x->ptr > y->ptr) + return 1; + return 0; +} +#define splay_tree_prefix fort_alloc +#define splay_tree_static +#include "splay-tree.h" + +#define splay_tree_prefix fort_alloc +#define splay_tree_static +#define splay_tree_c +#include "splay-tree.h" + +static struct fort_alloc_splay_tree_s fort_alloc_scalars; + +/* Add pointer as being alloced by GOMP_alloc. */ +void +GOMP_add_alloc (void *ptr) +{ + if (ptr == NULL) + return; + fort_alloc_splay_tree_node item; + item = gomp_malloc (sizeof (struct splay_tree_node_s)); + item->key.ptr = ptr; + item->left = NULL; + item->right = NULL; + fort_alloc_splay_tree_insert (&fort_alloc_scalars, item); +} + +/* Remove pointer, either called by FREE or by REALLOC, + either of them can change the allocation status. */ +bool +GOMP_is_alloc (void *ptr) +{ + struct fort_alloc_splay_tree_key_s needle; + fort_alloc_splay_tree_node n; + needle.ptr = ptr; + n = fort_alloc_splay_tree_lookup_node (&fort_alloc_scalars, &needle); + if (n) + { + fort_alloc_splay_tree_remove (&fort_alloc_scalars, &n->key); + free (n); + } + return n != NULL; +} + + #define omp_max_predefined_alloc omp_thread_mem_alloc /* These macros may be overridden in config//allocator.c. diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index fa29f4289768..7831e7bffe38 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1269,6 +1269,7 @@ reverse_splay_compare (reverse_splay_tree_key x, reverse_splay_tree_key y) } #define splay_tree_prefix reverse +#define splay_tree_static #include "splay-tree.h" /* Indirect target function splay-tree handling. */ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 90c401453b29..65901dff2359 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -419,9 +419,15 @@ GOMP_5.1 { GOMP_5.1.1 { global: GOMP_taskwait_depend_nowait; - GOMP_target_map_indirect_ptr; } GOMP_5.1; +GOMP_5.1.2 { + global: + GOMP_add_alloc; + GOMP_is_alloc; + GOMP_target_map_indirect_ptr; +} GOMP_5.1.1; + OACC_2.0 { global: acc_get_num_devices; diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 67a111265a01..cff2a2a00800 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -232,7 +232,9 @@ The OpenMP 4.5 specification is fully supported. @item Predefined memory spaces, memory allocators, allocator traits @tab Y @tab See also @ref{Memory allocation} @item Memory management routines @tab Y @tab -@item @code{allocate} directive @tab P @tab Only C and Fortran, only stack variables +@item @code{allocate} directive @tab P + @tab Only C for stack/automatic and Fortran for stack/automatic + and allocatable/pointer variables @item @code{allocate} clause @tab P @tab Initial support @item @code{use_device_addr} clause on @code{target data} @tab Y @tab @item @code{ancestor} modifier on @code{device} clause @tab Y @tab @@ -304,7 +306,7 @@ The OpenMP 4.5 specification is fully supported. @item @code{strict} modifier in the @code{grainsize} and @code{num_tasks} clauses of the @code{taskloop} construct @tab Y @tab @item @code{align} clause in @code{allocate} directive @tab P - @tab Only C and Fortran (and only stack variables) + @tab Only C and Fortran (and not for static variables) @item @code{align} modifier in @code{allocate} clause @tab Y @tab @item @code{thread_limit} clause to @code{target} construct @tab Y @tab @item @code{has_device_addr} clause to @code{target} construct @tab Y @tab @@ -402,7 +404,7 @@ to address of matching mapped list item per 5.1, Sect. 2.21.7.2 @tab N @tab @item Deprecation of @code{to} clause on declare target directive @tab N @tab @item Extended list of directives permitted in Fortran pure procedures @tab Y @tab -@item New @code{allocators} directive for Fortran @tab N @tab +@item New @code{allocators} directive for Fortran @tab Y @tab @item Deprecation of @code{allocate} directive for Fortran allocatables/pointers @tab N @tab @item Optional paired @code{end} directive with @code{dispatch} @tab N @tab @@ -5697,8 +5699,12 @@ The description below applies to: @option{-fstack-arrays}].) @item Using the @code{allocate} directive for variable in static memory is currently not supported (compile time error). -@item Using the @code{allocators} directive for Fortran pointers and - allocatables is currently not supported (compile time error). +@item In Fortran, the @code{allocators} directive and the executable + @code{allocate} directive for Fortran pointers and allocatables is + supported, but requires that files containing those directives has to be + compiled with @option{-fopenmp-allocators}. Additionally, all files that + might explicitly or implicitly deallocate memory allocated that way must + also be compiled with that option. @end itemize For the available predefined allocators and, as applicable, their associated diff --git a/libgomp/splay-tree.c b/libgomp/splay-tree.c index 02695d4b2bd7..9e076f551806 100644 --- a/libgomp/splay-tree.c +++ b/libgomp/splay-tree.c @@ -131,7 +131,11 @@ splay_tree_splay (splay_tree sp, splay_tree_key key) /* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ +#ifdef splay_tree_static +__attribute__((unused)) static void +#else attribute_hidden void +#endif splay_tree_insert (splay_tree sp, splay_tree_node node) { int comparison = 0; @@ -167,7 +171,11 @@ splay_tree_insert (splay_tree sp, splay_tree_node node) /* Remove node with KEY from SP. It is not an error if it did not exist. */ +#ifdef splay_tree_static +__attribute__((unused)) static void +#else attribute_hidden void +#endif splay_tree_remove (splay_tree sp, splay_tree_key key) { splay_tree_splay (sp, key); @@ -202,7 +210,28 @@ splay_tree_remove (splay_tree sp, splay_tree_key key) /* Lookup KEY in SP, returning NODE if present, and NULL otherwise. */ +#ifdef splay_tree_static +__attribute__((unused)) static splay_tree_node +#else +attribute_hidden splay_tree_node +#endif +splay_tree_lookup_node (splay_tree sp, splay_tree_key key) +{ + splay_tree_splay (sp, key); + + if (sp->root && splay_compare (&sp->root->key, key) == 0) + return sp->root; + else + return NULL; +} + +/* Likewise but return the key. */ + +#ifdef splay_tree_static +__attribute__((unused)) static splay_tree_key +#else attribute_hidden splay_tree_key +#endif splay_tree_lookup (splay_tree sp, splay_tree_key key) { splay_tree_splay (sp, key); @@ -231,7 +260,11 @@ splay_tree_foreach_internal (splay_tree_node node, splay_tree_callback func, /* Run FUNC on each of the nodes in SP. */ +#ifdef splay_tree_static +__attribute__((unused)) static void +#else attribute_hidden void +#endif splay_tree_foreach (splay_tree sp, splay_tree_callback func, void *data) { splay_tree_foreach_internal (sp->root, func, data); @@ -253,8 +286,13 @@ splay_tree_foreach_internal_lazy (splay_tree_node node, return splay_tree_foreach_internal_lazy (node->right, func, data); } +#ifdef splay_tree_static +__attribute__((unused)) static void +#else attribute_hidden void -splay_tree_foreach_lazy (splay_tree sp, splay_tree_callback_stop func, void *data) +#endif +splay_tree_foreach_lazy (splay_tree sp, splay_tree_callback_stop func, + void *data) { splay_tree_foreach_internal_lazy (sp->root, func, data); } diff --git a/libgomp/splay-tree.h b/libgomp/splay-tree.h index 978f1e49800d..04ff94739b09 100644 --- a/libgomp/splay-tree.h +++ b/libgomp/splay-tree.h @@ -35,6 +35,8 @@ typedef struct splay_tree_key_s *splay_tree_key; define splay_tree_key_s structure, and define splay_compare inline function. + Define splay_tree_static to mark all functions as static. + Alternatively, they can define splay_tree_prefix macro before including this header and then all the above types, the splay_compare function and the splay_tree_{lookup,insert_remove} @@ -72,6 +74,8 @@ typedef struct splay_tree_key_s *splay_tree_key; splay_tree_name (splay_tree_prefix, splay_compare) # define splay_tree_lookup \ splay_tree_name (splay_tree_prefix, splay_tree_lookup) +# define splay_tree_lookup_node \ + splay_tree_name (splay_tree_prefix, splay_tree_lookup_node) # define splay_tree_insert \ splay_tree_name (splay_tree_prefix, splay_tree_insert) # define splay_tree_remove \ @@ -105,11 +109,19 @@ struct splay_tree_s { typedef void (*splay_tree_callback) (splay_tree_key, void *); typedef int (*splay_tree_callback_stop) (splay_tree_key, void *); +#ifndef splay_tree_static extern splay_tree_key splay_tree_lookup (splay_tree, splay_tree_key); +extern splay_tree_node splay_tree_lookup_node (splay_tree, splay_tree_key); extern void splay_tree_insert (splay_tree, splay_tree_node); extern void splay_tree_remove (splay_tree, splay_tree_key); extern void splay_tree_foreach (splay_tree, splay_tree_callback, void *); extern void splay_tree_foreach_lazy (splay_tree, splay_tree_callback_stop, void *); +#endif + +#ifdef splay_tree_static_unused_attr +# undef splay_tree_static_unused_attr +#endif + #else /* splay_tree_c */ # ifdef splay_tree_prefix # include "splay-tree.c" @@ -117,6 +129,10 @@ extern void splay_tree_foreach_lazy (splay_tree, splay_tree_callback_stop, void # undef splay_tree_c #endif /* #ifndef splay_tree_c */ +#ifdef splay_tree_static +# undef splay_tree_static +#endif + #ifdef splay_tree_prefix # undef splay_tree_name_1 # undef splay_tree_name @@ -128,6 +144,7 @@ extern void splay_tree_foreach_lazy (splay_tree, splay_tree_callback_stop, void # undef splay_tree_key # undef splay_compare # undef splay_tree_lookup +# undef splay_tree_lookup_node # undef splay_tree_insert # undef splay_tree_remove # undef splay_tree_foreach diff --git a/libgomp/target.c b/libgomp/target.c index f30c20255d3b..0637d34f1258 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -47,6 +47,7 @@ /* Define another splay tree instantiation - for reverse offload. */ #define splay_tree_prefix reverse +#define splay_tree_static #define splay_tree_c #include "splay-tree.h" diff --git a/libgomp/testsuite/libgomp.fortran/allocators-1.f90 b/libgomp/testsuite/libgomp.fortran/allocators-1.f90 new file mode 100644 index 000000000000..935a37cd9594 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocators-1.f90 @@ -0,0 +1,68 @@ +! { dg-additional-options "-fopenmp-allocators -fdump-tree-original" } +module m + use omp_lib + use iso_c_binding, only: c_intptr_t + implicit none (type,external) + integer(omp_allocator_handle_kind) :: handle + integer(c_intptr_t) :: iptr +end module m + +subroutine scalar + use m + implicit none (type,external) + integer :: i + integer, allocatable :: SSS + i = 5 ! required executive statement before 'omp allocators' + !$omp allocate allocator(handle) + allocate(SSS) + if (mod (loc (sss), 64) /= 0) stop 1 + deallocate(SSS) + allocate(SSS) +end +! { dg-final { scan-tree-dump-times "sss = \\(integer\\(kind=4\\) \\*\\) __builtin_GOMP_alloc \\(4, 4, D\\.\[0-9\]+\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "GOMP_add_alloc \\(sss\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "if \\(GOMP_is_alloc \\(sss\\)\\)" 2 "original" } } +! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(sss, 0B\\);" 2 "original" } } + +subroutine array + use m + implicit none (type,external) + integer :: i + integer, allocatable :: A(:) + i = 5 ! required executive statement before 'omp allocators' + !$omp allocate allocator(handle) align(512) + allocate(A(5)) + if (mod (loc (A), 512) /= 0) stop 2 + A=[1] + if (mod (loc (A), 64) /= 0) stop 3 + deallocate(A) + A=[1] + deallocate(A) + call omp_set_default_allocator (handle) + !$omp allocate + allocate(A(7)) + if (mod (loc (A), 64) /= 0) stop 4 +end +! { dg-final { scan-tree-dump-times "a.dtype = {.elem_len=4, .version=0, .rank=1, .type=1};" 5 "original" } } +! { dg-final { scan-tree-dump-times "\\.elem_len=4" 5 "original" } } +! { dg-final { scan-tree-dump-times "a.data = \\(void \\* restrict\\) __builtin_GOMP_alloc \\(512, 20, D\\.\[0-9\]+\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "a.data = \\(void \\* restrict\\) __builtin_GOMP_alloc \\(4, 28, 0B\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "a.dtype.version = 1;" 2 "original" } } +! { dg-final { scan-tree-dump-times "a.data = \\(void \\* restrict\\) \\(a.dtype.version == 1 \\? __builtin_omp_realloc \\(\\(void \\*\\) a.data, 4, 0B, 0B\\) : __builtin_realloc \\(\\(void \\*\\) a.data, 4\\)\\);" 2 "original" } } +! { dg-final { scan-tree-dump-times "if \\(a.dtype.version == 1\\)" 3 "original" } } +! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) a.data, 0B\\);" 3 "original" } } +! { dg-final { scan-tree-dump-times "a.dtype.version = 0;" 3 "original" } } + +program main + use m + implicit none (type,external) + external :: scalar, array + type (omp_alloctrait), parameter :: traits(*) & + = [omp_alloctrait(omp_atk_sync_hint, omp_atv_contended), & + omp_alloctrait(omp_atk_alignment, 64)] + handle = omp_init_allocator (omp_high_bw_mem_alloc, size(traits), traits) + call scalar + call array + call omp_destroy_allocator (handle) +end + diff --git a/libgomp/testsuite/libgomp.fortran/allocators-2.f90 b/libgomp/testsuite/libgomp.fortran/allocators-2.f90 new file mode 100644 index 000000000000..c42fbd31e3e1 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocators-2.f90 @@ -0,0 +1,101 @@ +! { dg-additional-options "-fopenmp-allocators" } +module m + implicit none (type, external) + type t + integer, allocatable :: Acomp, Bcomp(:) + end type t + +contains + +subroutine intent_out(aa, bb, cc, dd, ee, ff) + integer, allocatable,intent(out) :: aa, bb(:) + type(t), intent(out) :: cc, dd(4) + type(t), allocatable, intent(out) :: ee, ff(:) +end + +subroutine q(qa, qb, qc, qd, qe, qf) + integer, allocatable :: qa, qb(:) + type(t) :: qc, qd(4) + type(t), allocatable :: qe, qf(:) + call intent_out (qa, qb, qc, qd, qe, qf) +end subroutine q + +subroutine r + integer, allocatable :: r1, r2(:) + type(t) :: r3, r4(4) + type(t), allocatable :: r5, r6(:) + + call q(r1,r2,r3,r4,r5,r6) + + allocate(r1,r2(3)) + allocate(r5,r6(4)) + allocate(r3%Acomp, r3%Bcomp(2)) + allocate(r4(2)%Acomp, r4(2)%Bcomp(2)) + allocate(r5%Acomp, r5%Bcomp(2)) + allocate(r6(3)%Acomp, r6(3)%Bcomp(2)) + !$omp allocate align(128) + allocate(r4(3)%Acomp, r4(3)%Bcomp(2), & + r6(1)%Acomp, r6(1)%Bcomp(2)) + if (mod (loc (r4(3)%Acomp), 128) /= 0) stop 1 + if (mod (loc (r4(3)%Bcomp), 128) /= 0) stop 2 + if (mod (loc (r6(1)%Acomp), 128) /= 0) stop 3 + if (mod (loc (r6(1)%Bcomp), 128) /= 0) stop 3 + call q(r1,r2,r3,r4,r5,r6) + + !$omp allocate align(64) + allocate(r1,r2(3)) + if (mod (loc (r1), 64) /= 0) stop 1 + if (mod (loc (r2), 64) /= 0) stop 1 + !$omp allocate align(64) + allocate(r5,r6(4)) + if (mod (loc (r5), 64) /= 0) stop 1 + if (mod (loc (r6), 64) /= 0) stop 1 + !$omp allocate align(64) + allocate(r3%Acomp, r3%Bcomp(2)) + if (mod (loc (r3%Acomp), 64) /= 0) stop 1 + if (mod (loc (r3%Bcomp), 64) /= 0) stop 1 + !$omp allocate align(64) + allocate(r4(2)%Acomp, r4(2)%Bcomp(2)) + if (mod (loc (r4(2)%Acomp), 64) /= 0) stop 1 + if (mod (loc (r4(2)%Bcomp), 64) /= 0) stop 1 + !$omp allocate align(64) + allocate(r5%Acomp, r5%Bcomp(2)) + if (mod (loc (r5%Acomp), 64) /= 0) stop 1 + if (mod (loc (r5%Bcomp), 64) /= 0) stop 1 + !$omp allocate align(64) + allocate(r6(3)%Acomp, r6(3)%Bcomp(2)) + if (mod (loc (r6(3)%Acomp), 64) /= 0) stop 1 + if (mod (loc (r6(3)%Bcomp), 64) /= 0) stop 1 + !$omp allocate align(128) + allocate(r4(3)%Acomp, r4(3)%Bcomp(2), & + r6(1)%Acomp, r6(1)%Bcomp(2)) + if (mod (loc (r4(3)%Acomp), 128) /= 0) stop 1 + if (mod (loc (r4(3)%Bcomp), 128) /= 0) stop 2 + if (mod (loc (r6(1)%Acomp), 128) /= 0) stop 3 + if (mod (loc (r6(1)%Bcomp), 128) /= 0) stop 3 + call q(r1,r2,r3,r4,r5,r6) +end subroutine r +end + +subroutine s + use m, only : t + implicit none (type, external) + type(t) :: xx + integer :: i, iiiiii + i = 4 + !$omp allocate + allocate(xx%Acomp, xx%Bcomp(4)) + deallocate(xx%Acomp, xx%Bcomp) + + !$omp allocate + allocate(xx%Acomp, xx%Bcomp(4)) + xx = t(1, [1,2]) +end + +program main + use m, only: r + implicit none (type, external) + external s + call s + call r +end diff --git a/libgomp/testsuite/libgomp.fortran/allocators-3.f90 b/libgomp/testsuite/libgomp.fortran/allocators-3.f90 new file mode 100644 index 000000000000..2e05939a8b6c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocators-3.f90 @@ -0,0 +1,25 @@ +! { dg-additional-options "-fdump-tree-original -fopenmp-allocators" } + +subroutine s + character(:), allocatable :: s1,s2 + + !$omp allocators allocate(s1) + allocate(character(len=3) :: s1) + + !$omp allocators allocate(s2) + allocate(character(len=5) :: s2) + + s2(1:5) = "12" + s1 = trim(s2) +end +! { dg-final { scan-tree-dump-times "s1 = \\(character\\(kind=1\\)\\\[1:.s1\\\] \\*\\) __builtin_GOMP_alloc \\(1, 3, 0B\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "s2 = \\(character\\(kind=1\\)\\\[1:.s2\\\] \\*\\) __builtin_GOMP_alloc \\(1, 5, 0B\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "s1 = \\(character\\(kind=1\\)\\\[1:.s1\\\] \\*\\) \\(D\\.\[0-9\]+ \\? __builtin_omp_realloc \\(\\(void \\*\\) s1, MAX_EXPR <\\(sizetype\\) len.1, 1>, 0B, 0B\\) : __builtin_realloc \\(\\(void \\*\\) s1, MAX_EXPR <\\(sizetype\\) len.1, 1>\\)\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "GOMP_add_alloc \\(s1\\);" 2 "original" } } +! { dg-final { scan-tree-dump-times "OMP_add_alloc \\(s2\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "if \\(GOMP_is_alloc \\(s2\\)\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "__builtin_GOMP_free \\(s2, 0B\\);" 1 "original" } } + + +call s +end diff --git a/libgomp/testsuite/libgomp.fortran/allocators-4.f90 b/libgomp/testsuite/libgomp.fortran/allocators-4.f90 new file mode 100644 index 000000000000..12689ea41ac0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocators-4.f90 @@ -0,0 +1,57 @@ +! { dg-additional-options "-fopenmp-allocators" } +module m +implicit none +type t + integer, allocatable :: Acomp, Bcomp(:) + class(*), allocatable :: Ccomp, Dcomp(:) +end type t +contains + +subroutine intout(c,d,e,f) +implicit none +class(t), intent(out) :: c,d(4) +class(t), allocatable, intent(out) :: e,f(:) +end + +subroutine q(c,d,e,f) +implicit none +class(t) :: c,d(4) +class(t), allocatable :: e,f(:) +call intout(c,d,e,f) +end subroutine q + +subroutine s +implicit none +type(t) :: xx +class(t), allocatable :: yy +integer :: i, iiiiii +i = 4 +!$omp allocate +allocate(xx%Acomp, xx%Bcomp(4)) +deallocate(xx%Acomp, xx%Bcomp) + +!$omp allocate +allocate(integer :: xx%Ccomp, xx%Dcomp(4)) +deallocate(xx%Ccomp, xx%Dcomp) + +!$omp allocators allocate(yy) +allocate(t :: yy) + +!$omp allocate +allocate(real :: xx%Ccomp, xx%Dcomp(4)) +deallocate(xx%Ccomp, xx%Dcomp) + +!$omp allocate +allocate(xx%Acomp, xx%Bcomp(4)) +!$omp allocate +allocate(logical :: xx%Ccomp, xx%Dcomp(4)) + +iiiiii = 555 +xx = t(1, [1,2]) +end + +end module + +use m +call s +end diff --git a/libgomp/testsuite/libgomp.fortran/allocators-5.f90 b/libgomp/testsuite/libgomp.fortran/allocators-5.f90 new file mode 100644 index 000000000000..87088630197b --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocators-5.f90 @@ -0,0 +1,27 @@ +! { dg-additional-options "-fopenmp-allocators" } +module m +contains +subroutine s(a,b,c,d) +integer, allocatable :: A, B +integer, allocatable :: C(:), D(:) + +!$omp allocators allocate(A,B) +allocate(A,B) +call move_alloc(A,B) + +!$omp allocators allocate(C,D) +allocate(C(5),D(5)) +call move_alloc(C,D) +end + +subroutine q() +integer, allocatable :: A, B +integer, allocatable :: C(:), D(:) + +call s(a,b,c,d) +end +end + +use m +call q +end From 68d4138204b09b658506e8eefa231c985b8f5363 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Fri, 8 Dec 2023 16:27:55 +0100 Subject: [PATCH 099/311] libgcov: Call __builtin_fork instead of fork Some targets do not provide a prototype for fork, and compilation now fails with an implicit-function-declaration error. libgcc/ * libgcov-interface.c (__gcov_fork): Use __builtin_fork instead of fork. --- libgcc/libgcov-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgcc/libgcov-interface.c b/libgcc/libgcov-interface.c index b2ee93086418..d166e98510d4 100644 --- a/libgcc/libgcov-interface.c +++ b/libgcc/libgcov-interface.c @@ -182,7 +182,7 @@ pid_t __gcov_fork (void) { pid_t pid; - pid = fork (); + pid = __builtin_fork (); if (pid == 0) { __GTHREAD_MUTEX_INIT_FUNCTION (&__gcov_mx); From b176556e6913497c4ba2925666eca8e10de64186 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:17 +0000 Subject: [PATCH 100/311] Revert "arm: vld1_types_x4 ACLE intrinsics" This reverts commit 656f092cba951fddc1e40468ad71d241ffe98566. --- gcc/config/arm/arm_neon.h | 156 ++---------------- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vld1_base_xN_1.c | 63 +------ .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_p64_xN_1.c | 7 +- 7 files changed, 22 insertions(+), 231 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 8bcf1d6325e5..dbc37cafe286 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10325,15 +10325,6 @@ vld1_p64_x3 (const poly64_t * __a) return __rv.__i; } -__extension__ extern __inline poly64x1x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p64_x4 (const poly64_t * __a) -{ - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10435,42 +10426,6 @@ vld1_s64_x3 (const int64_t * __a) return __rv.__i; } -__extension__ extern __inline int8x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s8_x4 (const int8_t * __a) -{ - union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s16_x4 (const int16_t * __a) -{ - union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s32_x4 (const int32_t * __a) -{ - union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x1x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s64_x4 (const int64_t * __a) -{ - union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10527,26 +10482,6 @@ vld1_f32_x3 (const float32_t * __a) return __rv.__i; } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f16_x4 (const float16_t * __a) -{ - union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f32_x4 (const float32_t * __a) -{ - union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10647,42 +10582,6 @@ vld1_u64_x3 (const uint64_t * __a) return __rv.__i; } -__extension__ extern __inline uint8x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u8_x4 (const uint8_t * __a) -{ - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u16_x4 (const uint16_t * __a) -{ - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u32_x4 (const uint32_t * __a) -{ - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x1x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u64_x4 (const uint64_t * __a) -{ - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10733,24 +10632,6 @@ vld1_p16_x3 (const poly16_t * __a) return __rv.__i; } -__extension__ extern __inline poly8x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p8_x4 (const poly8_t * __a) -{ - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p16_x4 (const poly16_t * __a) -{ - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10783,7 +10664,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x4 (const poly64_t * __a) { union { poly64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10893,7 +10774,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x4 (const uint8_t * __a) { union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10902,7 +10783,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x4 (const uint16_t * __a) { union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10911,7 +10792,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x4 (const int32_t * __a) { union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10920,7 +10801,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x4 (const int64_t * __a) { union { int64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10986,7 +10867,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x4 (const float16_t * __a) { union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v8hf (__a); + __rv.__o = __builtin_neon_vld1_x4v8hf (__a); return __rv.__i; } #endif @@ -10996,7 +10877,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x4 (const float32_t * __a) { union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1_x4v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -11105,7 +10986,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x4 (const uint8_t * __a) { union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -11114,7 +10995,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x4 (const uint16_t * __a) { union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -11123,7 +11004,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x4 (const uint32_t * __a) { union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -11132,7 +11013,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x4 (const uint64_t * __a) { union { uint64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -11191,7 +11072,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x4 (const poly8_t * __a) { union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -11200,7 +11081,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x4 (const poly16_t * __a) { union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -21191,15 +21072,6 @@ vld1_bf16_x3 (const bfloat16_t * __ptr) return __rv.__i; } -__extension__ extern __inline bfloat16x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_bf16_x4 (const bfloat16_t * __ptr) -{ - union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -21230,7 +21102,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x4 (const bfloat16_t * __ptr) { union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x4v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1_x4v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 20dfcae7de56..c74f0db645bd 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -305,8 +305,7 @@ VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (LOAD1, vld1q_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 30f5bf8e40e8..e67cbc247d9a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5005,16 +5005,6 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_x4" - [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD1))] - "TARGET_NEON" - "vld1.\t%h0, %A1" - [(set_attr "type" "neon_load1_4reg")] -) - -(define_insn "neon_vld1q_x4" [(set (match_operand:XI 0 "s_register_operand" "=w") (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c index a5686ffac013..95314bbe0ded 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c @@ -115,62 +115,7 @@ poly16x4x3_t test_vld1_p16_x3 (poly16_t * a) return vld1_p16_x3 (a); } -uint8x8x4_t test_vld1_u8_x4 (uint8_t * a) -{ - return vld1_u8_x4 (a); -} - -uint16x4x4_t test_vld1_u16_x4 (uint16_t * a) -{ - return vld1_u16_x4 (a); -} - -uint32x2x4_t test_vld1_u32_x4 (uint32_t * a) -{ - return vld1_u32_x4 (a); -} - -uint64x1x4_t test_vld1_u64_x4 (uint64_t * a) -{ - return vld1_u64_x4 (a); -} - -int8x8x4_t test_vld1_s8_x4 (int8_t * a) -{ - return vld1_s8_x4 (a); -} - -int16x4x4_t test_vld1_s16_x4 (int16_t * a) -{ - return vld1_s16_x4 (a); -} - -int32x2x4_t test_vld1_s32_x4 (int32_t * a) -{ - return vld1_s32_x4 (a); -} - -int64x1x4_t test_vld1_s64_x4 (int64_t * a) -{ - return vld1_s64_x4 (a); -} - -float32x2x4_t test_vld1_f32_x4 (float32_t * a) -{ - return vld1_f32_x4 (a); -} - -poly8x8x4_t test_vld1_p8_x4 (poly8_t * a) -{ - return vld1_p8_x4 (a); -} - -poly16x4x4_t test_vld1_p16_x4 (poly16_t * a) -{ - return vld1_p16_x4 (a); -} - -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 6 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c index 7ed17834ccf4..c1935da0a4c7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c @@ -15,9 +15,4 @@ bfloat16x4x3_t test_vld1_bf16_x3 (bfloat16_t * a) return vld1_bf16_x3 (a); } -bfloat16x4x4_t test_vld1_bf16_x4 (bfloat16_t * a) -{ - return vld1_bf16_x4 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c index 82e7211ebbf2..20363239f5b4 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c @@ -15,9 +15,4 @@ float16x4x3_t test_vld1_f16_x3 (float16_t * a) return vld1_f16_x3 (a); } -float16x4x4_t test_vld1_f16_x4 (float16_t * a) -{ - return vld1_f16_x4 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c index 644371b89ea2..210de511c716 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c @@ -15,9 +15,4 @@ poly64x1x3_t test_vld1_p64_x3 (poly64_t * a) return vld1_p64_x3 (a); } -poly64x1x4_t test_vld1_p64_x4 (poly64_t * a) -{ - return vld1_p64_x4 (a); -} - -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 0a80a35df3446c936205a768ed0cd93bf8e0f43b Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:17 +0000 Subject: [PATCH 101/311] Revert "arm: vld1_types_x3 ACLE intrinsics" This reverts commit 8e3ae874b21bdd8da32afefa6f6f60913481564c. --- gcc/config/arm/arm_neon.h | 156 ++---------------- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vld1_base_xN_1.c | 63 +------ .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1_p64_xN_1.c | 7 +- 7 files changed, 22 insertions(+), 231 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index dbc37cafe286..669b8fffb405 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10316,15 +10316,6 @@ vld1_p64_x2 (const poly64_t * __a) return __rv.__i; } -__extension__ extern __inline poly64x1x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p64_x3 (const poly64_t * __a) -{ - union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10390,42 +10381,6 @@ vld1_s64_x2 (const int64_t * __a) return __rv.__i; } -__extension__ extern __inline int8x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s8_x3 (const int8_t * __a) -{ - union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s16_x3 (const int16_t * __a) -{ - union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s32_x3 (const int32_t * __a) -{ - union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x1x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s64_x3 (const int64_t * __a) -{ - union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10462,26 +10417,6 @@ vld1_f32_x2 (const float32_t * __a) return __rv.__i; } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f16_x3 (const float16_t * __a) -{ - union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f32_x3 (const float32_t * __a) -{ - union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10546,42 +10481,6 @@ vld1_u64_x2 (const uint64_t * __a) return __rv.__i; } -__extension__ extern __inline uint8x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u8_x3 (const uint8_t * __a) -{ - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u16_x3 (const uint16_t * __a) -{ - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u32_x3 (const uint32_t * __a) -{ - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x1x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u64_x3 (const uint64_t * __a) -{ - union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10614,24 +10513,6 @@ vld1_p16_x2 (const poly16_t * __a) return __rv.__i; } -__extension__ extern __inline poly8x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p8_x3 (const poly8_t * __a) -{ - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p16_x3 (const poly16_t * __a) -{ - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10655,7 +10536,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x3 (const poly64_t * __a) { union { poly64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10738,7 +10619,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x3 (const uint8_t * __a) { union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10747,7 +10628,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x3 (const uint16_t * __a) { union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10756,7 +10637,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x3 (const int32_t * __a) { union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10765,7 +10646,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x3 (const int64_t * __a) { union { int64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10847,7 +10728,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x3 (const float16_t * __a) { union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v8hf (__a); + __rv.__o = __builtin_neon_vld1_x3v8hf (__a); return __rv.__i; } #endif @@ -10857,7 +10738,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x3 (const float32_t * __a) { union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1_x3v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10950,7 +10831,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x3 (const uint8_t * __a) { union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10959,7 +10840,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x3 (const uint16_t * __a) { union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10968,7 +10849,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x3 (const uint32_t * __a) { union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10977,7 +10858,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x3 (const uint64_t * __a) { union { uint64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -11054,7 +10935,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x3 (const poly8_t * __a) { union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -11063,7 +10944,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x3 (const poly16_t * __a) { union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -21063,15 +20944,6 @@ vld1_bf16_x2 (const bfloat16_t * __ptr) return __rv.__i; } -__extension__ extern __inline bfloat16x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_bf16_x3 (const bfloat16_t * __ptr) -{ - union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -21093,7 +20965,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x3 (const bfloat16_t * __ptr) { union { bfloat16x8x3_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x3v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1_x3v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index c74f0db645bd..07750c03c087 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -303,8 +303,7 @@ VAR13 (LOAD1, vld1, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (LOAD1, vld1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e67cbc247d9a..75add42777d8 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4968,16 +4968,6 @@ if (BYTES_BIG_ENDIAN) ) (define_insn "neon_vld1_x3" - [(set (match_operand:EI 0 "s_register_operand" "=w") - (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD1))] - "TARGET_NEON" - "vld1.\t%h0, %A1" - [(set_attr "type" "neon_load1_3reg")] -) - -(define_insn "neon_vld1q_x3" [(set (match_operand:CI 0 "s_register_operand" "=w") (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c index 95314bbe0ded..6b0e78d94d7c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c @@ -60,62 +60,7 @@ poly16x4x2_t test_vld1_p16_x2 (poly16_t * a) return vld1_p16_x2 (a); } -uint8x8x3_t test_vld1_u8_x3 (uint8_t * a) -{ - return vld1_u8_x3 (a); -} - -uint16x4x3_t test_vld1_u16_x3 (uint16_t * a) -{ - return vld1_u16_x3 (a); -} - -uint32x2x3_t test_vld1_u32_x3 (uint32_t * a) -{ - return vld1_u32_x3 (a); -} - -uint64x1x3_t test_vld1_u64_x3 (uint64_t * a) -{ - return vld1_u64_x3 (a); -} - -int8x8x3_t test_vld1_s8_x3 (int8_t * a) -{ - return vld1_s8_x3 (a); -} - -int16x4x3_t test_vld1_s16_x3 (int16_t * a) -{ - return vld1_s16_x3 (a); -} - -int32x2x3_t test_vld1_s32_x3 (int32_t * a) -{ - return vld1_s32_x3 (a); -} - -int64x1x3_t test_vld1_s64_x3 (int64_t * a) -{ - return vld1_s64_x3 (a); -} - -float32x2x3_t test_vld1_f32_x3 (float32_t * a) -{ - return vld1_f32_x3 (a); -} - -poly8x8x3_t test_vld1_p8_x3 (poly8_t * a) -{ - return vld1_p8_x3 (a); -} - -poly16x4x3_t test_vld1_p16_x3 (poly16_t * a) -{ - return vld1_p16_x3 (a); -} - -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c index c1935da0a4c7..3ec7a5e19864 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c @@ -10,9 +10,4 @@ bfloat16x4x2_t test_vld1_bf16_x2 (bfloat16_t * a) return vld1_bf16_x2 (a); } -bfloat16x4x3_t test_vld1_bf16_x3 (bfloat16_t * a) -{ - return vld1_bf16_x3 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c index 20363239f5b4..c0e5ea491424 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c @@ -10,9 +10,4 @@ float16x4x2_t test_vld1_f16_x2 (float16_t * a) return vld1_f16_x2 (a); } -float16x4x3_t test_vld1_f16_x3 (float16_t * a) -{ - return vld1_f16_x3 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c index 210de511c716..3ccea520ddc2 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c @@ -10,9 +10,4 @@ poly64x1x2_t test_vld1_p64_x2 (poly64_t * a) return vld1_p64_x2 (a); } -poly64x1x3_t test_vld1_p64_x3 (poly64_t * a) -{ - return vld1_p64_x3 (a); -} - -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ From f6d303dbb581af2d1d92a4df6ac9a4d57cb92942 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:17 +0000 Subject: [PATCH 102/311] Revert "arm: vld1_types_x2 ACLE intrinsics" This reverts commit 8fff3f065277f13176c320f22c4ed766a82c5d8e. --- gcc/config/arm/arm_neon.h | 156 ++---------------- gcc/config/arm/arm_neon_builtins.def | 3 +- gcc/config/arm/neon.md | 10 +- .../gcc.target/arm/simd/vld1_base_xN_1.c | 66 -------- .../gcc.target/arm/simd/vld1_bf16_xN_1.c | 13 -- .../gcc.target/arm/simd/vld1_fp16_xN_1.c | 13 -- .../gcc.target/arm/simd/vld1_p64_xN_1.c | 13 -- 7 files changed, 20 insertions(+), 254 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 669b8fffb405..af1f747f2622 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10307,15 +10307,6 @@ vld1_p64 (const poly64_t * __a) return (poly64x1_t) { *__a }; } -__extension__ extern __inline poly64x1x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p64_x2 (const poly64_t * __a) -{ - union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10345,42 +10336,6 @@ vld1_s64 (const int64_t * __a) return (int64x1_t) { *__a }; } -__extension__ extern __inline int8x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s8_x2 (const int8_t * __a) -{ - union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s16_x2 (const int16_t * __a) -{ - union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s32_x2 (const int32_t * __a) -{ - union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x1x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_s64_x2 (const int64_t * __a) -{ - union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10397,26 +10352,6 @@ vld1_f32 (const float32_t * __a) return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f16_x2 (const float16_t * __a) -{ - union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_f32_x2 (const float32_t * __a) -{ - union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_u8 (const uint8_t * __a) @@ -10445,42 +10380,6 @@ vld1_u64 (const uint64_t * __a) return (uint64x1_t) { *__a }; } -__extension__ extern __inline uint8x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u8_x2 (const uint8_t * __a) -{ - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u16_x2 (const uint16_t * __a) -{ - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u32_x2 (const uint32_t * __a) -{ - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x1x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_u64_x2 (const uint64_t * __a) -{ - union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_p8 (const poly8_t * __a) @@ -10495,24 +10394,6 @@ vld1_p16 (const poly16_t * __a) return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); } -__extension__ extern __inline poly8x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p8_x2 (const poly8_t * __a) -{ - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_p16_x2 (const poly16_t * __a) -{ - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline poly64x2_t @@ -10527,7 +10408,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p64_x2 (const poly64_t * __a) { union { poly64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10583,7 +10464,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s8_x2 (const int8_t * __a) { union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10592,7 +10473,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s16_x2 (const int16_t * __a) { union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10601,7 +10482,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_x2 (const int32_t * __a) { union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10610,7 +10491,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s64_x2 (const int64_t * __a) { union { int64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10708,7 +10589,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f16_x2 (const float16_t * __a) { union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v8hf (__a); + __rv.__o = __builtin_neon_vld1_x2v8hf (__a); return __rv.__i; } #endif @@ -10718,7 +10599,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_f32_x2 (const float32_t * __a) { union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v4sf ((const __builtin_neon_sf *) __a); + __rv.__o = __builtin_neon_vld1_x2v4sf ((const __builtin_neon_sf *) __a); return __rv.__i; } @@ -10795,7 +10676,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8_x2 (const uint8_t * __a) { union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10804,7 +10685,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u16_x2 (const uint16_t * __a) { union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -10813,7 +10694,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u32_x2 (const uint32_t * __a) { union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v4si ((const __builtin_neon_si *) __a); + __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); return __rv.__i; } @@ -10822,7 +10703,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u64_x2 (const uint64_t * __a) { union { uint64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v2di ((const __builtin_neon_di *) __a); + __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); return __rv.__i; } @@ -10917,7 +10798,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8_x2 (const poly8_t * __a) { union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v16qi ((const __builtin_neon_qi *) __a); + __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); return __rv.__i; } @@ -10926,7 +10807,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p16_x2 (const poly16_t * __a) { union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v8hi ((const __builtin_neon_hi *) __a); + __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } @@ -20935,15 +20816,6 @@ vld1_bf16 (bfloat16_t const * __ptr) return __builtin_neon_vld1v4bf (__ptr); } -__extension__ extern __inline bfloat16x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1_bf16_x2 (const bfloat16_t * __ptr) -{ - union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16 (const bfloat16_t * __ptr) @@ -20956,7 +20828,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_bf16_x2 (const bfloat16_t * __ptr) { union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1q_x2v8bf ((const __builtin_neon_bf *) __ptr); + __rv.__o = __builtin_neon_vld1_x2v8bf ((const __builtin_neon_bf *) __ptr); return __rv.__i; } diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 07750c03c087..55e097227485 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -301,8 +301,7 @@ VAR1 (TERNOP, vtbx4, v8qi) VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) -VAR7 (LOAD1, vld1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (LOAD1, vld1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) +VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 75add42777d8..e069ceb651c9 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4957,11 +4957,11 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_1reg")] ) -(define_insn "neon_vld1_x2" - [(set (match_operand:VMEMX2 0 "s_register_operand" "=w") - (unspec:VMEMX2 [(match_operand:VMEMX2 1 "neon_struct_operand" "Um") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD1))] +(define_insn "neon_vld1_x2" + [(set (match_operand:OI 0 "s_register_operand" "=w") + (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") + (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_VLD1))] "TARGET_NEON" "vld1.\t%h0, %A1" [(set_attr "type" "neon_load1_2reg")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c deleted file mode 100644 index 6b0e78d94d7c..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_base_xN_1.c +++ /dev/null @@ -1,66 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon } */ - -#include "arm_neon.h" - -uint8x8x2_t test_vld1_u8_x2 (uint8_t * a) -{ - return vld1_u8_x2 (a); -} - -uint16x4x2_t test_vld1_u16_x2 (uint16_t * a) -{ - return vld1_u16_x2 (a); -} - -uint32x2x2_t test_vld1_u32_x2 (uint32_t * a) -{ - return vld1_u32_x2 (a); -} - -uint64x1x2_t test_vld1_u64_x2 (uint64_t * a) -{ - return vld1_u64_x2 (a); -} - -int8x8x2_t test_vld1_s8_x2 (int8_t * a) -{ - return vld1_s8_x2 (a); -} - -int16x4x2_t test_vld1_s16_x2 (int16_t * a) -{ - return vld1_s16_x2 (a); -} - -int32x2x2_t test_vld1_s32_x2 (int32_t * a) -{ - return vld1_s32_x2 (a); -} - -int64x1x2_t test_vld1_s64_x2 (int64_t * a) -{ - return vld1_s64_x2 (a); -} - -float32x2x2_t test_vld1_f32_x2 (float32_t * a) -{ - return vld1_f32_x2 (a); -} - -poly8x8x2_t test_vld1_p8_x2 (poly8_t * a) -{ - return vld1_p8_x2 (a); -} - -poly16x4x2_t test_vld1_p16_x2 (poly16_t * a) -{ - return vld1_p16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c deleted file mode 100644 index 3ec7a5e19864..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_bf16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_v8_2a_bf16_neon } */ - -#include "arm_neon.h" - -bfloat16x4x2_t test_vld1_bf16_x2 (bfloat16_t * a) -{ - return vld1_bf16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c deleted file mode 100644 index c0e5ea491424..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_fp16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_fp16_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon_fp16 } */ - -#include "arm_neon.h" - -float16x4x2_t test_vld1_f16_x2 (float16_t * a) -{ - return vld1_f16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c deleted file mode 100644 index 3ccea520ddc2..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1_p64_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_crypto_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_crypto } */ - -#include "arm_neon.h" - -poly64x1x2_t test_vld1_p64_x2 (poly64_t * a) -{ - return vld1_p64_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ From 59f77a89712a927975475754e2e7a27d8c09cb11 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:17 +0000 Subject: [PATCH 103/311] Revert "arm: vst1q_types_x4 ACLE intrinsics" This reverts commit 4ad77f883c178679f1dbb3a5603f811e022080bb. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 26 ---- .../gcc.target/arm/simd/vst1q_base_xN_1.c | 59 --------- .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 8 +- .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 6 - .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 6 - 7 files changed, 1 insertion(+), 219 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index af1f747f2622..5cec7dd876f0 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11391,38 +11391,6 @@ vst1q_s64_x3 (int64_t * __a, int64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s8_x4 (int8_t * __a, int8x16x4_t __b) -{ - union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s16_x4 (int16_t * __a, int16x8x4_t __b) -{ - union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s32_x4 (int32_t * __a, int32x4x4_t __b) -{ - union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s64_x4 (int64_t * __a, int64x2x4_t __b) -{ - union { int64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11768,14 +11736,6 @@ vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t __b) -{ - union { poly64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11857,24 +11817,6 @@ vst1q_f32_x3 (float32_t * __a, float32x4x3_t __b) __builtin_neon_vst1q_x3v4sf (__a, __bu.__o); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f16_x4 (float16_t * __a, float16x8x4_t __b) -{ - union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v8hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f32_x4 (float32_t * __a, float32x4x4_t __b) -{ - union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v4sf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11967,38 +11909,6 @@ vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __b) __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t __b) -{ - union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t __b) -{ - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t __b) -{ - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t __b) -{ - union { uint64x2x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -12045,22 +11955,6 @@ vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __b) __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t __b) -{ - union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t __b) -{ - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20753,14 +20647,6 @@ vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __b) __builtin_neon_vst1q_x3v8bf (__a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __b) -{ - union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst1q_x4v8bf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 55e097227485..cc014f9b89ed 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -316,7 +316,6 @@ VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1q_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e069ceb651c9..010cc579f23a 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5169,32 +5169,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) -(define_insn "neon_vst1q_x4" - [(set (match_operand:XI 0 "neon_struct_operand" "=Um") - (unspec:XI [(match_operand:XI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" -{ - int regno = REGNO (operands[1]); - rtx ops[5]; - ops[0] = operands[0]; - ops[1] = gen_rtx_REG (DImode, regno); - ops[2] = gen_rtx_REG (DImode, regno + 2); - ops[3] = gen_rtx_REG (DImode, regno + 4); - ops[4] = gen_rtx_REG (DImode, regno + 6); - output_asm_insn ("vst1.\t{%P1, %P2, %P3, %P4}, %A0", ops); - - ops[1] = gen_rtx_REG (DImode, regno + 8); - ops[2] = gen_rtx_REG (DImode, regno + 10); - ops[3] = gen_rtx_REG (DImode, regno + 12); - ops[4] = gen_rtx_REG (DImode, regno + 14); - output_asm_insn ("vst1.\t{%P1, %P2, %P3, %P4}, %A0", ops); - return ""; -} - [(set_attr "type" "neon_store1_4reg")] -) - (define_insn "neon_vst1_x4" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c index 5a639560de6e..838da09fee7a 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c @@ -115,74 +115,15 @@ void test_vst1q_p16_x3 (poly16_t * ptr, poly16x8x3_t val) vst1q_p16_x3 (ptr, val); } -void test_vst1q_u8_x4 (uint8_t * ptr, uint8x16x4_t val) -{ - vst1q_u8_x4 (ptr, val); -} - -void test_vst1q_u16_x4 (uint16_t * ptr, uint16x8x4_t val) -{ - vst1q_u16_x4 (ptr, val); -} - -void test_vst1q_u32_x4 (uint32_t * ptr, uint32x4x4_t val) -{ - vst1q_u32_x4 (ptr, val); -} - -void test_vst1q_u64_x4 (uint64_t * ptr, uint64x2x4_t val) -{ - vst1q_u64_x4 (ptr, val); -} - -void test_vst1q_s8_x4 (int8_t * ptr, int8x16x4_t val) -{ - vst1q_s8_x4 (ptr, val); -} - -void test_vst1q_s16_x4 (int16_t * ptr, int16x8x4_t val) -{ - vst1q_s16_x4 (ptr, val); -} - -void test_vst1q_s32_x4 (int32_t * ptr, int32x4x4_t val) -{ - vst1q_s32_x4 (ptr, val); -} - -void test_vst1q_s64_x4 (int64_t * ptr, int64x2x4_t val) -{ - vst1q_s64_x4 (ptr, val); -} - -void test_vst1q_f32_x4 (float32_t * ptr, float32x4x4_t val) -{ - vst1q_f32_x4 (ptr, val); -} - -void test_vst1q_p8_x4 (poly8_t * ptr, poly8x16x4_t val) -{ - vst1q_p8_x4 (ptr, val); -} - -void test_vst1q_p16_x4 (poly16_t * ptr, poly16x8x4_t val) -{ - vst1q_p16_x4 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c index 84fa8509db8e..2593c31c7561 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c @@ -15,11 +15,5 @@ void test_vst1q_bf16_x3 (bfloat16_t * ptr, bfloat16x8x3_t val) vst1q_bf16_x3 (ptr, val); } -void test_vst1q_bf16_x4 (bfloat16_t * ptr, bfloat16x8x4_t val) -{ - vst1q_bf16_x4 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c index 5b13edf99987..28e949b557a9 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c @@ -15,11 +15,5 @@ void test_vst1q_f16_x3 (float16_t * ptr, float16x8x3_t val) vst1q_f16_x3 (ptr, val); } -void test_vst1q_f16_x4 (float16_t * ptr, float16x8x4_t val) -{ - vst1q_f16_x4 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c index f49917d5ec8b..7878d936b9f6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c @@ -15,11 +15,5 @@ void test_vst1q_p64_x3 (poly64_t * ptr, poly64x2x3_t val) vst1q_p64_x3 (ptr, val); } -void test_vst1q_p64_x4 (poly64_t * ptr, poly64x2x4_t val) -{ - vst1q_p64_x4 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 684bb3bdcd321b3ce7b099ffd222e03977c39bf6 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:17 +0000 Subject: [PATCH 104/311] Revert "arm: vst1q_types_x3 ACLE intrinsics" This reverts commit 2d58d53c9e0eed83faa9254f8d3ec0ddd54812d8. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 24 ---- .../gcc.target/arm/simd/vst1q_base_xN_1.c | 60 --------- .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 6 - .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 6 - .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 6 - 7 files changed, 217 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 5cec7dd876f0..1c447b6d42fa 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11359,38 +11359,6 @@ vst1q_s64_x2 (int64_t * __a, int64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s8_x3 (int8_t * __a, int8x16x3_t __b) -{ - union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s16_x3 (int16_t * __a, int16x8x3_t __b) -{ - union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s32_x3 (int32_t * __a, int32x4x3_t __b) -{ - union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s64_x3 (int64_t * __a, int64x2x3_t __b) -{ - union { int64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11728,14 +11696,6 @@ vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __b) -{ - union { poly64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11799,24 +11759,6 @@ vst1q_f32_x2 (float32_t * __a, float32x4x2_t __b) __builtin_neon_vst1q_x2v4sf (__a, __bu.__o); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f16_x3 (float16_t * __a, float16x8x3_t __b) -{ - union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v8hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f32_x3 (float32_t * __a, float32x4x3_t __b) -{ - union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v4sf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11877,38 +11819,6 @@ vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __b) __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __b) -{ - union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __b) -{ - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __b) -{ - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __b) -{ - union { uint64x2x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -11939,22 +11849,6 @@ vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __b) __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __b) -{ - union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __b) -{ - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20639,14 +20533,6 @@ vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __b) __builtin_neon_vst1q_x2v8bf (__a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __b) -{ - union { bfloat16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst1q_x3v8bf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index cc014f9b89ed..696ed72678aa 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -314,7 +314,6 @@ VAR14 (STORE1, vst1, VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1q_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 010cc579f23a..ce525ccbc392 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5145,30 +5145,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) -(define_insn "neon_vst1q_x3" - [(set (match_operand:CI 0 "neon_struct_operand" "=Um") - (unspec:CI [(match_operand:CI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" -{ - int regno = REGNO (operands[1]); - rtx ops[4]; - ops[0] = operands[0]; - ops[1] = gen_rtx_REG (DImode, regno); - ops[2] = gen_rtx_REG (DImode, regno + 2); - ops[3] = gen_rtx_REG (DImode, regno + 4); - output_asm_insn ("vst1.\t{%P1, %P2, %P3}, %A0", ops); - - ops[1] = gen_rtx_REG (DImode, regno + 6); - ops[2] = gen_rtx_REG (DImode, regno + 8); - ops[3] = gen_rtx_REG (DImode, regno + 10); - output_asm_insn ("vst1.\t{%P1, %P2, %P3}, %A0", ops); - return ""; -} - [(set_attr "type" "neon_store1_3reg")] -) - (define_insn "neon_vst1_x4" [(set (match_operand:OI 0 "neon_struct_operand" "=Um") (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c index 838da09fee7a..4a17a80974b2 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c @@ -60,70 +60,10 @@ void test_vst1q_p16_x2 (poly16_t * ptr, poly16x8x2_t val) vst1q_p16_x2 (ptr, val); } -void test_vst1q_u8_x3 (uint8_t * ptr, uint8x16x3_t val) -{ - vst1q_u8_x3 (ptr, val); -} - -void test_vst1q_u16_x3 (uint16_t * ptr, uint16x8x3_t val) -{ - vst1q_u16_x3 (ptr, val); -} - -void test_vst1q_u32_x3 (uint32_t * ptr, uint32x4x3_t val) -{ - vst1q_u32_x3 (ptr, val); -} - -void test_vst1q_u64_x3 (uint64_t * ptr, uint64x2x3_t val) -{ - vst1q_u64_x3 (ptr, val); -} - -void test_vst1q_s8_x3 (int8_t * ptr, int8x16x3_t val) -{ - vst1q_s8_x3 (ptr, val); -} - -void test_vst1q_s16_x3 (int16_t * ptr, int16x8x3_t val) -{ - vst1q_s16_x3 (ptr, val); -} - -void test_vst1q_s32_x3 (int32_t * ptr, int32x4x3_t val) -{ - vst1q_s32_x3 (ptr, val); -} - -void test_vst1q_s64_x3 (int64_t * ptr, int64x2x3_t val) -{ - vst1q_s64_x3 (ptr, val); -} - -void test_vst1q_f32_x3 (float32_t * ptr, float32x4x3_t val) -{ - vst1q_f32_x3 (ptr, val); -} - -void test_vst1q_p8_x3 (poly8_t * ptr, poly8x16x3_t val) -{ - vst1q_p8_x3 (ptr, val); -} - -void test_vst1q_p16_x3 (poly16_t * ptr, poly16x8x3_t val) -{ - vst1q_p16_x3 (ptr, val); -} - - /* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c index 2593c31c7561..2a4579f0aaef 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c @@ -10,10 +10,4 @@ void test_vst1q_bf16_x2 (bfloat16_t * ptr, bfloat16x8x2_t val) vst1q_bf16_x2 (ptr, val); } -void test_vst1q_bf16_x3 (bfloat16_t * ptr, bfloat16x8x3_t val) -{ - vst1q_bf16_x3 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c index 28e949b557a9..61a7e558c48d 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c @@ -10,10 +10,4 @@ void test_vst1q_f16_x2 (float16_t * ptr, float16x8x2_t val) vst1q_f16_x2 (ptr, val); } -void test_vst1q_f16_x3 (float16_t * ptr, float16x8x3_t val) -{ - vst1q_f16_x3 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c index 7878d936b9f6..82f3dad293c6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c @@ -10,10 +10,4 @@ void test_vst1q_p64_x2 (poly64_t * ptr, poly64x2x2_t val) vst1q_p64_x2 (ptr, val); } -void test_vst1q_p64_x3 (poly64_t * ptr, poly64x2x3_t val) -{ - vst1q_p64_x3 (ptr, val); -} - /* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From 3783954776e9ca658f85da13d69eacde2f350659 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 105/311] Revert "arm: vst1q_types_x2 ACLE intrinsics" This reverts commit 2cd0d0261ef9d0e13e20407f131f32dcb67fcdd3. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/iterators.md | 6 - gcc/config/arm/neon.md | 6 +- .../gcc.target/arm/simd/vst1q_base_xN_1.c | 69 ----------- .../gcc.target/arm/simd/vst1q_bf16_xN_1.c | 13 -- .../gcc.target/arm/simd/vst1q_fp16_xN_1.c | 13 -- .../gcc.target/arm/simd/vst1q_p64_xN_1.c | 13 -- 8 files changed, 3 insertions(+), 232 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 1c447b6d42fa..c9bdda39663a 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11327,38 +11327,6 @@ vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s8_x2 (int8_t * __a, int8x16x2_t __b) -{ - union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s16_x2 (int16_t * __a, int16x8x2_t __b) -{ - union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s32_x2 (int32_t * __a, int32x4x2_t __b) -{ - union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_s64_x2 (int64_t * __a, int64x2x2_t __b) -{ - union { int64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) @@ -11688,14 +11656,6 @@ vst1q_p64 (poly64_t * __a, poly64x2_t __b) __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __b) -{ - union { poly64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11741,24 +11701,6 @@ vst1q_f32 (float32_t * __a, float32x4_t __b) __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f16_x2 (float16_t * __a, float16x8x2_t __b) -{ - union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v8hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_f32_x2 (float32_t * __a, float32x4x2_t __b) -{ - union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v4sf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_u8 (uint8_t * __a, uint8x16_t __b) @@ -11787,38 +11729,6 @@ vst1q_u64 (uint64_t * __a, uint64x2_t __b) __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __b) -{ - union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __b) -{ - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __b) -{ - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v4si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __b) -{ - union { uint64x2x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_p8 (poly8_t * __a, poly8x16_t __b) @@ -11833,22 +11743,6 @@ vst1q_p16 (poly16_t * __a, poly16x8_t __b) __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __b) -{ - union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v16qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __b) -{ - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v8hi ((__builtin_neon_hi *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) @@ -20525,14 +20419,6 @@ vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) __builtin_neon_vst1v8bf (__a, __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __b) -{ - union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1q_x2v8bf (__a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 696ed72678aa..a4056ec24d96 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -312,7 +312,6 @@ VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1q_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 6c5a80d93483..a98035381016 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -141,9 +141,6 @@ ;; Opaque structure types used in table lookups (except vtbl1/vtbx1). (define_mode_iterator VTAB [TI EI OI]) -;; Opaque structure types for x2 variants of VSTR1/VSTR1Q or VLD1/VLD1Q. -(define_mode_iterator VMEMX2 [TI OI]) - ;; Widenable modes. (define_mode_iterator VW [V8QI V4HI V2SI]) @@ -1536,9 +1533,6 @@ ;; vtbl suffix for NEON vector modes. (define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")]) -;; Suffix for x2 variants of vld1 and vst1. -(define_mode_attr VMEMX2_q [(TI "") (OI "q")]) - ;; fp16 or bf16 marker for 16-bit float modes. (define_mode_attr fporbf [(HF "fp16") (BF "bf16")]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ce525ccbc392..dfbaf5a6dc68 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5125,9 +5125,9 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1))] "TARGET_NEON") -(define_insn "neon_vst1_x2" - [(set (match_operand:VMEMX2 0 "neon_struct_operand" "=Um") - (unspec:VMEMX2 [(match_operand:VMEMX2 1 "s_register_operand" "w") +(define_insn "neon_vst1_x2" + [(set (match_operand:TI 0 "neon_struct_operand" "=Um") + (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VST1))] "TARGET_NEON" diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c deleted file mode 100644 index 4a17a80974b2..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_base_xN_1.c +++ /dev/null @@ -1,69 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon } */ - -#include "arm_neon.h" - -void test_vst1q_u8_x2 (uint8_t * ptr, uint8x16x2_t val) -{ - vst1q_u8_x2 (ptr, val); -} - -void test_vst1q_u16_x2 (uint16_t * ptr, uint16x8x2_t val) -{ - vst1q_u16_x2 (ptr, val); -} - -void test_vst1q_u32_x2 (uint32_t * ptr, uint32x4x2_t val) -{ - vst1q_u32_x2 (ptr, val); -} - -void test_vst1q_u64_x2 (uint64_t * ptr, uint64x2x2_t val) -{ - vst1q_u64_x2 (ptr, val); -} - -void test_vst1q_s8_x2 (int8_t * ptr, int8x16x2_t val) -{ - vst1q_s8_x2 (ptr, val); -} - -void test_vst1q_s16_x2 (int16_t * ptr, int16x8x2_t val) -{ - vst1q_s16_x2 (ptr, val); -} - -void test_vst1q_s32_x2 (int32_t * ptr, int32x4x2_t val) -{ - vst1q_s32_x2 (ptr, val); -} - -void test_vst1q_s64_x2 (int64_t * ptr, int64x2x2_t val) -{ - vst1q_s64_x2 (ptr, val); -} - -void test_vst1q_f32_x2 (float32_t * ptr, float32x4x2_t val) -{ - vst1q_f32_x2 (ptr, val); -} - -void test_vst1q_p8_x2 (poly8_t * ptr, poly8x16x2_t val) -{ - vst1q_p8_x2 (ptr, val); -} - -void test_vst1q_p16_x2 (poly16_t * ptr, poly16x8x2_t val) -{ - vst1q_p16_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ - -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c deleted file mode 100644 index 2a4579f0aaef..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_bf16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_v8_2a_bf16_neon } */ - -#include "arm_neon.h" - -void test_vst1q_bf16_x2 (bfloat16_t * ptr, bfloat16x8x2_t val) -{ - vst1q_bf16_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c deleted file mode 100644 index 61a7e558c48d..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_fp16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_fp16_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon_fp16 } */ - -#include "arm_neon.h" - -void test_vst1q_f16_x2 (float16_t * ptr, float16x8x2_t val) -{ - vst1q_f16_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c deleted file mode 100644 index 82f3dad293c6..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1q_p64_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_crypto_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_crypto } */ - -#include "arm_neon.h" - -void test_vst1q_p64_x2 (poly64_t * ptr, poly64x2x2_t val) -{ - vst1q_p64_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ From bdd0a50833e2a30ad5795f0c81925c9cea46f9d1 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 106/311] Revert "arm: vst1_types_x4 ACLE intrinsics" This reverts commit 2f48d846c794ba091b266133f73717361096d454. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vst1_base_xN_1.c | 62 +--------- .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 6 +- .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_p64_xN_1.c | 7 +- 7 files changed, 7 insertions(+), 200 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c9bdda39663a..e76be3516d95 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11258,14 +11258,6 @@ vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x4 (poly64_t * __a, poly64x1x4_t __b) -{ - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11359,38 +11351,6 @@ vst1_s64_x3 (int64_t * __a, int64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x4 (int8_t * __a, int8x8x4_t __b) -{ - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x4 (int16_t * __a, int16x4x4_t __b) -{ - union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x4 (int32_t * __a, int32x2x4_t __b) -{ - union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x4 (int64_t * __a, int64x1x4_t __b) -{ - union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11443,24 +11403,6 @@ vst1_f32_x3 (float32_t * __a, float32x2x3_t __b) __builtin_neon_vst1_x3v2sf ((__builtin_neon_sf *) __a, __bu.__o); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x4 (float16_t * __a, float16x4x4_t __b) -{ - union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x4 (float32_t * __a, float32x2x4_t __b) -{ - union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2sf ((__builtin_neon_sf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11553,38 +11495,6 @@ vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __b) __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x4 (uint8_t * __a, uint8x8x4_t __b) -{ - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x4 (uint16_t * __a, uint16x4x4_t __b) -{ - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x4 (uint32_t * __a, uint32x2x4_t __b) -{ - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x4 (uint64_t * __a, uint64x1x4_t __b) -{ - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11631,22 +11541,6 @@ vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __b) __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x4 (poly8_t * __a, poly8x8x4_t __b) -{ - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x4 (poly16_t * __a, poly16x4x4_t __b) -{ - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20404,14 +20298,6 @@ vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __b) __builtin_neon_vst1_x3v4bf ((__builtin_neon_bf *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __b) -{ - union { bfloat16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst1_x4v4bf ((__builtin_neon_bf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a4056ec24d96..8b104b1a700b 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -313,7 +313,6 @@ VAR14 (STORE1, vst1, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1_x4, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index dfbaf5a6dc68..5185434d6d93 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5145,16 +5145,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_3reg")] ) -(define_insn "neon_vst1_x4" - [(set (match_operand:OI 0 "neon_struct_operand" "=Um") - (unspec:OI [(match_operand:OI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" - "vst1.\t%h1, %A0" - [(set_attr "type" "neon_store1_4reg")] -) - (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c index 04ca6583552f..5f820a6a496e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -115,62 +115,8 @@ void test_vst1_p16_x3 (poly16_t * ptr, poly16x4x3_t val) vst1_p16_x3 (ptr, val); } -void test_vst1_u8_x4 (uint8_t * ptr, uint8x8x4_t val) -{ - vst1_u8_x4 (ptr, val); -} -void test_vst1_u16_x4 (uint16_t * ptr, uint16x4x4_t val) -{ - vst1_u16_x4 (ptr, val); -} - -void test_vst1_u32_x4 (uint32_t * ptr, uint32x2x4_t val) -{ - vst1_u32_x4 (ptr, val); -} - -void test_vst1_u64_x4 (uint64_t * ptr, uint64x1x4_t val) -{ - vst1_u64_x4 (ptr, val); -} - -void test_vst1_s8_x4 (int8_t * ptr, int8x8x4_t val) -{ - vst1_s8_x4 (ptr, val); -} - -void test_vst1_s16_x4 (int16_t * ptr, int16x4x4_t val) -{ - vst1_s16_x4 (ptr, val); -} - -void test_vst1_s32_x4 (int32_t * ptr, int32x2x4_t val) -{ - vst1_s32_x4 (ptr, val); -} - -void test_vst1_s64_x4 (int64_t * ptr, int64x1x4_t val) -{ - vst1_s64_x4 (ptr, val); -} - -void test_vst1_f32_x4 (float32_t * ptr, float32x2x4_t val) -{ - vst1_f32_x4 (ptr, val); -} - -void test_vst1_p8_x4 (poly8_t * ptr, poly8x8x4_t val) -{ - vst1_p8_x4 (ptr, val); -} - -void test_vst1_p16_x4 (poly16_t * ptr, poly16x4x4_t val) -{ - vst1_p16_x4 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 9 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c index d919c7d060dc..a3a00ead4682 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -15,8 +15,4 @@ void test_vst1_bf16_x3 (bfloat16_t * ptr, bfloat16x4x3_t val) vst1_bf16_x3 (ptr, val); } -void test_vst1_bf16_x4 (bfloat16_t * ptr, bfloat16x4x4_t val) -{ - vst1_bf16_x4 (ptr, val); -} -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c index 3d1d1eb7ad14..0a6863e24c6c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -15,9 +15,4 @@ void test_vst1_f16_x3 (float16_t * ptr, float16x4x3_t val) vst1_f16_x3 (ptr, val); } -void test_vst1_f16_x4 (float16_t * ptr, float16x4x4_t val) -{ - vst1_f16_x4 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c index 62912143481a..5dbd6049bc9f 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -15,9 +15,4 @@ void test_vst1_p64_x3 (poly64_t * ptr, poly64x1x3_t val) vst1_p64_x3 (ptr, val); } -void test_vst1_p64_x4 (poly64_t * ptr, poly64x1x4_t val) -{ - vst1_p64_x4 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 3 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file From c1f800ccda704a88838c04e6403ad09e0ae2ff77 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 107/311] Revert "arm: vst1_types_x3 ACLE intrinsics" This reverts commit ef07ae652c25ec04c2e3ef8cec14b0771a809861. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vst1_base_xN_1.c | 63 +--------- .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vst1_p64_xN_1.c | 7 +- 7 files changed, 7 insertions(+), 202 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index e76be3516d95..60f1077752c6 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11250,14 +11250,6 @@ vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __b) -{ - union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11319,38 +11311,6 @@ vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x3 (int8_t * __a, int8x8x3_t __b) -{ - union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x3 (int16_t * __a, int16x4x3_t __b) -{ - union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x3 (int32_t * __a, int32x2x3_t __b) -{ - union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x3 (int64_t * __a, int64x1x3_t __b) -{ - union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11385,24 +11345,6 @@ vst1_f32_x2 (float32_t * __a, float32x2x2_t __b) __builtin_neon_vst1_x2v2sf ((__builtin_neon_sf *) __a, __bu.__o); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x3 (float16_t * __a, float16x4x3_t __b) -{ - union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v4hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x3 (float32_t * __a, float32x2x3_t __b) -{ - union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v2sf ((__builtin_neon_sf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11463,38 +11405,6 @@ vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __b) __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __b) -{ - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __b) -{ - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __b) -{ - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __b) -{ - union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11525,22 +11435,6 @@ vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __b) __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __b) -{ - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __b) -{ - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20290,14 +20184,6 @@ vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __b) __builtin_neon_vst1_x2v4bf ((__builtin_neon_bf *) __a, __bu.__o); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __b) -{ - union { bfloat16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst1_x3v4bf ((__builtin_neon_bf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 8b104b1a700b..6f16bf0863c8 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -312,7 +312,6 @@ VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) -VAR7 (STORE1, vst1_x3, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 5185434d6d93..2a7286dee52d 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5135,16 +5135,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_store1_2reg")] ) -(define_insn "neon_vst1_x3" - [(set (match_operand:EI 0 "neon_struct_operand" "=Um") - (unspec:EI [(match_operand:EI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" - "vst1.\t%h1, %A0" - [(set_attr "type" "neon_store1_3reg")] -) - (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c index 5f820a6a496e..575897fa422e 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c @@ -60,63 +60,8 @@ void test_vst1_p16_x2 (poly16_t * ptr, poly16x4x2_t val) vst1_p16_x2 (ptr, val); } -void test_vst1_u8_x3 (uint8_t * ptr, uint8x8x3_t val) -{ - vst1_u8_x3 (ptr, val); -} -void test_vst1_u16_x3 (uint16_t * ptr, uint16x4x3_t val) -{ - vst1_u16_x3 (ptr, val); -} - -void test_vst1_u32_x3 (uint32_t * ptr, uint32x2x3_t val) -{ - vst1_u32_x3 (ptr, val); -} - -void test_vst1_u64_x3 (uint64_t * ptr, uint64x1x3_t val) -{ - vst1_u64_x3 (ptr, val); -} - -void test_vst1_s8_x3 (int8_t * ptr, int8x8x3_t val) -{ - vst1_s8_x3 (ptr, val); -} - -void test_vst1_s16_x3 (int16_t * ptr, int16x4x3_t val) -{ - vst1_s16_x3 (ptr, val); -} - -void test_vst1_s32_x3 (int32_t * ptr, int32x2x3_t val) -{ - vst1_s32_x3 (ptr, val); -} - -void test_vst1_s64_x3 (int64_t * ptr, int64x1x3_t val) -{ - vst1_s64_x3 (ptr, val); -} - -void test_vst1_f32_x3 (float32_t * ptr, float32x2x3_t val) -{ - vst1_f32_x3 (ptr, val); -} - -void test_vst1_p8_x3 (poly8_t * ptr, poly8x8x3_t val) -{ - vst1_p8_x3 (ptr, val); -} - -void test_vst1_p16_x3 (poly16_t * ptr, poly16x4x3_t val) -{ - vst1_p16_x3 (ptr, val); -} - - -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ +/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c index a3a00ead4682..213fd20ee65f 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c @@ -10,9 +10,4 @@ void test_vst1_bf16_x2 (bfloat16_t * ptr, bfloat16x4x2_t val) vst1_bf16_x2 (ptr, val); } -void test_vst1_bf16_x3 (bfloat16_t * ptr, bfloat16x4x3_t val) -{ - vst1_bf16_x3 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c index 0a6863e24c6c..523aec92db24 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c @@ -10,9 +10,4 @@ void test_vst1_f16_x2 (float16_t * ptr, float16x4x2_t val) vst1_f16_x2 (ptr, val); } -void test_vst1_f16_x3 (float16_t * ptr, float16x4x3_t val) -{ - vst1_f16_x3 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ +/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c index 5dbd6049bc9f..f590ebd7b943 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c @@ -10,9 +10,4 @@ void test_vst1_p64_x2 (poly64_t * ptr, poly64x1x2_t val) vst1_p64_x2 (ptr, val); } -void test_vst1_p64_x3 (poly64_t * ptr, poly64x1x3_t val) -{ - vst1_p64_x3 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ \ No newline at end of file From 5449edc396c9199339bd023c8c83cf609d3b7d4e Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 108/311] Revert "arm: vst1_types_x2 ACLE intrinsics" This reverts commit a69a7c7b6782c5b6f213f1f34af8dbb6541f27bb. --- gcc/config/arm/arm_neon.h | 114 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vst1_base_xN_1.c | 67 ---------- .../gcc.target/arm/simd/vst1_bf16_xN_1.c | 13 -- .../gcc.target/arm/simd/vst1_fp16_xN_1.c | 13 -- .../gcc.target/arm/simd/vst1_p64_xN_1.c | 13 -- 7 files changed, 231 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 60f1077752c6..c03be9912f87 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -11242,14 +11242,6 @@ vst1_p64 (poly64_t * __a, poly64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __b) -{ - union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); -} - #pragma GCC pop_options __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11279,38 +11271,6 @@ vst1_s64 (int64_t * __a, int64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s8_x2 (int8_t * __a, int8x8x2_t __b) -{ - union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s16_x2 (int16_t * __a, int16x4x2_t __b) -{ - union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s32_x2 (int32_t * __a, int32x2x2_t __b) -{ - union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_s64_x2 (int64_t * __a, int64x1x2_t __b) -{ - union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -11327,24 +11287,6 @@ vst1_f32 (float32_t * __a, float32x2_t __b) __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f16_x2 (float16_t * __a, float16x4x2_t __b) -{ - union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v4hf (__a, __bu.__o); -} -#endif - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_f32_x2 (float32_t * __a, float32x2x2_t __b) -{ - union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v2sf ((__builtin_neon_sf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_u8 (uint8_t * __a, uint8x8_t __b) @@ -11373,38 +11315,6 @@ vst1_u64 (uint64_t * __a, uint64x1_t __b) __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __b) -{ - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __b) -{ - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __b) -{ - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v2si ((__builtin_neon_si *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __b) -{ - union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2di ((__builtin_neon_di *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1_p8 (poly8_t * __a, poly8x8_t __b) @@ -11419,22 +11329,6 @@ vst1_p16 (poly16_t * __a, poly16x4_t __b) __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __b) -{ - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v8qi ((__builtin_neon_qi *) __a, __bu.__o); -} - -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __b) -{ - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v4hi ((__builtin_neon_hi *) __a, __bu.__o); -} - #pragma GCC push_options #pragma GCC target ("fpu=crypto-neon-fp-armv8") __extension__ extern __inline void @@ -20176,14 +20070,6 @@ vst1_bf16 (bfloat16_t * __a, bfloat16x4_t __b) __builtin_neon_vst1v4bf (__a, __b); } -__extension__ extern __inline void -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __b) -{ - union { bfloat16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst1_x2v4bf ((__builtin_neon_bf *) __a, __bu.__o); -} - __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 6f16bf0863c8..90dad8cf6d18 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -311,7 +311,6 @@ VAR10 (LOAD1, vld1_dup, VAR14 (STORE1, vst1, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) -VAR7 (STORE1, vst1_x2, v8qi, v4hi, v2si, di, v4hf, v2sf, v4bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 2a7286dee52d..b8f8fd6b9280 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -5125,16 +5125,6 @@ if (BYTES_BIG_ENDIAN) UNSPEC_VST1))] "TARGET_NEON") -(define_insn "neon_vst1_x2" - [(set (match_operand:TI 0 "neon_struct_operand" "=Um") - (unspec:TI [(match_operand:TI 1 "s_register_operand" "w") - (unspec:VDQX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VST1))] - "TARGET_NEON" - "vst1.\t%h1, %A0" - [(set_attr "type" "neon_store1_2reg")] -) - (define_insn "neon_vst1" [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")] diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c deleted file mode 100644 index 575897fa422e..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_base_xN_1.c +++ /dev/null @@ -1,67 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon } */ - -#include "arm_neon.h" - -void test_vst1_u8_x2 (uint8_t * ptr, uint8x8x2_t val) -{ - vst1_u8_x2 (ptr, val); -} - -void test_vst1_u16_x2 (uint16_t * ptr, uint16x4x2_t val) -{ - vst1_u16_x2 (ptr, val); -} - -void test_vst1_u32_x2 (uint32_t * ptr, uint32x2x2_t val) -{ - vst1_u32_x2 (ptr, val); -} - -void test_vst1_u64_x2 (uint64_t * ptr, uint64x1x2_t val) -{ - vst1_u64_x2 (ptr, val); -} - -void test_vst1_s8_x2 (int8_t * ptr, int8x8x2_t val) -{ - vst1_s8_x2 (ptr, val); -} - -void test_vst1_s16_x2 (int16_t * ptr, int16x4x2_t val) -{ - vst1_s16_x2 (ptr, val); -} - -void test_vst1_s32_x2 (int32_t * ptr, int32x2x2_t val) -{ - vst1_s32_x2 (ptr, val); -} - -void test_vst1_s64_x2 (int64_t * ptr, int64x1x2_t val) -{ - vst1_s64_x2 (ptr, val); -} - -void test_vst1_f32_x2 (float32_t * ptr, float32x2x2_t val) -{ - vst1_f32_x2 (ptr, val); -} - -void test_vst1_p8_x2 (poly8_t * ptr, poly8x8x2_t val) -{ - vst1_p8_x2 (ptr, val); -} - -void test_vst1_p16_x2 (poly16_t * ptr, poly16x4x2_t val) -{ - vst1_p16_x2 (ptr, val); -} - - -/* { dg-final { scan-assembler-times {vst1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c deleted file mode 100644 index 213fd20ee65f..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_v8_2a_bf16_neon } */ - -#include "arm_neon.h" - -void test_vst1_bf16_x2 (bfloat16_t * ptr, bfloat16x4x2_t val) -{ - vst1_bf16_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c deleted file mode 100644 index 523aec92db24..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_fp16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_fp16_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon_fp16 } */ - -#include "arm_neon.h" - -void test_vst1_f16_x2 (float16_t * ptr, float16x4x2_t val) -{ - vst1_f16_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c deleted file mode 100644 index f590ebd7b943..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vst1_p64_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_crypto_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_crypto } */ - -#include "arm_neon.h" - -void test_vst1_p64_x2 (poly64_t * ptr, poly64x1x2_t val) -{ - vst1_p64_x2 (ptr, val); -} - -/* { dg-final { scan-assembler-times {vst1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ \ No newline at end of file From b8adb5396f4524f1946985a65eb9c27f34a87d43 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 109/311] Revert "arm: vld1q_types_x4 ACLE intrinsics" This reverts commit ac827ec3e600bcb636f564876b186ee19d384a1e. --- gcc/config/arm/arm_neon.h | 128 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 30 ---- .../gcc.target/arm/simd/vld1q_base_xN_1.c | 59 -------- .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 6 - .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 6 - .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 6 - 7 files changed, 236 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index c03be9912f87..557873ac0285 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10421,15 +10421,6 @@ vld1q_p64_x3 (const poly64_t * __a) return __rv.__i; } -__extension__ extern __inline poly64x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p64_x4 (const poly64_t * __a) -{ - union { poly64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10531,42 +10522,6 @@ vld1q_s64_x3 (const int64_t * __a) return __rv.__i; } -__extension__ extern __inline int8x16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s8_x4 (const uint8_t * __a) -{ - union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s16_x4 (const uint16_t * __a) -{ - union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s32_x4 (const int32_t * __a) -{ - union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s64_x4 (const int64_t * __a) -{ - union { int64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10623,26 +10578,6 @@ vld1q_f32_x3 (const float32_t * __a) return __rv.__i; } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f16_x4 (const float16_t * __a) -{ - union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f32_x4 (const float32_t * __a) -{ - union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10743,42 +10678,6 @@ vld1q_u64_x3 (const uint64_t * __a) return __rv.__i; } -__extension__ extern __inline uint8x16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u8_x4 (const uint8_t * __a) -{ - union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u16_x4 (const uint16_t * __a) -{ - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x4x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u32_x4 (const uint32_t * __a) -{ - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x2x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u64_x4 (const uint64_t * __a) -{ - union { uint64x2x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10829,24 +10728,6 @@ vld1q_p16_x3 (const poly16_t * __a) return __rv.__i; } -__extension__ extern __inline poly8x16x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p8_x4 (const poly8_t * __a) -{ - union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p16_x4 (const poly16_t * __a) -{ - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -20157,15 +20038,6 @@ vld1q_bf16_x3 (const bfloat16_t * __ptr) return __rv.__i; } -__extension__ extern __inline bfloat16x8x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_bf16_x4 (const bfloat16_t * __ptr) -{ - union { bfloat16x8x4_t __i; __builtin_neon_xi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x4v8bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 90dad8cf6d18..a363bf18ccb7 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -303,7 +303,6 @@ VAR13 (LOAD1, vld1, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x4, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index b8f8fd6b9280..b37d95f1fa07 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4994,36 +4994,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_3reg")] ) -(define_insn "neon_vld1_x4" - [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD4A))] - "TARGET_NEON" -{ - int regno = REGNO (operands[0]); - rtx ops[5]; - ops[0] = gen_rtx_REG (DImode, regno); - ops[1] = gen_rtx_REG (DImode, regno + 2); - ops[2] = gen_rtx_REG (DImode, regno + 4); - ops[3] = gen_rtx_REG (DImode, regno + 6); - ops[4] = operands[1]; - - output_asm_insn ("vld1.\t{%P0, %P1, %P2, %P3}, %A4", ops); - - ops[0] = gen_rtx_REG (DImode, regno + 8); - ops[1] = gen_rtx_REG (DImode, regno + 10); - ops[2] = gen_rtx_REG (DImode, regno + 12); - ops[3] = gen_rtx_REG (DImode, regno + 14); - ops[4] = operands[1]; - - output_asm_insn ("vld1.\t{%P0, %P1, %P2, %P3}, %A4", ops); - - return ""; -} - [(set_attr "type" "neon_load1_4reg")] -) - ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c index 9aae34cc18e7..bfad282751b4 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c @@ -115,73 +115,14 @@ poly16x8x3_t test_vld1q_p16_x3 (poly16_t * a) return vld1q_p16_x3 (a); } -uint8x16x4_t test_vld1q_u8_x4 (uint8_t * a) -{ - return vld1q_u8_x4 (a); -} - -uint16x8x4_t test_vld1q_u16_x4 (uint16_t * a) -{ - return vld1q_u16_x4 (a); -} - -uint32x4x4_t test_vld1q_u32_x4 (uint32_t * a) -{ - return vld1q_u32_x4 (a); -} - -uint64x2x4_t test_vld1q_u64_x4 (uint64_t * a) -{ - return vld1q_u64_x4 (a); -} - -int8x16x4_t test_vld1q_s8_x4 (int8_t * a) -{ - return vld1q_s8_x4 (a); -} - -int16x8x4_t test_vld1q_s16_x4 (int16_t * a) -{ - return vld1q_s16_x4 (a); -} - -int32x4x4_t test_vld1q_s32_x4 (int32_t * a) -{ - return vld1q_s32_x4 (a); -} - -int64x2x4_t test_vld1q_s64_x4 (int64_t * a) -{ - return vld1q_s64_x4 (a); -} - -float32x4x4_t test_vld1q_f32_x4 (float32_t * a) -{ - return vld1q_f32_x4 (a); -} - -poly8x16x4_t test_vld1q_p8_x4 (poly8_t * a) -{ - return vld1q_p8_x4 (a); -} - -poly16x8x4_t test_vld1q_p16_x4 (poly16_t * a) -{ - return vld1q_p16_x4 (a); -} - /* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ /* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c index fd86723f1464..4138fe951ee7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c @@ -15,11 +15,5 @@ bfloat16x8x3_t test_vld1q_bf16_x3 (bfloat16_t * a) return vld1q_bf16_x3 (a); } -bfloat16x8x4_t test_vld1q_bf16_x4 (bfloat16_t * a) -{ - return vld1q_bf16_x4 (a); -} - /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c index 2de3495f1dad..01640d7cc1f6 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c @@ -15,11 +15,5 @@ float16x8x3_t test_vld1q_f16_x3 (float16_t * a) return vld1q_f16_x3 (a); } -float16x8x4_t test_vld1q_f16_x4 (float16_t * a) -{ - return vld1q_f16_x4 (a); -} - /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c index 521b784e8e84..ae2ab36df57c 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c @@ -15,11 +15,5 @@ poly64x2x3_t test_vld1q_p64_x3 (poly64_t * a) return vld1q_p64_x3 (a); } -poly64x2x4_t test_vld1q_p64_x4 (poly64_t * a) -{ - return vld1q_p64_x4 (a); -} - /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ From ccc6226e57bae9727fae4b858b6dee8adfc02577 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 110/311] Revert "arm: vld1q_types_x3 ACLE intrinsics" This reverts commit 2514a331835e055a963fd059dc5770e5ae500af0. --- gcc/config/arm/arm_neon.h | 128 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 27 ---- .../gcc.target/arm/simd/vld1q_base_xN_1.c | 63 +-------- .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 6 - .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 7 +- .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 7 +- 7 files changed, 3 insertions(+), 236 deletions(-) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 557873ac0285..3eb41c6bdc83 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10412,15 +10412,6 @@ vld1q_p64_x2 (const poly64_t * __a) return __rv.__i; } -__extension__ extern __inline poly64x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p64_x3 (const poly64_t * __a) -{ - union { poly64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10486,42 +10477,6 @@ vld1q_s64_x2 (const int64_t * __a) return __rv.__i; } -__extension__ extern __inline int8x16x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s8_x3 (const uint8_t * __a) -{ - union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s16_x3 (const uint16_t * __a) -{ - union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s32_x3 (const int32_t * __a) -{ - union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s64_x3 (const int64_t * __a) -{ - union { int64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10558,26 +10513,6 @@ vld1q_f32_x2 (const float32_t * __a) return __rv.__i; } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f16_x3 (const float16_t * __a) -{ - union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f32_x3 (const float32_t * __a) -{ - union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10642,42 +10577,6 @@ vld1q_u64_x2 (const uint64_t * __a) return __rv.__i; } -__extension__ extern __inline uint8x16x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u8_x3 (const uint8_t * __a) -{ - union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u16_x3 (const uint16_t * __a) -{ - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x4x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u32_x3 (const uint32_t * __a) -{ - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x2x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u64_x3 (const uint64_t * __a) -{ - union { uint64x2x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10710,24 +10609,6 @@ vld1q_p16_x2 (const poly16_t * __a) return __rv.__i; } -__extension__ extern __inline poly8x16x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p8_x3 (const poly8_t * __a) -{ - union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p16_x3 (const poly16_t * __a) -{ - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -20029,15 +19910,6 @@ vld1q_bf16_x2 (const bfloat16_t * __ptr) return __rv.__i; } -__extension__ extern __inline bfloat16x8x3_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_bf16_x3 (const bfloat16_t * __ptr) -{ - union { bfloat16x8x3_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x3v8bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index a363bf18ccb7..6a8f0cb2ce1f 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -302,7 +302,6 @@ VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) -VAR7 (LOAD1, vld1_x3, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index b37d95f1fa07..55049ea549f3 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4967,33 +4967,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_2reg")] ) -(define_insn "neon_vld1_x3" - [(set (match_operand:CI 0 "s_register_operand" "=w") - (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um") - (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD3A))] - "TARGET_NEON" -{ - int regno = REGNO (operands[0]); - rtx ops[4]; - ops[0] = gen_rtx_REG (DImode, regno); - ops[1] = gen_rtx_REG (DImode, regno + 2); - ops[2] = gen_rtx_REG (DImode, regno + 4); - ops[3] = operands[1]; - - output_asm_insn ("vld1.\t{%P0, %P1, %P2}, %A3", ops); - - ops[0] = gen_rtx_REG (DImode, regno + 6); - ops[1] = gen_rtx_REG (DImode, regno + 8); - ops[2] = gen_rtx_REG (DImode, regno + 10); - ops[3] = operands[1]; - - output_asm_insn ("vld1.\t{%P0, %P1, %P2}, %A3", ops); - return ""; -} - [(set_attr "type" "neon_load1_3reg")] -) - ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c index bfad282751b4..1d31777afdf3 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c @@ -60,69 +60,8 @@ poly16x8x2_t test_vld1q_p16_x2 (poly16_t * a) return vld1q_p16_x2 (a); } -uint8x16x3_t test_vld1q_u8_x3 (uint8_t * a) -{ - return vld1q_u8_x3 (a); -} - -uint16x8x3_t test_vld1q_u16_x3 (uint16_t * a) -{ - return vld1q_u16_x3 (a); -} - -uint32x4x3_t test_vld1q_u32_x3 (uint32_t * a) -{ - return vld1q_u32_x3 (a); -} - -uint64x2x3_t test_vld1q_u64_x3 (uint64_t * a) -{ - return vld1q_u64_x3 (a); -} - -int8x16x3_t test_vld1q_s8_x3 (int8_t * a) -{ - return vld1q_s8_x3 (a); -} - -int16x8x3_t test_vld1q_s16_x3 (int16_t * a) -{ - return vld1q_s16_x3 (a); -} - -int32x4x3_t test_vld1q_s32_x3 (int32_t * a) -{ - return vld1q_s32_x3 (a); -} - -int64x2x3_t test_vld1q_s64_x3 (int64_t * a) -{ - return vld1q_s64_x3 (a); -} - -float32x4x3_t test_vld1q_f32_x3 (float32_t * a) -{ - return vld1q_f32_x3 (a); -} - -poly8x16x3_t test_vld1q_p8_x3 (poly8_t * a) -{ - return vld1q_p8_x3 (a); -} - -poly16x8x3_t test_vld1q_p16_x3 (poly16_t * a) -{ - return vld1q_p16_x3 (a); -} - /* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ - /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ - /* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 6 } } */ - /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 4 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c index 4138fe951ee7..5f6fc98640e7 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c @@ -10,10 +10,4 @@ bfloat16x8x2_t test_vld1q_bf16_x2 (bfloat16_t * a) return vld1q_bf16_x2 (a); } -bfloat16x8x3_t test_vld1q_bf16_x3 (bfloat16_t * a) -{ - return vld1q_bf16_x3 (a); -} - /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c index 01640d7cc1f6..aecf491a4de8 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c @@ -10,10 +10,5 @@ float16x8x2_t test_vld1q_f16_x2 (float16_t * a) return vld1q_f16_x2 (a); } -float16x8x3_t test_vld1q_f16_x3 (float16_t * a) -{ - return vld1q_f16_x3 (a); -} - /* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+\]\n} 2 } } */ + diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c index ae2ab36df57c..04ceb5e4a247 100644 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c +++ b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c @@ -10,10 +10,5 @@ poly64x2x2_t test_vld1q_p64_x2 (poly64_t * a) return vld1q_p64_x2 (a); } -poly64x2x3_t test_vld1q_p64_x3 (poly64_t * a) -{ - return vld1q_p64_x3 (a); -} - /* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+, d[0-9]+, d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ + From a2530e447d373d78444a80068972bdd157f7b518 Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Fri, 8 Dec 2023 16:04:18 +0000 Subject: [PATCH 111/311] Revert "arm: vld1q_types_x2 ACLE intrinsics" This reverts commit a1a0cdf21bb6a076e98658d815645d8ad1193840. --- gcc/config/arm/arm_neon.h | 128 ------------------ gcc/config/arm/arm_neon_builtins.def | 1 - gcc/config/arm/neon.md | 10 -- .../gcc.target/arm/simd/vld1q_base_xN_1.c | 67 --------- .../gcc.target/arm/simd/vld1q_bf16_xN_1.c | 13 -- .../gcc.target/arm/simd/vld1q_fp16_xN_1.c | 14 -- .../gcc.target/arm/simd/vld1q_p64_xN_1.c | 14 -- 7 files changed, 247 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c delete mode 100644 gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 3eb41c6bdc83..cdfdb44259a1 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -10403,15 +10403,6 @@ vld1q_p64 (const poly64_t * __a) return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } -__extension__ extern __inline poly64x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p64_x2 (const poly64_t * __a) -{ - union { poly64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #pragma GCC pop_options __extension__ extern __inline int8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10441,42 +10432,6 @@ vld1q_s64 (const int64_t * __a) return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } -__extension__ extern __inline int8x16x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s8_x2 (const int8_t * __a) -{ - union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int16x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s16_x2 (const int16_t * __a) -{ - union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline int32x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s32_x2 (const int32_t * __a) -{ - union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline int64x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_s64_x2 (const int64_t * __a) -{ - union { int64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) __extension__ extern __inline float16x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -10493,26 +10448,6 @@ vld1q_f32 (const float32_t * __a) return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); } -#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -__extension__ extern __inline float16x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f16_x2 (const float16_t * __a) -{ - union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hf (__a); - return __rv.__i; -} -#endif - -__extension__ extern __inline float32x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_f32_x2 (const float32_t * __a) -{ - union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4sf ((const __builtin_neon_sf *) __a); - return __rv.__i; -} - __extension__ extern __inline uint8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_u8 (const uint8_t * __a) @@ -10541,42 +10476,6 @@ vld1q_u64 (const uint64_t * __a) return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); } -__extension__ extern __inline uint8x16x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u8_x2 (const uint8_t * __a) -{ - union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint16x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u16_x2 (const uint16_t * __a) -{ - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint32x4x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u32_x2 (const uint32_t * __a) -{ - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v4si ((const __builtin_neon_si *) __a); - return __rv.__i; -} - -__extension__ extern __inline uint64x2x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_u64_x2 (const uint64_t * __a) -{ - union { uint64x2x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v2di ((const __builtin_neon_di *) __a); - return __rv.__i; -} - __extension__ extern __inline poly8x16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_p8 (const poly8_t * __a) @@ -10591,24 +10490,6 @@ vld1q_p16 (const poly16_t * __a) return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); } -__extension__ extern __inline poly8x16x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p8_x2 (const poly8_t * __a) -{ - union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v16qi ((const __builtin_neon_qi *) __a); - return __rv.__i; -} - -__extension__ extern __inline poly16x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_p16_x2 (const poly16_t * __a) -{ - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8hi ((const __builtin_neon_hi *) __a); - return __rv.__i; -} - __extension__ extern __inline int8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) @@ -19901,15 +19782,6 @@ vld1q_bf16 (const bfloat16_t * __ptr) return __builtin_neon_vld1v8bf (__ptr); } -__extension__ extern __inline bfloat16x8x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vld1q_bf16_x2 (const bfloat16_t * __ptr) -{ - union { bfloat16x8x2_t __i; __builtin_neon_oi __o; } __rv; - __rv.__o = __builtin_neon_vld1_x2v8bf ((const __builtin_neon_bf *) __ptr); - return __rv.__i; -} - __extension__ extern __inline bfloat16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld2_bf16 (bfloat16_t const * __ptr) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 6a8f0cb2ce1f..94b152381236 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -301,7 +301,6 @@ VAR1 (TERNOP, vtbx4, v8qi) VAR13 (LOAD1, vld1, v8qi, v4hi, v4hf, v2si, v2sf, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) -VAR7 (LOAD1, vld1_x2, v16qi, v8hi, v4si, v2di, v8hf, v4sf, v8bf) VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 55049ea549f3..d213369ffc38 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -4957,16 +4957,6 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_load1_1reg")] ) -(define_insn "neon_vld1_x2" - [(set (match_operand:OI 0 "s_register_operand" "=w") - (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um") - (unspec:VQXBF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_VLD1))] - "TARGET_NEON" - "vld1.\t%h0, %A1" - [(set_attr "type" "neon_load1_2reg")] -) - ;; The lane numbers in the RTL are in GCC lane order, having been flipped ;; in arm_expand_neon_args. The lane numbers are restored to architectural ;; lane order here. diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c deleted file mode 100644 index 1d31777afdf3..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_base_xN_1.c +++ /dev/null @@ -1,67 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon } */ - -#include "arm_neon.h" - -uint8x16x2_t test_vld1q_u8_x2 (uint8_t * a) -{ - return vld1q_u8_x2 (a); -} - -uint16x8x2_t test_vld1q_u16_x2 (uint16_t * a) -{ - return vld1q_u16_x2 (a); -} - -uint32x4x2_t test_vld1q_u32_x2 (uint32_t * a) -{ - return vld1q_u32_x2 (a); -} - -uint64x2x2_t test_vld1q_u64_x2 (uint64_t * a) -{ - return vld1q_u64_x2 (a); -} - -int8x16x2_t test_vld1q_s8_x2 (int8_t * a) -{ - return vld1q_s8_x2 (a); -} - -int16x8x2_t test_vld1q_s16_x2 (int16_t * a) -{ - return vld1q_s16_x2 (a); -} - -int32x4x2_t test_vld1q_s32_x2 (int32_t * a) -{ - return vld1q_s32_x2 (a); -} - -int64x2x2_t test_vld1q_s64_x2 (int64_t * a) -{ - return vld1q_s64_x2 (a); -} - -float32x4x2_t test_vld1q_f32_x2 (float32_t * a) -{ - return vld1q_f32_x2 (a); -} - -poly8x16x2_t test_vld1q_p8_x2 (poly8_t * a) -{ - return vld1q_p8_x2 (a); -} - -poly16x8x2_t test_vld1q_p16_x2 (poly16_t * a) -{ - return vld1q_p16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.8\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.32\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 3 } } */ -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 2 } } */ - diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c deleted file mode 100644 index 5f6fc98640e7..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_bf16_xN_1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_v8_2a_bf16_neon } */ - -#include "arm_neon.h" - -bfloat16x8x2_t test_vld1q_bf16_x2 (bfloat16_t * a) -{ - return vld1q_bf16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c deleted file mode 100644 index aecf491a4de8..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_fp16_xN_1.c +++ /dev/null @@ -1,14 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_neon_fp16_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_neon_fp16 } */ - -#include "arm_neon.h" - -float16x8x2_t test_vld1q_f16_x2 (float16_t * a) -{ - return vld1q_f16_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.16\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+\]\n} 1 } } */ - diff --git a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c b/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c deleted file mode 100644 index 04ceb5e4a247..000000000000 --- a/gcc/testsuite/gcc.target/arm/simd/vld1q_p64_xN_1.c +++ /dev/null @@ -1,14 +0,0 @@ -/* { dg-do assemble } */ -/* { dg-require-effective-target arm_crypto_ok } */ -/* { dg-options "-save-temps -O2" } */ -/* { dg-add-options arm_crypto } */ - -#include "arm_neon.h" - -poly64x2x2_t test_vld1q_p64_x2 (poly64_t * a) -{ - return vld1q_p64_x2 (a); -} - -/* { dg-final { scan-assembler-times {vld1.64\t\{d[0-9]+-d[0-9]+\}, \[r[0-9]+:64\]\n} 1 } } */ - From 8b5cd6c4519cc120badd2b35a9e30d4deb82c012 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 8 Dec 2023 16:27:40 +0000 Subject: [PATCH 112/311] aarch64: Some tweaks to the early-ra pass early-ra's likely_operand_match_p didn't handle relaxed and special memory constraints, which meant that the pass wasn't able to match LD1RQ instructions to their constraints, and so backed out of trying to allocate. This patch fixes that by switching the sense of the match: does the rtx seem appropriate for the constraint?, rather than: does the constraint seem appropriate for the rtx? Also, I came across a case that needed more general equivalence detection. Previously we would only record equivalences after the last definition of the source register, but it's worth trying to handle cases where the destination register's live range is restricted to a block, and the next definition of the source occurs only after the end of the destination register's live range. The patch also fixes a cut-&-pasto that Alex noticed (thanks). gcc/ * config/aarch64/aarch64-early-ra.cc (allocno_info::chain_next): Put into an enum with... (allocno_info::last_def_point): ...new member variable. (allocno_info::m_current_bb_point): New member variable. (likely_operand_match_p): Switch based on get_constraint_type, rather than based on rtx code. Handle relaxed and special memory constraints. (early_ra::record_copy): Allow the source of an equivalence to be assigned to more than once. (early_ra::record_allocno_use): Invalidate any previous equivalence. Initialize last_def_point. (early_ra::record_allocno_def): Set last_def_point. (early_ra::valid_equivalence_p): New function, split out from... (early_ra::record_copy): ...here. Use last_def_point to handle source registers that have a later definition. (make_pass_aarch64_early_ra): Fix comment. gcc/testsuite/ * gcc.target/aarch64/sme/strided_2.c: New test. --- gcc/config/aarch64/aarch64-early-ra.cc | 89 +++++++++++--- .../gcc.target/aarch64/sme/strided_2.c | 115 ++++++++++++++++++ 2 files changed, 184 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/strided_2.c diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc index c065416c5b94..f05869b5cf2b 100644 --- a/gcc/config/aarch64/aarch64-early-ra.cc +++ b/gcc/config/aarch64/aarch64-early-ra.cc @@ -306,9 +306,18 @@ private: // equivalent to EQUIV_ALLOCNO for the whole of this allocno's lifetime. unsigned int equiv_allocno; - // The next chained allocno in program order (i.e. at lower program - // points), or INVALID_ALLOCNO if none. - unsigned int chain_next; + union + { + // The program point at which the allocno was last defined, + // or START_OF_REGION if none. This is only used temporarily + // while recording allocnos; after that, chain_next below is + // used instead. + unsigned int last_def_point; + + // The next chained allocno in program order (i.e. at lower program + // points), or INVALID_ALLOCNO if none. + unsigned int chain_next; + }; // The previous chained allocno in program order (i.e. at higher // program points), or INVALID_ALLOCNO if none. @@ -406,6 +415,7 @@ private: void record_fpr_def (unsigned int); void record_allocno_use (allocno_info *); void record_allocno_def (allocno_info *); + bool valid_equivalence_p (allocno_info *, allocno_info *); void record_copy (rtx, rtx, bool = false); void record_constraints (rtx_insn *); void record_artificial_refs (unsigned int); @@ -479,6 +489,9 @@ private: // The basic block that we're currently processing. basic_block m_current_bb; + // The lowest-numbered program point in the current basic block. + unsigned int m_current_bb_point; + // The program point that we're currently processing (described above). unsigned int m_current_point; @@ -576,21 +589,26 @@ likely_operand_match_p (const operand_alternative &op_alt, rtx op) return true; auto cn = lookup_constraint (constraint); - if (REG_P (op) || SUBREG_P (op)) + switch (get_constraint_type (cn)) { - if (insn_extra_register_constraint (cn)) + case CT_REGISTER: + if (REG_P (op) || SUBREG_P (op)) return true; - } - else if (MEM_P (op)) - { - if (insn_extra_memory_constraint (cn)) + break; + + case CT_MEMORY: + case CT_SPECIAL_MEMORY: + case CT_RELAXED_MEMORY: + if (MEM_P (op)) return true; - } - else - { - if (!insn_extra_memory_constraint (cn) - && constraint_satisfied_p (op, cn)) + break; + + case CT_CONST_INT: + case CT_ADDRESS: + case CT_FIXED_FORM: + if (constraint_satisfied_p (op, cn)) return true; + break; } constraint += len; @@ -1407,10 +1425,14 @@ early_ra::record_allocno_use (allocno_info *allocno) { bitmap_set_bit (m_live_allocnos, allocno->id); if (allocno->end_point > m_current_point) - allocno->end_point = m_current_point; + { + allocno->end_point = m_current_point; + allocno->last_def_point = START_OF_REGION; + } allocno->start_point = m_current_point; allocno->is_copy_dest = false; allocno->is_strong_copy_dest = false; + allocno->equiv_allocno = INVALID_ALLOCNO; } // Record a definition of the allocno with index AI at the current program @@ -1419,6 +1441,7 @@ early_ra::record_allocno_use (allocno_info *allocno) void early_ra::record_allocno_def (allocno_info *allocno) { + allocno->last_def_point = m_current_point; allocno->start_point = m_current_point; allocno->num_defs = MIN (allocno->num_defs + 1, 2); gcc_checking_assert (!allocno->is_copy_dest @@ -1427,6 +1450,30 @@ early_ra::record_allocno_def (allocno_info *allocno) gcc_unreachable (); } +// Return true if a move from SRC_ALLOCNO to DEST_ALLOCNO could be treated +// as an equivalence. +bool +early_ra::valid_equivalence_p (allocno_info *dest_allocno, + allocno_info *src_allocno) +{ + if (src_allocno->end_point > dest_allocno->end_point) + // The src allocno dies first. + return false; + + if (src_allocno->num_defs != 0) + { + if (dest_allocno->end_point < m_current_bb_point) + // We don't currently track enough information to handle multiple + // definitions across basic block boundaries. + return false; + + if (src_allocno->last_def_point >= dest_allocno->end_point) + // There is another definition during the destination's live range. + return false; + } + return dest_allocno->num_defs == 1; +} + // Record any relevant allocno-related information for an actual or imagined // copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit // move instruction, false if it represents one way of satisfying the previous @@ -1512,9 +1559,7 @@ early_ra::record_copy (rtx dest, rtx src, bool from_move_p) dest_allocno->is_copy_dest = 1; } else if (from_move_p - && src_allocno->end_point <= dest_allocno->end_point - && src_allocno->num_defs == 0 - && dest_allocno->num_defs == 1) + && valid_equivalence_p (dest_allocno, src_allocno)) dest_allocno->equiv_allocno = src_allocno->id; } } @@ -3048,6 +3093,9 @@ early_ra::apply_allocation () void early_ra::process_region () { + for (auto *allocno : m_allocnos) + allocno->chain_next = INVALID_ALLOCNO; + if (dump_file && (dump_flags & TDF_DETAILS)) { dump_fpr_ranges (); @@ -3117,6 +3165,8 @@ void early_ra::process_block (basic_block bb, bool is_isolated) { m_current_bb = bb; + m_current_point += 1; + m_current_bb_point = m_current_point; // Process live-out FPRs. bitmap live_out = df_get_live_out (bb); @@ -3414,8 +3464,7 @@ pass_early_ra::execute (function *fn) } // end namespace -// Create a new CC fusion pass instance. - +// Create a new instance of the pass. rtl_opt_pass * make_pass_aarch64_early_ra (gcc::context *ctxt) { diff --git a/gcc/testsuite/gcc.target/aarch64/sme/strided_2.c b/gcc/testsuite/gcc.target/aarch64/sme/strided_2.c new file mode 100644 index 000000000000..2e58ae643ec2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/strided_2.c @@ -0,0 +1,115 @@ +// { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } +// { dg-final { check-function-bodies "**" "" } } + +#include + +#pragma GCC target "+sme2" + +// This file deliberately contains nonsense code. + +/* +** test1: +** ptrue (pn[0-9]+)\.s +** ld1w {z16\.s - z19\.s}, \1/z, \[x1\] +** ld1w {z20\.s - z23\.s}, \1/z, \[x1, #4, mul vl\] +** ld1w {z24\.s - z27\.s}, \1/z, \[x1, #8, mul vl\] +** ld1w {z28\.s - z31\.s}, \1/z, \[x1, #12, mul vl\] +** ptrue [^\n]+ +** ld1rqw [^\n]+ +** ld1rqw [^\n]+ +** sclamp {z16.s - z19.s}, [^\n]+ +** sclamp {z20.s - z23.s}, [^\n]+ +** sclamp {z24.s - z27.s}, [^\n]+ +** sclamp {z28.s - z31.s}, [^\n]+ +** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0\] +** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #4, mul vl\] +** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #8, mul vl\] +** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #12, mul vl\] +** st1w {z16\.s, z20\.s, z24\.s, z28\.s}, \1, \[x0, #16, mul vl\] +** st1w {z17\.s, z21\.s, z25\.s, z29\.s}, \1, \[x0, #20, mul vl\] +** st1w {z18\.s, z22\.s, z26\.s, z30\.s}, \1, \[x0, #24, mul vl\] +** st1w {z19\.s, z23\.s, z27\.s, z31\.s}, \1, \[x0, #28, mul vl\] +** ld1w {z16\.s - z19\.s}, \1/z, \[x3\] +** ld1w {z20\.s - z23\.s}, \1/z, \[x3, #4, mul vl\] +** ld1w {z24\.s - z27\.s}, \1/z, \[x3, #8, mul vl\] +** ld1w {z28\.s - z31\.s}, \1/z, \[x3, #12, mul vl\] +** sclamp {z16.s - z19.s}, [^\n]+ +** sclamp {z20.s - z23.s}, [^\n]+ +** sclamp {z24.s - z27.s}, [^\n]+ +** sclamp {z28.s - z31.s}, [^\n]+ +** ... +** ret +*/ +void test1(int32_t *dest, int32_t *src1, int32_t *src2, + int32_t *src3) __arm_streaming +{ + svcount_t pg = svptrue_c32(); + svint32x4_t l0 = svld1_vnum_x4(pg, src1, 0); + svint32x4_t l1 = svld1_vnum_x4(pg, src1, 4); + svint32x4_t l2 = svld1_vnum_x4(pg, src1, 8); + svint32x4_t l3 = svld1_vnum_x4(pg, src1, 12); + svint32_t l4 = svld1rq(svptrue_b32(), src2); + svint32_t l5 = svld1rq(svptrue_b32(), src2 + 4); + l0 = svclamp(l0, l4, l5); + l1 = svclamp(l1, l4, l5); + l2 = svclamp(l2, l4, l5); + l3 = svclamp(l3, l4, l5); + svst1_vnum(pg, dest, 0, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 4, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 8, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 12, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + svst1_vnum(pg, dest, 16, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 20, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 24, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 28, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + l0 = svld1_vnum_x4(pg, src3, 0); + l1 = svld1_vnum_x4(pg, src3, 4); + l2 = svld1_vnum_x4(pg, src3, 8); + l3 = svld1_vnum_x4(pg, src3, 12); + l0 = svclamp(l0, l4, l5); + l1 = svclamp(l1, l4, l5); + l2 = svclamp(l2, l4, l5); + l3 = svclamp(l3, l4, l5); + svst1_vnum(pg, dest, 32, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 36, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 40, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 44, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); + svst1_vnum(pg, dest, 48, + svcreate4(svget4(l0, 0), svget4(l1, 0), + svget4(l2, 0), svget4(l3, 0))); + svst1_vnum(pg, dest, 52, + svcreate4(svget4(l0, 1), svget4(l1, 1), + svget4(l2, 1), svget4(l3, 1))); + svst1_vnum(pg, dest, 56, + svcreate4(svget4(l0, 2), svget4(l1, 2), + svget4(l2, 2), svget4(l3, 2))); + svst1_vnum(pg, dest, 60, + svcreate4(svget4(l0, 3), svget4(l1, 3), + svget4(l2, 3), svget4(l3, 3))); +} + +/* { dg-final { scan-assembler-not {\tmov\tz} } } */ From 2664964b2f695e947faea4c29dbddd3615cc4b0b Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Fri, 1 Dec 2023 09:57:15 +0100 Subject: [PATCH 113/311] RISC-V: Add vectorized strlen. This patch implements a vectorized strlen by re-using and slightly adjusting the rawmemchr implementation. Rawmemchr returns the address of the needle while strlen returns the difference between needle address and start address. As before, strlen expansion is guarded by -minline-strlen. While testing with -minline-strlen I encountered a vsetvl problem in memcpy-chk.c where we didn't insert a vsetvl at the proper spot (after a setjmp). This needs to be fixed separately and I figured I'd post this patch as-is. gcc/ChangeLog: PR target/112109 * config/riscv/riscv-protos.h (expand_rawmemchr): Add strlen parameter. * config/riscv/riscv-string.cc (riscv_expand_strlen): Call rawmemchr. (expand_rawmemchr): Add strlen handling. * config/riscv/riscv.md: Add TARGET_VECTOR to strlen expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strlen.c: New test. --- gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-string.cc | 41 ++++++++++++++----- gcc/config/riscv/riscv.md | 3 +- .../riscv/rvv/autovec/builtin/strlen-run.c | 37 +++++++++++++++++ .../riscv/rvv/autovec/builtin/strlen.c | 12 ++++++ 5 files changed, 83 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index a6f204f3066f..c7b5789a4b3c 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -557,7 +557,7 @@ void expand_cond_unop (unsigned, rtx *); void expand_cond_binop (unsigned, rtx *); void expand_cond_ternop (unsigned, rtx *); void expand_popcount (rtx *); -void expand_rawmemchr (machine_mode, rtx, rtx, rtx); +void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false); void emit_vec_extract (rtx, rtx, poly_int64); /* Rounding mode bitfield for fixed point VXRM. */ diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 594ff49fc5ac..6cde1bf89a08 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -588,9 +588,16 @@ riscv_expand_strlen_scalar (rtx result, rtx src, rtx align) bool riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align) { + if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR) + { + riscv_vector::expand_rawmemchr (E_QImode, result, src, search_char, + /* strlen */ true); + return true; + } + gcc_assert (search_char == const0_rtx); - if (TARGET_ZBB || TARGET_XTHEADBB) + if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR) return riscv_expand_strlen_scalar (result, src, align); return false; @@ -979,12 +986,13 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in) } -/* Implement rawmemchr using vector instructions. +/* Implement rawmemchr and strlen using vector instructions. It can be assumed that the needle is in the haystack, otherwise the behavior is undefined. */ void -expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat) +expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle, + bool strlen) { /* rawmemchr: @@ -1005,6 +1013,9 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat) */ gcc_assert (TARGET_VECTOR); + if (strlen) + gcc_assert (mode == E_QImode); + unsigned int isize = GET_MODE_SIZE (mode).to_constant (); int lmul = TARGET_MAX_LMUL; poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); @@ -1028,12 +1039,13 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat) return a pointer to the matching byte. */ unsigned int shift = exact_log2 (GET_MODE_SIZE (mode).to_constant ()); - rtx src_addr = copy_addr_to_reg (XEXP (src, 0)); + rtx src_addr = copy_addr_to_reg (XEXP (haystack, 0)); + rtx start_addr = copy_addr_to_reg (XEXP (haystack, 0)); rtx loop = gen_label_rtx (); emit_label (loop); - rtx vsrc = change_address (src, vmode, src_addr); + rtx vsrc = change_address (haystack, vmode, src_addr); /* Bump the pointer. */ rtx step = gen_reg_rtx (Pmode); @@ -1052,8 +1064,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat) emit_insn (gen_read_vldi_zero_extend (cnt)); /* Compare needle with haystack and store in a mask. */ - rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, pat), vec); - rtx vmsops[] = {mask, eq, vec, pat}; + rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, needle), vec); + rtx vmsops[] = {mask, eq, vec, needle}; emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode), riscv_vector::COMPARE_OP, vmsops, cnt); @@ -1066,9 +1078,18 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat) rtx test = gen_rtx_LT (VOIDmode, end, const0_rtx); emit_jump_insn (gen_cbranch4 (Pmode, test, end, const0_rtx, loop)); - /* We found something at SRC + END * [1,2,4,8]. */ - emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift)))); - emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end))); + if (strlen) + { + /* For strlen, return the length. */ + emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end))); + emit_insn (gen_rtx_SET (dst, gen_rtx_MINUS (Pmode, dst, start_addr))); + } + else + { + /* For rawmemchr, return the position at SRC + END * [1,2,4,8]. */ + emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift)))); + emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end))); + } } } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 0db659acfbe7..6f9dec8c1526 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3743,7 +3743,8 @@ (match_operand:SI 2 "const_int_operand") (match_operand:SI 3 "const_int_operand")] UNSPEC_STRLEN))] - "riscv_inline_strlen && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strlen && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { rtx search_char = operands[2]; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c new file mode 100644 index 000000000000..d29297a5f86c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strlen" } */ + +int +__attribute__ ((noipa)) +foo (const char *s) +{ + return __builtin_strlen (s); +} + +int +__attribute__ ((noipa)) +foo2 (const char *s) +{ + int n = 0; + while (*s++ != '\0') + { + asm volatile (""); + n++; + } + return n; +} + +#define SZ 10 + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890"}; + + for (int i = 0; i < SZ; i++) + { + if (foo (s[i]) != foo2 (s[i])) + __builtin_abort (); + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c new file mode 100644 index 000000000000..0c6cca63ebf8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strlen" } */ + +int +__attribute__ ((noipa)) +foo (const char *s) +{ + return __builtin_strlen (s); +} + +/* { dg-final { scan-assembler-times "vle8ff" 1 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ From d468718c9a097aeb8794fb1a2df6db2c1064d7f7 Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Fri, 1 Dec 2023 10:07:23 +0100 Subject: [PATCH 114/311] RISC-V: Add vectorized strcmp and strncmp. This patch adds vectorized strcmp and strncmp implementations and tests. Similar to strlen, expansion is still guarded by -minline-str(n)cmp. gcc/ChangeLog: PR target/112109 * config/riscv/riscv-protos.h (expand_strcmp): Declare. * config/riscv/riscv-string.cc (riscv_expand_strcmp): Add strategy handling and delegation to scalar and vector expanders. (expand_strcmp): Vectorized implementation. * config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp expander. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test. * gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test. --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-string.cc | 161 +++++++++++++++++- gcc/config/riscv/riscv.md | 6 +- .../riscv/rvv/autovec/builtin/strcmp-run.c | 32 ++++ .../riscv/rvv/autovec/builtin/strcmp.c | 13 ++ .../riscv/rvv/autovec/builtin/strncmp-run.c | 136 +++++++++++++++ .../riscv/rvv/autovec/builtin/strncmp.c | 13 ++ 7 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index c7b5789a4b3c..20bbb5b859ce 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -558,6 +558,7 @@ void expand_cond_binop (unsigned, rtx *); void expand_cond_ternop (unsigned, rtx *); void expand_popcount (rtx *); void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false); +bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool); void emit_vec_extract (rtx, rtx, poly_int64); /* Rounding mode bitfield for fixed point VXRM. */ diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 6cde1bf89a08..11c1f74d0b34 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -511,12 +511,19 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2, return false; alignment = UINTVAL (align_rtx); - if (TARGET_ZBB || TARGET_XTHEADBB) + if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR) { - return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, - ncompare); + bool ok = riscv_vector::expand_strcmp (result, src1, src2, + bytes_rtx, alignment, + ncompare); + if (ok) + return true; } + if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR) + return riscv_expand_strcmp_scalar (result, src1, src2, nbytes, alignment, + ncompare); + return false; } @@ -1092,4 +1099,152 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle, } } +/* Implement cmpstr using vector instructions. The ALIGNMENT and + NCOMPARE parameters are unused for now. */ + +bool +expand_strcmp (rtx result, rtx src1, rtx src2, rtx nbytes, + unsigned HOST_WIDE_INT, bool) +{ + gcc_assert (TARGET_VECTOR); + + /* We don't support big endian. */ + if (BYTES_BIG_ENDIAN) + return false; + + bool with_length = nbytes != NULL_RTX; + + if (with_length + && (!REG_P (nbytes) && !SUBREG_P (nbytes) && !CONST_INT_P (nbytes))) + return false; + + if (with_length && CONST_INT_P (nbytes)) + nbytes = force_reg (Pmode, nbytes); + + machine_mode mode = E_QImode; + unsigned int isize = GET_MODE_SIZE (mode).to_constant (); + int lmul = TARGET_MAX_LMUL; + poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize); + + machine_mode vmode; + if (!riscv_vector::get_vector_mode (GET_MODE_INNER (mode), nunits) + .exists (&vmode)) + gcc_unreachable (); + + machine_mode mask_mode = riscv_vector::get_mask_mode (vmode); + + /* Prepare addresses. */ + rtx src_addr1 = copy_addr_to_reg (XEXP (src1, 0)); + rtx vsrc1 = change_address (src1, vmode, src_addr1); + + rtx src_addr2 = copy_addr_to_reg (XEXP (src2, 0)); + rtx vsrc2 = change_address (src2, vmode, src_addr2); + + /* Set initial pointer bump to 0. */ + rtx cnt = gen_reg_rtx (Pmode); + emit_move_insn (cnt, CONST0_RTX (Pmode)); + + rtx sub = gen_reg_rtx (Pmode); + emit_move_insn (sub, CONST0_RTX (Pmode)); + + /* Create source vectors. */ + rtx vec1 = gen_reg_rtx (vmode); + rtx vec2 = gen_reg_rtx (vmode); + + rtx done = gen_label_rtx (); + rtx loop = gen_label_rtx (); + emit_label (loop); + + /* Bump the pointers. */ + emit_insn (gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, cnt))); + emit_insn (gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, cnt))); + + rtx vlops1[] = {vec1, vsrc1}; + rtx vlops2[] = {vec2, vsrc2}; + + if (!with_length) + { + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1); + + emit_vlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2); + } + else + { + nbytes = gen_lowpart (Pmode, nbytes); + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops1, nbytes); + + emit_nonvlmax_insn (code_for_pred_fault_load (vmode), + riscv_vector::UNARY_OP, vlops2, nbytes); + } + + /* Read the vl for the next pointer bump. */ + if (Pmode == SImode) + emit_insn (gen_read_vlsi (cnt)); + else + emit_insn (gen_read_vldi_zero_extend (cnt)); + + if (with_length) + { + rtx test_done = gen_rtx_EQ (VOIDmode, cnt, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test_done, cnt, const0_rtx, done)); + emit_insn (gen_rtx_SET (nbytes, gen_rtx_MINUS (Pmode, nbytes, cnt))); + } + + /* Look for a \0 in the first string. */ + rtx mask0 = gen_reg_rtx (mask_mode); + rtx eq0 + = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, CONST0_RTX (mode)), + vec1); + rtx vmsops1[] = {mask0, eq0, vec1, CONST0_RTX (mode)}; + emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode), + riscv_vector::COMPARE_OP, vmsops1, cnt); + + /* Look for vec1 != vec2 (includes vec2[i] == 0). */ + rtx maskne = gen_reg_rtx (mask_mode); + rtx ne = gen_rtx_NE (mask_mode, vec1, vec2); + rtx vmsops[] = {maskne, ne, vec1, vec2}; + emit_nonvlmax_insn (code_for_pred_cmp (vmode), riscv_vector::COMPARE_OP, + vmsops, cnt); + + /* Combine both masks into one. */ + rtx mask = gen_reg_rtx (mask_mode); + rtx vmorops[] = {mask, mask0, maskne}; + emit_nonvlmax_insn (code_for_pred (IOR, mask_mode), + riscv_vector::BINARY_MASK_OP, vmorops, cnt); + + /* Find the first bit in the mask (the first unequal element). */ + rtx found_at = gen_reg_rtx (Pmode); + rtx vfops[] = {found_at, mask}; + emit_nonvlmax_insn (code_for_pred_ffs (mask_mode, Pmode), + riscv_vector::CPOP_OP, vfops, cnt); + + /* Emit the loop condition. */ + rtx test = gen_rtx_LT (VOIDmode, found_at, const0_rtx); + emit_jump_insn (gen_cbranch4 (Pmode, test, found_at, const0_rtx, loop)); + + /* Walk up to the difference point. */ + emit_insn ( + gen_rtx_SET (src_addr1, gen_rtx_PLUS (Pmode, src_addr1, found_at))); + emit_insn ( + gen_rtx_SET (src_addr2, gen_rtx_PLUS (Pmode, src_addr2, found_at))); + + /* Load the respective byte and compute the difference. */ + rtx c1 = gen_reg_rtx (Pmode); + rtx c2 = gen_reg_rtx (Pmode); + + do_load_from_addr (mode, c1, src_addr1, src1); + do_load_from_addr (mode, c2, src_addr2, src2); + + do_sub3 (sub, c1, c2); + + if (with_length) + emit_label (done); + + emit_insn (gen_movsi (result, gen_lowpart (SImode, sub))); + return true; +} + } diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 6f9dec8c1526..eed997116b01 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3702,7 +3702,8 @@ (match_operand:BLK 2))) (use (match_operand:SI 3)) (use (match_operand:SI 4))])] - "riscv_inline_strncmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strncmp && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { if (riscv_expand_strcmp (operands[0], operands[1], operands[2], operands[3], operands[4])) @@ -3722,7 +3723,8 @@ (compare:SI (match_operand:BLK 1) (match_operand:BLK 2))) (use (match_operand:SI 3))])] - "riscv_inline_strcmp && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)" + "riscv_inline_strcmp && !optimize_size + && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)" { if (riscv_expand_strcmp (operands[0], operands[1], operands[2], NULL_RTX, operands[3])) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c new file mode 100644 index 000000000000..6dec7da91c1b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +#include + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +int +__attribute__ ((noipa, optimize ("0"))) +foo2 (const char *s, const char *t) +{ + return strcmp (s, t); +} + +#define SZ 10 + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890"}; + + for (int i = 0; i < SZ; i++) + for (int j = 0; j < SZ; j++) + if (foo (s[i], s[j]) != foo2 (s[i], s[j])) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c new file mode 100644 index 000000000000..f9d33a74fc56 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strcmp (s, t); +} + +/* { dg-final { scan-assembler-times "vle8ff" 2 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ +/* { dg-final { scan-assembler-times "vmor.m" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c new file mode 100644 index 000000000000..8d1471a3a135 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c @@ -0,0 +1,136 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O3 -minline-strcmp" } */ + +#include + +int +__attribute__ ((noipa, optimize ("0"))) +foo2 (const char *s, const char *t, int n) +{ + return strncmp (s, t, n); +} + +#define SZ 11 + +#define TEST(I, J, N) \ + int res_##I_##J_##N = __builtin_strncmp (s[I], s[J], N); \ + int ref_##I_##J_##N = foo2 (s[I], s[J], N); \ + if (res_##I_##J_##N != ref_##I_##J_##N) \ + __builtin_abort (); + +int main () +{ + const char *s[SZ] + = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", + "a", "z", "1", "9", "12345678901234567889012345678901234567890", + "ds0fi0349r0sdmfvi0sjf0c9fj034mrx903cw0efmc9jfsicn2390crrm0i90msdfi0sdf0"}; + + for (int i = 0; i < SZ; i++) + for (int j = 0; j < SZ; j++) + { + TEST(i, j, 0) + TEST(i, j, 1) + TEST(i, j, 2) + TEST(i, j, 3) + TEST(i, j, 4) + TEST(i, j, 5) + TEST(i, j, 6) + TEST(i, j, 7) + TEST(i, j, 8) + TEST(i, j, 9) + TEST(i, j, 10) + TEST(i, j, 11) + TEST(i, j, 12) + TEST(i, j, 13) + TEST(i, j, 14) + TEST(i, j, 15) + TEST(i, j, 16) + TEST(i, j, 17) + TEST(i, j, 18) + TEST(i, j, 19) + TEST(i, j, 20) + TEST(i, j, 21) + TEST(i, j, 22) + TEST(i, j, 23) + TEST(i, j, 24) + TEST(i, j, 25) + TEST(i, j, 26) + TEST(i, j, 27) + TEST(i, j, 28) + TEST(i, j, 29) + TEST(i, j, 30) + TEST(i, j, 31) + TEST(i, j, 32) + TEST(i, j, 33) + TEST(i, j, 34) + TEST(i, j, 35) + TEST(i, j, 36) + TEST(i, j, 37) + TEST(i, j, 38) + TEST(i, j, 39) + TEST(i, j, 40) + TEST(i, j, 41) + TEST(i, j, 42) + TEST(i, j, 43) + TEST(i, j, 44) + TEST(i, j, 45) + TEST(i, j, 46) + TEST(i, j, 47) + TEST(i, j, 48) + TEST(i, j, 49) + TEST(i, j, 50) + TEST(i, j, 51) + TEST(i, j, 52) + TEST(i, j, 53) + TEST(i, j, 54) + TEST(i, j, 55) + TEST(i, j, 56) + TEST(i, j, 57) + TEST(i, j, 58) + TEST(i, j, 59) + TEST(i, j, 60) + TEST(i, j, 61) + TEST(i, j, 62) + TEST(i, j, 63) + TEST(i, j, 64) + TEST(i, j, 65) + TEST(i, j, 66) + TEST(i, j, 67) + TEST(i, j, 68) + TEST(i, j, 69) + TEST(i, j, 70) + TEST(i, j, 71) + TEST(i, j, 72) + TEST(i, j, 73) + TEST(i, j, 74) + TEST(i, j, 75) + TEST(i, j, 76) + TEST(i, j, 77) + TEST(i, j, 78) + TEST(i, j, 79) + TEST(i, j, 80) + TEST(i, j, 81) + TEST(i, j, 82) + TEST(i, j, 83) + TEST(i, j, 84) + TEST(i, j, 85) + TEST(i, j, 86) + TEST(i, j, 87) + TEST(i, j, 88) + TEST(i, j, 89) + TEST(i, j, 90) + TEST(i, j, 91) + TEST(i, j, 92) + TEST(i, j, 93) + TEST(i, j, 94) + TEST(i, j, 95) + TEST(i, j, 96) + TEST(i, j, 97) + TEST(i, j, 98) + TEST(i, j, 99) + TEST(i, j, 100) + TEST(i, j, 101) + TEST(i, j, 102) + TEST(i, j, 103) + } +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c new file mode 100644 index 000000000000..a89633ea9d37 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { riscv_v } } } */ +/* { dg-additional-options "-O3 -minline-strncmp" } */ + +int +__attribute__ ((noipa)) +foo (const char *s, const char *t) +{ + return __builtin_strncmp (s, t, 7); +} + +/* { dg-final { scan-assembler-times "vle8ff" 2 } } */ +/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */ +/* { dg-final { scan-assembler-times "vmor.m" 1 } } */ From 5764825aed613f201a8bc47e5b239027a39691f0 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 8 Dec 2023 13:33:55 -0500 Subject: [PATCH 115/311] c++: undiagnosed error_mark_node from cp_build_c_cast [PR112658] When cp_build_c_cast commits to an erroneous const_cast, we neglect to replay errors from build_const_cast_1 which can lead to us incorrectly accepting (and "miscompiling") the cast, or triggering the assert in finish_expr_stmt. This patch fixes this oversight. This was the original fix for the ICE in PR112658 before r14-5941-g305a2686c99bf9 made us accept the testcase there after all. I wasn't able to come up with an alternate testcase for which this fix has an effect anymore, but below is a reduced version of the PR112658 testcase (accepted ever since r14-5941) for good measure. PR c++/112658 PR c++/94264 gcc/cp/ChangeLog: * typeck.cc (cp_build_c_cast): If we're committed to a const_cast and the result is erroneous, call build_const_cast_1 a second time to issue errors. Use complain=tf_none instead of =false. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/initlist-array20.C: New test. --- gcc/cp/typeck.cc | 4 +++- gcc/testsuite/g++.dg/cpp0x/initlist-array20.C | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/initlist-array20.C diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 8e4cfae08aa6..258cfd43114d 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -9213,6 +9213,8 @@ cp_build_c_cast (location_t loc, tree type, tree expr, maybe_warn_about_useless_cast (loc, type, value, complain); maybe_warn_about_cast_ignoring_quals (loc, type, complain); } + else if (complain & tf_error) + build_const_cast_1 (loc, type, value, tf_error, &valid_p); return result; } @@ -9248,7 +9250,7 @@ cp_build_c_cast (location_t loc, tree type, tree expr, to succeed. */ if (!same_type_p (non_reference (type), non_reference (result_type))) { - result = build_const_cast_1 (loc, type, result, false, &valid_p); + result = build_const_cast_1 (loc, type, result, tf_none, &valid_p); gcc_assert (valid_p); } return result; diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-array20.C b/gcc/testsuite/g++.dg/cpp0x/initlist-array20.C new file mode 100644 index 000000000000..048c5b45652c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/initlist-array20.C @@ -0,0 +1,11 @@ +// PR c++/112658 +// PR c++/94264 +// { dg-do compile { target c++11 } } + +void f(int*); + +int main() { + using array = int[]; + f(array{42}); + f((int*)array{42}); +} From 0c018a74eb1affe2a1fa385cdddaa93979683420 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 8 Dec 2023 13:34:04 -0500 Subject: [PATCH 116/311] c++: guard more against undiagnosed error_mark_node [PR112658] This adds a sanity check to cp_parser_expression_statement similar to the one in finish_expr_stmt added by r6-6795-g0fd9d4921f7ba2, which effectively downgrades accepts-invalid/wrong-code bugs like this one into ice-on-invalid/ice-on-valid ones. PR c++/112658 gcc/cp/ChangeLog: * parser.cc (cp_parser_expression_statement): If the statement is error_mark_node, make sure we've seen_error(). --- gcc/cp/parser.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 732d2a919ebd..6ec342c08eda 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -12964,6 +12964,9 @@ cp_parser_expression_statement (cp_parser* parser, tree in_statement_expr) if (statement == error_mark_node && !cp_parser_uncommitted_to_tentative_parse_p (parser)) { + /* If we ran into a problem, make sure we complained. */ + gcc_assert (seen_error ()); + cp_parser_skip_to_end_of_block_or_statement (parser); return error_mark_node; } From 2a5a5d5e7d32b21205562a35b307ff69e389b996 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Fri, 8 Dec 2023 13:44:10 -0500 Subject: [PATCH 117/311] c++: Add fixed test [PR88848] This one was fixed by r12-7714-g47da5198766256. PR c++/88848 gcc/testsuite/ChangeLog: * g++.dg/inherit/multiple2.C: New test. --- gcc/testsuite/g++.dg/inherit/multiple2.C | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 gcc/testsuite/g++.dg/inherit/multiple2.C diff --git a/gcc/testsuite/g++.dg/inherit/multiple2.C b/gcc/testsuite/g++.dg/inherit/multiple2.C new file mode 100644 index 000000000000..dd3d0daf248c --- /dev/null +++ b/gcc/testsuite/g++.dg/inherit/multiple2.C @@ -0,0 +1,35 @@ +// PR c++/88848 +// { dg-do compile { target c++17 } } + +template +struct True { static constexpr bool value{ true }; }; + +template +struct Integer { static constexpr int value{ VALUE }; }; + +template +struct Foo +{ + using Integer_t = Integer; + + static TYPE get_type(Integer_t); +}; + +template +struct Bar : ARGS... +{ + using ARGS::get_type...; + + template + using Type_t = decltype(get_type(Integer{})); + + Bar() { static_assert((True< Type_t >::value && ...)); } + + static_assert((True< Type_t >::value && ...)); +}; + +int main() +{ + Bar, Foo<8, double>> obj; + return int{ sizeof(obj) }; +} From 6ddaf06e375e1c15dcda338697ab6ea457e6f497 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 20:56:48 +0100 Subject: [PATCH 118/311] c++: Unshare folded SAVE_EXPR arguments during cp_fold [PR112727] The following testcase is miscompiled because two ubsan instrumentations run into each other. The first one is the shift instrumentation. Before the C++ FE calls it, it wraps the 2 shift arguments with cp_save_expr, so that side-effects in them aren't evaluated multiple times. And, ubsan_instrument_shift itself uses unshare_expr on any uses of the operands to make sure further modifications in them don't affect other copies of them (the only not unshared ones are the one the caller then uses for the actual operation after the instrumentation, which means there is no tree sharing). Now, if there are side-effects in the first operand like say function call, cp_save_expr wraps it into a SAVE_EXPR, and ubsan_instrument_shift in this mode emits something like if (..., SAVE_EXPR , SAVE_EXPR > const) __ubsan_handle_shift_out_of_bounds (..., SAVE_EXPR , ...); and caller adds SAVE_EXPR << SAVE_EXPR after it in a COMPOUND_EXPR. So far so good. If there are no side-effects and cp_save_expr doesn't create SAVE_EXPR, everything is ok as well because of the unshare_expr. We have if (..., SAVE_EXPR > const) __ubsan_handle_shift_out_of_bounds (..., ptr->something[i], ...); and ptr->something[i] << SAVE_EXPR where ptr->something[i] is unshared. In the testcase below, the !x->s[j] ? 1 : 0 expression is wrapped initially into a SAVE_EXPR though, and unshare_expr doesn't unshare SAVE_EXPRs nor anything used in them for obvious reasons, so we end up with: if (..., SAVE_EXPR (x)->s[j] ? 1 : 0>, SAVE_EXPR > const) __ubsan_handle_shift_out_of_bounds (..., SAVE_EXPR (x)->s[j] ? 1 : 0>, ...); and SAVE_EXPR (x)->s[j] ? 1 : 0> << SAVE_EXPR So far good as well. But later during cp_fold of the SAVE_EXPR we find out that VIEW_CONVERT_EXPR(x)->s[j] ? 0 : 1 is actually invariant (has TREE_READONLY set) and so cp_fold simplifies the above to if (..., SAVE_EXPR > const) __ubsan_handle_shift_out_of_bounds (..., (bool) VIEW_CONVERT_EXPR(x)->s[j] ? 0 : 1, ...); and ((bool) VIEW_CONVERT_EXPR(x)->s[j] ? 0 : 1) << SAVE_EXPR with the s[j] ARRAY_REFs and other expressions shared in between the two uses (and obviously the expression optimized away from the COMPOUND_EXPR in the if condition. Then comes another ubsan instrumentation at genericization time, this time to instrument the ARRAY_REFs with strict bounds checking, and replaces the s[j] in there with s[.UBSAN_BOUNDS (0B, SAVE_EXPR, 8), SAVE_EXPR] As the trees are shared, it does that just once though. And as the if body is gimplified first, the SAVE_EXPR is evaluated inside of the if body and when it is used again after the if, it uses a potentially uninitialized value of j.1 (always uninitialized if the shift count isn't out of bounds). The following patch fixes that by unshare_expr unsharing the folded argument of a SAVE_EXPR if we've folded the SAVE_EXPR into an invariant and it is used more than once. 2023-12-08 Jakub Jelinek PR sanitizer/112727 * cp-gimplify.cc (cp_fold): If SAVE_EXPR has been previously folded, unshare_expr what is returned. * c-c++-common/ubsan/pr112727.c: New test. --- gcc/cp/cp-gimplify.cc | 9 ++++++++- gcc/testsuite/c-c++-common/ubsan/pr112727.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/c-c++-common/ubsan/pr112727.c diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index 5abb91bbdd3b..c307e1b62db4 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -2906,7 +2906,14 @@ cp_fold (tree x, fold_flags_t flags) fold_cache = hash_map::create_ggc (101); if (tree *cached = fold_cache->get (x)) - return *cached; + { + /* unshare_expr doesn't recurse into SAVE_EXPRs. If SAVE_EXPR's + argument has been folded into a tree invariant, make sure it is + unshared. See PR112727. */ + if (TREE_CODE (x) == SAVE_EXPR && *cached != x) + return unshare_expr (*cached); + return *cached; + } uid_sensitive_constexpr_evaluation_checker c; diff --git a/gcc/testsuite/c-c++-common/ubsan/pr112727.c b/gcc/testsuite/c-c++-common/ubsan/pr112727.c new file mode 100644 index 000000000000..cc8b3e2a565e --- /dev/null +++ b/gcc/testsuite/c-c++-common/ubsan/pr112727.c @@ -0,0 +1,17 @@ +/* PR sanitizer/112727 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fsanitize=shift-exponent,bounds-strict -Wuninitialized" } */ + +#ifndef __cplusplus +#define bool _Bool +#endif + +struct S { bool s[8]; }; + +void +foo (const struct S *x) +{ + unsigned n = 0; + for (unsigned j = 0; j < 8; j++) + n |= ((!x->s[j]) ? 1 : 0) << (16 + j); +} From 662a613dd381e327b90b42bb850cb393a01e7f7e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 8 Dec 2023 20:58:38 +0100 Subject: [PATCH 119/311] c++: Fix parsing [[]][[]]; When working on the previous patch I put [[]] [[]] asm (""); into a testcase, but was surprised it wasn't parsed. The problem is that when cp_parser_std_attribute_spec returns NULL, it can mean 2 different things, one is that the next token(s) are neither [[ nor alignas (in that case the caller should break from the loop), or when we parsed something like [[]] - it was valid attribute specifier, but didn't specify any attributes in it. The following patch fixes that by using a magic value of void_list_node for the case where the first tokens are neither [[ nor alignas and so where cp_parser_std_attribute_spec_seq should stop iterating to differentiate it from NULL_TREE which stands for some attribute specifier has been parsed, but it didn't contain any (or any valid) attributes. 2023-12-08 Jakub Jelinek * parser.cc (cp_parser_std_attribute_spec): Return void_list_node rather than NULL_TREE if token is neither CPP_OPEN_SQUARE nor RID_ALIGNAS CPP_KEYWORD. (cp_parser_std_attribute_spec_seq): For attr_spec == void_list_node break, for attr_spec == NULL_TREE continue. * g++.dg/cpp0x/gen-attrs-79.C: New test. --- gcc/cp/parser.cc | 14 +++++++++++--- gcc/testsuite/g++.dg/cpp0x/gen-attrs-79.C | 9 +++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/gen-attrs-79.C diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 6ec342c08eda..ca91a50f059e 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -30265,7 +30265,11 @@ void cp_parser_late_contract_condition (cp_parser *parser, [ [ assert : contract-mode [opt] : conditional-expression ] ] [ [ pre : contract-mode [opt] : conditional-expression ] ] [ [ post : contract-mode [opt] identifier [opt] : - conditional-expression ] ] */ + conditional-expression ] ] + + Return void_list_node if the current token doesn't start an + attribute-specifier to differentiate from NULL_TREE returned e.g. + for [ [ ] ]. */ static tree cp_parser_std_attribute_spec (cp_parser *parser) @@ -30345,7 +30349,7 @@ cp_parser_std_attribute_spec (cp_parser *parser) if (token->type != CPP_KEYWORD || token->keyword != RID_ALIGNAS) - return NULL_TREE; + return void_list_node; cp_lexer_consume_token (parser->lexer); maybe_warn_cpp0x (CPP0X_ATTRIBUTES); @@ -30418,8 +30422,12 @@ cp_parser_std_attribute_spec_seq (cp_parser *parser) while (true) { tree attr_spec = cp_parser_std_attribute_spec (parser); - if (attr_spec == NULL_TREE) + if (attr_spec == void_list_node) break; + /* Accept [[]][[]]; for which cp_parser_std_attribute_spec + returns NULL_TREE as there are no attributes. */ + if (attr_spec == NULL_TREE) + continue; if (attr_spec == error_mark_node) return error_mark_node; diff --git a/gcc/testsuite/g++.dg/cpp0x/gen-attrs-79.C b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-79.C new file mode 100644 index 000000000000..6cdd65c44174 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/gen-attrs-79.C @@ -0,0 +1,9 @@ +// { dg-do compile { target c++11 } } + +[[]] [[]]; + +[[]] [[]] void +foo () +{ + [[]] [[]]; +} From 48cb51827c9eb991b92014a3f59d31eb237ce03f Mon Sep 17 00:00:00 2001 From: "Vladimir N. Makarov" Date: Fri, 8 Dec 2023 15:37:42 -0500 Subject: [PATCH 120/311] [PR112875][LRA]: Fix an assert in lra elimination code PR112875 test ran into a wrong assert (gcc_unreachable) in elimination in a debug insn. The insn seems ok. So I change the assertion. To be more accurate I made it the same as analogous reload pass code. gcc/ChangeLog: PR rtl-optimization/112875 * lra-eliminations.cc (lra_eliminate_regs_1): Change an assert. Add ASM_OPERANDS case. gcc/testsuite/ChangeLog: PR rtl-optimization/112875 * gcc.target/i386/pr112875.c: New test. --- gcc/lra-eliminations.cc | 4 +++ gcc/testsuite/gcc.target/i386/pr112875.c | 33 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr112875.c diff --git a/gcc/lra-eliminations.cc b/gcc/lra-eliminations.cc index f3b75e083901..cf229b402daa 100644 --- a/gcc/lra-eliminations.cc +++ b/gcc/lra-eliminations.cc @@ -666,6 +666,10 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, return x; case CLOBBER: + case ASM_OPERANDS: + gcc_assert (insn && DEBUG_INSN_P (insn)); + break; + case SET: gcc_unreachable (); diff --git a/gcc/testsuite/gcc.target/i386/pr112875.c b/gcc/testsuite/gcc.target/i386/pr112875.c new file mode 100644 index 000000000000..b704404b2480 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112875.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-Oz -frounding-math -fno-dce -fno-trapping-math -fno-tree-dce -fno-tree-dse -g" } */ +long a, f; +int b, c, d, g, h, i, j; +char e; +void k(long, int l, char t) { + char m = b, n = g, o = 0; + int p, q, r = h; + long s = g; + if (f) { + q = t + (float)16777217; + o = ~0; + } + if (e) { + d = g + a; + if (d % (a % l)) { + p = d; + n = b; + } + if (l) { + i = b; + r = a; + p = h; + } + if (s) + s = q; + c = f; + e += t; + a = p; + } + j = r % n; + s += g / 0xc000000000000000 + !o; +} From 08262e78209ed4a69f309b6fdf79e7c0be0c6793 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 8 Dec 2023 15:59:43 -0500 Subject: [PATCH 121/311] analyzer: fix ICE on infoleak with poisoned size gcc/analyzer/ChangeLog: * region-model.cc (contains_uninit_p): Only check for svalues that the infoleak warning can handle. gcc/testsuite/ChangeLog: * gcc.dg/plugin/infoleak-uninit-size-1.c: New test. * gcc.dg/plugin/infoleak-uninit-size-2.c: New test. * gcc.dg/plugin/plugin.exp: Add the new tests. Signed-off-by: David Malcolm --- gcc/analyzer/region-model.cc | 39 ++++++++++++------- .../gcc.dg/plugin/infoleak-uninit-size-1.c | 20 ++++++++++ .../gcc.dg/plugin/infoleak-uninit-size-2.c | 20 ++++++++++ gcc/testsuite/gcc.dg/plugin/plugin.exp | 2 + 4 files changed, 67 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-1.c create mode 100644 gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-2.c diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 6a7a8bc9f488..2315751870d3 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -6576,22 +6576,33 @@ private: static bool contains_uninit_p (const svalue *sval) { - struct uninit_finder : public visitor - { - public: - uninit_finder () : m_found_uninit (false) {} - void visit_poisoned_svalue (const poisoned_svalue *sval) + switch (sval->get_kind ()) { - if (sval->get_poison_kind () == POISON_KIND_UNINIT) - m_found_uninit = true; + default: + return false; + case SK_POISONED: + { + const poisoned_svalue *psval + = as_a (sval); + return psval->get_poison_kind () == POISON_KIND_UNINIT; + } + case SK_COMPOUND: + { + const compound_svalue *compound_sval + = as_a (sval); + + for (auto iter : *compound_sval) + { + const svalue *sval = iter.second; + if (const poisoned_svalue *psval + = sval->dyn_cast_poisoned_svalue ()) + if (psval->get_poison_kind () == POISON_KIND_UNINIT) + return true; + } + + return false; + } } - bool m_found_uninit; - }; - - uninit_finder v; - sval->accept (&v); - - return v.m_found_uninit; } /* Function for use by plugins when simulating writing data through a diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-1.c b/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-1.c new file mode 100644 index 000000000000..7466112fe149 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-1.c @@ -0,0 +1,20 @@ +/* Reduced from infoleak ICE seen on Linux kernel with + -Wno-analyzer-use-of-uninitialized-value. + + Verify that we don't ICE when complaining about an infoleak + when the size is uninitialized. */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer -Wno-analyzer-use-of-uninitialized-value" } */ +/* { dg-require-effective-target analyzer } */ + +extern unsigned long +copy_to_user(void* to, const void* from, unsigned long n); + +unsigned long +test_uninit_size (void *to, void *from) +{ + unsigned long n; + char buf[16]; + return copy_to_user(to, from, n); +} diff --git a/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-2.c b/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-2.c new file mode 100644 index 000000000000..a8a383f4b2d3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/infoleak-uninit-size-2.c @@ -0,0 +1,20 @@ +/* Reduced from infoleak ICE seen on Linux kernel with + -Wno-analyzer-use-of-uninitialized-value. + + Verify that we complain about the uninit value when + -Wno-analyzer-use-of-uninitialized-value isn't supplied. */ + +/* { dg-do compile } */ +/* { dg-options "-fanalyzer" } */ +/* { dg-require-effective-target analyzer } */ + +extern unsigned long +copy_to_user(void* to, const void* from, unsigned long n); + +unsigned long +test_uninit_size (void *to, void *from) +{ + unsigned long n; + char buf[16]; + return copy_to_user(to, from, n); /* { dg-warning "use of uninitialized value 'n'" } */ +} diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index f0b4bb7a051f..d6cccb269df2 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -150,6 +150,8 @@ set plugin_test_list [list \ infoleak-CVE-2017-18550-1.c \ infoleak-antipatterns-1.c \ infoleak-fixit-1.c \ + infoleak-uninit-size-1.c \ + infoleak-uninit-size-2.c \ infoleak-net-ethtool-ioctl.c \ infoleak-vfio_iommu_type1.c \ taint-CVE-2011-0521-1-fixed.c \ From 0bef72539e585d13941987369cf34726a7ac5b2e Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 8 Dec 2023 15:59:48 -0500 Subject: [PATCH 122/311] analyzer: avoid taint for (TAINTED % NON_TAINTED) gcc/analyzer/ChangeLog: * sm-taint.cc (taint_state_machine::alt_get_inherited_state): Fix handling of TRUNC_MOD_EXPR. gcc/testsuite/ChangeLog: * c-c++-common/analyzer/taint-modulus-1.c: New test. Signed-off-by: David Malcolm --- gcc/analyzer/sm-taint.cc | 9 ++++++++- gcc/testsuite/c-c++-common/analyzer/taint-modulus-1.c | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/c-c++-common/analyzer/taint-modulus-1.c diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 6b5d51c62af9..597e8e55609a 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -891,7 +891,6 @@ taint_state_machine::alt_get_inherited_state (const sm_state_map &map, case MULT_EXPR: case POINTER_PLUS_EXPR: case TRUNC_DIV_EXPR: - case TRUNC_MOD_EXPR: { state_t arg0_state = map.get_state (arg0, ext_state); state_t arg1_state = map.get_state (arg1, ext_state); @@ -899,6 +898,14 @@ taint_state_machine::alt_get_inherited_state (const sm_state_map &map, } break; + case TRUNC_MOD_EXPR: + { + /* The left-hand side of X % Y can be sanitized by + the operation. */ + return map.get_state (arg1, ext_state); + } + break; + case BIT_AND_EXPR: case RSHIFT_EXPR: return NULL; diff --git a/gcc/testsuite/c-c++-common/analyzer/taint-modulus-1.c b/gcc/testsuite/c-c++-common/analyzer/taint-modulus-1.c new file mode 100644 index 000000000000..ed286fa341cd --- /dev/null +++ b/gcc/testsuite/c-c++-common/analyzer/taint-modulus-1.c @@ -0,0 +1,8 @@ +#define SIZE 16 +char buf[SIZE]; + +__attribute__ ((tainted_args)) +char test_sanitized_by_modulus (int val) +{ + return buf[val % SIZE]; /* { dg-bogus "use of attacker-controlled value" } */ +} From d9965fef40794d548021d2e34844e5fafeca4ce5 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Fri, 8 Dec 2023 16:57:13 -0500 Subject: [PATCH 123/311] c++: decltype of (non-captured variable) [PR83167] For decltype((x)) within a lambda where x is not captured, we dubiously require that the lambda has a capture default, unlike for decltype(x). But according to [expr.prim.id.unqual]/3 we should just ignore the lambda in this case. This patch narrowly fixes this issue by disabling the capture_decltype handling and falling back to the ordinary handling when the innermost lambda has no capture-default. In fact, we can restrict the special handling to only by-copy lambdas since that's what [expr.prim.id.unqual]/3 is concerned with; for by-ref implicit captures both code paths should give the same result anyway. During review some other issues were discovered which are documented in a new FIXME. PR c++/83167 gcc/cp/ChangeLog: * semantics.cc (capture_decltype): Inline into its only caller ... (finish_decltype_type): ... here. Update nearby comment to refer to recent standard. Add FIXME. Restrict uncaptured variable type transformation to happen only for lambdas with a by-copy capture-default. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/lambda/lambda-decltype4.C: New test. --- gcc/cp/semantics.cc | 111 +++++++----------- .../g++.dg/cpp0x/lambda/lambda-decltype4.C | 15 +++ 2 files changed, 59 insertions(+), 67 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-decltype4.C diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 6634acfda3fb..efd959d95b9f 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -53,7 +53,6 @@ along with GCC; see the file COPYING3. If not see static tree maybe_convert_cond (tree); static tree finalize_nrv_r (tree *, int *, void *); -static tree capture_decltype (tree); /* Used for OpenMP non-static data member privatization. */ @@ -11855,21 +11854,52 @@ finish_decltype_type (tree expr, bool id_expression_or_member_access_p, } else { - /* Within a lambda-expression: - - Every occurrence of decltype((x)) where x is a possibly - parenthesized id-expression that names an entity of - automatic storage duration is treated as if x were - transformed into an access to a corresponding data member - of the closure type that would have been declared if x - were a use of the denoted entity. */ if (outer_automatic_var_p (STRIP_REFERENCE_REF (expr)) && current_function_decl && LAMBDA_FUNCTION_P (current_function_decl)) { - type = capture_decltype (STRIP_REFERENCE_REF (expr)); - if (!type) - goto dependent; + /* [expr.prim.id.unqual]/3: If naming the entity from outside of an + unevaluated operand within S would refer to an entity captured by + copy in some intervening lambda-expression, then let E be the + innermost such lambda-expression. + + If there is such a lambda-expression and if P is in E's function + parameter scope but not its parameter-declaration-clause, then the + type of the expression is the type of a class member access + expression naming the non-static data member that would be declared + for such a capture in the object parameter of the function call + operator of E." */ + /* FIXME: This transformation needs to happen for all uses of an outer + local variable inside decltype, not just decltype((x)) (PR83167). + And we don't handle nested lambdas properly, where we need to + consider the outer lambdas as well (PR112926). */ + tree decl = STRIP_REFERENCE_REF (expr); + tree lam = CLASSTYPE_LAMBDA_EXPR (DECL_CONTEXT (current_function_decl)); + tree cap = lookup_name (DECL_NAME (decl), LOOK_where::BLOCK, + LOOK_want::HIDDEN_LAMBDA); + + if (cap && is_capture_proxy (cap)) + type = TREE_TYPE (cap); + else if (LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (lam) == CPLD_COPY) + { + type = TREE_TYPE (decl); + if (TYPE_REF_P (type) + && TREE_CODE (TREE_TYPE (type)) != FUNCTION_TYPE) + type = TREE_TYPE (type); + } + + if (type && !TYPE_REF_P (type)) + { + tree obtype = TREE_TYPE (DECL_ARGUMENTS (current_function_decl)); + if (WILDCARD_TYPE_P (non_reference (obtype))) + /* We don't know what the eventual obtype quals will be. */ + goto dependent; + int quals = cp_type_quals (type); + if (INDIRECT_TYPE_P (obtype)) + quals |= cp_type_quals (TREE_TYPE (obtype)); + type = cp_build_qualified_type (type, quals); + type = build_reference_type (type); + } } else if (error_operand_p (expr)) type = error_mark_node; @@ -11877,7 +11907,8 @@ finish_decltype_type (tree expr, bool id_expression_or_member_access_p, /* If the expression is just "this", we want the cv-unqualified pointer for the "this" type. */ type = TYPE_MAIN_VARIANT (TREE_TYPE (expr)); - else + + if (!type) { /* Otherwise, where T is the type of e, if e is an lvalue, decltype(e) is defined as T&; if an xvalue, T&&; otherwise, T. */ @@ -12766,60 +12797,6 @@ apply_deduced_return_type (tree fco, tree return_type) } } -/* DECL is a local variable or parameter from the surrounding scope of a - lambda-expression. Returns the decltype for a use of the capture field - for DECL even if it hasn't been captured yet. Or NULL_TREE if we can't give - a correct answer at this point and we should build a DECLTYPE_TYPE. */ - -static tree -capture_decltype (tree decl) -{ - tree lam = CLASSTYPE_LAMBDA_EXPR (DECL_CONTEXT (current_function_decl)); - tree cap = lookup_name (DECL_NAME (decl), LOOK_where::BLOCK, - LOOK_want::HIDDEN_LAMBDA); - tree type; - - if (cap && is_capture_proxy (cap)) - type = TREE_TYPE (cap); - else - switch (LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (lam)) - { - case CPLD_NONE: - error ("%qD is not captured", decl); - return error_mark_node; - - case CPLD_COPY: - type = TREE_TYPE (decl); - if (TYPE_REF_P (type) - && TREE_CODE (TREE_TYPE (type)) != FUNCTION_TYPE) - type = TREE_TYPE (type); - break; - - case CPLD_REFERENCE: - type = TREE_TYPE (decl); - if (!TYPE_REF_P (type)) - type = build_reference_type (TREE_TYPE (decl)); - break; - - default: - gcc_unreachable (); - } - - if (!TYPE_REF_P (type)) - { - tree obtype = TREE_TYPE (DECL_ARGUMENTS (current_function_decl)); - if (WILDCARD_TYPE_P (non_reference (obtype))) - /* We don't know what the eventual obtype quals will be. */ - return NULL_TREE; - int quals = cp_type_quals (type); - if (INDIRECT_TYPE_P (obtype)) - quals |= cp_type_quals (TREE_TYPE (obtype)); - type = cp_build_qualified_type (type, quals); - type = build_reference_type (type); - } - return type; -} - /* Build a unary fold expression of EXPR over OP. If IS_RIGHT is true, this is a right unary fold. Otherwise it is a left unary fold. */ diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-decltype4.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-decltype4.C new file mode 100644 index 000000000000..0062d7b86720 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-decltype4.C @@ -0,0 +1,15 @@ +// PR c++/83167 +// { dg-do compile { target c++11 } } + +int main() { + int x; + const int y = 42; + + [] { + using ty1 = decltype((x)); + using ty1 = int&; + + using ty2 = decltype((y)); + using ty2 = const int&; + }; +} From dd3b75d8f99df9197bb58193037201ccdf994d8b Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 9 Dec 2023 00:17:30 +0000 Subject: [PATCH 124/311] Daily bump. --- gcc/ChangeLog | 519 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 22 ++ gcc/cp/ChangeLog | 37 +++ gcc/fortran/ChangeLog | 42 ++++ gcc/testsuite/ChangeLog | 378 +++++++++++++++++++++++++++++ libgcc/ChangeLog | 38 +++ libgomp/ChangeLog | 22 ++ 8 files changed, 1059 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6a402c8859b0..2ff57b89c8d7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,522 @@ +2023-12-08 Vladimir N. Makarov + + PR rtl-optimization/112875 + * lra-eliminations.cc (lra_eliminate_regs_1): Change an assert. + Add ASM_OPERANDS case. + +2023-12-08 Robin Dapp + + PR target/112109 + * config/riscv/riscv-protos.h (expand_strcmp): Declare. + * config/riscv/riscv-string.cc (riscv_expand_strcmp): Add + strategy handling and delegation to scalar and vector expanders. + (expand_strcmp): Vectorized implementation. + * config/riscv/riscv.md: Add TARGET_VECTOR to strcmp and strncmp + expander. + +2023-12-08 Robin Dapp + + PR target/112109 + * config/riscv/riscv-protos.h (expand_rawmemchr): Add strlen + parameter. + * config/riscv/riscv-string.cc (riscv_expand_strlen): Call + rawmemchr. + (expand_rawmemchr): Add strlen handling. + * config/riscv/riscv.md: Add TARGET_VECTOR to strlen expander. + +2023-12-08 Richard Sandiford + + * config/aarch64/aarch64-early-ra.cc (allocno_info::chain_next): + Put into an enum with... + (allocno_info::last_def_point): ...new member variable. + (allocno_info::m_current_bb_point): New member variable. + (likely_operand_match_p): Switch based on get_constraint_type, + rather than based on rtx code. Handle relaxed and special memory + constraints. + (early_ra::record_copy): Allow the source of an equivalence to be + assigned to more than once. + (early_ra::record_allocno_use): Invalidate any previous equivalence. + Initialize last_def_point. + (early_ra::record_allocno_def): Set last_def_point. + (early_ra::valid_equivalence_p): New function, split out from... + (early_ra::record_copy): ...here. Use last_def_point to handle + source registers that have a later definition. + (make_pass_aarch64_early_ra): Fix comment. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x2, vld1q_u16_x2, vld1q_u32_x2, vld1q_u64_x2): New. + (vld1q_s8_x2, vld1q_s16_x2, vld1q_s32_x2, vld1q_s64_x2): New. + (vld1q_f16_x2, vld1q_f32_x2): New. + (vld1q_p8_x2, vld1q_p16_x2, vld1q_p64_x2): New. + (vld1q_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vld1_x2): New entries. + * config/arm/neon.md (vld1_x2): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x3, vld1q_u16_x3, vld1q_u32_x3, vld1q_u64_x3): New. + (vld1q_s8_x3, vld1q_s16_x3, vld1q_s32_x3, vld1q_s64_x3): New. + (vld1q_f16_x3, vld1q_f32_x3): New. + (vld1q_p8_x3, vld1q_p16_x3, vld1q_p64_x3): New. + (vld1q_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vld1_x3): New entries. + * config/arm/neon.md (vld1_x3): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1q_u8_x4, vld1q_u16_x4, vld1q_u32_x4, vld1q_u64_x4): New. + (vld1q_s8_x4, vld1q_s16_x4, vld1q_s32_x4, vld1q_s64_x4): New. + (vld1q_f16_x4, vld1q_f32_x4): New. + (vld1q_p8_x4, vld1q_p16_x4, vld1q_p64_x4): New. + (vld1q_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vld1_x4): New entries. + * config/arm/neon.md (vld1_x4): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x2, vst1_u16_x2, vst1_u32_x2, vst1_u64_x2): New. + (vst1_s8_x2, vst1_s16_x2, vst1_s32_x2, vst1_s64_x2): New. + (vst1_f16_x2, vst1_f32_x2): New. + (vst1_p8_x2, vst1_p16_x2, vst1_p64_x2): New. + (vst1_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vst1_x2): New entries. + * config/arm/neon.md (vst1_x2): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x3, vst1_u16_x3, vst1_u32_x3, vst1_u64_x3): New. + (vst1_s8_x3, vst1_s16_x3, vst1_s32_x3, vst1_s64_x3): New. + (vst1_f16_x3, vst1_f32_x3): New. + (vst1_p8_x3, vst1_p16_x3, vst1_p64_x3): New. + (vst1_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vst1_x3): New entries. + * config/arm/neon.md (vst1_x3): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1_u8_x4, vst1_u16_x4, vst1_u32_x4, vst1_u64_x4): New. + (vst1_s8_x4, vst1_s16_x4, vst1_s32_x4, vst1_s64_x4): New. + (vst1_f16_x4, vst1_f32_x4): New. + (vst1_p8_x4, vst1_p16_x4, vst1_p64_x4): New. + (vst1_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vst1_x4): New entries. + * config/arm/neon.md (vst1_x4): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x2, vst1q_u16_x2, vst1q_u32_x2, vst1q_u64_x2): New. + (vst1q_s8_x2, vst1q_s16_x2, vst1q_s32_x2, vst1q_s64_x2): New. + (vst1q_f16_x2, vst1q_f32_x2): New. + (vst1q_p8_x2, vst1q_p16_x2, vst1q_p64_x2): New. + (vst1q_bf16_x2): New. + * config/arm/arm_neon_builtins.def (vst1q_x2): New entries. + * config/arm/neon.md + (neon_vst1_x2): Updated from + neon_vst1_x2. + * config/arm/iterators.md (VMEMX2): New mode iterator. + (VMEMX2_q): New mode attribute. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x3, vst1q_u16_x3, vst1q_u32_x3, vst1q_u64_x3): New. + (vst1q_s8_x3, vst1q_s16_x3, vst1q_s32_x3, vst1q_s64_x3): New. + (vst1q_f16_x3, vst1q_f32_x3): New. + (vst1q_p8_x3, vst1q_p16_x3, vst1q_p64_x3): New. + (vst1q_bf16_x3): New. + * config/arm/arm_neon_builtins.def (vst1q_x3): New entries. + * config/arm/neon.md (neon_vst1q_x3): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vst1q_u8_x4, vst1q_u16_x4, vst1q_u32_x4, vst1q_u64_x4): New. + (vst1q_s8_x4, vst1q_s16_x4, vst1q_s32_x4, vst1q_s64_x4): New. + (vst1q_f16_x4, vst1q_f32_x4): New. + (vst1q_p8_x4, vst1q_p16_x4, vst1q_p64_x4): New. + (vst1q_bf16_x4): New. + * config/arm/arm_neon_builtins.def (vst1q_x4): New entries. + * config/arm/neon.md (neon_vst1q_x4): New. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x2, vld1_u16_x2, vld1_u32_x2, vld1_u64_x2): New + (vld1_s8_x2, vld1_s16_x2, vld1_s32_x2, vld1_s64_x2): New. + (vld1_f16_x2, vld1_f32_x2): New. + (vld1_p8_x2, vld1_p16_x2, vld1_p64_x2): New. + (vld1_bf16_x2): New. + (vld1q_types_x2): Updated to use vld1q_x2 from + arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x2): Updated entries. + (vld1q_x2): New entries, but comes from the old vld1_x2 + * config/arm/neon.md + (neon_vld1_x2): Updated + from neon_vld1_x2. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x3, vld1_u16_x3, vld1_u32_x3, vld1_u64_x3): New + (vld1_s8_x3, vld1_s16_x3, vld1_s32_x3, vld1_s64_x3): New. + (vld1_f16_x3, vld1_f32_x3): New. + (vld1_p8_x3, vld1_p16_x3, vld1_p64_x3): New. + (vld1_bf16_x3): New. + (vld1q_types_x3): Updated to use vld1q_x3 from + arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x3): Updated entries. + (vld1q_x3): New entries, but comes from the old vld1_x2 + * config/arm/neon.md (neon_vld1q_x3): Updated from + neon_vld1_x3. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-07 Ezra Sitorus + + * config/arm/arm_neon.h + (vld1_u8_x4, vld1_u16_x4, vld1_u32_x4, vld1_u64_x4): New + (vld1_s8_x4, vld1_s16_x4, vld1_s32_x4, vld1_s64_x4): New. + (vld1_f16_x4, vld1_f32_x4): New. + (vld1_p8_x4, vld1_p16_x4, vld1_p64_x4): New. + (vld1_bf16_x4): New. + (vld1q_types_x4): Updated to use vld1q_x4 + from arm_neon_builtins.def + * config/arm/arm_neon_builtins.def + (vld1_x4): Updated entries. + (vld1q_x4): New entries, but comes from the old vld1_x2 + * config/arm/neon.md (neon_vld1q_x4): + Updated from neon_vld1_x4. + +2023-12-08 Tobias Burnus + + * builtin-types.def (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE): New. + * omp-builtins.def (BUILT_IN_GOMP_REALLOC): New. + * builtins.cc (builtin_fnspec): Handle it. + * gimple-ssa-warn-access.cc (fndecl_alloc_p, + matching_alloc_calls_p): Likewise. + * gimple.cc (nonfreeing_call_p): Likewise. + * predict.cc (expr_expected_value_1): Likewise. + * tree-ssa-ccp.cc (evaluate_stmt): Likewise. + * tree.cc (fndecl_dealloc_argno): Likewise. + +2023-12-08 Richard Biener + + PR tree-optimization/112909 + * tree-ssa-uninit.cc (find_uninit_use): Look through a + single level of SSA name copies with single use. + +2023-12-08 Jiahao Xu + + * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use + simplify_gen_subreg instead of gen_rtx_SUBREG. + (loongarch_expand_vec_perm_const_2): Ditto. + (loongarch_expand_vec_cond_expr): Ditto. + +2023-12-08 Jiahao Xu + + * config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor): + If m_has_recip is true, uf return 1. + (loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence. + +2023-12-08 Jiahao Xu + + * config/loongarch/genopts/loongarch.opt.in (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lasx.md (div3): New expander. + (*div3): Rename. + (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/loongarch-protos.h (loongarch_emit_swrsqrtsf): New prototype. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.cc (loongarch_option_override_internal): Set + recip_mask for -mrecip and -mrecip= options. + (loongarch_emit_swrsqrtsf): New function. + (loongarch_emit_swdivsf): Ditto. + * config/loongarch/loongarch.h (RECIP_MASK_NONE, RECIP_MASK_DIV, RECIP_MASK_SQRT + RECIP_MASK_RSQRT, RECIP_MASK_VEC_DIV, RECIP_MASK_VEC_SQRT, RECIP_MASK_VEC_RSQRT + RECIP_MASK_ALL): New bitmasks. + (TARGET_RECIP_DIV, TARGET_RECIP_SQRT, TARGET_RECIP_RSQRT, TARGET_RECIP_VEC_DIV + TARGET_RECIP_VEC_SQRT, TARGET_RECIP_VEC_RSQRT): New tests. + * config/loongarch/loongarch.md (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/loongarch.opt (recip_mask): New variable. + (-mrecip, -mrecip): New options. + * config/loongarch/lsx.md (div3): New expander. + (*div3): Rename. + (sqrt2): New expander. + (*sqrt2): Rename. + (rsqrt2): New expander. + * config/loongarch/predicates.md (reg_or_vecotr_1_operand): New predicate. + * doc/invoke.texi (LoongArch Options): Document new options. + +2023-12-08 Jiahao Xu + + * config/loongarch/lasx.md (lasx_xvfrecip_): Renamed to .. + (recip3): .. this. + * config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine + to new pattern name. + (CODE_FOR_lsx_vfrecip_s): Ditto. + (CODE_FOR_lasx_xvfrecip_d): Ditto. + (CODE_FOR_lasx_xvfrecip_s): Ditto. + (loongarch_expand_builtin_direct): For the vector recip instructions, construct a + temporary parameter const1_vector. + * config/loongarch/lsx.md (lsx_vfrecip_): Renamed to .. + (recip3): .. this. + * config/loongarch/predicates.md (const_vector_1_operand): New predicate. + +2023-12-08 Jiahao Xu + + * config/loongarch/lasx.md (lasx_xvfrsqrt_): Renamed to .. + (rsqrt2): .. this. + * config/loongarch/loongarch-builtins.cc + (CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name. + (CODE_FOR_lsx_vfrsqrt_s): Ditto. + (CODE_FOR_lasx_xvfrsqrt_d): Ditto. + (CODE_FOR_lasx_xvfrsqrt_s): Ditto. + * config/loongarch/loongarch.cc (use_rsqrt_p): New function. + (loongarch_optab_supported_p): Ditto. + (TARGET_OPTAB_SUPPORTED_P): New hook. + * config/loongarch/loongarch.md (*rsqrta): Remove. + (*rsqrt2): New insn pattern. + (*rsqrtb): Remove. + * config/loongarch/lsx.md (lsx_vfrsqrt_): Renamed to .. + (rsqrt2): .. this. + +2023-12-08 Jiahao Xu + + * config/loongarch/genopts/isa-evolution.in (fecipe): Add. + * config/loongarch/larchintrin.h (__frecipe_s): New intrinsic. + (__frecipe_d): Ditto. + (__frsqrte_s): Ditto. + (__frsqrte_d): Ditto. + * config/loongarch/lasx.md (lasx_xvfrecipe_): New insn pattern. + (lasx_xvfrsqrte_): Ditto. + * config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic. + (__lasx_xvfrecipe_d): Ditto. + (__lasx_xvfrsqrte_s): Ditto. + (__lasx_xvfrsqrte_d): Ditto. + * config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates. + (LSX_EXT_BUILTIN): New macro. + (LASX_EXT_BUILTIN): Ditto. + * config/loongarch/loongarch-cpucfg-map.h: Regenerate. + * config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe". + * config/loongarch/loongarch-def.cc: Regenerate. + * config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate. + * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE. + * config/loongarch/loongarch.md (loongarch_frecipe_): New insn pattern. + (loongarch_frsqrte_): Ditto. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/lsx.md (lsx_vfrecipe_): New insn pattern. + (lsx_vfrsqrte_): Ditto. + * config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic. + (__lsx_vfrecipe_d): Ditto. + (__lsx_vfrsqrte_s): Ditto. + (__lsx_vfrsqrte_d): Ditto. + * doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics. + +2023-12-08 Richard Biener + + * tree-outof-ssa.cc (rewrite_out_of_ssa): Dump GIMPLE once only, + after final IL adjustments. + +2023-12-08 Pan Li + + * config/riscv/vector-iterators.md: Replace RVVM2SI to RVVM2SF + for mode attr V_F2DI_CONVERT_BRIDGE. + +2023-12-08 Jiahao Xu + + * config/loongarch/lasx.md (xorsign3): New expander. + * config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow + conversion between LSX vector mode and scalar fp mode. + * config/loongarch/loongarch.md (@xorsign3): New expander. + * config/loongarch/lsx.md (@xorsign3): Ditto. + +2023-12-08 Jakub Jelinek + + PR tree-optimization/112902 + * gimple-lower-bitint.cc (gimple_lower_bitint): For a narrowing + or same precision cast don't set SSA_NAME_VERSION in m_names only + if use_stmt is mergeable_op or fall through into the check that + use is a store or rhs1 is not mergeable or other reasons prevent + merging. + +2023-12-08 Jakub Jelinek + + PR tree-optimization/112901 + * vr-values.cc + (simplify_using_ranges::simplify_float_conversion_using_ranges): + Return false if rhs1 has BITINT_TYPE type with BLKmode TYPE_MODE. + +2023-12-08 Jakub Jelinek + + PR middle-end/112411 + * haifa-sched.cc (extend_h_i_d): Use 3U instead of 3 in + 3 * get_max_uid () / 2 calculation. + +2023-12-08 Lulu Cheng + + * config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110. + * config/loongarch/genopts/loongarch.opt.in: Likewise. + * config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro. + (fill_native_cpu_config): Define a new variable hw_isa_evolution record the + extended instruction set support read from cpucfg. + * config/loongarch/loongarch-def.cc: Set evolution at initialization. + * config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete. + (ISA_BASE_LA64V110): Likewise. + (N_ISA_BASE_TYPES): Likewise. + (defined): Likewise. + * config/loongarch/loongarch-opts.cc: Likewise. + * config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise. + (ISA_BASE_IS_LA64V110): Likewise. + * config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise. + * config/loongarch/loongarch.opt: Regenerate. + +2023-12-08 Xi Ruoyao + + * config/loongarch/loongarch-def.h: Remove extern "C". + (loongarch_isa_base_strings): Declare as loongarch_def_array + instead of plain array. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (loongarch_cpu_strings): Likewise. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_isa): Add a constructor and field setter functions. + * config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not + include for target libraries. + * config/loongarch/loongarch-opts.cc: Comment code that doesn't + run and causes compilation errors. + * config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise. + (struct loongarch_rtx_cost_data): Likewise. + (struct loongarch_cache): Likewise. + (struct loongarch_align): Likewise. + * config/loongarch/t-loongarch: Compile loongarch-def.cc with the + C++ compiler. + * config/loongarch/loongarch-def-array.h: New file for a + std:array like data structure with position setter function. + * config/loongarch/loongarch-def.c: Rename to ... + * config/loongarch/loongarch-def.cc: ... here. + (loongarch_cpu_strings): Define as loongarch_def_array instead + of plain array. + (loongarch_cpu_default_isa): Likewise. + (loongarch_cpu_cache): Likewise. + (loongarch_cpu_align): Likewise. + (loongarch_cpu_rtx_cost_data): Likewise. + (loongarch_cpu_issue_rate): Likewise. + (loongarch_cpu_multipass_dfa_lookahead): Likewise. + (loongarch_isa_base_strings): Likewise. + (loongarch_isa_ext_strings): Likewise. + (loongarch_abi_base_strings): Likewise. + (loongarch_abi_ext_strings): Likewise. + (loongarch_cmodel_strings): Likewise. + (abi_minimal_isa): Likewise. + (loongarch_rtx_cost_optimize_size): Use field setter functions + instead of designated initializers. + (loongarch_rtx_cost_data): Implement default constructor. + +2023-12-08 Jakub Jelinek + + PR middle-end/112411 + * params.opt (-param=min-nondebug-insn-uid=): Add + IntegerRange(0, 1073741824). + * lra.cc (check_and_expand_insn_recog_data): Use 3U rather than 3 + in * 3 / 2 computation and if the result is smaller or equal to + index, use index + 1. + +2023-12-08 Haochen Jiang + + * config/i386/driver-i386.cc (host_detect_local_cpu): + Do not append "-mno-" for Xeon Phi ISAs. + * config/i386/i386-options.cc (ix86_option_override_internal): + Emit a warning for KNL/KNM targets. + * config/i386/i386.opt: Emit a warning for Xeon Phi ISAs. + +2023-12-08 Juzhe-Zhong + + * config/riscv/riscv-vector-costs.cc (costs::better_main_loop_than_p): + Remove redundant check. + +2023-12-08 Hao Liu + + PR tree-optimization/112774 + * tree-pretty-print.cc: if nonwrapping flag is set, chrec will be + printed with additional info. + * tree-scalar-evolution.cc: add record_nonwrapping_chrec and + nonwrapping_chrec_p to set and check the new flag respectively. + * tree-scalar-evolution.h: Likewise. + * tree-ssa-loop-niter.cc (idx_infer_loop_bounds, + infer_loop_bounds_from_pointer_arith, infer_loop_bounds_from_signedness, + scev_probably_wraps_p): call record_nonwrapping_chrec before + record_nonwrapping_iv, call nonwrapping_chrec_p to check the flag is + set and return false from scev_probably_wraps_p. + * tree-vect-loop.cc (vect_analyze_loop): call + free_numbers_of_iterations_estimates explicitly. + * tree-core.h: document the nothrow_flag usage in CHREC_NOWRAP + * tree.h: add CHREC_NOWRAP(NODE), base.nothrow_flag is used to + represent the nonwrapping info. + +2023-12-08 Fei Gao + + * ifcvt.cc (noce_try_cond_zero_arith): New function. + (noce_emit_czero, get_base_reg): Likewise. + (noce_cond_zero_binary_op_supported): Likewise. + (noce_bbs_ok_for_cond_zero_arith): Likewise. + (noce_process_if_block): Use noce_try_cond_zero_arith. + Co-authored-by: Xiao Zeng + 2023-12-07 Juzhe-Zhong * config/riscv/riscv-protos.h (expand_vec_series): Adapt function. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9e296399fe4b..98957cde41b6 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231208 +20231209 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 296acce9d25d..cf056decf722 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,25 @@ +2023-12-08 David Malcolm + + * sm-taint.cc (taint_state_machine::alt_get_inherited_state): Fix + handling of TRUNC_MOD_EXPR. + +2023-12-08 David Malcolm + + * region-model.cc (contains_uninit_p): Only check for + svalues that the infoleak warning can handle. + +2023-12-08 David Malcolm + + PR analyzer/112889 + * store.h (concrete_binding::concrete_binding): Strengthen + assertion to require size to be be positive, rather than just + non-zero. + (concrete_binding::mark_deleted): Use size rather than start bit + offset. + (concrete_binding::mark_empty): Likewise. + (concrete_binding::is_deleted): Likewise. + (concrete_binding::is_empty): Likewise. + 2023-12-07 Alexandre Oliva * region-model.cc (has_nondefault_case_for_value_p): Take diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 53c8dce26ffd..3939154623db 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,40 @@ +2023-12-08 Patrick Palka + + PR c++/83167 + * semantics.cc (capture_decltype): Inline into its only caller ... + (finish_decltype_type): ... here. Update nearby comment to refer + to recent standard. Add FIXME. Restrict uncaptured variable type + transformation to happen only for lambdas with a by-copy + capture-default. + +2023-12-08 Jakub Jelinek + + * parser.cc (cp_parser_std_attribute_spec): Return void_list_node + rather than NULL_TREE if token is neither CPP_OPEN_SQUARE nor + RID_ALIGNAS CPP_KEYWORD. + (cp_parser_std_attribute_spec_seq): For attr_spec == void_list_node + break, for attr_spec == NULL_TREE continue. + +2023-12-08 Jakub Jelinek + + PR sanitizer/112727 + * cp-gimplify.cc (cp_fold): If SAVE_EXPR has been previously + folded, unshare_expr what is returned. + +2023-12-08 Patrick Palka + + PR c++/112658 + * parser.cc (cp_parser_expression_statement): If the statement + is error_mark_node, make sure we've seen_error(). + +2023-12-08 Patrick Palka + + PR c++/112658 + PR c++/94264 + * typeck.cc (cp_build_c_cast): If we're committed to a const_cast + and the result is erroneous, call build_const_cast_1 a second + time to issue errors. Use complain=tf_none instead of =false. + 2023-12-06 David Malcolm * cp-tree.h (cxx_print_error_function): Make diagnostic_info param diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 5a823eefcd87..04b711199ff6 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,45 @@ +2023-12-08 Tobias Burnus + + * dump-parse-tree.cc (show_omp_node): Handle EXEC_OMP_ALLOCATE + and EXEC_OMP_ALLOCATORS. + * f95-lang.cc (ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LIST): + Add 'ECF_LEAF | ECF_MALLOC' to existing 'ECF_NOTHROW'. + (ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST): Define. + * gfortran.h (gfc_omp_clauses): Add contained_in_target_construct. + * invoke.texi (-fopenacc, -fopenmp): Update based on C version. + (-fopenmp-simd): New, based on C version. + (-fopenmp-allocators): New. + * lang.opt (fopenmp-allocators): Add. + * openmp.cc (resolve_omp_clauses): For allocators/allocate directive, + add target and no dynamic_allocators diagnostic and more invalid + diagnostic. + * parse.cc (decode_omp_directive): Set contains_teams_construct. + * trans-array.h (gfc_array_allocate): Update prototype. + (gfc_conv_descriptor_version): New prototype. + * trans-decl.cc (gfc_init_default_dt): Fix comment. + * trans-array.cc (gfc_conv_descriptor_version): New. + (gfc_array_allocate): Support GOMP_alloc allocation. + (gfc_alloc_allocatable_for_assignment, structure_alloc_comps): + Handle GOMP_free/omp_realloc as needed. + * trans-expr.cc (gfc_conv_procedure_call): Likewise. + (alloc_scalar_allocatable_for_assignment): Likewise. + * trans-intrinsic.cc (conv_intrinsic_move_alloc): Likewise. + * trans-openmp.cc (gfc_trans_omp_allocators, + gfc_trans_omp_directive): Handle allocators/allocate directive. + (gfc_omp_call_add_alloc, gfc_omp_call_is_alloc): New. + * trans-stmt.h (gfc_trans_allocate): Update prototype. + * trans-stmt.cc (gfc_trans_allocate): Support GOMP_alloc. + * trans-types.cc (gfc_get_dtype_rank_type): Set version field. + * trans.cc (gfc_allocate_using_malloc, gfc_allocate_allocatable): + Update to handle GOMP_alloc. + (gfc_deallocate_with_status, gfc_deallocate_scalar_with_status): + Handle GOMP_free. + (trans_code): Update call. + * trans.h (gfc_allocate_allocatable, gfc_allocate_using_malloc): + Update prototype. + (gfc_omp_call_add_alloc, gfc_omp_call_is_alloc): New prototype. + * types.def (BT_FN_PTR_PTR_SIZE_PTRMODE_PTRMODE): New. + 2023-12-06 David Malcolm * error.cc (gfc_diagnostic_starter): Make diagnostic_info param diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ea17860858b7..61cb3fba5d65 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,381 @@ +2023-12-08 Patrick Palka + + PR c++/83167 + * g++.dg/cpp0x/lambda/lambda-decltype4.C: New test. + +2023-12-08 David Malcolm + + * c-c++-common/analyzer/taint-modulus-1.c: New test. + +2023-12-08 David Malcolm + + * gcc.dg/plugin/infoleak-uninit-size-1.c: New test. + * gcc.dg/plugin/infoleak-uninit-size-2.c: New test. + * gcc.dg/plugin/plugin.exp: Add the new tests. + +2023-12-08 Vladimir N. Makarov + + PR rtl-optimization/112875 + * gcc.target/i386/pr112875.c: New test. + +2023-12-08 Jakub Jelinek + + * g++.dg/cpp0x/gen-attrs-79.C: New test. + +2023-12-08 Jakub Jelinek + + PR sanitizer/112727 + * c-c++-common/ubsan/pr112727.c: New test. + +2023-12-08 Marek Polacek + + PR c++/88848 + * g++.dg/inherit/multiple2.C: New test. + +2023-12-08 Patrick Palka + + PR c++/112658 + PR c++/94264 + * g++.dg/cpp0x/initlist-array20.C: New test. + +2023-12-08 Robin Dapp + + * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: New test. + * gcc.target/riscv/rvv/autovec/builtin/strcmp.c: New test. + * gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: New test. + * gcc.target/riscv/rvv/autovec/builtin/strncmp.c: New test. + +2023-12-08 Robin Dapp + + * gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: New test. + * gcc.target/riscv/rvv/autovec/builtin/strlen.c: New test. + +2023-12-08 Richard Sandiford + + * gcc.target/aarch64/sme/strided_2.c: New test. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new test. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1q_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1_base_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_bf16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_fp16_xN_1.c: Add new test. + * gcc.target/arm/simd/vst1_p64_xN_1.c: Add new test. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vst1q_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vst1q_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-08 Richard Earnshaw + + Revert: + 2023-12-08 Ezra Sitorus + + * gcc.target/arm/simd/vld1_base_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_bf16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_fp16_xN_1.c: Add new tests. + * gcc.target/arm/simd/vld1_p64_xN_1.c: Add new tests. + +2023-12-08 Tobias Burnus + + * gfortran.dg/gomp/allocate-14.f90: Add coarray and + not-listed tests. + * gfortran.dg/gomp/allocate-5.f90: Remove sorry dg-message. + * gfortran.dg/bind_c_array_params_2.f90: Update expected + dump for dtype '.version=0'. + * gfortran.dg/gomp/allocate-16.f90: New test. + * gfortran.dg/gomp/allocators-3.f90: New test. + * gfortran.dg/gomp/allocators-4.f90: New test. + +2023-12-08 Richard Biener + + PR tree-optimization/112909 + * gcc.dg/uninit-pr112909.c: New testcase. + +2023-12-08 Marc Poulhiès + + Revert: + 2023-12-07 Marc Poulhiès + + * gcc.dg/vect/vect-ifcvt-18.c: Add dep on avx_runtime. + * gcc.dg/vect/vect-simd-clone-16f.c: Likewise. + * gcc.dg/vect/vect-simd-clone-18f.c: Likewise. + +2023-12-08 Jiahao Xu + + * gcc.target/loongarch/pr112476-3.c: New test. + * gcc.target/loongarch/pr112476-4.c: New test. + +2023-12-08 Jiahao Xu + + PR target/112611 + * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64. + * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto. + +2023-12-08 Jiahao Xu + + * gcc.target/loongarch/divf.c: New test. + * gcc.target/loongarch/recip-divf.c: New test. + * gcc.target/loongarch/recip-sqrtf.c: New test. + * gcc.target/loongarch/sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-recip.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-divf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip-sqrtf.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-recip.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-sqrtf.c: New test. + +2023-12-08 Jiahao Xu + + * gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test. + +2023-12-08 Jiahao Xu + + * gcc.target/loongarch/larch-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test. + +2023-12-08 Pan Li + + * gcc.target/riscv/rvv/autovec/unop/math-lroundf16-rv64-ice-1.c: New test. + +2023-12-08 Jiahao Xu + + * gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test. + * gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test. + * gcc.target/loongarch/xorsign-run.c: New test. + * gcc.target/loongarch/xorsign.c: New test. + +2023-12-08 Jakub Jelinek + + PR tree-optimization/112902 + * gcc.dg/bitint-52.c: New test. + +2023-12-08 Jakub Jelinek + + PR tree-optimization/112901 + * gcc.dg/bitint-51.c: New test. + +2023-12-08 Jakub Jelinek + + PR middle-end/112411 + * gcc.dg/params/blocksort-part.c: Add dg-skip-if for + --param min-nondebug-insn-uid=1073741824. + +2023-12-08 Haochen Jiang + + * g++.dg/other/i386-2.C: Adjust testcases. + * g++.dg/other/i386-3.C: Ditto. + * g++.dg/pr80481.C: Ditto. + * gcc.dg/pr71279.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fmaddps-1.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fmaddps-2.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fmaddss-1.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c: Ditto. + * gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c: Ditto. + * gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c: Ditto. + * gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c: Ditto. + * gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c: Ditto. + * gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c: Ditto. + * gcc.target/i386/avx512er-vexp2pd-1.c: Ditto. + * gcc.target/i386/avx512er-vexp2pd-2.c: Ditto. + * gcc.target/i386/avx512er-vexp2ps-1.c: Ditto. + * gcc.target/i386/avx512er-vexp2ps-2.c: Ditto. + * gcc.target/i386/avx512er-vrcp28pd-1.c: Ditto. + * gcc.target/i386/avx512er-vrcp28pd-2.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ps-1.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ps-2.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ps-3.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ps-4.c: Ditto. + * gcc.target/i386/avx512er-vrcp28sd-1.c: Ditto. + * gcc.target/i386/avx512er-vrcp28sd-2.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ss-1.c: Ditto. + * gcc.target/i386/avx512er-vrcp28ss-2.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28pd-1.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28pd-2.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-1.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-2.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-3.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-4.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-5.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ps-6.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28sd-1.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ss-1.c: Ditto. + * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto. + * gcc.target/i386/avx512f-gather-1.c: Ditto. + * gcc.target/i386/avx512f-gather-2.c: Ditto. + * gcc.target/i386/avx512f-gather-3.c: Ditto. + * gcc.target/i386/avx512f-gather-4.c: Ditto. + * gcc.target/i386/avx512f-gather-5.c: Ditto. + * gcc.target/i386/avx512f-i32gatherd512-1.c: Ditto. + * gcc.target/i386/avx512f-i32gatherd512-2.c: Ditto. + * gcc.target/i386/avx512f-i32gatherpd512-1.c: Ditto. + * gcc.target/i386/avx512f-i32gatherpd512-2.c: Ditto. + * gcc.target/i386/avx512f-i32gatherps512-1.c: Ditto. + * gcc.target/i386/avx512f-vect-perm-1.c: Ditto. + * gcc.target/i386/avx512f-vect-perm-2.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf0dpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf0dps-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf0qpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf0qps-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf1dpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf1dps-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf1qpd-1.c: Ditto. + * gcc.target/i386/avx512pf-vscatterpf1qps-1.c: Ditto. + * gcc.target/i386/funcspec-56.inc: Ditto. + * gcc.target/i386/pr103404.c: Ditto. + * gcc.target/i386/pr104448.c: Ditto. + * gcc.target/i386/pr107934.c: Ditto. + * gcc.target/i386/pr64387.c: Ditto. + * gcc.target/i386/pr70728.c: Ditto. + * gcc.target/i386/pr71346.c: Ditto. + * gcc.target/i386/pr82941-2.c: Ditto. + * gcc.target/i386/pr82942-1.c: Ditto. + * gcc.target/i386/pr82942-2.c: Ditto. + * gcc.target/i386/pr82990-1.c: Ditto. + * gcc.target/i386/pr82990-3.c: Ditto. + * gcc.target/i386/pr82990-4.c: Ditto. + * gcc.target/i386/pr82990-6.c: Ditto. + * gcc.target/i386/pr88713-3.c: Ditto. + * gcc.target/i386/pr89523-5.c: Ditto. + * gcc.target/i386/pr89523-6.c: Ditto. + * gcc.target/i386/pr91033.c: Ditto. + * gcc.target/i386/pr94561.c: Ditto. + * gcc.target/i386/prefetchwt1-1.c: Ditto. + * gcc.target/i386/sse-12.c: Ditto. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-26.c: Ditto. + * gcc.target/i386/pr69471-3.c: Removed. + +2023-12-08 Hao Liu + + * gcc.dg/tree-ssa/scev-16.c: New test. + +2023-12-08 Fei Gao + + * gcc.target/riscv/zicond_ifcvt_opt.c: New file. + +2023-12-08 David Malcolm + + PR analyzer/112889 + * c-c++-common/analyzer/ice-pr112889.c: New test. + 2023-12-07 Juzhe-Zhong * gcc.target/riscv/rvv/autovec/slp-interleave-1.c: New test. diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 8dc9785ccb11..279d71802645 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,41 @@ +2023-12-08 Florian Weimer + + * libgcov-interface.c (__gcov_fork): Use __builtin_fork instead + of fork. + +2023-12-08 Szabolcs Nagy + + * config.in: Regenerate. + +2023-12-08 Szabolcs Nagy + + * config/aarch64/__arm_za_disable.S: Add hidden alias. + * config/aarch64/aarch64-unwind.h: Reset the SME state before + EH return via the _Unwind_Frames_Extra hook. + +2023-12-08 Szabolcs Nagy + + * config/aarch64/t-aarch64: Add sources to the build. + * config/aarch64/__aarch64_have_sme.c: New file. + * config/aarch64/__arm_sme_state.S: New file. + * config/aarch64/__arm_tpidr2_restore.S: New file. + * config/aarch64/__arm_tpidr2_save.S: New file. + * config/aarch64/__arm_za_disable.S: New file. + * config/aarch64/aarch64-asm.h: New file. + * config/aarch64/libgcc-sme.ver: New file. + +2023-12-08 Szabolcs Nagy + + * config.in: Undef HAVE___GETAUXVAL. + * configure: Regenerate. + * configure.ac: Check for __getauxval. + +2023-12-08 Szabolcs Nagy + + * config.in: Undef HAVE_AS_VARIANT_PCS. + * configure: Regenerate. + * configure.ac: Check for .variant_pcs. + 2023-12-07 Alexandre Oliva * configure.ac: Check for strub support. diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 9e7453787560..77280e09e073 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,25 @@ +2023-12-08 Tobias Burnus + + * allocator.c (struct fort_alloc_splay_tree_key_s, + fort_alloc_splay_compare, GOMP_add_alloc, GOMP_is_alloc): New. + * libgomp.h: Define splay_tree_static for 'reverse' splay tree. + * libgomp.map (GOMP_5.1.2): New; add GOMP_add_alloc and + GOMP_is_alloc; move GOMP_target_map_indirect_ptr from ... + (GOMP_5.1.1): ... here. + * libgomp.texi (Impl. Status, Memory management): Update for + allocators/allocate directives. + * splay-tree.c: Handle splay_tree_static define to declare all + functions as static. + (splay_tree_lookup_node): New. + * splay-tree.h: Handle splay_tree_decl_only define. + (splay_tree_lookup_node): New prototype. + * target.c: Define splay_tree_static for 'reverse'. + * testsuite/libgomp.fortran/allocators-1.f90: New test. + * testsuite/libgomp.fortran/allocators-2.f90: New test. + * testsuite/libgomp.fortran/allocators-3.f90: New test. + * testsuite/libgomp.fortran/allocators-4.f90: New test. + * testsuite/libgomp.fortran/allocators-5.f90: New test. + 2023-12-06 Andrew Stubbs * config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here. From 5a9e8b0cbbc1a10d73f1809e76bfec73a4386be3 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Fri, 8 Dec 2023 21:41:33 -0300 Subject: [PATCH 125/311] strub: skip emutls after strubm errors The emutls pass requires PROP_ssa, but if the strubm pass (or any other pre-SSA pass) issues errors, all of the build_ssa_passes are skipped, so the property is not set, but emutls still attempts to run, on targets that use it, despite earlier errors, so it hits the unsatisfied requirement. Adjust emutls to be skipped in case of earlier errors. for gcc/ChangeLog * tree-emutls.cc: Include diagnostic-core.h. (pass_ipa_lower_emutls::gate): Skip if errors were seen. --- gcc/tree-emutls.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/tree-emutls.cc b/gcc/tree-emutls.cc index 5dca5a829135..38de202717a1 100644 --- a/gcc/tree-emutls.cc +++ b/gcc/tree-emutls.cc @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "tree-iterator.h" #include "gimplify.h" +#include "diagnostic-core.h" /* for seen_error */ /* Whenever a target does not support thread-local storage (TLS) natively, we can emulate it with some run-time support in libgcc. This will in @@ -841,7 +842,7 @@ public: bool gate (function *) final override { /* If the target supports TLS natively, we need do nothing here. */ - return !targetm.have_tls; + return !targetm.have_tls && !seen_error (); } unsigned int execute (function *) final override From 0f3bac474e8f6563a59f814ccf7609ced48b1157 Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Thu, 7 Dec 2023 17:23:30 +0100 Subject: [PATCH 126/311] testsuite: Remove gcc.dg/tree-ssa/scev-3.c -4.c and 5.c These tests were recently xfailed on ilp32 targets though passing on almost all ilp32 targets (known exceptions: ia32 and some arm subtargets). They've been changed around too much to remain useful. PR testsuite/112786 * gcc.dg/tree-ssa/scev-3.c, gcc.dg/tree-ssa/scev-4.c, gcc.dg/tree-ssa/scev-5.c: Remove. --- gcc/testsuite/gcc.dg/tree-ssa/scev-3.c | 44 ----------------------- gcc/testsuite/gcc.dg/tree-ssa/scev-4.c | 49 -------------------------- gcc/testsuite/gcc.dg/tree-ssa/scev-5.c | 44 ----------------------- 3 files changed, 137 deletions(-) delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-3.c delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-4.c delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-5.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c deleted file mode 100644 index beea9aed9fe9..000000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-3.c +++ /dev/null @@ -1,44 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */ - -int *a_p; -int a[1000]; - -void __GIMPLE (ssa,startwith ("loop")) -f (int k) -{ - int i; - int * _1; - -__BB(2): - i_5 = k_4(D); - if (i_5 <= 999) - goto __BB4; - else - goto __BB3; - -__BB(3): - return; - -__BB(4): - goto __BB5; - -__BB(5): - i_12 = __PHI (__BB6: i_9, __BB4: i_5); - _1 = &a[i_12]; - a_p = _1; - __MEM ((int *)&a)[i_12] = 100; - i_9 = i_5 + i_12; - if (i_9 <= 999) - goto __BB6; - else - goto __BB3; - -__BB(6): - ; - goto __BB5; - -} - -/* Not all 32-bit systems fail this, but several do. */ -/* { dg-final { scan-tree-dump-times "&a" 1 "ivopts" { xfail ilp32 } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c deleted file mode 100644 index a97f75f81f65..000000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-4.c +++ /dev/null @@ -1,49 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */ - -typedef struct { - int x; - int y; -} S; - -int *a_p; -S a[1000]; - -void __GIMPLE (ssa, startwith ("loop")) -f (int k) -{ - int i; - int * _1; - -__BB(2): - i_5 = k_4(D); - if (i_5 <= 999) - goto __BB4; - else - goto __BB3; - -__BB(3): - return; - -__BB(4): - goto __BB5; - -__BB(5): - i_12 = __PHI (__BB6: i_9, __BB4: i_5); - _1 = &a[i_12].y; - a_p = _1; - __MEM ((int *)&a)[i_12].y = 100; - i_9 = i_5 + i_12; - if (i_9 <= 999) - goto __BB6; - else - goto __BB3; - -__BB(6): - ; - goto __BB5; - -} - -/* Not all 32-bit systems fail this, but several do. */ -/* { dg-final { scan-tree-dump-times "&a" 1 "ivopts" { xfail ilp32 } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c deleted file mode 100644 index 08f4260403c4..000000000000 --- a/gcc/testsuite/gcc.dg/tree-ssa/scev-5.c +++ /dev/null @@ -1,44 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fgimple -fdump-tree-ivopts" } */ - -int *a_p; -int a[1000]; - -void __GIMPLE (ssa,startwith ("loop")) -f (int k) -{ - long long int i; - int * _1; - -__BB(2): - i_5 = (long long int) k_4(D); - if (i_5 <= 999ll) - goto __BB4; - else - goto __BB3; - -__BB(3): - return; - -__BB(4): - goto __BB5; - -__BB(5): - i_12 = __PHI (__BB6: i_9, __BB4: i_5); - _1 = &a[i_12]; - a_p = _1; - __MEM ((int *)&a)[i_12] = 100; - i_9 = i_5 + i_12; - if (i_9 <= 999ll) - goto __BB6; - else - goto __BB3; - -__BB(6): - ; - goto __BB5; - -} - -/* Not all 32-bit systems fail this, but several do. */ -/* { dg-final { scan-tree-dump-times "&a" 1 "ivopts" { xfail ilp32 } } } */ From 9363d91956931bb28735bed97035b9ec965c850f Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Sat, 9 Dec 2023 16:31:53 +0800 Subject: [PATCH 127/311] RISC-V: Fix VLS mode movmiaslign bug PR112932 let me notice there is a bug of current VLS mode misalign pattern. Adapt it same as VLA mode. Commited as it is obvious fix. PR target/112932 gcc/ChangeLog: * config/riscv/vector.md (movmisalign): Fix VLSmode bugs. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto. * gcc.target/riscv/rvv/autovec/pr112932.c: New test. --- gcc/config/riscv/vector.md | 23 +------ .../gcc.target/riscv/rvv/autovec/pr112932.c | 66 +++++++++++++++++++ .../riscv/rvv/autovec/vls/misalign-1.c | 6 +- 3 files changed, 70 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112932.c diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index ba0714a99719..bace1a2852c6 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1334,31 +1334,12 @@ [(set_attr "type" "vmov") (set_attr "mode" "")]) -(define_expand "movmisalign" - [(set (match_operand:VLS 0 "nonimmediate_operand") - (match_operand:VLS 1 "general_operand"))] - "TARGET_VECTOR" - { - /* To support misalign data movement, we should use - minimum element alignment load/store. */ - unsigned int size = GET_MODE_SIZE (GET_MODE_INNER (mode)); - poly_int64 nunits = GET_MODE_NUNITS (mode) * size; - machine_mode mode = riscv_vector::get_vector_mode (QImode, nunits).require (); - operands[0] = gen_lowpart (mode, operands[0]); - operands[1] = gen_lowpart (mode, operands[1]); - if (MEM_P (operands[0]) && !register_operand (operands[1], mode)) - operands[1] = force_reg (mode, operands[1]); - riscv_vector::emit_vlmax_insn (code_for_pred_mov (mode), riscv_vector::UNARY_OP, operands); - DONE; - } -) - ;; According to RVV ISA: ;; If an element accessed by a vector memory instruction is not naturally aligned to the size of the element, ;; either the element is transferred successfully or an address misaligned exception is raised on that element. (define_expand "movmisalign" - [(set (match_operand:V 0 "nonimmediate_operand") - (match_operand:V 1 "general_operand"))] + [(set (match_operand:V_VLS 0 "nonimmediate_operand") + (match_operand:V_VLS 1 "general_operand"))] "TARGET_VECTOR && TARGET_VECTOR_MISALIGN_SUPPORTED" { emit_move_insn (operands[0], operands[1]); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112932.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112932.c new file mode 100644 index 000000000000..4ae6ec028173 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112932.c @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target riscv_v } */ + +#include +int a, j, n, b, c, o, d, g, h; +int e[8]; +long f[8][6]; +void l() { + o = -27; + for (; o; o++) { + *e = 1; + if (a >= n) { + d = 0; + for (; d <= 7; d++) + e[d] = c; + } + } + j = 0; + for (; j < 8; j++) { + g = 0; + for (; g < 2; g++) { + h = 1; + for (; h < 3; h++) + f[j][g * 2 + h] = 1; + } + } + unsigned long *m = &f[1][1]; + *m = 0; +} +int main() { + l(); + assert (f[0][1] == 1); + assert (f[0][2] == 1); + assert (f[0][3] == 1); + assert (f[0][4] == 1); + assert (f[1][1] == 0); + assert (f[1][2] == 1); + assert (f[1][3] == 1); + assert (f[1][4] == 1); + assert (f[2][1] == 1); + assert (f[2][2] == 1); + assert (f[2][3] == 1); + assert (f[2][4] == 1); + assert (f[3][1] == 1); + assert (f[3][2] == 1); + assert (f[3][3] == 1); + assert (f[3][4] == 1); + assert (f[4][1] == 1); + assert (f[4][2] == 1); + assert (f[4][3] == 1); + assert (f[4][4] == 1); + assert (f[5][1] == 1); + assert (f[5][2] == 1); + assert (f[5][3] == 1); + assert (f[5][4] == 1); + assert (f[6][1] == 1); + assert (f[6][2] == 1); + assert (f[6][3] == 1); + assert (f[6][4] == 1); + assert (f[7][1] == 1); + assert (f[7][2] == 1); + assert (f[7][3] == 1); + assert (f[7][4] == 1); +} + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c index b602ffd69bbc..6e08f77921af 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/misalign-1.c @@ -21,7 +21,5 @@ foo () abort (); } -/* { dg-final { scan-assembler-times {vle8\.v} 1 } } */ -/* { dg-final { scan-assembler-times {vle8\.v} 1 } } */ -/* { dg-final { scan-assembler-not {vle16\.v} } } */ -/* { dg-final { scan-assembler-not {vle16\.v} } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ + From 388ab03975c5c6b3c434ebb95c56c07ea8932486 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 9 Dec 2023 10:20:05 +0100 Subject: [PATCH 128/311] c++: Don't diagnose ignoring of attributes if all ignored attributes are attribute_ignored_p There is another thing I wonder about: with -Wno-attributes= we are supposed to ignore the attributes altogether, but we are actually still warning about them when we emit these generic warnings about ignoring all attributes which appertain to this and that (perhaps with some exceptions we first remove from the attribute chain), like: void foo () { [[foo::bar]]; } with -Wattributes -Wno-attributes=foo::bar Shouldn't we call some helper function in cases like this and warn not when std_attrs (or how the attribute chain var is called) is non-NULL, but if it is non-NULL and contains at least one non-attribute_ignored_p attribute? I've kept warnings for cases where the C++ standard says explicitly any attributes aren't ok - "If an attribute-specifier-seq appertains to a friend declaration, that declaration shall be a definition." or https://eel.is/c++draft/dcl.type.elab#3 or https://eel.is/c++draft/temp.spec#temp.explicit-3 For some changes I haven't figured out how could I cover it in the testsuite. Note, C uses a different strategy, it has c_warn_unused_attributes function which warns about all the attributes one by one unless they are ignored (or allowed in certain position). Though that is just a single diagnostic wording, while C++ FE just warns that there are some ignored attributes and doesn't name them individually (except for namespace and using namespace) and uses different wordings in different spots. 2023-12-09 Jakub Jelinek gcc/ * attribs.h (any_nonignored_attribute_p): Declare. * attribs.cc (any_nonignored_attribute_p): New function. gcc/cp/ * parser.cc (cp_parser_statement, cp_parser_expression_statement, cp_parser_declaration, cp_parser_asm_definition): Don't diagnose ignored attributes if !any_nonignored_attribute_p. * decl.cc (grokdeclarator): Likewise. * name-lookup.cc (handle_namespace_attrs, finish_using_directive): Don't diagnose ignoring of attr_ignored_p attributes. gcc/testsuite/ * g++.dg/warn/Wno-attributes-1.C: New test. --- gcc/attribs.cc | 13 +++++ gcc/attribs.h | 1 + gcc/cp/decl.cc | 3 +- gcc/cp/name-lookup.cc | 4 +- gcc/cp/parser.cc | 11 ++--- gcc/testsuite/g++.dg/warn/Wno-attributes-1.C | 52 ++++++++++++++++++++ 6 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/g++.dg/warn/Wno-attributes-1.C diff --git a/gcc/attribs.cc b/gcc/attribs.cc index ff4b638c25c4..776655dde00e 100644 --- a/gcc/attribs.cc +++ b/gcc/attribs.cc @@ -584,6 +584,19 @@ attribute_ignored_p (const attribute_spec *const as) return as->max_length == -2; } +/* Return true if the ATTRS chain contains at least one attribute which + is not ignored. */ + +bool +any_nonignored_attribute_p (tree attrs) +{ + for (tree attr = attrs; attr; attr = TREE_CHAIN (attr)) + if (!attribute_ignored_p (attr)) + return true; + + return false; +} + /* See whether LIST contains at least one instance of attribute ATTR (possibly with different arguments). Return the first such attribute if so, otherwise return null. */ diff --git a/gcc/attribs.h b/gcc/attribs.h index fdeebff1cd98..2c615a456638 100644 --- a/gcc/attribs.h +++ b/gcc/attribs.h @@ -48,6 +48,7 @@ extern void apply_tm_attr (tree, tree); extern tree make_attribute (const char *, const char *, tree); extern bool attribute_ignored_p (tree); extern bool attribute_ignored_p (const attribute_spec *const); +extern bool any_nonignored_attribute_p (tree); extern struct scoped_attributes * register_scoped_attributes (const scoped_attribute_specs &, bool = false); diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 4b685270097f..b1ada1d52152 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -13058,7 +13058,8 @@ grokdeclarator (const cp_declarator *declarator, && !diagnose_misapplied_contracts (declspecs->std_attributes)) { location_t attr_loc = declspecs->locations[ds_std_attribute]; - if (warning_at (attr_loc, OPT_Wattributes, "attribute ignored")) + if (any_nonignored_attribute_p (declspecs->std_attributes) + && warning_at (attr_loc, OPT_Wattributes, "attribute ignored")) inform (attr_loc, "an attribute that appertains to a type-specifier " "is ignored"); } diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 76f1d44610aa..09dc6ef3e5ad 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -6356,7 +6356,7 @@ handle_namespace_attrs (tree ns, tree attributes) DECL_ATTRIBUTES (ns) = tree_cons (name, args, DECL_ATTRIBUTES (ns)); } - else + else if (!attribute_ignored_p (d)) { warning (OPT_Wattributes, "%qD attribute directive ignored", name); @@ -8703,7 +8703,7 @@ finish_using_directive (tree target, tree attribs) diagnosed = true; } } - else + else if (!attribute_ignored_p (a)) warning (OPT_Wattributes, "%qD attribute directive ignored", name); } } diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index ca91a50f059e..de7af150781b 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -12778,9 +12778,8 @@ cp_parser_statement (cp_parser* parser, tree in_statement_expr, SET_EXPR_LOCATION (statement, statement_location); /* Allow "[[fallthrough]];" or "[[assume(cond)]];", but warn otherwise. */ - if (std_attrs != NULL_TREE) - warning_at (attrs_loc, - OPT_Wattributes, + if (std_attrs != NULL_TREE && any_nonignored_attribute_p (std_attrs)) + warning_at (attrs_loc, OPT_Wattributes, "attributes at the beginning of statement are ignored"); } @@ -12989,7 +12988,7 @@ cp_parser_expression_statement (cp_parser* parser, tree in_statement_expr) } /* Allow "[[fallthrough]];", but warn otherwise. */ - if (attr != NULL_TREE) + if (attr != NULL_TREE && any_nonignored_attribute_p (attr)) warning_at (loc, OPT_Wattributes, "attributes at the beginning of statement are ignored"); @@ -15194,7 +15193,7 @@ cp_parser_declaration (cp_parser* parser, tree prefix_attrs) } } - if (std_attrs != NULL_TREE && !attribute_ignored_p (std_attrs)) + if (std_attrs != NULL_TREE && any_nonignored_attribute_p (std_attrs)) warning_at (make_location (attrs_loc, attrs_loc, parser->lexer), OPT_Wattributes, "attribute ignored"); if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON)) @@ -22675,7 +22674,7 @@ cp_parser_asm_definition (cp_parser* parser) symtab->finalize_toplevel_asm (string); } - if (std_attrs) + if (std_attrs && any_nonignored_attribute_p (std_attrs)) warning_at (asm_loc, OPT_Wattributes, "attributes ignored on % declaration"); } diff --git a/gcc/testsuite/g++.dg/warn/Wno-attributes-1.C b/gcc/testsuite/g++.dg/warn/Wno-attributes-1.C new file mode 100644 index 000000000000..863ca5c4892c --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wno-attributes-1.C @@ -0,0 +1,52 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-Wno-attributes=bar:: -Wno-attributes=baz::qux" } + +[[foo::bar]]; // { dg-warning "attribute ignored" } +[[bar::foo, foo::bar, baz::qux]]; // { dg-warning "attribute ignored" } +[[bar::foo, bar::bar, baz::qux]]; // { dg-bogus "attribute ignored" } + +namespace [[foo::bar]] N { // { dg-warning "'bar' attribute directive ignored" } + int n; +} +namespace [[bar::foo, foo::bar, baz::qux]] O { // { dg-warning "'bar' attribute directive ignored" } + int o; +} +namespace [[bar::foo, bar::bar, baz::qux]] P { // { dg-bogus "attribute directive ignored" } + int p; +} + +void +foo () +{ + int i = 0; + [[foo::bar]]; // { dg-warning "attributes at the beginning of statement are ignored" } + [[bar::foo, foo::bar, baz::qux]]; // { dg-warning "attributes at the beginning of statement are ignored" } + [[bar::foo, bar::bar, baz::qux]]; // { dg-bogus "attributes at the beginning of statement are ignored" } + [[foo::bar]] ++i; // { dg-warning "attributes at the beginning of statement are ignored" } + [[bar::foo, foo::bar, baz::qux]] ++i; // { dg-warning "attributes at the beginning of statement are ignored" } + [[bar::foo, bar::bar, baz::qux]] ++i; // { dg-bogus "attributes at the beginning of statement are ignored" } + [[foo::bar]] asm (""); // { dg-warning "attributes ignored on 'asm' declaration" } + [[bar::foo, foo::bar, baz::qux]] asm (""); // { dg-warning "attributes ignored on 'asm' declaration" } + [[bar::foo, bar::bar, baz::qux]] asm (""); // { dg-bogus "attributes ignored on 'asm' declaration" } + [[foo::bar]] using namespace N; // { dg-warning "'bar' attribute directive ignored" } + [[bar::foo, foo::bar, baz::qux]] using namespace O; // { dg-warning "'bar' attribute directive ignored" } + [[bar::foo, bar::bar, baz::qux]] using namespace P; // { dg-bogus "attribute directive ignored" } +} + +class S +{ + [[foo::bar]] friend int bar (S &); // { dg-warning "attribute ignored" } + // { dsg-message "an attribute that appertains to a friend declaration that is not a definition is ignored" "" { target *-*-* } .-1 } + [[bar::foo, foo::bar, baz::qux]] friend int baz (S &); // { dg-warning "attribute ignored" } + // { dsg-message "an attribute that appertains to a friend declaration that is not a definition is ignored" "" { target *-*-* } .-1 } + [[bar::foo, bar::bar, baz::qux]] friend int qux (S &); // { dg-warning "attribute ignored" } + // { dsg-message "an attribute that appertains to a friend declaration that is not a definition is ignored" "" { target *-*-* } .-1 } +public: + int s; +}; + +int [[foo::bar]] i; // { dg-warning "attribute ignored" } + // { dg-message "an attribute that appertains to a type-specifier is ignored" "" { target *-*-* } .-1 } +int [[bar::foo, foo::bar, baz::qux]] j; // { dg-warning "attribute ignored" } + // { dg-message "an attribute that appertains to a type-specifier is ignored" "" { target *-*-* } .-1 } +int [[bar::foo, bar::bar, baz::qux]] k; // { dg-bogus "attribute ignored" } From 36be2a0e91c76da4afcd5ddc37e03f5800396387 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 9 Dec 2023 10:28:37 +0100 Subject: [PATCH 129/311] driver: Fix memory leak [PR93019] driver:finalize used by JIT clears the mdswitches pointer; if it was allocated before, that leaks the memory. 2023-12-09 Costas Argyris Jakub Jelinek PR driver/93019 * gcc.cc (driver::finalize): Call XDELETEVEC on mdswitches before clearing it. Signed-off-by: Costas Argyris --- gcc/gcc.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/gcc.cc b/gcc/gcc.cc index d73fb0414e98..701f5cdfb59c 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -11368,6 +11368,7 @@ driver::finalize () input_from_pipe = 0; suffix_subst = NULL; + XDELETEVEC (mdswitches); mdswitches = NULL; n_mdswitches = 0; From a314edee2490259d7f7caec8eef77846bcdb608b Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Fri, 8 Dec 2023 13:47:04 +0000 Subject: [PATCH 130/311] libstdc++: Fix resolution of LWG 4016 for std::ranges::to [PR112876] What I implemented in r14-6199-g45630fbcf7875b does not match what I proposed for LWG 4016, and it imposes additional, unwanted requirements on the emplace and insert member functions of the container being populated. libstdc++-v3/ChangeLog: PR libstdc++/112876 * include/std/ranges (ranges::to): Do not try to use an iterator returned by the container's emplace or insert member functions. * testsuite/std/ranges/conv/1.cc (Cont4::emplace, Cont4::insert): Use the iterator parameter. Do not return an iterator. --- libstdc++-v3/include/std/ranges | 10 +++------- libstdc++-v3/testsuite/std/ranges/conv/1.cc | 12 ++++++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index fb9df3d3e79b..be8475c0cb1b 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -9300,14 +9300,10 @@ namespace __detail __c.emplace_back(*__it); else if constexpr (requires { __c.push_back(*__it); }) __c.push_back(*__it); + else if constexpr (requires { __c.emplace(__c.end(), *__it); }) + __c.emplace(__c.end(), *__it); else - { - auto __end = __c.end(); - if constexpr (requires { __c.emplace(__end, *__it); }) - __end = __c.emplace(__end, *__it); - else - __end = __c.insert(__end, *__it); - } + __c.insert(__c.end(), *__it); ++__it; } return __c; diff --git a/libstdc++-v3/testsuite/std/ranges/conv/1.cc b/libstdc++-v3/testsuite/std/ranges/conv/1.cc index b5f861dedb30..6d6a708ab64c 100644 --- a/libstdc++-v3/testsuite/std/ranges/conv/1.cc +++ b/libstdc++-v3/testsuite/std/ranges/conv/1.cc @@ -236,19 +236,19 @@ struct Cont4 template requires (Kind <= Emplace) && requires(C& c, T&& t) { c.emplace(c.end(), std::forward(t)); } - typename C::iterator - emplace(typename C::iterator, T&& t) + void + emplace(typename C::iterator pos, T&& t) { kind = Emplace; - return c.emplace(c.end(), std::forward(t)); + c.emplace(pos, std::forward(t)); } template - typename C::iterator - insert(typename C::iterator, T&& t) + void + insert(typename C::iterator pos, T&& t) { kind = Insert; - return c.insert(c.end(), std::forward(t)); + c.insert(pos, std::forward(t)); } // Required to satisfy reservable-container From cdf45e00a936a76a785c592c9730f24ef1ac25cd Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Fri, 8 Dec 2023 14:40:26 +0000 Subject: [PATCH 131/311] libstdc++: Fix value of __cpp_lib_format macro [PR111826] As noted in the PR, we support both features required for the 202110L value, so we should define it with that value. libstdc++-v3/ChangeLog: PR libstdc++/111826 * include/bits/version.def (format): Update value. * include/bits/version.h: Regenerate. * testsuite/std/format/functions/format.cc: --- libstdc++-v3/include/bits/version.def | 4 +- libstdc++-v3/include/bits/version.h | 128 +++++++++--------- .../testsuite/std/format/functions/format.cc | 4 +- 3 files changed, 67 insertions(+), 69 deletions(-) diff --git a/libstdc++-v3/include/bits/version.def b/libstdc++-v3/include/bits/version.def index 140777832ed8..38b73ec9b5d0 100644 --- a/libstdc++-v3/include/bits/version.def +++ b/libstdc++-v3/include/bits/version.def @@ -1160,14 +1160,12 @@ ftms = { // TODO: #define __cpp_lib_format_ranges 202207L name = format; values = { - v = 202106; + v = 202110; cxxmin = 20; hosted = yes; }; }; -// #undef __glibcxx_chrono -// #define __glibcxx_chrono 201907L // FIXME: #define __glibcxx_execution 201902L ftms = { diff --git a/libstdc++-v3/include/bits/version.h b/libstdc++-v3/include/bits/version.h index 1fb1d1484594..a201a444925b 100644 --- a/libstdc++-v3/include/bits/version.h +++ b/libstdc++-v3/include/bits/version.h @@ -1414,15 +1414,15 @@ // from version.def line 1161 #if !defined(__cpp_lib_format) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED -# define __glibcxx_format 202106L +# define __glibcxx_format 202110L # if defined(__glibcxx_want_all) || defined(__glibcxx_want_format) -# define __cpp_lib_format 202106L +# define __cpp_lib_format 202110L # endif # endif #endif /* !defined(__cpp_lib_format) && defined(__glibcxx_want_format) */ #undef __glibcxx_want_format -// from version.def line 1174 +// from version.def line 1172 #if !defined(__cpp_lib_constexpr_complex) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_constexpr_complex 201711L @@ -1433,7 +1433,7 @@ #endif /* !defined(__cpp_lib_constexpr_complex) && defined(__glibcxx_want_constexpr_complex) */ #undef __glibcxx_want_constexpr_complex -// from version.def line 1183 +// from version.def line 1181 #if !defined(__cpp_lib_constexpr_dynamic_alloc) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_constexpr_dynamic_alloc 201907L @@ -1444,7 +1444,7 @@ #endif /* !defined(__cpp_lib_constexpr_dynamic_alloc) && defined(__glibcxx_want_constexpr_dynamic_alloc) */ #undef __glibcxx_want_constexpr_dynamic_alloc -// from version.def line 1192 +// from version.def line 1190 #if !defined(__cpp_lib_constexpr_string) # if (__cplusplus >= 202002L) && _GLIBCXX_USE_CXX11_ABI && _GLIBCXX_HOSTED && (defined(__glibcxx_is_constant_evaluated)) # define __glibcxx_constexpr_string 201907L @@ -1465,7 +1465,7 @@ #endif /* !defined(__cpp_lib_constexpr_string) && defined(__glibcxx_want_constexpr_string) */ #undef __glibcxx_want_constexpr_string -// from version.def line 1216 +// from version.def line 1214 #if !defined(__cpp_lib_constexpr_vector) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_constexpr_vector 201907L @@ -1476,7 +1476,7 @@ #endif /* !defined(__cpp_lib_constexpr_vector) && defined(__glibcxx_want_constexpr_vector) */ #undef __glibcxx_want_constexpr_vector -// from version.def line 1225 +// from version.def line 1223 #if !defined(__cpp_lib_erase_if) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_erase_if 202002L @@ -1487,7 +1487,7 @@ #endif /* !defined(__cpp_lib_erase_if) && defined(__glibcxx_want_erase_if) */ #undef __glibcxx_want_erase_if -// from version.def line 1234 +// from version.def line 1232 #if !defined(__cpp_lib_generic_unordered_lookup) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_generic_unordered_lookup 201811L @@ -1498,7 +1498,7 @@ #endif /* !defined(__cpp_lib_generic_unordered_lookup) && defined(__glibcxx_want_generic_unordered_lookup) */ #undef __glibcxx_want_generic_unordered_lookup -// from version.def line 1243 +// from version.def line 1241 #if !defined(__cpp_lib_jthread) # if (__cplusplus >= 202002L) && defined(_GLIBCXX_HAS_GTHREADS) && _GLIBCXX_HOSTED # define __glibcxx_jthread 201911L @@ -1509,7 +1509,7 @@ #endif /* !defined(__cpp_lib_jthread) && defined(__glibcxx_want_jthread) */ #undef __glibcxx_want_jthread -// from version.def line 1253 +// from version.def line 1251 #if !defined(__cpp_lib_latch) # if (__cplusplus >= 202002L) && (__glibcxx_atomic_wait) # define __glibcxx_latch 201907L @@ -1520,7 +1520,7 @@ #endif /* !defined(__cpp_lib_latch) && defined(__glibcxx_want_latch) */ #undef __glibcxx_want_latch -// from version.def line 1262 +// from version.def line 1260 #if !defined(__cpp_lib_list_remove_return_type) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_list_remove_return_type 201806L @@ -1531,7 +1531,7 @@ #endif /* !defined(__cpp_lib_list_remove_return_type) && defined(__glibcxx_want_list_remove_return_type) */ #undef __glibcxx_want_list_remove_return_type -// from version.def line 1271 +// from version.def line 1269 #if !defined(__cpp_lib_polymorphic_allocator) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_polymorphic_allocator 201902L @@ -1542,7 +1542,7 @@ #endif /* !defined(__cpp_lib_polymorphic_allocator) && defined(__glibcxx_want_polymorphic_allocator) */ #undef __glibcxx_want_polymorphic_allocator -// from version.def line 1280 +// from version.def line 1278 #if !defined(__cpp_lib_move_iterator_concept) # if (__cplusplus >= 202002L) && (__glibcxx_concepts) # define __glibcxx_move_iterator_concept 202207L @@ -1553,7 +1553,7 @@ #endif /* !defined(__cpp_lib_move_iterator_concept) && defined(__glibcxx_want_move_iterator_concept) */ #undef __glibcxx_want_move_iterator_concept -// from version.def line 1290 +// from version.def line 1288 #if !defined(__cpp_lib_semaphore) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED && (__glibcxx_atomic_wait || _GLIBCXX_HAVE_POSIX_SEMAPHORE) # define __glibcxx_semaphore 201907L @@ -1564,7 +1564,7 @@ #endif /* !defined(__cpp_lib_semaphore) && defined(__glibcxx_want_semaphore) */ #undef __glibcxx_want_semaphore -// from version.def line 1300 +// from version.def line 1298 #if !defined(__cpp_lib_smart_ptr_for_overwrite) # if (__cplusplus >= 202002L) && _GLIBCXX_HOSTED # define __glibcxx_smart_ptr_for_overwrite 202002L @@ -1575,7 +1575,7 @@ #endif /* !defined(__cpp_lib_smart_ptr_for_overwrite) && defined(__glibcxx_want_smart_ptr_for_overwrite) */ #undef __glibcxx_want_smart_ptr_for_overwrite -// from version.def line 1309 +// from version.def line 1307 #if !defined(__cpp_lib_syncbuf) # if (__cplusplus >= 202002L) && _GLIBCXX_USE_CXX11_ABI && _GLIBCXX_HOSTED # define __glibcxx_syncbuf 201803L @@ -1586,7 +1586,7 @@ #endif /* !defined(__cpp_lib_syncbuf) && defined(__glibcxx_want_syncbuf) */ #undef __glibcxx_want_syncbuf -// from version.def line 1319 +// from version.def line 1317 #if !defined(__cpp_lib_byteswap) # if (__cplusplus >= 202100L) # define __glibcxx_byteswap 202110L @@ -1597,7 +1597,7 @@ #endif /* !defined(__cpp_lib_byteswap) && defined(__glibcxx_want_byteswap) */ #undef __glibcxx_want_byteswap -// from version.def line 1327 +// from version.def line 1325 #if !defined(__cpp_lib_constexpr_charconv) # if (__cplusplus >= 202100L) # define __glibcxx_constexpr_charconv 202207L @@ -1608,7 +1608,7 @@ #endif /* !defined(__cpp_lib_constexpr_charconv) && defined(__glibcxx_want_constexpr_charconv) */ #undef __glibcxx_want_constexpr_charconv -// from version.def line 1335 +// from version.def line 1333 #if !defined(__cpp_lib_constexpr_typeinfo) # if (__cplusplus >= 202100L) # define __glibcxx_constexpr_typeinfo 202106L @@ -1619,7 +1619,7 @@ #endif /* !defined(__cpp_lib_constexpr_typeinfo) && defined(__glibcxx_want_constexpr_typeinfo) */ #undef __glibcxx_want_constexpr_typeinfo -// from version.def line 1343 +// from version.def line 1341 #if !defined(__cpp_lib_expected) # if (__cplusplus >= 202100L) && (__cpp_concepts >= 202002L) # define __glibcxx_expected 202211L @@ -1630,7 +1630,7 @@ #endif /* !defined(__cpp_lib_expected) && defined(__glibcxx_want_expected) */ #undef __glibcxx_want_expected -// from version.def line 1352 +// from version.def line 1350 #if !defined(__cpp_lib_freestanding_algorithm) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_algorithm 202311L @@ -1641,7 +1641,7 @@ #endif /* !defined(__cpp_lib_freestanding_algorithm) && defined(__glibcxx_want_freestanding_algorithm) */ #undef __glibcxx_want_freestanding_algorithm -// from version.def line 1361 +// from version.def line 1359 #if !defined(__cpp_lib_freestanding_array) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_array 202311L @@ -1652,7 +1652,7 @@ #endif /* !defined(__cpp_lib_freestanding_array) && defined(__glibcxx_want_freestanding_array) */ #undef __glibcxx_want_freestanding_array -// from version.def line 1370 +// from version.def line 1368 #if !defined(__cpp_lib_freestanding_cstring) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_cstring 202311L @@ -1663,7 +1663,7 @@ #endif /* !defined(__cpp_lib_freestanding_cstring) && defined(__glibcxx_want_freestanding_cstring) */ #undef __glibcxx_want_freestanding_cstring -// from version.def line 1379 +// from version.def line 1377 #if !defined(__cpp_lib_freestanding_expected) # if (__cplusplus >= 202100L) && (__cpp_lib_expected) # define __glibcxx_freestanding_expected 202311L @@ -1674,7 +1674,7 @@ #endif /* !defined(__cpp_lib_freestanding_expected) && defined(__glibcxx_want_freestanding_expected) */ #undef __glibcxx_want_freestanding_expected -// from version.def line 1389 +// from version.def line 1387 #if !defined(__cpp_lib_freestanding_optional) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_optional 202311L @@ -1685,7 +1685,7 @@ #endif /* !defined(__cpp_lib_freestanding_optional) && defined(__glibcxx_want_freestanding_optional) */ #undef __glibcxx_want_freestanding_optional -// from version.def line 1398 +// from version.def line 1396 #if !defined(__cpp_lib_freestanding_string_view) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_string_view 202311L @@ -1696,7 +1696,7 @@ #endif /* !defined(__cpp_lib_freestanding_string_view) && defined(__glibcxx_want_freestanding_string_view) */ #undef __glibcxx_want_freestanding_string_view -// from version.def line 1407 +// from version.def line 1405 #if !defined(__cpp_lib_freestanding_variant) # if (__cplusplus >= 202100L) # define __glibcxx_freestanding_variant 202311L @@ -1707,7 +1707,7 @@ #endif /* !defined(__cpp_lib_freestanding_variant) && defined(__glibcxx_want_freestanding_variant) */ #undef __glibcxx_want_freestanding_variant -// from version.def line 1416 +// from version.def line 1414 #if !defined(__cpp_lib_invoke_r) # if (__cplusplus >= 202100L) # define __glibcxx_invoke_r 202106L @@ -1718,7 +1718,7 @@ #endif /* !defined(__cpp_lib_invoke_r) && defined(__glibcxx_want_invoke_r) */ #undef __glibcxx_want_invoke_r -// from version.def line 1424 +// from version.def line 1422 #if !defined(__cpp_lib_is_scoped_enum) # if (__cplusplus >= 202100L) # define __glibcxx_is_scoped_enum 202011L @@ -1729,7 +1729,7 @@ #endif /* !defined(__cpp_lib_is_scoped_enum) && defined(__glibcxx_want_is_scoped_enum) */ #undef __glibcxx_want_is_scoped_enum -// from version.def line 1432 +// from version.def line 1430 #if !defined(__cpp_lib_reference_from_temporary) # if (__cplusplus >= 202100L) && (__has_builtin(__reference_constructs_from_temporary) && __has_builtin(__reference_converts_from_temporary)) # define __glibcxx_reference_from_temporary 202202L @@ -1740,7 +1740,7 @@ #endif /* !defined(__cpp_lib_reference_from_temporary) && defined(__glibcxx_want_reference_from_temporary) */ #undef __glibcxx_want_reference_from_temporary -// from version.def line 1452 +// from version.def line 1450 #if !defined(__cpp_lib_ranges_to_container) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_ranges_to_container 202202L @@ -1751,7 +1751,7 @@ #endif /* !defined(__cpp_lib_ranges_to_container) && defined(__glibcxx_want_ranges_to_container) */ #undef __glibcxx_want_ranges_to_container -// from version.def line 1461 +// from version.def line 1459 #if !defined(__cpp_lib_ranges_zip) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_zip 202110L @@ -1762,7 +1762,7 @@ #endif /* !defined(__cpp_lib_ranges_zip) && defined(__glibcxx_want_ranges_zip) */ #undef __glibcxx_want_ranges_zip -// from version.def line 1469 +// from version.def line 1467 #if !defined(__cpp_lib_ranges_chunk) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_chunk 202202L @@ -1773,7 +1773,7 @@ #endif /* !defined(__cpp_lib_ranges_chunk) && defined(__glibcxx_want_ranges_chunk) */ #undef __glibcxx_want_ranges_chunk -// from version.def line 1477 +// from version.def line 1475 #if !defined(__cpp_lib_ranges_slide) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_slide 202202L @@ -1784,7 +1784,7 @@ #endif /* !defined(__cpp_lib_ranges_slide) && defined(__glibcxx_want_ranges_slide) */ #undef __glibcxx_want_ranges_slide -// from version.def line 1485 +// from version.def line 1483 #if !defined(__cpp_lib_ranges_chunk_by) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_chunk_by 202202L @@ -1795,7 +1795,7 @@ #endif /* !defined(__cpp_lib_ranges_chunk_by) && defined(__glibcxx_want_ranges_chunk_by) */ #undef __glibcxx_want_ranges_chunk_by -// from version.def line 1493 +// from version.def line 1491 #if !defined(__cpp_lib_ranges_join_with) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_join_with 202202L @@ -1806,7 +1806,7 @@ #endif /* !defined(__cpp_lib_ranges_join_with) && defined(__glibcxx_want_ranges_join_with) */ #undef __glibcxx_want_ranges_join_with -// from version.def line 1501 +// from version.def line 1499 #if !defined(__cpp_lib_ranges_repeat) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_repeat 202207L @@ -1817,7 +1817,7 @@ #endif /* !defined(__cpp_lib_ranges_repeat) && defined(__glibcxx_want_ranges_repeat) */ #undef __glibcxx_want_ranges_repeat -// from version.def line 1509 +// from version.def line 1507 #if !defined(__cpp_lib_ranges_stride) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_stride 202207L @@ -1828,7 +1828,7 @@ #endif /* !defined(__cpp_lib_ranges_stride) && defined(__glibcxx_want_ranges_stride) */ #undef __glibcxx_want_ranges_stride -// from version.def line 1517 +// from version.def line 1515 #if !defined(__cpp_lib_ranges_cartesian_product) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_cartesian_product 202207L @@ -1839,7 +1839,7 @@ #endif /* !defined(__cpp_lib_ranges_cartesian_product) && defined(__glibcxx_want_ranges_cartesian_product) */ #undef __glibcxx_want_ranges_cartesian_product -// from version.def line 1525 +// from version.def line 1523 #if !defined(__cpp_lib_ranges_as_rvalue) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_as_rvalue 202207L @@ -1850,7 +1850,7 @@ #endif /* !defined(__cpp_lib_ranges_as_rvalue) && defined(__glibcxx_want_ranges_as_rvalue) */ #undef __glibcxx_want_ranges_as_rvalue -// from version.def line 1533 +// from version.def line 1531 #if !defined(__cpp_lib_ranges_as_const) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_as_const 202207L @@ -1861,7 +1861,7 @@ #endif /* !defined(__cpp_lib_ranges_as_const) && defined(__glibcxx_want_ranges_as_const) */ #undef __glibcxx_want_ranges_as_const -// from version.def line 1541 +// from version.def line 1539 #if !defined(__cpp_lib_ranges_enumerate) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_enumerate 202302L @@ -1872,7 +1872,7 @@ #endif /* !defined(__cpp_lib_ranges_enumerate) && defined(__glibcxx_want_ranges_enumerate) */ #undef __glibcxx_want_ranges_enumerate -// from version.def line 1549 +// from version.def line 1547 #if !defined(__cpp_lib_ranges_fold) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_fold 202207L @@ -1883,7 +1883,7 @@ #endif /* !defined(__cpp_lib_ranges_fold) && defined(__glibcxx_want_ranges_fold) */ #undef __glibcxx_want_ranges_fold -// from version.def line 1557 +// from version.def line 1555 #if !defined(__cpp_lib_ranges_contains) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_contains 202207L @@ -1894,7 +1894,7 @@ #endif /* !defined(__cpp_lib_ranges_contains) && defined(__glibcxx_want_ranges_contains) */ #undef __glibcxx_want_ranges_contains -// from version.def line 1565 +// from version.def line 1563 #if !defined(__cpp_lib_ranges_iota) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_iota 202202L @@ -1905,7 +1905,7 @@ #endif /* !defined(__cpp_lib_ranges_iota) && defined(__glibcxx_want_ranges_iota) */ #undef __glibcxx_want_ranges_iota -// from version.def line 1573 +// from version.def line 1571 #if !defined(__cpp_lib_ranges_find_last) # if (__cplusplus >= 202100L) # define __glibcxx_ranges_find_last 202207L @@ -1916,7 +1916,7 @@ #endif /* !defined(__cpp_lib_ranges_find_last) && defined(__glibcxx_want_ranges_find_last) */ #undef __glibcxx_want_ranges_find_last -// from version.def line 1581 +// from version.def line 1579 #if !defined(__cpp_lib_constexpr_bitset) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED && (__cpp_constexpr_dynamic_alloc) # define __glibcxx_constexpr_bitset 202202L @@ -1927,7 +1927,7 @@ #endif /* !defined(__cpp_lib_constexpr_bitset) && defined(__glibcxx_want_constexpr_bitset) */ #undef __glibcxx_want_constexpr_bitset -// from version.def line 1591 +// from version.def line 1589 #if !defined(__cpp_lib_stdatomic_h) # if (__cplusplus >= 202100L) # define __glibcxx_stdatomic_h 202011L @@ -1938,7 +1938,7 @@ #endif /* !defined(__cpp_lib_stdatomic_h) && defined(__glibcxx_want_stdatomic_h) */ #undef __glibcxx_want_stdatomic_h -// from version.def line 1599 +// from version.def line 1597 #if !defined(__cpp_lib_adaptor_iterator_pair_constructor) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_adaptor_iterator_pair_constructor 202106L @@ -1949,7 +1949,7 @@ #endif /* !defined(__cpp_lib_adaptor_iterator_pair_constructor) && defined(__glibcxx_want_adaptor_iterator_pair_constructor) */ #undef __glibcxx_want_adaptor_iterator_pair_constructor -// from version.def line 1608 +// from version.def line 1606 #if !defined(__cpp_lib_formatters) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_formatters 202302L @@ -1960,7 +1960,7 @@ #endif /* !defined(__cpp_lib_formatters) && defined(__glibcxx_want_formatters) */ #undef __glibcxx_want_formatters -// from version.def line 1617 +// from version.def line 1615 #if !defined(__cpp_lib_forward_like) # if (__cplusplus >= 202100L) # define __glibcxx_forward_like 202207L @@ -1971,7 +1971,7 @@ #endif /* !defined(__cpp_lib_forward_like) && defined(__glibcxx_want_forward_like) */ #undef __glibcxx_want_forward_like -// from version.def line 1625 +// from version.def line 1623 #if !defined(__cpp_lib_ios_noreplace) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_ios_noreplace 202207L @@ -1982,7 +1982,7 @@ #endif /* !defined(__cpp_lib_ios_noreplace) && defined(__glibcxx_want_ios_noreplace) */ #undef __glibcxx_want_ios_noreplace -// from version.def line 1634 +// from version.def line 1632 #if !defined(__cpp_lib_move_only_function) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_move_only_function 202110L @@ -1993,7 +1993,7 @@ #endif /* !defined(__cpp_lib_move_only_function) && defined(__glibcxx_want_move_only_function) */ #undef __glibcxx_want_move_only_function -// from version.def line 1643 +// from version.def line 1641 #if !defined(__cpp_lib_out_ptr) # if (__cplusplus >= 202100L) # define __glibcxx_out_ptr 202311L @@ -2004,7 +2004,7 @@ #endif /* !defined(__cpp_lib_out_ptr) && defined(__glibcxx_want_out_ptr) */ #undef __glibcxx_want_out_ptr -// from version.def line 1651 +// from version.def line 1649 #if !defined(__cpp_lib_spanstream) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED && (__glibcxx_span) # define __glibcxx_spanstream 202106L @@ -2015,7 +2015,7 @@ #endif /* !defined(__cpp_lib_spanstream) && defined(__glibcxx_want_spanstream) */ #undef __glibcxx_want_spanstream -// from version.def line 1661 +// from version.def line 1659 #if !defined(__cpp_lib_stacktrace) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED && (_GLIBCXX_HAVE_STACKTRACE) # define __glibcxx_stacktrace 202011L @@ -2026,7 +2026,7 @@ #endif /* !defined(__cpp_lib_stacktrace) && defined(__glibcxx_want_stacktrace) */ #undef __glibcxx_want_stacktrace -// from version.def line 1671 +// from version.def line 1669 #if !defined(__cpp_lib_string_contains) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_string_contains 202011L @@ -2037,7 +2037,7 @@ #endif /* !defined(__cpp_lib_string_contains) && defined(__glibcxx_want_string_contains) */ #undef __glibcxx_want_string_contains -// from version.def line 1680 +// from version.def line 1678 #if !defined(__cpp_lib_string_resize_and_overwrite) # if (__cplusplus >= 202100L) && _GLIBCXX_HOSTED # define __glibcxx_string_resize_and_overwrite 202110L @@ -2048,7 +2048,7 @@ #endif /* !defined(__cpp_lib_string_resize_and_overwrite) && defined(__glibcxx_want_string_resize_and_overwrite) */ #undef __glibcxx_want_string_resize_and_overwrite -// from version.def line 1689 +// from version.def line 1687 #if !defined(__cpp_lib_to_underlying) # if (__cplusplus >= 202100L) # define __glibcxx_to_underlying 202102L @@ -2059,7 +2059,7 @@ #endif /* !defined(__cpp_lib_to_underlying) && defined(__glibcxx_want_to_underlying) */ #undef __glibcxx_want_to_underlying -// from version.def line 1697 +// from version.def line 1695 #if !defined(__cpp_lib_unreachable) # if (__cplusplus >= 202100L) # define __glibcxx_unreachable 202202L @@ -2070,7 +2070,7 @@ #endif /* !defined(__cpp_lib_unreachable) && defined(__glibcxx_want_unreachable) */ #undef __glibcxx_want_unreachable -// from version.def line 1705 +// from version.def line 1703 #if !defined(__cpp_lib_fstream_native_handle) # if (__cplusplus > 202302L) && _GLIBCXX_HOSTED # define __glibcxx_fstream_native_handle 202306L @@ -2081,7 +2081,7 @@ #endif /* !defined(__cpp_lib_fstream_native_handle) && defined(__glibcxx_want_fstream_native_handle) */ #undef __glibcxx_want_fstream_native_handle -// from version.def line 1714 +// from version.def line 1712 #if !defined(__cpp_lib_ratio) # if (__cplusplus > 202302L) # define __glibcxx_ratio 202306L @@ -2092,7 +2092,7 @@ #endif /* !defined(__cpp_lib_ratio) && defined(__glibcxx_want_ratio) */ #undef __glibcxx_want_ratio -// from version.def line 1722 +// from version.def line 1720 #if !defined(__cpp_lib_saturation_arithmetic) # if (__cplusplus > 202302L) # define __glibcxx_saturation_arithmetic 202311L @@ -2103,7 +2103,7 @@ #endif /* !defined(__cpp_lib_saturation_arithmetic) && defined(__glibcxx_want_saturation_arithmetic) */ #undef __glibcxx_want_saturation_arithmetic -// from version.def line 1730 +// from version.def line 1728 #if !defined(__cpp_lib_to_string) # if (__cplusplus > 202302L) && _GLIBCXX_HOSTED && (__glibcxx_to_chars) # define __glibcxx_to_string 202306L diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc b/libstdc++-v3/testsuite/std/format/functions/format.cc index dacc276e03c3..9328dec88758 100644 --- a/libstdc++-v3/testsuite/std/format/functions/format.cc +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc @@ -5,7 +5,7 @@ #ifndef __cpp_lib_format # error "Feature test macro for std::format is missing in " -#elif __cpp_lib_format < 202106L +#elif __cpp_lib_format < 202110L # error "Feature test macro for std::format has wrong value in " #endif @@ -13,7 +13,7 @@ #include #ifndef __cpp_lib_format # error "Feature test macro for std::format is missing in " -#elif __cpp_lib_format < 202106L +#elif __cpp_lib_format < 202110L # error "Feature test macro for std::format has wrong value in " #endif From af8bbd631f5425e9be084dfd1f2b9487a31a350e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 9 Dec 2023 15:29:51 +0100 Subject: [PATCH 132/311] testsuite: Add testcase for already fixed PR [PR112924] This testcase got fixed with r14-6132-g50f2a3370d177f8fe9bea0461feb710523e048a2 . I'm just adding a testcase so that it doesn't reappear. 2023-12-09 Jakub Jelinek PR tree-optimization/112924 * gcc.dg/pr112924.c: New test. --- gcc/testsuite/gcc.dg/pr112924.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr112924.c diff --git a/gcc/testsuite/gcc.dg/pr112924.c b/gcc/testsuite/gcc.dg/pr112924.c new file mode 100644 index 000000000000..c8a51d40b82b --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112924.c @@ -0,0 +1,26 @@ +/* PR tree-optimization/112924 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -w" } */ +/* { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */ + +struct S { long a; char b[64]; }; +void foo (struct S a); +char c; +int d[3541]; + +static void +bar (struct S *s, char *p) +{ + unsigned int a = sizeof (d) - sizeof (int) - s->a; + long c = __builtin_object_size (s, 0); + for (; a >= 64; a -= 64, p += 4); + __builtin___memcpy_chk (s, p, a, c); +} + +void +baz (void) +{ + struct S s = {}; + bar (&s, &c); + foo (s); +} From c250ff90989a71dff11e9256e99d2fa965ab1295 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 9 Dec 2023 21:41:00 +0100 Subject: [PATCH 133/311] phiopt: Fix ICE with large --param l1-cache-line-size= [PR112887] This function is never called when param_l1_cache_line_size is 0, but it uses int and unsigned int variables to hold alignment in bits, so for large param_l1_cache_line_size it is zero and e.g. DECL_ALIGN () % param_align_bits can divide by zero. Looking at the code, the function uses tree_fits_uhwi_p on the trees before converting them using tree_to_uhwi to int variables, which looks just wrong, either it would need to punt if it doesn't fit into those and also check for overflows during the computation, or use unsigned HOST_WIDE_INT for all of this. That also fixes the division by zero, as param_l1_cache_line_size maximum is INT_MAX, that multiplied by 8 will always fit. 2023-12-09 Jakub Jelinek PR tree-optimization/112887 * tree-ssa-phiopt.cc (hoist_adjacent_loads): Change type of param_align, param_align_bits, offset1, offset2, size2 and align1 variables from int or unsigned int to unsigned HOST_WIDE_INT. * gcc.dg/pr112887.c: New test. --- gcc/testsuite/gcc.dg/pr112887.c | 13 +++++++++++++ gcc/tree-ssa-phiopt.cc | 7 +++---- 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr112887.c diff --git a/gcc/testsuite/gcc.dg/pr112887.c b/gcc/testsuite/gcc.dg/pr112887.c new file mode 100644 index 000000000000..41f5cc8cadf3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112887.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/112887 */ +/* { dg-do compile } */ +/* { dg-options "-O2 --param=l1-cache-line-size=0x20000000" } */ + +void bar (long); +long c; +struct S { long a, b; } s; + +void +foo (void) +{ + bar (c ? s.a : s.b); +} diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index ac805173453d..a3b660bb18ac 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -3757,8 +3757,8 @@ static void hoist_adjacent_loads (basic_block bb0, basic_block bb1, basic_block bb2, basic_block bb3) { - int param_align = param_l1_cache_line_size; - unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT); + unsigned HOST_WIDE_INT param_align = param_l1_cache_line_size; + unsigned HOST_WIDE_INT param_align_bits = param_align * BITS_PER_UNIT; gphi_iterator gsi; /* Walk the phis in bb3 looking for an opportunity. We are looking @@ -3770,8 +3770,7 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, gimple *def1, *def2; tree arg1, arg2, ref1, ref2, field1, field2; tree tree_offset1, tree_offset2, tree_size2, next; - int offset1, offset2, size2; - unsigned align1; + unsigned HOST_WIDE_INT offset1, offset2, size2, align1; gimple_stmt_iterator gsi2; basic_block bb_for_def1, bb_for_def2; From fbfe43daec6443978df65530dc5f7f3f8a4e6f9e Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 10 Dec 2023 00:16:47 +0000 Subject: [PATCH 134/311] Daily bump. --- gcc/ChangeLog | 29 +++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 9 +++++++++ gcc/testsuite/ChangeLog | 26 ++++++++++++++++++++++++++ libstdc++-v3/ChangeLog | 15 +++++++++++++++ 5 files changed, 80 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2ff57b89c8d7..68754648cbbe 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2023-12-09 Jakub Jelinek + + PR tree-optimization/112887 + * tree-ssa-phiopt.cc (hoist_adjacent_loads): Change type of + param_align, param_align_bits, offset1, offset2, size2 and align1 + variables from int or unsigned int to unsigned HOST_WIDE_INT. + +2023-12-09 Costas Argyris + Jakub Jelinek + + PR driver/93019 + * gcc.cc (driver::finalize): Call XDELETEVEC on mdswitches before + clearing it. + +2023-12-09 Jakub Jelinek + + * attribs.h (any_nonignored_attribute_p): Declare. + * attribs.cc (any_nonignored_attribute_p): New function. + +2023-12-09 Juzhe-Zhong + + PR target/112932 + * config/riscv/vector.md (movmisalign): Fix VLSmode bugs. + +2023-12-09 Alexandre Oliva + + * tree-emutls.cc: Include diagnostic-core.h. + (pass_ipa_lower_emutls::gate): Skip if errors were seen. + 2023-12-08 Vladimir N. Makarov PR rtl-optimization/112875 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 98957cde41b6..19d30f166542 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231209 +20231210 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 3939154623db..58f84237d787 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,12 @@ +2023-12-09 Jakub Jelinek + + * parser.cc (cp_parser_statement, cp_parser_expression_statement, + cp_parser_declaration, cp_parser_asm_definition): Don't diagnose + ignored attributes if !any_nonignored_attribute_p. + * decl.cc (grokdeclarator): Likewise. + * name-lookup.cc (handle_namespace_attrs, finish_using_directive): + Don't diagnose ignoring of attr_ignored_p attributes. + 2023-12-08 Patrick Palka PR c++/83167 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 61cb3fba5d65..73aa606733f7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,29 @@ +2023-12-09 Jakub Jelinek + + PR tree-optimization/112887 + * gcc.dg/pr112887.c: New test. + +2023-12-09 Jakub Jelinek + + PR tree-optimization/112924 + * gcc.dg/pr112924.c: New test. + +2023-12-09 Jakub Jelinek + + * g++.dg/warn/Wno-attributes-1.C: New test. + +2023-12-09 Juzhe-Zhong + + PR target/112932 + * gcc.target/riscv/rvv/autovec/vls/misalign-1.c: Ditto. + * gcc.target/riscv/rvv/autovec/pr112932.c: New test. + +2023-12-09 Hans-Peter Nilsson + + PR testsuite/112786 + * gcc.dg/tree-ssa/scev-3.c, gcc.dg/tree-ssa/scev-4.c, + gcc.dg/tree-ssa/scev-5.c: Remove. + 2023-12-08 Patrick Palka PR c++/83167 diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 4ffaa9d558b2..cd9d121374e8 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,18 @@ +2023-12-09 Jonathan Wakely + + PR libstdc++/111826 + * include/bits/version.def (format): Update value. + * include/bits/version.h: Regenerate. + * testsuite/std/format/functions/format.cc: + +2023-12-09 Jonathan Wakely + + PR libstdc++/112876 + * include/std/ranges (ranges::to): Do not try to use an iterator + returned by the container's emplace or insert member functions. + * testsuite/std/ranges/conv/1.cc (Cont4::emplace, Cont4::insert): + Use the iterator parameter. Do not return an iterator. + 2023-12-07 Patrick Palka * include/std/ranges (__detail::_ToClosure): Replace with ... From 4ac358c619e364ad767242409765c178da9d83e0 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Sun, 10 Dec 2023 09:32:55 -0700 Subject: [PATCH 135/311] [committed] Fix length computation for logical shifts on H8 This fixes the length computation for logical shifts on the H8/SX. The H8/SX has a richer set of logical shifts compared to early parts in the H8 family. It has special 2 byte instructions for shifts by power of two immediate values as well as a special 4 byte shift by other immediate values. These were never accounted for (AFIACT) in the length computation for shifts. Until now that's mostly just affected branch shortening. But an upcoming patch uses instruction lengths to select between two potential sequences and getting these lengths wrong will cause it to miss optimization opportunities on the H8/SX. gcc * config/h8300/h8300.cc (compute_a_shift_length): Fix computation of logical shifts on the H8/SX. --- gcc/config/h8300/h8300.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc index 5936cdca177c..5f9bbc9793b2 100644 --- a/gcc/config/h8300/h8300.cc +++ b/gcc/config/h8300/h8300.cc @@ -4299,6 +4299,11 @@ compute_a_shift_length (rtx operands[3], rtx_code code) /* Fall through. */ case SHIFT_INLINE: + /* H8/SX has a richer set of logical shifts. */ + if (TARGET_H8300SX + && (code == ASHIFT || code == LSHIFTRT)) + return (exact_log2 (n) >= 0) ? 2 : 4; + n = info.remainder; if (info.shift2 != NULL) From 1f55c5cc698519094f751257db62ff274c015fdc Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Sun, 10 Dec 2023 10:05:18 -0700 Subject: [PATCH 136/311] [committed] Fix length computation of single bit bitfield extraction on H8 Various approaches are used to optimize extracting a sign extended single bit bitfield. The length computation of 10 bytes was conservatively correct, but inaccurate. In particular when the bit we want is in the low half word we don't need the move high half to low half instruction. Account for that in the length computation. This was spotted when looking at regressions in the generalized signed bitfield extraction pattern. This has been regression tested on the H8 port. gcc/ * config/h8300/combiner.md (single bit signed bitfield extraction): Fix length computation when the bit we want is in the low half word. --- gcc/config/h8300/combiner.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/h8300/combiner.md b/gcc/config/h8300/combiner.md index e1179b5fea61..cce187805c72 100644 --- a/gcc/config/h8300/combiner.md +++ b/gcc/config/h8300/combiner.md @@ -1358,7 +1358,7 @@ to get that bit into the destination, properly extended. */ return "subx\t%s0,%s0\;exts.w %T0\;exts.l %0"; } - [(set_attr "length" "10")]) + [(set (attr "length") (symbol_ref "INTVAL (operands[2]) >= 16 ? 10 : 8"))]) ;; For shift counts >= 16 we can always do better than the ;; generic sequences. Other patterns handle smaller counts. From 73f6e1fe8835085ccc6de5c5f4428d47e853913b Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Sun, 10 Dec 2023 10:29:23 -0700 Subject: [PATCH 137/311] [committed] Provide patterns for signed bitfield extractions on H8 Inspired by Roger's work on the ARC port, this patch provides a define_and_split pattern to optimize sign extended bitfields starting at position 0 using an approach that doesn't require shifting. It then builds on that to provide another define_and_split pattern to support arbitrary signed bitfield extractions -- it uses a right logical shift to move the bitfield into position 0, then the specialized pattern above to sign extend the MSB of the field through the rest of the register. This is often, but certainly not always, better than a two shift approach. The code uses the sizes of the sequences to select between the two shift approach and single shift with extension from an arbitrary location approach. There's certainly further improvements that could be made here, but I think we're getting the bulk of the improvements already. Regression tested on the H8 port without errors. Installing on the trunk. gcc/ * config/h8300/h8300-protos.h (use_extvsi): Prototype. * config/h8300/combiner.md: Two new define_insn_and_split patterns to implement signed bitfield extractions. * config/h8300/h8300.cc (use_extvsi): New function. --- gcc/config/h8300/combiner.md | 49 ++++++++++++++++++++++++- gcc/config/h8300/h8300-protos.h | 1 + gcc/config/h8300/h8300.cc | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 1 deletion(-) diff --git a/gcc/config/h8300/combiner.md b/gcc/config/h8300/combiner.md index cce187805c72..d5f26b50983a 100644 --- a/gcc/config/h8300/combiner.md +++ b/gcc/config/h8300/combiner.md @@ -1269,7 +1269,54 @@ ;; (pc)))] ;; "") -;; Various ways to extract a single bit bitfield and sign extend it +;; This is a signed bitfield extraction starting at bit 0 +;; It's usually faster than using shifts, but not always, +;; particularly on the H8/S and H8/SX variants. +(define_insn_and_split "*extvsi_n_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "0") + (match_operand 2 "const_int_operand") + (const_int 0)))] + "INTVAL (operands[2]) > 1 + && INTVAL (operands[2]) < (TARGET_H8300S ? 26 - TARGET_H8300SX : 29) + && (!TARGET_H8300SX || (INTVAL (operands[2]) != 24 && INTVAL (operands[2]) != 17))" + "#" + "&& reload_completed" +[(parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC CC_REG))]) + (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4))) + (clobber (reg:CC CC_REG))]) + (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4))) + (clobber (reg:CC CC_REG))])] +{ + int tmp = INTVAL (operands[2]); + operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp)); + operands[4] = GEN_INT (HOST_WIDE_INT_1U << (tmp - 1)); +}) + +(define_insn_and_split "*extvsi_n_n" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "0") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")))] + "(!h8300_shift_needs_scratch_p (INTVAL (operands[3]), SImode, LSHIFTRT) + && use_extvsi (INTVAL (operands[2]), INTVAL (operands[3])))" + "#" + "&& reload_completed" +[(parallel [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 3))) + (clobber (reg:CC CC_REG))]) + (parallel [(set (match_dup 0) (and:SI (match_dup 0) (match_dup 4))) + (clobber (reg:CC CC_REG))]) + (parallel [(set (match_dup 0) (xor:SI (match_dup 0) (match_dup 5))) + (clobber (reg:CC CC_REG))]) + (parallel [(set (match_dup 0) (minus:SI (match_dup 0) (match_dup 5))) + (clobber (reg:CC CC_REG))])] +{ + int tmp = INTVAL (operands[2]); + operands[4] = gen_int_mode (~(HOST_WIDE_INT_M1U << tmp), SImode); + operands[5] = gen_int_mode (HOST_WIDE_INT_1U << (tmp - 1), SImode); +}) + ;; ;; Testing showed this only triggering with SImode, probably because ;; of how insv/extv are defined. diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h index 3376bd060321..96bd0c8daafb 100644 --- a/gcc/config/h8300/h8300-protos.h +++ b/gcc/config/h8300/h8300-protos.h @@ -111,5 +111,6 @@ extern const char * output_h8sx_shift (rtx *, int, int); extern bool h8300_operands_match_p (rtx *); extern bool h8sx_mergeable_memrefs_p (rtx, rtx); extern poly_int64 h8300_push_rounding (poly_int64); +extern bool use_extvsi (int, int); #endif /* ! GCC_H8300_PROTOS_H */ diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc index 5f9bbc9793b2..f906286d65d1 100644 --- a/gcc/config/h8300/h8300.cc +++ b/gcc/config/h8300/h8300.cc @@ -5503,6 +5503,70 @@ h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) } } +/* Return true if a signed bitfield extraction with length COUNT + starting at position POS should be optimized by first shifting + right to put the field in the LSB, then using a 3 operand sequence + to sign extend from an arbitrary position. Return false + otherwise. + + The basic idea here is to compute the length of each sequence + and use that as a proxy for performance. It's not strictly + correct on the H8/SX which has variable timed shifts and some + lengths may be incorrect, but this is pretty close. + + There may be cases where the length computations are inaccurate + which may in turn lead to a sub-optimal sequence, but that + should be rare. + + We do not try to balance avoiding a loop with burning an extra + couple bytes. Those probably couple be handled with special + cases. */ + +bool +use_extvsi (int count, int pos) +{ + rtx operands[3]; + operands[0] = gen_rtx_REG (SImode, 0); + operands[1] = gen_rtx_REG (SImode, 0); + + /* We have a special sequence to sign extract a single bit + object, otherwise compute it as a pair of shifts, first + left, then arithmetic right. The cost of that special + sequence is 8/10 depending on where the bit is. */ + unsigned shift_cost; + if (count == 1) + shift_cost = pos >= 16 ? 10 : 8; + else + { + unsigned lshift = 32 - (count + pos); + unsigned rshift = 32 - count; + operands[2] = GEN_INT (lshift); + shift_cost = compute_a_shift_length (operands, ASHIFT); + operands[2] = GEN_INT (rshift); + shift_cost += compute_a_shift_length (operands, ASHIFTRT); + } + + /* Cost of hopefully optimized sequence. First we logically shift right + by an adjusted count. Logicals are generally better than arith, + particularly for H8/SX. */ + operands[2] = GEN_INT (pos); + unsigned opt_cost = compute_a_shift_length (operands, LSHIFTRT); + operands[2] = gen_int_mode (~(HOST_WIDE_INT_M1U << count), SImode); + opt_cost += compute_logical_op_length (SImode, AND, operands, NULL); + operands[2] = gen_int_mode (HOST_WIDE_INT_1U << (count - 1), SImode); + opt_cost += compute_logical_op_length (SImode, XOR, operands, NULL); + + /* H8/SX has short form subtraction. */ + if (TARGET_H8300SX && (INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 7)) + opt_cost += 2; + else if (TARGET_H8300SX && (INTVAL (operands[2]) >= 8 && INTVAL (operands[2]) <= 32767)) + opt_cost += 4; + else + opt_cost += 6; + + return opt_cost <= shift_cost; +} + /* Implement PUSH_ROUNDING. On the H8/300, @-sp really pushes a byte if you ask it to - but that's From 7fb9454c748632d148a07c275ea1f77b290b0c2d Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Sun, 10 Dec 2023 10:41:05 -0700 Subject: [PATCH 138/311] [committed] Support uaddv and usubv on the H8 This patch adds uaddv/usubv support on the H8 port to speed up those pesky builtin-overflow tests. It's a variant of something I'd been running for a while -- the major change between the old approach I'd been using and this patch is this version does not expose the CC register until after reload to be consistent with the rest of the H8 port. The general approach is to first clear the GPR that's going to hold the overflow status, perform the arithmetic operation (add/sub), then use addx to move the overflow indicator (in the C bit) into the GPR holding the overflow status. That's a significant improvement over the mess of logicals that's generated by the generic code. Handling signed overflow is possible and something I'll probably port to this scheme at some point. It's a bit more complex because we can't trivially move the bit from CCR into the right position in a GPR and other quirks of the H8. This has been regression tested on the H8 without problems. Pushing to the trunk. gcc/ * config/h8300/addsub.md (uaddv4, usubv4): New expanders. (uaddv): New define_insn_and_split plus post-reload pattern. --- gcc/config/h8300/addsub.md | 77 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/gcc/config/h8300/addsub.md b/gcc/config/h8300/addsub.md index b1eb0d201880..32eba9df67aa 100644 --- a/gcc/config/h8300/addsub.md +++ b/gcc/config/h8300/addsub.md @@ -239,3 +239,80 @@ "reload_completed" "xor.w\\t#32768,%e0" [(set_attr "length" "4")]) + +(define_expand "uaddv4" + [(set (match_operand:QHSI 0 "register_operand" "") + (plus:QHSI (match_operand:QHSI 1 "register_operand" "") + (match_operand:QHSI 2 "register_operand" ""))) + (set (pc) + (if_then_else (ltu (match_dup 0) (match_dup 1)) + (label_ref (match_operand 3 "")) + (pc)))] + "") + +(define_insn_and_split "*uaddv" + [(set (match_operand:QHSI2 3 "register_operand" "=&r") + (ltu:QHSI2 (plus:QHSI (match_operand:QHSI 1 "register_operand" "%0") + (match_operand:QHSI 2 "register_operand" "r")) + (match_dup 1))) + (set (match_operand:QHSI 0 "register_operand" "=r") + (plus:QHSI (match_dup 1) (match_dup 2)))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 3) (ltu:QHSI2 (plus:QHSI (match_dup 1) (match_dup 2)) + (match_dup 1))) + (set (match_dup 0) (plus:QHSI (match_dup 1) (match_dup 2))) + (clobber (reg:CC CC_REG))])]) + +(define_insn "*uaddv" + [(set (match_operand:QHSI2 3 "register_operand" "=&r") + (ltu:QHSI2 (plus:QHSI (match_operand:QHSI 1 "register_operand" "%0") + (match_operand:QHSI 2 "register_operand" "r")) + (match_dup 1))) + (set (match_operand:QHSI 0 "register_operand" "=r") + (plus (match_dup 1) (match_dup 2))) + (clobber (reg:CC CC_REG))] + "" +{ + if (E_mode == E_QImode) + { + if (E_mode == E_QImode) + return "sub.b\t%X3,%X3\;add.b\t%X2,%X0\;addx\t%X3,%X3"; + else if (E_mode == E_HImode) + return "sub.b\t%X3,%X3\;add.w\t%T2,%T0\;addx\t%X3,%X3"; + else if (E_mode == E_SImode) + return "sub.b\t%X3,%X3\;add.l\t%S2,%S0\;addx\t%X3,%X3"; + } + else if (E_mode == E_HImode) + { + if (E_mode == E_QImode) + return "sub.w\t%T3,%T3\;add.b\t%X2,%X0\;addx\t%X3,%X3"; + else if (E_mode == E_HImode) + return "sub.w\t%T3,%T3\;add.w\t%T2,%T0\;addx\t%X3,%X3"; + else if (E_mode == E_SImode) + return "sub.w\t%T3,%T3\;add.l\t%S2,%S0\;addx\t%X3,%X3"; + } + else if (E_mode == E_SImode) + { + if (E_mode == E_QImode) + return "sub.l\t%S3,%S3\;add.b\t%X2,%X0\;addx\t%X3,%X3"; + else if (E_mode == E_HImode) + return "sub.l\t%S3,%S3\;add.w\t%T2,%T0\;addx\t%X3,%X3"; + else if (E_mode == E_SImode) + return "sub.l\t%S3,%S3\;add.l\t%S2,%S0\;addx\t%X3,%X3"; + } + else + gcc_unreachable (); +} + [(set_attr "length" "6")]) + +(define_expand "usubv4" + [(set (match_operand:QHSI 0 "register_operand" "") + (minus:QHSI (match_operand:QHSI 1 "register_operand" "") + (match_operand:QHSI 2 "register_operand" ""))) + (set (pc) + (if_then_else (ltu (match_dup 1) (match_dup 2)) + (label_ref (match_operand 3 "")) + (pc)))] + "") From c343e4242c9a951ab1d60e5e048e29b165855ba1 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:32:56 -0800 Subject: [PATCH 139/311] c++: Sort built-in traits alphabetically This patch sorts built-in traits alphabetically for better code readability. gcc/cp/ChangeLog: * constraint.cc (diagnose_trait_expr): Sort built-in traits alphabetically. * cp-trait.def: Likewise. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. (finish_trait_type): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Sort built-in traits alphabetically. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 68 ++++++++--------- gcc/cp/cp-trait.def | 12 +-- gcc/cp/semantics.cc | 96 ++++++++++++------------ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 72 +++++++++--------- 4 files changed, 124 insertions(+), 124 deletions(-) diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index d9972d697251..29aa7bb3df84 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3707,18 +3707,36 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_HAS_TRIVIAL_DESTRUCTOR: inform (loc, " %qT is not trivially destructible", t1); break; + case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS: + inform (loc, " %qT does not have unique object representations", t1); + break; case CPTK_HAS_VIRTUAL_DESTRUCTOR: inform (loc, " %qT does not have a virtual destructor", t1); break; case CPTK_IS_ABSTRACT: inform (loc, " %qT is not an abstract class", t1); break; + case CPTK_IS_AGGREGATE: + inform (loc, " %qT is not an aggregate", t1); + break; + case CPTK_IS_ASSIGNABLE: + inform (loc, " %qT is not assignable from %qT", t1, t2); + break; case CPTK_IS_BASE_OF: inform (loc, " %qT is not a base of %qT", t1, t2); break; case CPTK_IS_CLASS: inform (loc, " %qT is not a class", t1); break; + case CPTK_IS_CONSTRUCTIBLE: + if (!t2) + inform (loc, " %qT is not default constructible", t1); + else + inform (loc, " %qT is not constructible from %qE", t1, t2); + break; + case CPTK_IS_CONVERTIBLE: + inform (loc, " %qT is not convertible from %qE", t2, t1); + break; case CPTK_IS_EMPTY: inform (loc, " %qT is not an empty class", t1); break; @@ -3734,6 +3752,18 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_LITERAL_TYPE: inform (loc, " %qT is not a literal type", t1); break; + case CPTK_IS_NOTHROW_ASSIGNABLE: + inform (loc, " %qT is not nothrow assignable from %qT", t1, t2); + break; + case CPTK_IS_NOTHROW_CONSTRUCTIBLE: + if (!t2) + inform (loc, " %qT is not nothrow default constructible", t1); + else + inform (loc, " %qT is not nothrow constructible from %qE", t1, t2); + break; + case CPTK_IS_NOTHROW_CONVERTIBLE: + inform (loc, " %qT is not nothrow convertible from %qE", t2, t1); + break; case CPTK_IS_POINTER_INTERCONVERTIBLE_BASE_OF: inform (loc, " %qT is not pointer-interconvertible base of %qT", t1, t2); @@ -3753,50 +3783,20 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_TRIVIAL: inform (loc, " %qT is not a trivial type", t1); break; - case CPTK_IS_UNION: - inform (loc, " %qT is not a union", t1); - break; - case CPTK_IS_AGGREGATE: - inform (loc, " %qT is not an aggregate", t1); - break; - case CPTK_IS_TRIVIALLY_COPYABLE: - inform (loc, " %qT is not trivially copyable", t1); - break; - case CPTK_IS_ASSIGNABLE: - inform (loc, " %qT is not assignable from %qT", t1, t2); - break; case CPTK_IS_TRIVIALLY_ASSIGNABLE: inform (loc, " %qT is not trivially assignable from %qT", t1, t2); break; - case CPTK_IS_NOTHROW_ASSIGNABLE: - inform (loc, " %qT is not nothrow assignable from %qT", t1, t2); - break; - case CPTK_IS_CONSTRUCTIBLE: - if (!t2) - inform (loc, " %qT is not default constructible", t1); - else - inform (loc, " %qT is not constructible from %qE", t1, t2); - break; case CPTK_IS_TRIVIALLY_CONSTRUCTIBLE: if (!t2) inform (loc, " %qT is not trivially default constructible", t1); else inform (loc, " %qT is not trivially constructible from %qE", t1, t2); break; - case CPTK_IS_NOTHROW_CONSTRUCTIBLE: - if (!t2) - inform (loc, " %qT is not nothrow default constructible", t1); - else - inform (loc, " %qT is not nothrow constructible from %qE", t1, t2); + case CPTK_IS_TRIVIALLY_COPYABLE: + inform (loc, " %qT is not trivially copyable", t1); break; - case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS: - inform (loc, " %qT does not have unique object representations", t1); - break; - case CPTK_IS_CONVERTIBLE: - inform (loc, " %qT is not convertible from %qE", t2, t1); - break; - case CPTK_IS_NOTHROW_CONVERTIBLE: - inform (loc, " %qT is not nothrow convertible from %qE", t2, t1); + case CPTK_IS_UNION: + inform (loc, " %qT is not a union", t1); break; case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY: inform (loc, " %qT is not a reference that binds to a temporary " diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 8b7fece0cc86..0e48e64b8dd7 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -84,15 +84,15 @@ DEFTRAIT_EXPR (IS_TRIVIALLY_COPYABLE, "__is_trivially_copyable", 1) DEFTRAIT_EXPR (IS_UNION, "__is_union", 1) DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, "__reference_constructs_from_temporary", 2) DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, "__reference_converts_from_temporary", 2) +DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1) +DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1) +DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1) +DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1) +DEFTRAIT_TYPE (UNDERLYING_TYPE, "__underlying_type", 1) + /* FIXME Added space to avoid direct usage in GCC 13. */ DEFTRAIT_EXPR (IS_DEDUCIBLE, "__is_deducible ", 2) -DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1) -DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1) -DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1) -DEFTRAIT_TYPE (UNDERLYING_TYPE, "__underlying_type", 1) -DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1) - /* These traits yield a type pack, not a type, and are represented by cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE tree. */ DEFTRAIT_TYPE (BASES, "__bases", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index efd959d95b9f..23d1f12e3714 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12334,6 +12334,20 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) && classtype_has_nothrow_assign_or_copy_p (type1, true)))); + case CPTK_HAS_NOTHROW_CONSTRUCTOR: + type1 = strip_array_types (type1); + return (trait_expr_value (CPTK_HAS_TRIVIAL_CONSTRUCTOR, type1, type2) + || (CLASS_TYPE_P (type1) + && (t = locate_ctor (type1)) + && maybe_instantiate_noexcept (t) + && TYPE_NOTHROW_P (TREE_TYPE (t)))); + + case CPTK_HAS_NOTHROW_COPY: + type1 = strip_array_types (type1); + return (trait_expr_value (CPTK_HAS_TRIVIAL_COPY, type1, type2) + || (CLASS_TYPE_P (type1) + && classtype_has_nothrow_assign_or_copy_p (type1, false))); + case CPTK_HAS_TRIVIAL_ASSIGN: /* ??? The standard seems to be missing the "or array of such a class type" wording for this trait. */ @@ -12343,25 +12357,11 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) || (CLASS_TYPE_P (type1) && TYPE_HAS_TRIVIAL_COPY_ASSIGN (type1)))); - case CPTK_HAS_NOTHROW_CONSTRUCTOR: - type1 = strip_array_types (type1); - return (trait_expr_value (CPTK_HAS_TRIVIAL_CONSTRUCTOR, type1, type2) - || (CLASS_TYPE_P (type1) - && (t = locate_ctor (type1)) - && maybe_instantiate_noexcept (t) - && TYPE_NOTHROW_P (TREE_TYPE (t)))); - case CPTK_HAS_TRIVIAL_CONSTRUCTOR: type1 = strip_array_types (type1); return (trivial_type_p (type1) || (CLASS_TYPE_P (type1) && TYPE_HAS_TRIVIAL_DFLT (type1))); - case CPTK_HAS_NOTHROW_COPY: - type1 = strip_array_types (type1); - return (trait_expr_value (CPTK_HAS_TRIVIAL_COPY, type1, type2) - || (CLASS_TYPE_P (type1) - && classtype_has_nothrow_assign_or_copy_p (type1, false))); - case CPTK_HAS_TRIVIAL_COPY: /* ??? The standard seems to be missing the "or array of such a class type" wording for this trait. */ @@ -12375,18 +12375,21 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) || (CLASS_TYPE_P (type1) && TYPE_HAS_TRIVIAL_DESTRUCTOR (type1))); - case CPTK_HAS_VIRTUAL_DESTRUCTOR: - return type_has_virtual_destructor (type1); - case CPTK_HAS_UNIQUE_OBJ_REPRESENTATIONS: return type_has_unique_obj_representations (type1); + case CPTK_HAS_VIRTUAL_DESTRUCTOR: + return type_has_virtual_destructor (type1); + case CPTK_IS_ABSTRACT: return ABSTRACT_CLASS_TYPE_P (type1); case CPTK_IS_AGGREGATE: return CP_AGGREGATE_TYPE_P (type1); + case CPTK_IS_ASSIGNABLE: + return is_xible (MODIFY_EXPR, type1, type2); + case CPTK_IS_BASE_OF: return (NON_UNION_CLASS_TYPE_P (type1) && NON_UNION_CLASS_TYPE_P (type2) && (same_type_ignoring_top_level_qualifiers_p (type1, type2) @@ -12395,6 +12398,12 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_CLASS: return NON_UNION_CLASS_TYPE_P (type1); + case CPTK_IS_CONSTRUCTIBLE: + return is_xible (INIT_EXPR, type1, type2); + + case CPTK_IS_CONVERTIBLE: + return is_convertible (type1, type2); + case CPTK_IS_EMPTY: return NON_UNION_CLASS_TYPE_P (type1) && CLASSTYPE_EMPTY_P (type1); @@ -12410,6 +12419,15 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_LITERAL_TYPE: return literal_type_p (type1); + case CPTK_IS_NOTHROW_ASSIGNABLE: + return is_nothrow_xible (MODIFY_EXPR, type1, type2); + + case CPTK_IS_NOTHROW_CONSTRUCTIBLE: + return is_nothrow_xible (INIT_EXPR, type1, type2); + + case CPTK_IS_NOTHROW_CONVERTIBLE: + return is_nothrow_convertible (type1, type2); + case CPTK_IS_POINTER_INTERCONVERTIBLE_BASE_OF: return pointer_interconvertible_base_of_p (type1, type2); @@ -12440,24 +12458,6 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_UNION: return type_code1 == UNION_TYPE; - case CPTK_IS_ASSIGNABLE: - return is_xible (MODIFY_EXPR, type1, type2); - - case CPTK_IS_CONSTRUCTIBLE: - return is_xible (INIT_EXPR, type1, type2); - - case CPTK_IS_NOTHROW_ASSIGNABLE: - return is_nothrow_xible (MODIFY_EXPR, type1, type2); - - case CPTK_IS_NOTHROW_CONSTRUCTIBLE: - return is_nothrow_xible (INIT_EXPR, type1, type2); - - case CPTK_IS_CONVERTIBLE: - return is_convertible (type1, type2); - - case CPTK_IS_NOTHROW_CONVERTIBLE: - return is_nothrow_convertible (type1, type2); - case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY: return ref_xes_from_temporary (type1, type2, /*direct_init=*/true); @@ -12570,9 +12570,9 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) return error_mark_node; break; + case CPTK_IS_ABSTRACT: case CPTK_IS_EMPTY: case CPTK_IS_POLYMORPHIC: - case CPTK_IS_ABSTRACT: case CPTK_HAS_VIRTUAL_DESTRUCTOR: if (!check_trait_type (type1, /* kind = */ 3)) return error_mark_node; @@ -12592,12 +12592,12 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) return error_mark_node; break; - case CPTK_IS_TRIVIALLY_ASSIGNABLE: - case CPTK_IS_TRIVIALLY_CONSTRUCTIBLE: + case CPTK_IS_CONVERTIBLE: case CPTK_IS_NOTHROW_ASSIGNABLE: case CPTK_IS_NOTHROW_CONSTRUCTIBLE: - case CPTK_IS_CONVERTIBLE: case CPTK_IS_NOTHROW_CONVERTIBLE: + case CPTK_IS_TRIVIALLY_ASSIGNABLE: + case CPTK_IS_TRIVIALLY_CONSTRUCTIBLE: case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY: case CPTK_REF_CONVERTS_FROM_TEMPORARY: if (!check_trait_type (type1) @@ -12616,8 +12616,8 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_CLASS: case CPTK_IS_ENUM: - case CPTK_IS_UNION: case CPTK_IS_SAME: + case CPTK_IS_UNION: break; case CPTK_IS_LAYOUT_COMPATIBLE: @@ -12680,25 +12680,25 @@ finish_trait_type (cp_trait_kind kind, tree type1, tree type2, switch (kind) { - case CPTK_UNDERLYING_TYPE: - return finish_underlying_type (type1); - case CPTK_REMOVE_CV: return cv_unqualified (type1); + case CPTK_REMOVE_CVREF: + if (TYPE_REF_P (type1)) + type1 = TREE_TYPE (type1); + return cv_unqualified (type1); + case CPTK_REMOVE_REFERENCE: if (TYPE_REF_P (type1)) type1 = TREE_TYPE (type1); return type1; - case CPTK_REMOVE_CVREF: - if (TYPE_REF_P (type1)) - type1 = TREE_TYPE (type1); - return cv_unqualified (type1); - case CPTK_TYPE_PACK_ELEMENT: return finish_type_pack_element (type1, type2, complain); + case CPTK_UNDERLYING_TYPE: + return finish_underlying_type (type1); + #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \ case CPTK_##CODE: #include "cp-trait.def" diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index f343e153e56c..2223f08a6285 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -8,9 +8,21 @@ #if !__has_builtin (__builtin_bit_cast) # error "__has_builtin (__builtin_bit_cast) failed" #endif +#if !__has_builtin (__builtin_is_constant_evaluated) +# error "__has_builtin (__builtin_is_constant_evaluated) failed" +#endif +#if !__has_builtin (__builtin_is_corresponding_member) +# error "__has_builtin (__builtin_is_corresponding_member) failed" +#endif +#if !__has_builtin (__builtin_is_pointer_interconvertible_with_class) +# error "__has_builtin (__builtin_is_pointer_interconvertible_with_class) failed" +#endif #if !__has_builtin (__builtin_launder) # error "__has_builtin (__builtin_launder) failed" #endif +#if !__has_builtin (__builtin_source_location) +# error "__has_builtin (__builtin_source_location) failed" +#endif #if !__has_builtin (__has_nothrow_assign) # error "__has_builtin (__has_nothrow_assign) failed" #endif @@ -44,12 +56,21 @@ #if !__has_builtin (__is_aggregate) # error "__has_builtin (__is_aggregate) failed" #endif +#if !__has_builtin (__is_assignable) +# error "__has_builtin (__is_assignable) failed" +#endif #if !__has_builtin (__is_base_of) # error "__has_builtin (__is_base_of) failed" #endif #if !__has_builtin (__is_class) # error "__has_builtin (__is_class) failed" #endif +#if !__has_builtin (__is_constructible) +# error "__has_builtin (__is_constructible) failed" +#endif +#if !__has_builtin (__is_convertible) +# error "__has_builtin (__is_convertible) failed" +#endif #if !__has_builtin (__is_empty) # error "__has_builtin (__is_empty) failed" #endif @@ -65,6 +86,15 @@ #if !__has_builtin (__is_literal_type) # error "__has_builtin (__is_literal_type) failed" #endif +#if !__has_builtin (__is_nothrow_assignable) +# error "__has_builtin (__is_nothrow_assignable) failed" +#endif +#if !__has_builtin (__is_nothrow_constructible) +# error "__has_builtin (__is_nothrow_constructible) failed" +#endif +#if !__has_builtin (__is_nothrow_convertible) +# error "__has_builtin (__is_nothrow_convertible) failed" +#endif #if !__has_builtin (__is_pointer_interconvertible_base_of) # error "__has_builtin (__is_pointer_interconvertible_base_of) failed" #endif @@ -98,51 +128,21 @@ #if !__has_builtin (__is_union) # error "__has_builtin (__is_union) failed" #endif -#if !__has_builtin (__underlying_type) -# error "__has_builtin (__underlying_type) failed" -#endif -#if !__has_builtin (__is_assignable) -# error "__has_builtin (__is_assignable) failed" -#endif -#if !__has_builtin (__is_constructible) -# error "__has_builtin (__is_constructible) failed" -#endif -#if !__has_builtin (__is_nothrow_assignable) -# error "__has_builtin (__is_nothrow_assignable) failed" -#endif -#if !__has_builtin (__is_nothrow_constructible) -# error "__has_builtin (__is_nothrow_constructible) failed" -#endif #if !__has_builtin (__reference_constructs_from_temporary) # error "__has_builtin (__reference_constructs_from_temporary) failed" #endif #if !__has_builtin (__reference_converts_from_temporary) # error "__has_builtin (__reference_converts_from_temporary) failed" #endif -#if !__has_builtin (__builtin_is_constant_evaluated) -# error "__has_builtin (__builtin_is_constant_evaluated) failed" -#endif -#if !__has_builtin (__builtin_source_location) -# error "__has_builtin (__builtin_source_location) failed" -#endif -#if !__has_builtin (__builtin_is_corresponding_member) -# error "__has_builtin (__builtin_is_corresponding_member) failed" -#endif -#if !__has_builtin (__builtin_is_pointer_interconvertible_with_class) -# error "__has_builtin (__builtin_is_pointer_interconvertible_with_class) failed" -#endif -#if !__has_builtin (__is_convertible) -# error "__has_builtin (__is_convertible) failed" -#endif -#if !__has_builtin (__is_nothrow_convertible) -# error "__has_builtin (__is_nothrow_convertible) failed" -#endif #if !__has_builtin (__remove_cv) # error "__has_builtin (__remove_cv) failed" #endif -#if !__has_builtin (__remove_reference) -# error "__has_builtin (__remove_reference) failed" -#endif #if !__has_builtin (__remove_cvref) # error "__has_builtin (__remove_cvref) failed" #endif +#if !__has_builtin (__remove_reference) +# error "__has_builtin (__remove_reference) failed" +#endif +#if !__has_builtin (__underlying_type) +# error "__has_builtin (__underlying_type) failed" +#endif From 400cd0c26cf86fe75b0e4d42f9976b3125bcfd43 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:32:57 -0800 Subject: [PATCH 140/311] c-family, c++: Look up built-in traits via identifier node Since RID_MAX soon reaches 255 and all built-in traits are used approximately once in a C++ translation unit, this patch removes all RID values for built-in traits and uses the identifier node to look up the specific trait. Rather than holding traits as keywords, we set all trait identifiers as cik_trait, which is a new cp_identifier_kind. As cik_reserved_for_udlit was unused and cp_identifier_kind is 3 bits, we replaced the unused field with the new cik_trait. Also, the later patch handles a subsequent token to the built-in identifier so that we accept the use of non-function-like built-in trait identifiers. gcc/c-family/ChangeLog: * c-common.cc (c_common_reswords): Remove all mappings of built-in traits. * c-common.h (enum rid): Remove all RID values for built-in traits. gcc/cp/ChangeLog: * cp-objcp-common.cc (names_builtin_p): Remove all RID value cases for built-in traits. Check for built-in traits via the new cik_trait kind. * cp-tree.h (enum cp_trait_kind): Set its underlying type to addr_space_t. (struct cp_trait): New struct to hold trait information. (cp_traits): New array to hold a mapping to all traits. (cik_reserved_for_udlit): Rename to ... (cik_trait): ... this. (IDENTIFIER_ANY_OP_P): Exclude cik_trait. (IDENTIFIER_TRAIT_P): New macro to detect cik_trait. * lex.cc (cp_traits): Define its values, declared in cp-tree.h. (init_cp_traits): New function to set cik_trait and IDENTIFIER_CP_INDEX for all built-in trait identifiers. (cxx_init): Call init_cp_traits function. * parser.cc (cp_lexer_lookup_trait): New function to look up a built-in trait by IDENTIFIER_CP_INDEX. (cp_lexer_lookup_trait_expr): Likewise, look up an expression-yielding built-in trait. (cp_lexer_lookup_trait_type): Likewise, look up a type-yielding built-in trait. (cp_keyword_starts_decl_specifier_p): Remove all RID value cases for built-in traits. (cp_lexer_next_token_is_decl_specifier_keyword): Handle type-yielding built-in traits. (cp_parser_primary_expression): Remove all RID value cases for built-in traits. Handle expression-yielding built-in traits. (cp_parser_trait): Handle cp_trait instead of enum rid. (cp_parser_simple_type_specifier): Remove all RID value cases for built-in traits. Handle type-yielding built-in traits. Co-authored-by: Patrick Palka Signed-off-by: Ken Matsui --- gcc/c-family/c-common.cc | 7 --- gcc/c-family/c-common.h | 5 -- gcc/cp/cp-objcp-common.cc | 8 +-- gcc/cp/cp-tree.h | 32 +++++++++--- gcc/cp/lex.cc | 34 ++++++++++++ gcc/cp/parser.cc | 105 +++++++++++++++++++++++--------------- 6 files changed, 126 insertions(+), 65 deletions(-) diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc index d175054dddb1..0f1de44a3481 100644 --- a/gcc/c-family/c-common.cc +++ b/gcc/c-family/c-common.cc @@ -560,13 +560,6 @@ const struct c_common_resword c_common_reswords[] = { "wchar_t", RID_WCHAR, D_CXXONLY }, { "while", RID_WHILE, 0 }, -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \ - { NAME, RID_##CODE, D_CXXONLY }, -#include "cp/cp-trait.def" -#undef DEFTRAIT - /* An alias for __is_same. */ - { "__is_same_as", RID_IS_SAME, D_CXXONLY }, - /* C++ transactional memory. */ { "synchronized", RID_SYNCHRONIZED, D_CXX_OBJC | D_TRANSMEM }, { "atomic_noexcept", RID_ATOMIC_NOEXCEPT, D_CXXONLY | D_TRANSMEM }, diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index cb9b6f301d87..62d76c87cc00 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -168,11 +168,6 @@ enum rid RID_BUILTIN_LAUNDER, RID_BUILTIN_BIT_CAST, -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \ - RID_##CODE, -#include "cp/cp-trait.def" -#undef DEFTRAIT - /* C++11 */ RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT, diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc index 9439c4dc7444..ee88df5767bb 100644 --- a/gcc/cp/cp-objcp-common.cc +++ b/gcc/cp/cp-objcp-common.cc @@ -565,6 +565,10 @@ names_builtin_p (const char *name) } } + /* Check for built-in traits. */ + if (IDENTIFIER_TRAIT_P (id)) + return true; + /* Also detect common reserved C++ words that aren't strictly built-in functions. */ switch (C_RID_CODE (id)) @@ -578,10 +582,6 @@ names_builtin_p (const char *name) case RID_BUILTIN_ASSOC_BARRIER: case RID_BUILTIN_BIT_CAST: case RID_OFFSETOF: -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \ - case RID_##CODE: -#include "cp-trait.def" -#undef DEFTRAIT return true; default: break; diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index cb89d372b234..cbf280ec4546 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -1237,7 +1237,7 @@ enum cp_identifier_kind { cik_simple_op = 4, /* Non-assignment operator name. */ cik_assign_op = 5, /* An assignment operator name. */ cik_conv_op = 6, /* Conversion operator name. */ - cik_reserved_for_udlit = 7, /* Not yet in use */ + cik_trait = 7, /* Built-in trait name. */ cik_max }; @@ -1282,9 +1282,9 @@ enum cp_identifier_kind { & IDENTIFIER_KIND_BIT_0 (NODE)) /* True if this identifier is for any operator name (including - conversions). Value 4, 5, 6 or 7. */ + conversions). Value 4, 5, or 6. */ #define IDENTIFIER_ANY_OP_P(NODE) \ - (IDENTIFIER_KIND_BIT_2 (NODE)) + (IDENTIFIER_KIND_BIT_2 (NODE) && !IDENTIFIER_TRAIT_P (NODE)) /* True if this identifier is for an overloaded operator. Values 4, 5. */ #define IDENTIFIER_OVL_OP_P(NODE) \ @@ -1297,12 +1297,18 @@ enum cp_identifier_kind { & IDENTIFIER_KIND_BIT_0 (NODE)) /* True if this identifier is the name of a type-conversion - operator. Value 7. */ + operator. Value 6. */ #define IDENTIFIER_CONV_OP_P(NODE) \ (IDENTIFIER_ANY_OP_P (NODE) \ & IDENTIFIER_KIND_BIT_1 (NODE) \ & (!IDENTIFIER_KIND_BIT_0 (NODE))) +/* True if this identifier is the name of a built-in trait. */ +#define IDENTIFIER_TRAIT_P(NODE) \ + (IDENTIFIER_KIND_BIT_0 (NODE) \ + & IDENTIFIER_KIND_BIT_1 (NODE) \ + & IDENTIFIER_KIND_BIT_2 (NODE)) + /* True if this identifier is a new or delete operator. */ #define IDENTIFIER_NEWDEL_OP_P(NODE) \ (IDENTIFIER_OVL_OP_P (NODE) \ @@ -1386,16 +1392,26 @@ struct GTY (()) tree_argument_pack_select { int index; }; -/* The different kinds of traits that we encounter. */ - -enum cp_trait_kind -{ +/* The different kinds of traits that we encounter. The size is limited to + addr_space_t since a trait is looked up by IDENTIFIER_CP_INDEX. */ +enum cp_trait_kind : addr_space_t { #define DEFTRAIT(TCC, CODE, NAME, ARITY) \ CPTK_##CODE, #include "cp-trait.def" #undef DEFTRAIT }; +/* The trait type. */ +struct cp_trait { + const char *name; + cp_trait_kind kind; + short arity; + bool type; +}; + +/* The trait table indexed by cp_trait_kind. */ +extern const struct cp_trait cp_traits[]; + /* The types that we are processing. */ #define TRAIT_EXPR_TYPE1(NODE) \ (((struct tree_trait_expr *)TRAIT_EXPR_CHECK (NODE))->type1) diff --git a/gcc/cp/lex.cc b/gcc/cp/lex.cc index 64bcfb181968..a939e2e5f13b 100644 --- a/gcc/cp/lex.cc +++ b/gcc/cp/lex.cc @@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" static int interface_strcmp (const char *); +static void init_cp_traits (void); static void init_cp_pragma (void); static tree parse_strconst_pragma (const char *, int); @@ -97,6 +98,19 @@ ovl_op_info_t ovl_op_info[2][OVL_OP_MAX] = unsigned char ovl_op_mapping[MAX_TREE_CODES]; unsigned char ovl_op_alternate[OVL_OP_MAX]; +/* The trait table, declared in cp-tree.h. */ +const cp_trait cp_traits[] = +{ +#define DEFTRAIT(TCC, CODE, NAME, ARITY) \ + { NAME, CPTK_##CODE, ARITY, (TCC == tcc_type) }, +#include "cp-trait.def" +#undef DEFTRAIT +}; +/* The trait table cannot have more than 255 (addr_space_t) entries since + the index is retrieved through IDENTIFIER_CP_INDEX. */ +static_assert(ARRAY_SIZE (cp_traits) <= 255, + "cp_traits array cannot have more than 255 entries"); + /* Get the name of the kind of identifier T. */ const char * @@ -283,6 +297,25 @@ init_reswords (void) } } +/* Initialize the C++ traits. */ +static void +init_cp_traits (void) +{ + tree id; + + for (unsigned int i = 0; i < ARRAY_SIZE (cp_traits); ++i) + { + id = get_identifier (cp_traits[i].name); + IDENTIFIER_CP_INDEX (id) = cp_traits[i].kind; + set_identifier_kind (id, cik_trait); + } + + /* An alias for __is_same. */ + id = get_identifier ("__is_same_as"); + IDENTIFIER_CP_INDEX (id) = CPTK_IS_SAME; + set_identifier_kind (id, cik_trait); +} + static void init_cp_pragma (void) { @@ -324,6 +357,7 @@ cxx_init (void) input_location = BUILTINS_LOCATION; init_reswords (); + init_cp_traits (); init_tree (); init_cp_semantics (); init_operators (); diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index de7af150781b..f23ef6be2db6 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -246,6 +246,12 @@ static void cp_lexer_start_debugging (cp_lexer *) ATTRIBUTE_UNUSED; static void cp_lexer_stop_debugging (cp_lexer *) ATTRIBUTE_UNUSED; +static const cp_trait *cp_lexer_lookup_trait + (const cp_token *); +static const cp_trait *cp_lexer_lookup_trait_expr + (const cp_token *); +static const cp_trait *cp_lexer_lookup_trait_type + (const cp_token *); static cp_token_cache *cp_token_cache_new (cp_token *, cp_token *); @@ -1173,12 +1179,6 @@ cp_keyword_starts_decl_specifier_p (enum rid keyword) case RID_CONSTEVAL: return true; -#define DEFTRAIT_TYPE(CODE, NAME, ARITY) \ - case RID_##CODE: -#include "cp-trait.def" -#undef DEFTRAIT_TYPE - return true; - default: if (keyword >= RID_FIRST_INT_N && keyword < RID_FIRST_INT_N + NUM_INT_N_ENTS @@ -1188,6 +1188,44 @@ cp_keyword_starts_decl_specifier_p (enum rid keyword) } } +/* Look ups the corresponding built-in trait if a given token is + a built-in trait. Otherwise, returns nullptr. */ + +static const cp_trait * +cp_lexer_lookup_trait (const cp_token *token) +{ + if (token->type == CPP_NAME && IDENTIFIER_TRAIT_P (token->u.value)) + return &cp_traits[IDENTIFIER_CP_INDEX (token->u.value)]; + + return nullptr; +} + +/* Similarly, but only if the token is an expression-yielding + built-in trait. */ + +static const cp_trait * +cp_lexer_lookup_trait_expr (const cp_token *token) +{ + const cp_trait *trait = cp_lexer_lookup_trait (token); + if (trait && !trait->type) + return trait; + + return nullptr; +} + +/* Similarly, but only if the token is a type-yielding + built-in trait. */ + +static const cp_trait * +cp_lexer_lookup_trait_type (const cp_token *token) +{ + const cp_trait *trait = cp_lexer_lookup_trait (token); + if (trait && trait->type) + return trait; + + return nullptr; +} + /* Return true if the next token is a keyword for a decl-specifier. */ static bool @@ -1196,6 +1234,8 @@ cp_lexer_next_token_is_decl_specifier_keyword (cp_lexer *lexer) cp_token *token; token = cp_lexer_peek_token (lexer); + if (cp_lexer_lookup_trait_type (token)) + return true; return cp_keyword_starts_decl_specifier_p (token->keyword); } @@ -2861,7 +2901,7 @@ static void cp_parser_late_parsing_default_args static tree cp_parser_sizeof_operand (cp_parser *, enum rid); static cp_expr cp_parser_trait - (cp_parser *, enum rid); + (cp_parser *, const cp_trait *); static bool cp_parser_declares_only_class_p (cp_parser *); static void cp_parser_set_storage_class @@ -6055,12 +6095,6 @@ cp_parser_primary_expression (cp_parser *parser, case RID_OFFSETOF: return cp_parser_builtin_offsetof (parser); -#define DEFTRAIT_EXPR(CODE, NAME, ARITY) \ - case RID_##CODE: -#include "cp-trait.def" -#undef DEFTRAIT_EXPR - return cp_parser_trait (parser, token->keyword); - // C++ concepts case RID_REQUIRES: return cp_parser_requires_expression (parser); @@ -6099,6 +6133,9 @@ cp_parser_primary_expression (cp_parser *parser, `::' as the beginning of a qualified-id, or the "operator" keyword. */ case CPP_NAME: + if (const cp_trait* trait = cp_lexer_lookup_trait_expr (token)) + return cp_parser_trait (parser, trait); + /* FALLTHRU */ case CPP_SCOPE: case CPP_TEMPLATE_ID: case CPP_NESTED_NAME_SPECIFIER: @@ -11029,28 +11066,13 @@ cp_parser_builtin_offsetof (cp_parser *parser) /* Parse a builtin trait expression or type. */ static cp_expr -cp_parser_trait (cp_parser* parser, enum rid keyword) +cp_parser_trait (cp_parser* parser, const cp_trait* trait) { - cp_trait_kind kind; + const cp_trait_kind kind = trait->kind; tree type1, type2 = NULL_TREE; - bool binary = false; - bool variadic = false; - bool type = false; - - switch (keyword) - { -#define DEFTRAIT(TCC, CODE, NAME, ARITY) \ - case RID_##CODE: \ - kind = CPTK_##CODE; \ - binary = (ARITY == 2); \ - variadic = (ARITY == -1); \ - type = (TCC == tcc_type); \ - break; -#include "cp-trait.def" -#undef DEFTRAIT - default: - gcc_unreachable (); - } + const bool binary = (trait->arity == 2); + const bool variadic = (trait->arity == -1); + const bool type = trait->type; /* Get location of initial token. */ location_t start_loc = cp_lexer_peek_token (parser->lexer)->location; @@ -20126,20 +20148,21 @@ cp_parser_simple_type_specifier (cp_parser* parser, return type; -#define DEFTRAIT_TYPE(CODE, NAME, ARITY) \ - case RID_##CODE: -#include "cp-trait.def" -#undef DEFTRAIT_TYPE - type = cp_parser_trait (parser, token->keyword); + default: + break; + } + + /* If token is a type-yielding built-in traits, parse it. */ + const cp_trait* trait = cp_lexer_lookup_trait_type (token); + if (trait) + { + type = cp_parser_trait (parser, trait); if (decl_specs) cp_parser_set_decl_spec_type (decl_specs, type, token, /*type_definition_p=*/false); return type; - - default: - break; } /* If token is an already-parsed decltype not followed by ::, From df3559d951ba6572e254a1bd1ef9a34b6e543325 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:32:58 -0800 Subject: [PATCH 141/311] c++: Accept the use of built-in trait identifiers This patch accepts the use of built-in trait identifiers when they are actually not used as traits. Specifically, we check if the subsequent token is '(' for ordinary built-in traits or is '<' only for the special __type_pack_element built-in trait. If those identifiers are used differently, the parser treats them as normal identifiers. This allows us to accept code like: struct __is_pointer {};. gcc/cp/ChangeLog: * parser.cc (cp_lexer_lookup_trait): Rename to ... (cp_lexer_peek_trait): ... this. Handle a subsequent token for the corresponding built-in trait. (cp_lexer_lookup_trait_expr): Rename to ... (cp_lexer_peek_trait_expr): ... this. (cp_lexer_lookup_trait_type): Rename to ... (cp_lexer_peek_trait_type): ... this. (cp_lexer_next_token_is_decl_specifier_keyword): Call cp_lexer_peek_trait_type. (cp_parser_simple_type_specifier): Likewise. (cp_parser_primary_expression): Call cp_lexer_peek_trait_expr. Signed-off-by: Ken Matsui --- gcc/cp/parser.cc | 53 +++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index f23ef6be2db6..9e76426566b1 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -246,12 +246,12 @@ static void cp_lexer_start_debugging (cp_lexer *) ATTRIBUTE_UNUSED; static void cp_lexer_stop_debugging (cp_lexer *) ATTRIBUTE_UNUSED; -static const cp_trait *cp_lexer_lookup_trait - (const cp_token *); -static const cp_trait *cp_lexer_lookup_trait_expr - (const cp_token *); -static const cp_trait *cp_lexer_lookup_trait_type - (const cp_token *); +static const cp_trait *cp_lexer_peek_trait + (cp_lexer *); +static const cp_trait *cp_lexer_peek_trait_expr + (cp_lexer *); +static const cp_trait *cp_lexer_peek_trait_type + (cp_lexer *); static cp_token_cache *cp_token_cache_new (cp_token *, cp_token *); @@ -1188,15 +1188,29 @@ cp_keyword_starts_decl_specifier_p (enum rid keyword) } } -/* Look ups the corresponding built-in trait if a given token is - a built-in trait. Otherwise, returns nullptr. */ +/* Peeks the corresponding built-in trait if the first token is + a built-in trait and the second token is either `(' or `<' depending + on the trait. Otherwise, returns nullptr. */ static const cp_trait * -cp_lexer_lookup_trait (const cp_token *token) +cp_lexer_peek_trait (cp_lexer *lexer) { - if (token->type == CPP_NAME && IDENTIFIER_TRAIT_P (token->u.value)) - return &cp_traits[IDENTIFIER_CP_INDEX (token->u.value)]; + const cp_token *token1 = cp_lexer_peek_token (lexer); + if (token1->type == CPP_NAME && IDENTIFIER_TRAIT_P (token1->u.value)) + { + const cp_trait &trait = cp_traits[IDENTIFIER_CP_INDEX (token1->u.value)]; + const bool is_pack_element = (trait.kind == CPTK_TYPE_PACK_ELEMENT); + /* Check if the subsequent token is a `<' token to + __type_pack_element or is a `(' token to everything else. */ + const cp_token *token2 = cp_lexer_peek_nth_token (lexer, 2); + if (is_pack_element && token2->type != CPP_LESS) + return nullptr; + if (!is_pack_element && token2->type != CPP_OPEN_PAREN) + return nullptr; + + return &trait; + } return nullptr; } @@ -1204,9 +1218,9 @@ cp_lexer_lookup_trait (const cp_token *token) built-in trait. */ static const cp_trait * -cp_lexer_lookup_trait_expr (const cp_token *token) +cp_lexer_peek_trait_expr (cp_lexer *lexer) { - const cp_trait *trait = cp_lexer_lookup_trait (token); + const cp_trait *trait = cp_lexer_peek_trait (lexer); if (trait && !trait->type) return trait; @@ -1217,9 +1231,9 @@ cp_lexer_lookup_trait_expr (const cp_token *token) built-in trait. */ static const cp_trait * -cp_lexer_lookup_trait_type (const cp_token *token) +cp_lexer_peek_trait_type (cp_lexer *lexer) { - const cp_trait *trait = cp_lexer_lookup_trait (token); + const cp_trait *trait = cp_lexer_peek_trait (lexer); if (trait && trait->type) return trait; @@ -1233,9 +1247,10 @@ cp_lexer_next_token_is_decl_specifier_keyword (cp_lexer *lexer) { cp_token *token; - token = cp_lexer_peek_token (lexer); - if (cp_lexer_lookup_trait_type (token)) + if (cp_lexer_peek_trait_type (lexer)) return true; + + token = cp_lexer_peek_token (lexer); return cp_keyword_starts_decl_specifier_p (token->keyword); } @@ -6133,7 +6148,7 @@ cp_parser_primary_expression (cp_parser *parser, `::' as the beginning of a qualified-id, or the "operator" keyword. */ case CPP_NAME: - if (const cp_trait* trait = cp_lexer_lookup_trait_expr (token)) + if (const cp_trait* trait = cp_lexer_peek_trait_expr (parser->lexer)) return cp_parser_trait (parser, trait); /* FALLTHRU */ case CPP_SCOPE: @@ -20153,7 +20168,7 @@ cp_parser_simple_type_specifier (cp_parser* parser, } /* If token is a type-yielding built-in traits, parse it. */ - const cp_trait* trait = cp_lexer_lookup_trait_type (token); + const cp_trait* trait = cp_lexer_peek_trait_type (parser->lexer); if (trait) { type = cp_parser_trait (parser, trait); From e410303f768fa7b020e46f3bd7d28381144e5340 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Fri, 8 Dec 2023 15:55:49 -0500 Subject: [PATCH 142/311] c++: trait patch tweak As Patrick suggested elsewhere, let's move this into the default case. gcc/cp/ChangeLog: * parser.cc (cp_parser_simple_type_specifier): Move trait handling to default label. --- gcc/cp/parser.cc | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 9e76426566b1..b987324f6691 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -20164,22 +20164,22 @@ cp_parser_simple_type_specifier (cp_parser* parser, return type; default: + /* If token is a type-yielding built-in traits, parse it. */ + const cp_trait* trait = cp_lexer_peek_trait_type (parser->lexer); + if (trait) + { + type = cp_parser_trait (parser, trait); + if (decl_specs) + cp_parser_set_decl_spec_type (decl_specs, type, + token, + /*type_definition_p=*/false); + + return type; + } + break; } - /* If token is a type-yielding built-in traits, parse it. */ - const cp_trait* trait = cp_lexer_peek_trait_type (parser->lexer); - if (trait) - { - type = cp_parser_trait (parser, trait); - if (decl_specs) - cp_parser_set_decl_spec_type (decl_specs, type, - token, - /*type_definition_p=*/false); - - return type; - } - /* If token is an already-parsed decltype not followed by ::, it's a simple-type-specifier. */ if (token->type == CPP_DECLTYPE From 7a585f14a140edcb3bc32770855144df8e15e28d Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:32:59 -0800 Subject: [PATCH 143/311] c++: Implement __is_array built-in trait This patch implements built-in trait for std::is_array. gcc/cp/ChangeLog: * cp-trait.def: Define __is_array. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_ARRAY. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_array. * g++.dg/ext/is_array.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 +++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 ++++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 +++ gcc/testsuite/g++.dg/ext/is_array.C | 28 ++++++++++++++++++++++++ 5 files changed, 39 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_array.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 29aa7bb3df84..d75132e8e82a 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3719,6 +3719,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_AGGREGATE: inform (loc, " %qT is not an aggregate", t1); break; + case CPTK_IS_ARRAY: + inform (loc, " %qT is not an array", t1); + break; case CPTK_IS_ASSIGNABLE: inform (loc, " %qT is not assignable from %qT", t1, t2); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 0e48e64b8dd7..759f10a35328 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -59,6 +59,7 @@ DEFTRAIT_EXPR (HAS_UNIQUE_OBJ_REPRESENTATIONS, "__has_unique_object_representati DEFTRAIT_EXPR (HAS_VIRTUAL_DESTRUCTOR, "__has_virtual_destructor", 1) DEFTRAIT_EXPR (IS_ABSTRACT, "__is_abstract", 1) DEFTRAIT_EXPR (IS_AGGREGATE, "__is_aggregate", 1) +DEFTRAIT_EXPR (IS_ARRAY, "__is_array", 1) DEFTRAIT_EXPR (IS_ASSIGNABLE, "__is_assignable", 2) DEFTRAIT_EXPR (IS_BASE_OF, "__is_base_of", 2) DEFTRAIT_EXPR (IS_CLASS, "__is_class", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 23d1f12e3714..399a33a82a81 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12387,6 +12387,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_AGGREGATE: return CP_AGGREGATE_TYPE_P (type1); + case CPTK_IS_ARRAY: + return type_code1 == ARRAY_TYPE; + case CPTK_IS_ASSIGNABLE: return is_xible (MODIFY_EXPR, type1, type2); @@ -12614,6 +12617,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) return error_mark_node; break; + case CPTK_IS_ARRAY: case CPTK_IS_CLASS: case CPTK_IS_ENUM: case CPTK_IS_SAME: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 2223f08a6285..6b9437f7c472 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -56,6 +56,9 @@ #if !__has_builtin (__is_aggregate) # error "__has_builtin (__is_aggregate) failed" #endif +#if !__has_builtin (__is_array) +# error "__has_builtin (__is_array) failed" +#endif #if !__has_builtin (__is_assignable) # error "__has_builtin (__is_assignable) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_array.C b/gcc/testsuite/g++.dg/ext/is_array.C new file mode 100644 index 000000000000..facfed5c7cbd --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_array.C @@ -0,0 +1,28 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) +#define SA_TEST_CATEGORY(TRAIT, X, expect) \ + SA(TRAIT(X) == expect); \ + SA(TRAIT(const X) == expect); \ + SA(TRAIT(volatile X) == expect); \ + SA(TRAIT(const volatile X) == expect) + +SA_TEST_CATEGORY(__is_array, int[2], true); +SA_TEST_CATEGORY(__is_array, int[], true); +SA_TEST_CATEGORY(__is_array, int[2][3], true); +SA_TEST_CATEGORY(__is_array, int[][3], true); +SA_TEST_CATEGORY(__is_array, float*[2], true); +SA_TEST_CATEGORY(__is_array, float*[], true); +SA_TEST_CATEGORY(__is_array, float*[2][3], true); +SA_TEST_CATEGORY(__is_array, float*[][3], true); +SA_TEST_CATEGORY(__is_array, ClassType[2], true); +SA_TEST_CATEGORY(__is_array, ClassType[], true); +SA_TEST_CATEGORY(__is_array, ClassType[2][3], true); +SA_TEST_CATEGORY(__is_array, ClassType[][3], true); + +// Sanity check. +SA_TEST_CATEGORY(__is_array, ClassType, false); From ea0a0e6bf1470e48cc5f337bc1be40c40ac8e5ed Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:01 -0800 Subject: [PATCH 144/311] c++: Implement __is_bounded_array built-in trait This patch implements built-in trait for std::is_bounded_array. gcc/cp/ChangeLog: * cp-trait.def: Define __is_bounded_array. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_BOUNDED_ARRAY. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_bounded_array. * g++.dg/ext/is_bounded_array.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 ++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ gcc/testsuite/g++.dg/ext/is_bounded_array.C | 38 +++++++++++++++++++++ 5 files changed, 49 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_bounded_array.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index d75132e8e82a..2311bab28c48 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3728,6 +3728,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_BASE_OF: inform (loc, " %qT is not a base of %qT", t1, t2); break; + case CPTK_IS_BOUNDED_ARRAY: + inform (loc, " %qT is not a bounded array", t1); + break; case CPTK_IS_CLASS: inform (loc, " %qT is not a class", t1); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 759f10a35328..0e93e2b71145 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -62,6 +62,7 @@ DEFTRAIT_EXPR (IS_AGGREGATE, "__is_aggregate", 1) DEFTRAIT_EXPR (IS_ARRAY, "__is_array", 1) DEFTRAIT_EXPR (IS_ASSIGNABLE, "__is_assignable", 2) DEFTRAIT_EXPR (IS_BASE_OF, "__is_base_of", 2) +DEFTRAIT_EXPR (IS_BOUNDED_ARRAY, "__is_bounded_array", 1) DEFTRAIT_EXPR (IS_CLASS, "__is_class", 1) DEFTRAIT_EXPR (IS_CONSTRUCTIBLE, "__is_constructible", -1) DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 399a33a82a81..ecbd858b6dd8 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12398,6 +12398,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) && (same_type_ignoring_top_level_qualifiers_p (type1, type2) || DERIVED_FROM_P (type1, type2))); + case CPTK_IS_BOUNDED_ARRAY: + return type_code1 == ARRAY_TYPE && TYPE_DOMAIN (type1); + case CPTK_IS_CLASS: return NON_UNION_CLASS_TYPE_P (type1); @@ -12618,6 +12621,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) break; case CPTK_IS_ARRAY: + case CPTK_IS_BOUNDED_ARRAY: case CPTK_IS_CLASS: case CPTK_IS_ENUM: case CPTK_IS_SAME: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 6b9437f7c472..4cfb817788cd 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -65,6 +65,9 @@ #if !__has_builtin (__is_base_of) # error "__has_builtin (__is_base_of) failed" #endif +#if !__has_builtin (__is_bounded_array) +# error "__has_builtin (__is_bounded_array) failed" +#endif #if !__has_builtin (__is_class) # error "__has_builtin (__is_class) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_bounded_array.C b/gcc/testsuite/g++.dg/ext/is_bounded_array.C new file mode 100644 index 000000000000..346790eba127 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_bounded_array.C @@ -0,0 +1,38 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_CONST(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT) + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +SA_TEST_CATEGORY(__is_bounded_array, int[2], true); +SA_TEST_CATEGORY(__is_bounded_array, int[], false); +SA_TEST_CATEGORY(__is_bounded_array, int[2][3], true); +SA_TEST_CATEGORY(__is_bounded_array, int[][3], false); +SA_TEST_CATEGORY(__is_bounded_array, float*[2], true); +SA_TEST_CATEGORY(__is_bounded_array, float*[], false); +SA_TEST_CATEGORY(__is_bounded_array, float*[2][3], true); +SA_TEST_CATEGORY(__is_bounded_array, float*[][3], false); +SA_TEST_CATEGORY(__is_bounded_array, ClassType[2], true); +SA_TEST_CATEGORY(__is_bounded_array, ClassType[], false); +SA_TEST_CATEGORY(__is_bounded_array, ClassType[2][3], true); +SA_TEST_CATEGORY(__is_bounded_array, ClassType[][3], false); +SA_TEST_CATEGORY(__is_bounded_array, int(*)[2], false); +SA_TEST_CATEGORY(__is_bounded_array, int(*)[], false); +SA_TEST_CATEGORY(__is_bounded_array, int(&)[2], false); +SA_TEST_CONST(__is_bounded_array, int(&)[], false); + +// Sanity check. +SA_TEST_CATEGORY(__is_bounded_array, ClassType, false); +SA_TEST_CONST(__is_bounded_array, void(), false); From 8769777bf74a844b05521bf51bed4db4c9ca66d4 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:03 -0800 Subject: [PATCH 145/311] c++: Implement __is_scoped_enum built-in trait This patch implements built-in trait for std::is_scoped_enum. gcc/cp/ChangeLog: * cp-trait.def: Define __is_scoped_enum. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SCOPED_ENUM. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_scoped_enum. * g++.dg/ext/is_scoped_enum.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 + gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 ++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 + gcc/testsuite/g++.dg/ext/is_scoped_enum.C | 67 +++++++++++++++++++++++ 5 files changed, 78 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_scoped_enum.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 2311bab28c48..062dc404ccf5 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3783,6 +3783,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_SAME: inform (loc, " %qT is not the same as %qT", t1, t2); break; + case CPTK_IS_SCOPED_ENUM: + inform (loc, " %qT is not a scoped enum", t1); + break; case CPTK_IS_STD_LAYOUT: inform (loc, " %qT is not an standard layout type", t1); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 0e93e2b71145..9d848f6f77de 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -78,6 +78,7 @@ DEFTRAIT_EXPR (IS_POINTER_INTERCONVERTIBLE_BASE_OF, "__is_pointer_interconvertib DEFTRAIT_EXPR (IS_POD, "__is_pod", 1) DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1) DEFTRAIT_EXPR (IS_SAME, "__is_same", 2) +DEFTRAIT_EXPR (IS_SCOPED_ENUM, "__is_scoped_enum", 1) DEFTRAIT_EXPR (IS_STD_LAYOUT, "__is_standard_layout", 1) DEFTRAIT_EXPR (IS_TRIVIAL, "__is_trivial", 1) DEFTRAIT_EXPR (IS_TRIVIALLY_ASSIGNABLE, "__is_trivially_assignable", 2) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index ecbd858b6dd8..2bb545513557 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12446,6 +12446,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_SAME: return same_type_p (type1, type2); + case CPTK_IS_SCOPED_ENUM: + return SCOPED_ENUM_P (type1); + case CPTK_IS_STD_LAYOUT: return std_layout_type_p (type1); @@ -12625,6 +12628,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_CLASS: case CPTK_IS_ENUM: case CPTK_IS_SAME: + case CPTK_IS_SCOPED_ENUM: case CPTK_IS_UNION: break; diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 4cfb817788cd..744cfb3b42fb 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -116,6 +116,9 @@ #if !__has_builtin (__is_same_as) # error "__has_builtin (__is_same_as) failed" #endif +#if !__has_builtin (__is_scoped_enum) +# error "__has_builtin (__is_scoped_enum) failed" +#endif #if !__has_builtin (__is_standard_layout) # error "__has_builtin (__is_standard_layout) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_scoped_enum.C b/gcc/testsuite/g++.dg/ext/is_scoped_enum.C new file mode 100644 index 000000000000..a563b6ee67d1 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_scoped_enum.C @@ -0,0 +1,67 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_FN(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +enum class E { e1, e2 }; +SA_TEST_CATEGORY(__is_scoped_enum, E, true); +enum class Ec : char { e1, e2 }; +SA_TEST_CATEGORY(__is_scoped_enum, Ec, true); + +// negative tests +enum U { u1, u2 }; +SA_TEST_CATEGORY(__is_scoped_enum, U, false); +enum F : int { f1, f2 }; +SA_TEST_CATEGORY(__is_scoped_enum, F, false); +struct S; +SA_TEST_CATEGORY(__is_scoped_enum, S, false); +struct S { }; +SA_TEST_CATEGORY(__is_scoped_enum, S, false); + +SA_TEST_CATEGORY(__is_scoped_enum, int, false); +SA_TEST_CATEGORY(__is_scoped_enum, int[], false); +SA_TEST_CATEGORY(__is_scoped_enum, int[2], false); +SA_TEST_CATEGORY(__is_scoped_enum, int[][2], false); +SA_TEST_CATEGORY(__is_scoped_enum, int[2][3], false); +SA_TEST_CATEGORY(__is_scoped_enum, int*, false); +SA_TEST_CATEGORY(__is_scoped_enum, int&, false); +SA_TEST_CATEGORY(__is_scoped_enum, int*&, false); +SA_TEST_FN(__is_scoped_enum, int(), false); +SA_TEST_FN(__is_scoped_enum, int(*)(), false); +SA_TEST_FN(__is_scoped_enum, int(&)(), false); + +enum opaque_unscoped : short; +enum class opaque_scoped; +enum class opaque_scoped_with_base : long; + +SA_TEST_CATEGORY(__is_scoped_enum, opaque_unscoped, false); +SA_TEST_CATEGORY(__is_scoped_enum, opaque_scoped, true); +SA_TEST_CATEGORY(__is_scoped_enum, opaque_scoped_with_base, true); + +enum unscoped { + u_is_scoped = __is_scoped_enum(unscoped), +}; +SA( ! unscoped::u_is_scoped ); + +enum unscoped_fixed : char { + uf_is_scoped = __is_scoped_enum(unscoped_fixed), +}; +SA( ! unscoped_fixed::uf_is_scoped ); + +enum class scoped { + is_scoped = __is_scoped_enum(scoped), +}; +SA( (bool) scoped::is_scoped ); From bd3776c03b00e6106d3609eb6cfcc99c0320b0c7 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:05 -0800 Subject: [PATCH 146/311] c++: Implement __is_member_pointer built-in trait This patch implements built-in trait for std::is_member_pointer. gcc/cp/ChangeLog: * cp-trait.def: Define __is_member_pointer. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_MEMBER_POINTER. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_member_pointer. * g++.dg/ext/is_member_pointer.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 ++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ gcc/testsuite/g++.dg/ext/is_member_pointer.C | 30 ++++++++++++++++++++ 5 files changed, 41 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_member_pointer.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 062dc404ccf5..fb150e02ea92 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3758,6 +3758,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_LITERAL_TYPE: inform (loc, " %qT is not a literal type", t1); break; + case CPTK_IS_MEMBER_POINTER: + inform (loc, " %qT is not a member pointer", t1); + break; case CPTK_IS_NOTHROW_ASSIGNABLE: inform (loc, " %qT is not nothrow assignable from %qT", t1, t2); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 9d848f6f77de..e17f5eaeac41 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -71,6 +71,7 @@ DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1) DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1) DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2) DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1) +DEFTRAIT_EXPR (IS_MEMBER_POINTER, "__is_member_pointer", 1) DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2) DEFTRAIT_EXPR (IS_NOTHROW_CONSTRUCTIBLE, "__is_nothrow_constructible", -1) DEFTRAIT_EXPR (IS_NOTHROW_CONVERTIBLE, "__is_nothrow_convertible", 2) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 2bb545513557..a462ed35d897 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12425,6 +12425,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_LITERAL_TYPE: return literal_type_p (type1); + case CPTK_IS_MEMBER_POINTER: + return TYPE_PTRMEM_P (type1); + case CPTK_IS_NOTHROW_ASSIGNABLE: return is_nothrow_xible (MODIFY_EXPR, type1, type2); @@ -12627,6 +12630,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_BOUNDED_ARRAY: case CPTK_IS_CLASS: case CPTK_IS_ENUM: + case CPTK_IS_MEMBER_POINTER: case CPTK_IS_SAME: case CPTK_IS_SCOPED_ENUM: case CPTK_IS_UNION: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 744cfb3b42fb..349fae7104e7 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -92,6 +92,9 @@ #if !__has_builtin (__is_literal_type) # error "__has_builtin (__is_literal_type) failed" #endif +#if !__has_builtin (__is_member_pointer) +# error "__has_builtin (__is_member_pointer) failed" +#endif #if !__has_builtin (__is_nothrow_assignable) # error "__has_builtin (__is_nothrow_assignable) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_member_pointer.C b/gcc/testsuite/g++.dg/ext/is_member_pointer.C new file mode 100644 index 000000000000..7ee2e3ab90c9 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_member_pointer.C @@ -0,0 +1,30 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_NON_VOLATILE(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT) + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +SA_TEST_CATEGORY(__is_member_pointer, int (ClassType::*), true); +SA_TEST_CATEGORY(__is_member_pointer, ClassType (ClassType::*), true); + +SA_TEST_NON_VOLATILE(__is_member_pointer, int (ClassType::*)(int), true); +SA_TEST_NON_VOLATILE(__is_member_pointer, int (ClassType::*)(int) const, true); +SA_TEST_NON_VOLATILE(__is_member_pointer, int (ClassType::*)(float, ...), true); +SA_TEST_NON_VOLATILE(__is_member_pointer, ClassType (ClassType::*)(ClassType), true); +SA_TEST_NON_VOLATILE(__is_member_pointer, + float (ClassType::*)(int, float, int[], int&), true); + +// Sanity check. +SA_TEST_CATEGORY(__is_member_pointer, ClassType, false); From 7c62a418f8a14149f28956ba779538000f4ea358 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:07 -0800 Subject: [PATCH 147/311] c++: Implement __is_member_function_pointer built-in trait This patch implements built-in trait for std::is_member_function_pointer. gcc/cp/ChangeLog: * cp-trait.def: Define __is_member_function_pointer. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_MEMBER_FUNCTION_POINTER. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_member_function_pointer. * g++.dg/ext/is_member_function_pointer.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 ++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ .../g++.dg/ext/is_member_function_pointer.C | 31 +++++++++++++++++++ 5 files changed, 42 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_member_function_pointer.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index fb150e02ea92..1efc7983039a 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3758,6 +3758,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_LITERAL_TYPE: inform (loc, " %qT is not a literal type", t1); break; + case CPTK_IS_MEMBER_FUNCTION_POINTER: + inform (loc, " %qT is not a member function pointer", t1); + break; case CPTK_IS_MEMBER_POINTER: inform (loc, " %qT is not a member pointer", t1); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index e17f5eaeac41..03a5cc280205 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -71,6 +71,7 @@ DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1) DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1) DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2) DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1) +DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1) DEFTRAIT_EXPR (IS_MEMBER_POINTER, "__is_member_pointer", 1) DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2) DEFTRAIT_EXPR (IS_NOTHROW_CONSTRUCTIBLE, "__is_nothrow_constructible", -1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index a462ed35d897..e8224e819338 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12425,6 +12425,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_LITERAL_TYPE: return literal_type_p (type1); + case CPTK_IS_MEMBER_FUNCTION_POINTER: + return TYPE_PTRMEMFUNC_P (type1); + case CPTK_IS_MEMBER_POINTER: return TYPE_PTRMEM_P (type1); @@ -12630,6 +12633,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_BOUNDED_ARRAY: case CPTK_IS_CLASS: case CPTK_IS_ENUM: + case CPTK_IS_MEMBER_FUNCTION_POINTER: case CPTK_IS_MEMBER_POINTER: case CPTK_IS_SAME: case CPTK_IS_SCOPED_ENUM: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 349fae7104e7..fb17680d3b06 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -92,6 +92,9 @@ #if !__has_builtin (__is_literal_type) # error "__has_builtin (__is_literal_type) failed" #endif +#if !__has_builtin (__is_member_function_pointer) +# error "__has_builtin (__is_member_function_pointer) failed" +#endif #if !__has_builtin (__is_member_pointer) # error "__has_builtin (__is_member_pointer) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_member_function_pointer.C b/gcc/testsuite/g++.dg/ext/is_member_function_pointer.C new file mode 100644 index 000000000000..555123e8f078 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_member_function_pointer.C @@ -0,0 +1,31 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_FN(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +// Positive tests. +SA_TEST_FN(__is_member_function_pointer, int (ClassType::*) (int), true); +SA_TEST_FN(__is_member_function_pointer, int (ClassType::*) (int) const, true); +SA_TEST_FN(__is_member_function_pointer, int (ClassType::*) (float, ...), true); +SA_TEST_FN(__is_member_function_pointer, ClassType (ClassType::*) (ClassType), true); +SA_TEST_FN(__is_member_function_pointer, float (ClassType::*) (int, float, int[], int&), true); + +// Negative tests. +SA_TEST_CATEGORY(__is_member_function_pointer, int (ClassType::*), false); +SA_TEST_CATEGORY(__is_member_function_pointer, ClassType (ClassType::*), false); + +// Sanity check. +SA_TEST_CATEGORY(__is_member_function_pointer, ClassType, false); From f4007cb78e428c7a82b85b7c1c4104946b98c933 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:09 -0800 Subject: [PATCH 148/311] c++: Implement __is_member_object_pointer built-in trait This patch implements built-in trait for std::is_member_object_pointer. gcc/cp/ChangeLog: * cp-trait.def: Define __is_member_object_pointer. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_MEMBER_OBJECT_POINTER. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_member_object_pointer. * g++.dg/ext/is_member_object_pointer.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 ++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ .../g++.dg/ext/is_member_object_pointer.C | 30 +++++++++++++++++++ 5 files changed, 41 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_member_object_pointer.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 1efc7983039a..204b9989b6a6 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3761,6 +3761,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_MEMBER_FUNCTION_POINTER: inform (loc, " %qT is not a member function pointer", t1); break; + case CPTK_IS_MEMBER_OBJECT_POINTER: + inform (loc, " %qT is not a member object pointer", t1); + break; case CPTK_IS_MEMBER_POINTER: inform (loc, " %qT is not a member pointer", t1); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 03a5cc280205..f5efffdfc99a 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -72,6 +72,7 @@ DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1) DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2) DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1) DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1) +DEFTRAIT_EXPR (IS_MEMBER_OBJECT_POINTER, "__is_member_object_pointer", 1) DEFTRAIT_EXPR (IS_MEMBER_POINTER, "__is_member_pointer", 1) DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2) DEFTRAIT_EXPR (IS_NOTHROW_CONSTRUCTIBLE, "__is_nothrow_constructible", -1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index e8224e819338..5478c4bd3bc9 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12428,6 +12428,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_MEMBER_FUNCTION_POINTER: return TYPE_PTRMEMFUNC_P (type1); + case CPTK_IS_MEMBER_OBJECT_POINTER: + return TYPE_PTRDATAMEM_P (type1); + case CPTK_IS_MEMBER_POINTER: return TYPE_PTRMEM_P (type1); @@ -12634,6 +12637,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_CLASS: case CPTK_IS_ENUM: case CPTK_IS_MEMBER_FUNCTION_POINTER: + case CPTK_IS_MEMBER_OBJECT_POINTER: case CPTK_IS_MEMBER_POINTER: case CPTK_IS_SAME: case CPTK_IS_SCOPED_ENUM: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index fb17680d3b06..b5797075d524 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -95,6 +95,9 @@ #if !__has_builtin (__is_member_function_pointer) # error "__has_builtin (__is_member_function_pointer) failed" #endif +#if !__has_builtin (__is_member_object_pointer) +# error "__has_builtin (__is_member_object_pointer) failed" +#endif #if !__has_builtin (__is_member_pointer) # error "__has_builtin (__is_member_pointer) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_member_object_pointer.C b/gcc/testsuite/g++.dg/ext/is_member_object_pointer.C new file mode 100644 index 000000000000..835e48c8f8ef --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_member_object_pointer.C @@ -0,0 +1,30 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_NON_VOLATILE(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT) + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +// Positive tests. +SA_TEST_CATEGORY(__is_member_object_pointer, int (ClassType::*), true); +SA_TEST_CATEGORY(__is_member_object_pointer, ClassType (ClassType::*), true); + +// Negative tests. +SA_TEST_NON_VOLATILE(__is_member_object_pointer, int (ClassType::*) (int), false); +SA_TEST_NON_VOLATILE(__is_member_object_pointer, int (ClassType::*) (float, ...), false); +SA_TEST_NON_VOLATILE(__is_member_object_pointer, ClassType (ClassType::*) (ClassType), false); +SA_TEST_NON_VOLATILE(__is_member_object_pointer, float (ClassType::*) (int, float, int[], int&), false); + +// Sanity check. +SA_TEST_CATEGORY(__is_member_object_pointer, ClassType, false); From 5bca9af579a06bbf7160613f948ecf2092299437 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:11 -0800 Subject: [PATCH 149/311] c++: Implement __is_reference built-in trait This patch implements built-in trait for std::is_reference. gcc/cp/ChangeLog: * cp-trait.def: Define __is_reference. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_REFERENCE. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_reference. * g++.dg/ext/is_reference.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 +++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 +++ gcc/testsuite/g++.dg/ext/is_reference.C | 34 ++++++++++++++++++++++++ 5 files changed, 45 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_reference.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 204b9989b6a6..aa42017f67c5 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3789,6 +3789,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_POLYMORPHIC: inform (loc, " %qT is not a polymorphic type", t1); break; + case CPTK_IS_REFERENCE: + inform (loc, " %qT is not a reference", t1); + break; case CPTK_IS_SAME: inform (loc, " %qT is not the same as %qT", t1, t2); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index f5efffdfc99a..2d82ed3dd359 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -80,6 +80,7 @@ DEFTRAIT_EXPR (IS_NOTHROW_CONVERTIBLE, "__is_nothrow_convertible", 2) DEFTRAIT_EXPR (IS_POINTER_INTERCONVERTIBLE_BASE_OF, "__is_pointer_interconvertible_base_of", 2) DEFTRAIT_EXPR (IS_POD, "__is_pod", 1) DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1) +DEFTRAIT_EXPR (IS_REFERENCE, "__is_reference", 1) DEFTRAIT_EXPR (IS_SAME, "__is_same", 2) DEFTRAIT_EXPR (IS_SCOPED_ENUM, "__is_scoped_enum", 1) DEFTRAIT_EXPR (IS_STD_LAYOUT, "__is_standard_layout", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 5478c4bd3bc9..4af5a60e16a4 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12452,6 +12452,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_POLYMORPHIC: return CLASS_TYPE_P (type1) && TYPE_POLYMORPHIC_P (type1); + case CPTK_IS_REFERENCE: + return type_code1 == REFERENCE_TYPE; + case CPTK_IS_SAME: return same_type_p (type1, type2); @@ -12639,6 +12642,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_MEMBER_FUNCTION_POINTER: case CPTK_IS_MEMBER_OBJECT_POINTER: case CPTK_IS_MEMBER_POINTER: + case CPTK_IS_REFERENCE: case CPTK_IS_SAME: case CPTK_IS_SCOPED_ENUM: case CPTK_IS_UNION: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index b5797075d524..b667b5c33ac6 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -119,6 +119,9 @@ #if !__has_builtin (__is_polymorphic) # error "__has_builtin (__is_polymorphic) failed" #endif +#if !__has_builtin (__is_reference) +# error "__has_builtin (__is_reference) failed" +#endif #if !__has_builtin (__is_same) # error "__has_builtin (__is_same) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_reference.C b/gcc/testsuite/g++.dg/ext/is_reference.C new file mode 100644 index 000000000000..b5ce4db7afd8 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_reference.C @@ -0,0 +1,34 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +// Positive tests. +SA_TEST_CATEGORY(__is_reference, int&, true); +SA_TEST_CATEGORY(__is_reference, ClassType&, true); +SA(__is_reference(int(&)(int))); +SA_TEST_CATEGORY(__is_reference, int&&, true); +SA_TEST_CATEGORY(__is_reference, ClassType&&, true); +SA(__is_reference(int(&&)(int))); +SA_TEST_CATEGORY(__is_reference, IncompleteClass&, true); + +// Negative tests +SA_TEST_CATEGORY(__is_reference, void, false); +SA_TEST_CATEGORY(__is_reference, int*, false); +SA_TEST_CATEGORY(__is_reference, int[3], false); +SA(!__is_reference(int(int))); +SA(!__is_reference(int(*const)(int))); +SA(!__is_reference(int(*volatile)(int))); +SA(!__is_reference(int(*const volatile)(int))); + +// Sanity check. +SA_TEST_CATEGORY(__is_reference, ClassType, false); +SA_TEST_CATEGORY(__is_reference, IncompleteClass, false); From ffc08431f5c89a20dab21f958bb408d6a604cf54 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:13 -0800 Subject: [PATCH 150/311] c++: Implement __is_function built-in trait This patch implements built-in trait for std::is_function. gcc/cp/ChangeLog: * cp-trait.def: Define __is_function. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_function. * g++.dg/ext/is_function.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 ++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 4 ++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ gcc/testsuite/g++.dg/ext/is_function.C | 58 ++++++++++++++++++++++++ 5 files changed, 69 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_function.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index aa42017f67c5..4bea60897911 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3752,6 +3752,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_FINAL: inform (loc, " %qT is not a final class", t1); break; + case CPTK_IS_FUNCTION: + inform (loc, " %qT is not a function", t1); + break; case CPTK_IS_LAYOUT_COMPATIBLE: inform (loc, " %qT is not layout compatible with %qT", t1, t2); break; diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 2d82ed3dd359..89712f186674 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -69,6 +69,7 @@ DEFTRAIT_EXPR (IS_CONVERTIBLE, "__is_convertible", 2) DEFTRAIT_EXPR (IS_EMPTY, "__is_empty", 1) DEFTRAIT_EXPR (IS_ENUM, "__is_enum", 1) DEFTRAIT_EXPR (IS_FINAL, "__is_final", 1) +DEFTRAIT_EXPR (IS_FUNCTION, "__is_function", 1) DEFTRAIT_EXPR (IS_LAYOUT_COMPATIBLE, "__is_layout_compatible", 2) DEFTRAIT_EXPR (IS_LITERAL_TYPE, "__is_literal_type", 1) DEFTRAIT_EXPR (IS_MEMBER_FUNCTION_POINTER, "__is_member_function_pointer", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 4af5a60e16a4..4a95af477e71 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12419,6 +12419,9 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_FINAL: return CLASS_TYPE_P (type1) && CLASSTYPE_FINAL (type1); + case CPTK_IS_FUNCTION: + return type_code1 == FUNCTION_TYPE; + case CPTK_IS_LAYOUT_COMPATIBLE: return layout_compatible_type_p (type1, type2); @@ -12639,6 +12642,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_BOUNDED_ARRAY: case CPTK_IS_CLASS: case CPTK_IS_ENUM: + case CPTK_IS_FUNCTION: case CPTK_IS_MEMBER_FUNCTION_POINTER: case CPTK_IS_MEMBER_OBJECT_POINTER: case CPTK_IS_MEMBER_POINTER: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index b667b5c33ac6..5215da27d6f6 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -86,6 +86,9 @@ #if !__has_builtin (__is_final) # error "__has_builtin (__is_final) failed" #endif +#if !__has_builtin (__is_function) +# error "__has_builtin (__is_function) failed" +#endif #if !__has_builtin (__is_layout_compatible) # error "__has_builtin (__is_layout_compatible) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_function.C b/gcc/testsuite/g++.dg/ext/is_function.C new file mode 100644 index 000000000000..2e1594b12ad2 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_function.C @@ -0,0 +1,58 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +struct A +{ void fn(); }; + +template +struct AHolder { }; + +template +struct AHolder +{ using type = U; }; + +// Positive tests. +SA(__is_function(int (int))); +SA(__is_function(ClassType (ClassType))); +SA(__is_function(float (int, float, int[], int&))); +SA(__is_function(int (int, ...))); +SA(__is_function(bool (ClassType) const)); +SA(__is_function(AHolder::type)); + +void fn(); +SA(__is_function(decltype(fn))); + +// Negative tests. +SA_TEST_CATEGORY(__is_function, int, false); +SA_TEST_CATEGORY(__is_function, int*, false); +SA_TEST_CATEGORY(__is_function, int&, false); +SA_TEST_CATEGORY(__is_function, void, false); +SA_TEST_CATEGORY(__is_function, void*, false); +SA_TEST_CATEGORY(__is_function, void**, false); +SA_TEST_CATEGORY(__is_function, std::nullptr_t, false); + +SA_TEST_CATEGORY(__is_function, AbstractClass, false); +SA(!__is_function(int(&)(int))); +SA(!__is_function(int(*)(int))); + +SA_TEST_CATEGORY(__is_function, A, false); +SA_TEST_CATEGORY(__is_function, decltype(&A::fn), false); + +struct FnCallOverload +{ void operator()(); }; +SA_TEST_CATEGORY(__is_function, FnCallOverload, false); + +// Sanity check. +SA_TEST_CATEGORY(__is_function, ClassType, false); +SA_TEST_CATEGORY(__is_function, IncompleteClass, false); +SA_TEST_CATEGORY(__is_function, IncompleteUnion, false); From 692080219c2ecf2cde9360a3b179f491e867afb8 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:15 -0800 Subject: [PATCH 151/311] c++: Implement __is_object built-in trait This patch implements built-in trait for std::is_object. gcc/cp/ChangeLog: * cp-trait.def: Define __is_object. * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_OBJECT. * semantics.cc (trait_expr_value): Likewise. (finish_trait_expr): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __is_object. * g++.dg/ext/is_object.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/constraint.cc | 3 +++ gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 6 +++++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 +++ gcc/testsuite/g++.dg/ext/is_object.C | 29 ++++++++++++++++++++++++ 5 files changed, 42 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/is_object.C diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 4bea60897911..eeacead52a5a 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3782,6 +3782,9 @@ diagnose_trait_expr (tree expr, tree args) case CPTK_IS_NOTHROW_CONVERTIBLE: inform (loc, " %qT is not nothrow convertible from %qE", t2, t1); break; + case CPTK_IS_OBJECT: + inform (loc, " %qT is not an object type", t1); + break; case CPTK_IS_POINTER_INTERCONVERTIBLE_BASE_OF: inform (loc, " %qT is not pointer-interconvertible base of %qT", t1, t2); diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 89712f186674..b833efff26ed 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -78,6 +78,7 @@ DEFTRAIT_EXPR (IS_MEMBER_POINTER, "__is_member_pointer", 1) DEFTRAIT_EXPR (IS_NOTHROW_ASSIGNABLE, "__is_nothrow_assignable", 2) DEFTRAIT_EXPR (IS_NOTHROW_CONSTRUCTIBLE, "__is_nothrow_constructible", -1) DEFTRAIT_EXPR (IS_NOTHROW_CONVERTIBLE, "__is_nothrow_convertible", 2) +DEFTRAIT_EXPR (IS_OBJECT, "__is_object", 1) DEFTRAIT_EXPR (IS_POINTER_INTERCONVERTIBLE_BASE_OF, "__is_pointer_interconvertible_base_of", 2) DEFTRAIT_EXPR (IS_POD, "__is_pod", 1) DEFTRAIT_EXPR (IS_POLYMORPHIC, "__is_polymorphic", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 4a95af477e71..6420159e0b0f 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12446,6 +12446,11 @@ trait_expr_value (cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_NOTHROW_CONVERTIBLE: return is_nothrow_convertible (type1, type2); + case CPTK_IS_OBJECT: + return (type_code1 != FUNCTION_TYPE + && type_code1 != REFERENCE_TYPE + && type_code1 != VOID_TYPE); + case CPTK_IS_POINTER_INTERCONVERTIBLE_BASE_OF: return pointer_interconvertible_base_of_p (type1, type2); @@ -12646,6 +12651,7 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) case CPTK_IS_MEMBER_FUNCTION_POINTER: case CPTK_IS_MEMBER_OBJECT_POINTER: case CPTK_IS_MEMBER_POINTER: + case CPTK_IS_OBJECT: case CPTK_IS_REFERENCE: case CPTK_IS_SAME: case CPTK_IS_SCOPED_ENUM: diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 5215da27d6f6..2242276f6336 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -113,6 +113,9 @@ #if !__has_builtin (__is_nothrow_convertible) # error "__has_builtin (__is_nothrow_convertible) failed" #endif +#if !__has_builtin (__is_object) +# error "__has_builtin (__is_object) failed" +#endif #if !__has_builtin (__is_pointer_interconvertible_base_of) # error "__has_builtin (__is_pointer_interconvertible_base_of) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/is_object.C b/gcc/testsuite/g++.dg/ext/is_object.C new file mode 100644 index 000000000000..5c759a5ef693 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_object.C @@ -0,0 +1,29 @@ +// { dg-do compile { target c++11 } } + +#include + +using namespace __gnu_test; + +#define SA(X) static_assert((X),#X) + +#define SA_TEST_NON_VOLATILE(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT) + +#define SA_TEST_CATEGORY(TRAIT, TYPE, EXPECT) \ + SA(TRAIT(TYPE) == EXPECT); \ + SA(TRAIT(const TYPE) == EXPECT); \ + SA(TRAIT(volatile TYPE) == EXPECT); \ + SA(TRAIT(const volatile TYPE) == EXPECT) + +SA_TEST_NON_VOLATILE(__is_object, int (int), false); +SA_TEST_NON_VOLATILE(__is_object, ClassType (ClassType), false); +SA_TEST_NON_VOLATILE(__is_object, + float (int, float, int[], int&), false); +SA_TEST_CATEGORY(__is_object, int&, false); +SA_TEST_CATEGORY(__is_object, ClassType&, false); +SA_TEST_NON_VOLATILE(__is_object, int(&)(int), false); +SA_TEST_CATEGORY(__is_object, void, false); + +// Sanity check. +SA_TEST_CATEGORY(__is_object, ClassType, true); From 40dd7a5fe5d098a6ca2c77d7c42919abb37611b0 Mon Sep 17 00:00:00 2001 From: Ken Matsui Date: Wed, 6 Dec 2023 21:33:17 -0800 Subject: [PATCH 152/311] c++: Implement __remove_pointer built-in trait This patch implements built-in trait for std::remove_pointer. gcc/cp/ChangeLog: * cp-trait.def: Define __remove_pointer. * semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER. gcc/testsuite/ChangeLog: * g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer. * g++.dg/ext/remove_pointer.C: New test. Signed-off-by: Ken Matsui --- gcc/cp/cp-trait.def | 1 + gcc/cp/semantics.cc | 5 +++ gcc/testsuite/g++.dg/ext/has-builtin-1.C | 3 ++ gcc/testsuite/g++.dg/ext/remove_pointer.C | 51 +++++++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 gcc/testsuite/g++.dg/ext/remove_pointer.C diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index b833efff26ed..394f006f20f2 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -95,6 +95,7 @@ DEFTRAIT_EXPR (REF_CONSTRUCTS_FROM_TEMPORARY, "__reference_constructs_from_tempo DEFTRAIT_EXPR (REF_CONVERTS_FROM_TEMPORARY, "__reference_converts_from_temporary", 2) DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1) DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1) +DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1) DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1) DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1) DEFTRAIT_TYPE (UNDERLYING_TYPE, "__underlying_type", 1) diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 6420159e0b0f..973b44f75ae9 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -12726,6 +12726,11 @@ finish_trait_type (cp_trait_kind kind, tree type1, tree type2, type1 = TREE_TYPE (type1); return cv_unqualified (type1); + case CPTK_REMOVE_POINTER: + if (TYPE_PTR_P (type1)) + type1 = TREE_TYPE (type1); + return type1; + case CPTK_REMOVE_REFERENCE: if (TYPE_REF_P (type1)) type1 = TREE_TYPE (type1); diff --git a/gcc/testsuite/g++.dg/ext/has-builtin-1.C b/gcc/testsuite/g++.dg/ext/has-builtin-1.C index 2242276f6336..02b4b4d745d3 100644 --- a/gcc/testsuite/g++.dg/ext/has-builtin-1.C +++ b/gcc/testsuite/g++.dg/ext/has-builtin-1.C @@ -167,6 +167,9 @@ #if !__has_builtin (__remove_cvref) # error "__has_builtin (__remove_cvref) failed" #endif +#if !__has_builtin (__remove_pointer) +# error "__has_builtin (__remove_pointer) failed" +#endif #if !__has_builtin (__remove_reference) # error "__has_builtin (__remove_reference) failed" #endif diff --git a/gcc/testsuite/g++.dg/ext/remove_pointer.C b/gcc/testsuite/g++.dg/ext/remove_pointer.C new file mode 100644 index 000000000000..7b13db93950e --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/remove_pointer.C @@ -0,0 +1,51 @@ +// { dg-do compile { target c++11 } } + +#define SA(X) static_assert((X),#X) + +SA(__is_same(__remove_pointer(int), int)); +SA(__is_same(__remove_pointer(int*), int)); +SA(__is_same(__remove_pointer(int**), int*)); + +SA(__is_same(__remove_pointer(const int*), const int)); +SA(__is_same(__remove_pointer(const int**), const int*)); +SA(__is_same(__remove_pointer(int* const), int)); +SA(__is_same(__remove_pointer(int** const), int*)); +SA(__is_same(__remove_pointer(int* const* const), int* const)); + +SA(__is_same(__remove_pointer(volatile int*), volatile int)); +SA(__is_same(__remove_pointer(volatile int**), volatile int*)); +SA(__is_same(__remove_pointer(int* volatile), int)); +SA(__is_same(__remove_pointer(int** volatile), int*)); +SA(__is_same(__remove_pointer(int* volatile* volatile), int* volatile)); + +SA(__is_same(__remove_pointer(const volatile int*), const volatile int)); +SA(__is_same(__remove_pointer(const volatile int**), const volatile int*)); +SA(__is_same(__remove_pointer(const int* volatile), const int)); +SA(__is_same(__remove_pointer(volatile int* const), volatile int)); +SA(__is_same(__remove_pointer(int* const volatile), int)); +SA(__is_same(__remove_pointer(const int** volatile), const int*)); +SA(__is_same(__remove_pointer(volatile int** const), volatile int*)); +SA(__is_same(__remove_pointer(int** const volatile), int*)); +SA(__is_same(__remove_pointer(int* const* const volatile), int* const)); +SA(__is_same(__remove_pointer(int* volatile* const volatile), int* volatile)); +SA(__is_same(__remove_pointer(int* const volatile* const volatile), int* const volatile)); + +SA(__is_same(__remove_pointer(int&), int&)); +SA(__is_same(__remove_pointer(const int&), const int&)); +SA(__is_same(__remove_pointer(volatile int&), volatile int&)); +SA(__is_same(__remove_pointer(const volatile int&), const volatile int&)); + +SA(__is_same(__remove_pointer(int&&), int&&)); +SA(__is_same(__remove_pointer(const int&&), const int&&)); +SA(__is_same(__remove_pointer(volatile int&&), volatile int&&)); +SA(__is_same(__remove_pointer(const volatile int&&), const volatile int&&)); + +SA(__is_same(__remove_pointer(int[3]), int[3])); +SA(__is_same(__remove_pointer(const int[3]), const int[3])); +SA(__is_same(__remove_pointer(volatile int[3]), volatile int[3])); +SA(__is_same(__remove_pointer(const volatile int[3]), const volatile int[3])); + +SA(__is_same(__remove_pointer(int(int)), int(int))); +SA(__is_same(__remove_pointer(int(*const)(int)), int(int))); +SA(__is_same(__remove_pointer(int(*volatile)(int)), int(int))); +SA(__is_same(__remove_pointer(int(*const volatile)(int)), int(int))); From fa99f7d12b87f36d3c38349fcdcfca074564858d Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Sun, 10 Dec 2023 14:20:32 -0500 Subject: [PATCH 153/311] doc: small tweak Mention Objective-C++ here to be consistent with the surrounding C/ObjC lines. gcc/ChangeLog: * doc/invoke.texi (-fpermissive): Mention ObjC++ for -Wnarrowing. --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d4e689b64c01..15f3a86e7688 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6246,7 +6246,7 @@ that have their own flag: -Wimplicit-int @r{(C and Objective-C only)} -Wincompatible-pointer-types @r{(C and Objective-C only)} -Wint-conversion @r{(C and Objective-C only)} --Wnarrowing @r{(C++)} +-Wnarrowing @r{(C++ and Objective-C++ only)} -Wreturn-mismatch @r{(C and Objective-C only)} } From 1e462fb480d38de5f9a4578bbe5c5bc66a01a9ed Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Wed, 6 Dec 2023 20:42:27 +0100 Subject: [PATCH 154/311] Fortran: function returning contiguous class array [PR105543] gcc/fortran/ChangeLog: PR fortran/105543 * resolve.cc (resolve_symbol): For a CLASS-valued function having a RESULT clause, ensure that attr.class_ok is set for its symbol as well as for its resolved result variable. gcc/testsuite/ChangeLog: PR fortran/105543 * gfortran.dg/contiguous_13.f90: New test. --- gcc/fortran/resolve.cc | 5 +++++ gcc/testsuite/gfortran.dg/contiguous_13.f90 | 22 +++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/contiguous_13.f90 diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 166b702cd9a6..4fe0e7202e5d 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -16102,6 +16102,11 @@ resolve_symbol (gfc_symbol *sym) specification_expr = saved_specification_expr; } + /* For a CLASS-valued function with a result variable, affirm that it has + been resolved also when looking at the symbol 'sym'. */ + if (mp_flag && sym->ts.type == BT_CLASS && sym->result->attr.class_ok) + sym->attr.class_ok = sym->result->attr.class_ok; + if (sym->ts.type == BT_CLASS && sym->attr.class_ok && sym->ts.u.derived && CLASS_DATA (sym)) { diff --git a/gcc/testsuite/gfortran.dg/contiguous_13.f90 b/gcc/testsuite/gfortran.dg/contiguous_13.f90 new file mode 100644 index 000000000000..8c6784432c99 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/contiguous_13.f90 @@ -0,0 +1,22 @@ +! { dg-do compile } +! PR fortran/105543 - function returning contiguous class array +! Contributed by martin + +module func_contiguous + implicit none + type :: a + end type a +contains + function create1 () result(x) + class(a), dimension(:), contiguous, pointer :: x + end + function create2 () + class(a), dimension(:), contiguous, pointer :: create2 + end + function create3 () result(x) + class(*), dimension(:), contiguous, pointer :: x + end + function create4 () + class(*), dimension(:), contiguous, pointer :: create4 + end +end module func_contiguous From 99c5fa3ba6b5f54a7d632bd9da0e07dd85a4b669 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Fri, 8 Dec 2023 13:57:31 +0100 Subject: [PATCH 155/311] Fortran: allow NULL() for POINTER, OPTIONAL, CONTIGUOUS dummy [PR111503] gcc/fortran/ChangeLog: PR fortran/111503 * expr.cc (gfc_is_simply_contiguous): Determine characteristics of NULL() from optional MOLD argument, otherwise treat as contiguous. * primary.cc (gfc_variable_attr): Derive attributes of NULL(MOLD) from MOLD. gcc/testsuite/ChangeLog: PR fortran/111503 * gfortran.dg/contiguous_14.f90: New test. --- gcc/fortran/expr.cc | 14 ++++++++ gcc/fortran/primary.cc | 4 ++- gcc/testsuite/gfortran.dg/contiguous_14.f90 | 39 +++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gfortran.dg/contiguous_14.f90 diff --git a/gcc/fortran/expr.cc b/gcc/fortran/expr.cc index c668baeef8c3..709f3c3cbef4 100644 --- a/gcc/fortran/expr.cc +++ b/gcc/fortran/expr.cc @@ -5958,6 +5958,20 @@ gfc_is_simply_contiguous (gfc_expr *expr, bool strict, bool permit_element) if (expr->expr_type == EXPR_ARRAY) return true; + if (expr->expr_type == EXPR_NULL) + { + /* F2018:16.9.144 NULL ([MOLD]): + "If MOLD is present, the characteristics are the same as MOLD." + "If MOLD is absent, the characteristics of the result are + determined by the entity with which the reference is associated." + F2018:15.3.2.2 characteristics attributes include CONTIGUOUS. */ + if (expr->ts.type == BT_UNKNOWN) + return true; + else + return (gfc_variable_attr (expr, NULL).contiguous + || gfc_variable_attr (expr, NULL).allocatable); + } + if (expr->expr_type == EXPR_FUNCTION) { if (expr->value.function.isym) diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc index 7278932b634a..f8a1c09d190b 100644 --- a/gcc/fortran/primary.cc +++ b/gcc/fortran/primary.cc @@ -2627,7 +2627,9 @@ gfc_variable_attr (gfc_expr *expr, gfc_typespec *ts) gfc_component *comp; bool has_inquiry_part; - if (expr->expr_type != EXPR_VARIABLE && expr->expr_type != EXPR_FUNCTION) + if (expr->expr_type != EXPR_VARIABLE + && expr->expr_type != EXPR_FUNCTION + && !(expr->expr_type == EXPR_NULL && expr->ts.type != BT_UNKNOWN)) gfc_internal_error ("gfc_variable_attr(): Expression isn't a variable"); sym = expr->symtree->n.sym; diff --git a/gcc/testsuite/gfortran.dg/contiguous_14.f90 b/gcc/testsuite/gfortran.dg/contiguous_14.f90 new file mode 100644 index 000000000000..21e42311e9c7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/contiguous_14.f90 @@ -0,0 +1,39 @@ +! { dg-do compile } +! PR fortran/111503 - passing NULL() to POINTER, OPTIONAL, CONTIGUOUS dummy + +program test + implicit none + integer, pointer, contiguous :: p(:) => null() + integer, allocatable, target :: a(:) + type t + integer, pointer, contiguous :: p(:) => null() + integer, allocatable :: a(:) + end type t + type(t), target :: z + class(t), allocatable, target :: c + print *, is_contiguous (p) + allocate (t :: c) + call one (p) + call one () + call one (null ()) + call one (null (p)) + call one (a) + call one (null (a)) + call one (z% p) + call one (z% a) + call one (null (z% p)) + call one (null (z% a)) + call one (c% p) + call one (c% a) + call one (null (c% p)) + call one (null (c% a)) +contains + subroutine one (x) + integer, pointer, optional, contiguous, intent(in) :: x(:) + print *, present (x) + if (present (x)) then + print *, "->", associated (x) + if (associated (x)) stop 99 + end if + end subroutine one +end From 02ecdaab7a50f4505fd905effb6d238d773dc813 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sun, 10 Dec 2023 19:46:05 +0000 Subject: [PATCH 156/311] aarch64: Add -funwind-tables to some tests The .cfi scans in these tests failed for *-elf targets because those targets don't enable .eh_frame info by default. gcc/testsuite/ * gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables. * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c index a2de55773af7..98922aaeae09 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c @@ -1,4 +1,4 @@ -// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } // { dg-final { check-function-bodies "**" "" } } void ns_callee (); diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c index ed999d085603..4250fe7984cd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c @@ -1,4 +1,4 @@ -// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } // { dg-final { check-function-bodies "**" "" } } __attribute__((aarch64_vector_pcs)) void ns_callee (); diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c index be9b5cc04102..e3d9bc274d84 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c @@ -1,4 +1,4 @@ -// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } +// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } // { dg-final { check-function-bodies "**" "" } } #include From 23ea0bc2cf042d74c4adfe26a57cf96b1d837a91 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sun, 10 Dec 2023 19:46:05 +0000 Subject: [PATCH 157/311] aarch64: Skip some SME register save tests on BE Big-endian targets need to save Z8-Z15 in the same order as the registers would appear for D8-D15, because the layout is mandated by the EH ABI. BE targets therefore use ST1D instead of the normal STR for those registers (but not for others). That difference is already tested elsewhere and isn't important for the SME tests. This patch therefore restricts the affected tests to LE. gcc/testsuite/ * gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that contain Z8-Z23 saves to little-endian. * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. * gcc.target/aarch64/sme/locally_streaming_1.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 6 +++--- gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c | 6 +++--- gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c index e3d9bc274d84..6238ab80da2f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c @@ -14,7 +14,7 @@ struct callbacks { }; /* -** n_caller: { target lp64 } +** n_caller: { target { lp64 && aarch64_little_endian } } ** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! ** cntd x16 ** str x16, \[sp, #?16\] @@ -114,7 +114,7 @@ n_caller (struct callbacks *c) } /* -** s_caller: { target lp64 } +** s_caller: { target { lp64 && aarch64_little_endian } } ** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! ** cntd x16 ** str x16, \[sp, #?16\] @@ -214,7 +214,7 @@ s_caller (struct callbacks *c) [[arm::streaming]] } /* -** sc_caller: +** sc_caller: { target aarch64_little_endian } ** stp x29, x30, \[sp, #?-32\]! ** mov x29, sp ** cntd x16 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c index f44724df32f4..c909b34ff5ef 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c @@ -7,7 +7,7 @@ svint8_t produce_z0 (); void consume_z0 (svint8_t); /* -** test_z0: +** test_z0: { target aarch64_little_endian } ** ... ** smstop sm ** bl produce_z0 @@ -32,7 +32,7 @@ svint8x4_t produce_z3 (); void consume_z3 (svint8x4_t); /* -** test_z3: +** test_z3: { target aarch64_little_endian } ** ... ** smstop sm ** bl produce_z3 @@ -61,7 +61,7 @@ svbool_t produce_p0 (); void consume_p0 (svbool_t); /* -** test_p0: +** test_p0: { target aarch64_little_endian } ** ... ** smstop sm ** bl produce_p0 diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c index 20ff4b87d94b..4bb637f4781d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c @@ -265,7 +265,7 @@ n_ls_vector_pcs () } /* -** n_ls_sve_pcs: +** n_ls_sve_pcs: { target aarch64_little_endian } ** sub sp, sp, #?16 ** cntd x16 ** str x16, \[sp\] From 8c1df4022546cc783c4ab666751c9a2e9b3637b0 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sun, 10 Dec 2023 19:46:05 +0000 Subject: [PATCH 158/311] aarch64: XFAIL some SME tests for BE The z0_z23 tests rely on being able to propagate: (1) set of double-register z0-z1 (2) copy of z0 to z28 (3) use of z28 to a use of z0. On LE targets it's regcprop that does this. But regcprop punts on (2) because of: https://gcc.gnu.org/pipermail/gcc-patches/2002-July/081990.html This patch therefore XFAILs the affected tests. gcc/testsuite/ * gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c: XFAIL z0_z23 tests for big-endian. * gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c: Likewise. * gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c | 2 +- gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c | 2 +- 48 files changed, 48 insertions(+), 48 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c index 17b952eeae9d..fe90d0467608 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svbfloat16x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c index a4361ed1a9e1..26cd6a8e5590 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svfloat16x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c index dbc91650943b..8de1d66db08f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svfloat32x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c index c893d31f760d..d198f6029b3a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svfloat64x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c index 54607d45b847..b623b1bfc966 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svint16x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c index 9b1a81f1fdec..62fb290d39b6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svint32x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c index 91527ce31079..3cf32f2848a0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svint64x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c index 406d227272e7..e9d35a47a5c9 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svint8x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.b - z29\.b}, z0\.b, z23\.b ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c index 9990a5982e15..62a705884e1b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svuint16x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c index 4bdcaa34f5d7..5fd4093c9185 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svuint32x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c index 95b69d1f7112..e18a66918493 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svuint64x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c index 52002920d236..244ec93a4173 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzp_z28_z0, svuint8x2_t, z28, svuzp (z0)) /* -** uzp_z28_z0_z23: +** uzp_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.b - z29\.b}, z0\.b, z23\.b ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c index 6d9ae624deee..9b35be0b26de 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svbfloat16x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c index e8add862b47b..f8cd75c37182 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svfloat16x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c index d4487f45082b..447d973a2983 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svfloat32x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c index 8f7bc9ff23aa..27c6195b4252 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svfloat64x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c index 2cfe7afcfe11..60885a73668b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svint16x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c index bd583bb5d847..074a48f400a6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svint32x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c index 4b2aa57345c7..ed17535738ce 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svint64x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c index fe5098716c47..d351acbd7ec6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svint8x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c index 69a7aa644741..c4de6a23abd5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svuint16x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c index 94a89c80bf21..1feb93643374 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svuint32x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c index 779906cacc9a..470d9347fb3e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svuint64x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c index 04fe9e6dbfe7..0213ad85bfd6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c @@ -59,7 +59,7 @@ TEST_XN (uzpq_z28_z0, svuint8x2_t, z28, svuzpq (z0)) /* -** uzpq_z28_z0_z23: +** uzpq_z28_z0_z23: { xfail aarch64_big_endian } ** uzp {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c index 53fb2fafc7aa..116d91d9221f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svbfloat16x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c index c404cfa7d40a..578803b4cf5c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svfloat16x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c index 24f19001b89f..ecce2c94a3e8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svfloat32x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c index 6c27bdb868da..3e8ecc0d1f01 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svfloat64x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c index ebe26f1ad46a..4cecf808382f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svint16x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c index 8969a89cd1af..1e367270c05f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svint32x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c index 68ca3fafdf53..4a485ae55431 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svint64x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c index 2c98222c9007..24ef2e1e7f02 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svint8x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.b - z29\.b}, z0\.b, z23\.b ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c index 8ed76db922c5..e57e3a61d133 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svuint16x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.h - z29\.h}, z0\.h, z23\.h ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c index 3970d3e20a1e..f96b7b58bcdc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svuint32x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.s - z29\.s}, z0\.s, z23\.s ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c index 7aa1d601508d..bec1e650d198 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svuint64x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.d - z29\.d}, z0\.d, z23\.d ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c index 716edb4912de..b16989c09098 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c @@ -59,7 +59,7 @@ TEST_XN (zip_z28_z0, svuint8x2_t, z28, svzip (z0)) /* -** zip_z28_z0_z23: +** zip_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.b - z29\.b}, z0\.b, z23\.b ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c index d9432cbdd3ca..a21175506f7d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svbfloat16x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c index 928ec546b185..dd799bef6478 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svfloat16x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c index 7cf9b43bcc60..57f90b18b8b6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svfloat32x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c index 3360f0a3214d..3f18f4d15d66 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svfloat64x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c index 130c094a0d4a..1aebc9701aa4 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svint16x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c index cb353cba5d69..e830622b99a0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svint32x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c index 8791a664dd10..226c6fb574f0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svint64x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c index 83a17565c77a..a9836125943a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svint8x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c index e45ba0389099..9e420c43c5d2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svuint16x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c index 893b956449f5..d767d2d315e8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svuint32x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c index 4cbf69a67ce3..25a2170e999c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svuint64x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c index 1e1ee8bb458c..5ee90582ff51 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c @@ -59,7 +59,7 @@ TEST_XN (zipq_z28_z0, svuint8x2_t, z28, svzipq (z0)) /* -** zipq_z28_z0_z23: +** zipq_z28_z0_z23: { xfail aarch64_big_endian } ** zip {z28\.q - z29\.q}, z0\.q, z23\.q ** ret */ From f5c8d6bc050a8a6120aff2be25b6892d91baac99 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sun, 10 Dec 2023 19:46:06 +0000 Subject: [PATCH 159/311] aarch64: Fix SMSTART/SMSTOP save/restore for BE VNx16QI (the SVE register byte mode) is the only SVE mode for which LD1 and LDR result in the same register layout for big-endian. It is therefore the only mode for which we allow LDR and STR to be used for big-endian SVE moves. The SME support sometimes needs to use LDR and STR to save and restore Z register contents around an SMSTART/SMSTOP SM. It therefore needs to use VNx16QI regardless of the type of value that is stored in the Z registers. gcc/ PR target/112930 * config/aarch64/aarch64.cc (aarch64_sme_mode_switch_regs::add_reg): Force specific SVE modes for single registers as well as structures. --- gcc/config/aarch64/aarch64.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 5cffdabc62e5..2a64053f675c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -4956,14 +4956,17 @@ aarch64_sme_mode_switch_regs::add_reg (machine_mode mode, unsigned int regno) gcc_assert ((vec_flags & VEC_STRUCT) || end_regno == regno + 1); for (; regno < end_regno; regno++) { + /* Force the mode of SVE saves and restores even for single registers. + This is necessary because big-endian targets only allow LDR Z and + STR Z to be used with byte modes. */ machine_mode submode = mode; - if (vec_flags & VEC_STRUCT) + if (vec_flags & VEC_SVE_PRED) + submode = VNx16BImode; + else if (vec_flags & VEC_SVE_DATA) + submode = SVE_BYTE_MODE; + else if (vec_flags & VEC_STRUCT) { - if (vec_flags & VEC_SVE_PRED) - submode = VNx16BImode; - else if (vec_flags & VEC_SVE_DATA) - submode = SVE_BYTE_MODE; - else if (vec_flags & VEC_PARTIAL) + if (vec_flags & VEC_PARTIAL) submode = V8QImode; else submode = V16QImode; From b3aed459634654d295a1d00e6c149565ced7a9a2 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sun, 10 Dec 2023 19:46:07 +0000 Subject: [PATCH 160/311] aarch64: Fix invalid subregs for BE svread/write_za Multi-register svread_za and svwrite_za are implemented using one pattern per register count, with the register contents being bitcast on entry (for writes) or return (for reads). Previously we relied on subregs for this, with the subreg for reads being handled by target-independent code. But using subregs isn't correct for many big-endian cases, where following subreg rules often requires actual instructions. The semantics are instead supposed to be those of svreinterpret. gcc/ PR target/112931 PR target/112933 * config/aarch64/aarch64-protos.h (aarch64_sve_reinterpret): Declare. * config/aarch64/aarch64.cc (aarch64_sve_reinterpret): New function. * config/aarch64/aarch64-sve-builtins-sme.cc (svread_za_impl::expand) (svwrite_za_impl::expand): Use it to cast the SVE register to the right mode. --- gcc/config/aarch64/aarch64-protos.h | 1 + .../aarch64/aarch64-sve-builtins-sme.cc | 5 +++-- gcc/config/aarch64/aarch64.cc | 22 +++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index d1af7f40891e..eaf74a725e70 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -789,6 +789,7 @@ bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx); bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); +rtx aarch64_sve_reinterpret (machine_mode, rtx); bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode); machine_mode aarch64_sve_int_mode (machine_mode); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc index 8d06a72f3849..047a333ef47b 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc @@ -365,7 +365,8 @@ public: expand (function_expander &e) const override { machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode; - return e.use_exact_insn (code_for_aarch64_sme_read (mode)); + rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode)); + return aarch64_sve_reinterpret (e.result_mode (), res); } }; @@ -457,7 +458,7 @@ public: expand (function_expander &e) const override { machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode; - e.args[1] = lowpart_subreg (mode, e.args[1], e.tuple_mode (1)); + e.args[1] = aarch64_sve_reinterpret (mode, e.args[1]); return e.use_exact_insn (code_for_aarch64_sme_write (mode)); } }; diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 2a64053f675c..0889ceb7db17 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3226,6 +3226,28 @@ aarch64_split_simd_move (rtx dst, rtx src) } } +/* Return a register that contains SVE value X reinterpreted as SVE mode MODE. + The semantics of those of svreinterpret rather than those of subregs; + see the comment at the head of aarch64-sve.md for details about the + difference. */ + +rtx +aarch64_sve_reinterpret (machine_mode mode, rtx x) +{ + if (GET_MODE (x) == mode) + return x; + + /* can_change_mode_class must only return true if subregs and svreinterprets + have the same semantics. */ + if (targetm.can_change_mode_class (GET_MODE (x), mode, FP_REGS)) + return lowpart_subreg (mode, x, GET_MODE (x)); + + rtx res = gen_reg_rtx (mode); + x = force_reg (GET_MODE (x), x); + emit_insn (gen_aarch64_sve_reinterpret (mode, res, x)); + return res; +} + bool aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, machine_mode ymode, rtx y) From 5a4faf915575c690a25f0522dccc5b8d82909f10 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Sun, 10 Dec 2023 13:39:30 -0700 Subject: [PATCH 161/311] [PATCH 2/5] [ifcvt] optimize x=c ? (y shift_op z):y by RISC-V Zicond like insns op=[ASHIFT, ASHIFTRT, LSHIFTRT, ROTATE, ROTATERT] Conditional op, if zero rd = (rc == 0) ? (rs1 op rs2) : rs1 --> czero.nez rd, rs2, rc op rd, rs1, rd Conditional op, if non-zero rd = (rc != 0) ? (rs1 op rs2) : rs1 --> czero.eqz rd, rs2, rc op rd, rs1, rd gcc/ChangeLog: * ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for shift like op. gcc/testsuite/ChangeLog: * gcc.target/riscv/zicond_ifcvt_opt.c: Add tests for shift like op. Co-authored-by: Xiao Zeng --- gcc/ifcvt.cc | 4 +- .../gcc.target/riscv/zicond_ifcvt_opt.c | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index e4eda1a68375..6ac91b8cbb33 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -2920,7 +2920,9 @@ noce_cond_zero_binary_op_supported (rtx op) { enum rtx_code opcode = GET_CODE (op); - if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR) + if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR + || opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT + || opcode == ROTATE || opcode == ROTATERT) return true; return false; diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c index dcb21c15d1a7..efed199627e5 100644 --- a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c +++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c @@ -562,5 +562,58 @@ test_XOR_eqz_x_2_reverse_bin_oprands (long x, long z, long c) return x; } +long +test_ShiftLeft_eqz (long x, long y, long z, long c) +{ + if (c) + x = y << z; + else + x = y; + return x; +} + +long +test_ShiftR_eqz (long x, long y, long z, long c) +{ + if (c) + x = y >> z; + else + x = y; + return x; +} + +unsigned long +test_ShiftR_logical_eqz (unsigned long x, unsigned long y, unsigned long z, + unsigned long c) +{ + if (c) + x = y >> z; + else + x = y; + return x; +} + +unsigned long +test_RotateL_eqz (unsigned long x, unsigned long y, unsigned long z, + unsigned long c) +{ + if (c) + x = (y << z) | (y >> (64 - z)); + else + x = y; + return x; +} + +unsigned long +test_RotateR_eqz (unsigned long x, unsigned long y, unsigned long z, + unsigned long c) +{ + if (c) + x = (y >> z) | (y << (64 - z)); + else + x = y; + return x; +} + /* { dg-final { scan-assembler-times {czero\.eqz} 28 } } */ /* { dg-final { scan-assembler-times {czero\.nez} 28 } } */ From 748766b8f6237cd23b8e5f998fae39b31d2664d0 Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Sat, 9 Dec 2023 09:19:30 -0700 Subject: [PATCH 162/311] Add some new DW_IDX_* constants I've reimplemented the .debug_names code in GDB -- it was quite far from being correct, and the new implementation is much closer to what is specified by DWARF. However, the new writer in GDB needs to emit some symbol properties, so that the reader can be fully functional. This patch adds a few new DW_IDX_* constants, and tries to document the existing extensions as well. (My patch series add more documentation of these to the GDB manual as well.) include/ChangeLog 2023-12-10 Tom Tromey * dwarf2.def (DW_IDX_GNU_internal, DW_IDX_GNU_external): Comment. (DW_IDX_GNU_main, DW_IDX_GNU_language, DW_IDX_GNU_linkage_name): New constants. --- include/dwarf2.def | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/dwarf2.def b/include/dwarf2.def index 7ab3ee611fd4..75b75d901884 100644 --- a/include/dwarf2.def +++ b/include/dwarf2.def @@ -802,8 +802,17 @@ DW_IDX (DW_IDX_parent, 4) DW_IDX (DW_IDX_type_hash, 5) DW_IDX_DUP (DW_IDX_lo_user, 0x2000) DW_IDX (DW_IDX_hi_user, 0x3fff) +/* Internal linkage. A flag. */ DW_IDX (DW_IDX_GNU_internal, 0x2000) +/* External linkage. A flag. Note that gdb no longer generates this; + the default is to assume external linkage. */ DW_IDX (DW_IDX_GNU_external, 0x2001) +/* This entry is the program's entry point. A flag. */ +DW_IDX (DW_IDX_GNU_main, 0x2002) +/* Language for this entry. A DW_LANG_* value. */ +DW_IDX (DW_IDX_GNU_language, 0x2003) +/* This entry is a linkage name. A flag. */ +DW_IDX (DW_IDX_GNU_linkage_name, 0x2004) DW_END_IDX /* DWARF5 Unit type header encodings */ From 4a6613e2a417512077ea39b5097c0c602055f028 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 11 Dec 2023 00:17:32 +0000 Subject: [PATCH 163/311] Daily bump. --- gcc/ChangeLog | 48 ++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 8 +++ gcc/cp/ChangeLog | 134 ++++++++++++++++++++++++++++++++++++++ gcc/fortran/ChangeLog | 15 +++++ gcc/testsuite/ChangeLog | 138 ++++++++++++++++++++++++++++++++++++++++ include/ChangeLog | 6 ++ 7 files changed, 350 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 68754648cbbe..6724048f5172 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,51 @@ +2023-12-10 Fei Gao + Xiao Zeng + + * ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for shift + like op. + +2023-12-10 Richard Sandiford + + PR target/112931 + PR target/112933 + * config/aarch64/aarch64-protos.h (aarch64_sve_reinterpret): Declare. + * config/aarch64/aarch64.cc (aarch64_sve_reinterpret): New function. + * config/aarch64/aarch64-sve-builtins-sme.cc (svread_za_impl::expand) + (svwrite_za_impl::expand): Use it to cast the SVE register to the + right mode. + +2023-12-10 Richard Sandiford + + PR target/112930 + * config/aarch64/aarch64.cc (aarch64_sme_mode_switch_regs::add_reg): + Force specific SVE modes for single registers as well as structures. + +2023-12-10 Jason Merrill + + * doc/invoke.texi (-fpermissive): Mention ObjC++ for -Wnarrowing. + +2023-12-10 Jeff Law + + * config/h8300/addsub.md (uaddv4, usubv4): New expanders. + (uaddv): New define_insn_and_split plus post-reload pattern. + +2023-12-10 Jeff Law + + * config/h8300/h8300-protos.h (use_extvsi): Prototype. + * config/h8300/combiner.md: Two new define_insn_and_split patterns + to implement signed bitfield extractions. + * config/h8300/h8300.cc (use_extvsi): New function. + +2023-12-10 Jeff Law + + * config/h8300/combiner.md (single bit signed bitfield extraction): Fix + length computation when the bit we want is in the low half word. + +2023-12-10 Jeff Law + + * config/h8300/h8300.cc (compute_a_shift_length): Fix computation + of logical shifts on the H8/SX. + 2023-12-09 Jakub Jelinek PR tree-optimization/112887 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 19d30f166542..6a5191bd9774 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231210 +20231211 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 4cae882759db..97e2cfdc449f 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,11 @@ +2023-12-10 Ken Matsui + Patrick Palka + + * c-common.cc (c_common_reswords): Remove all mappings of + built-in traits. + * c-common.h (enum rid): Remove all RID values for built-in + traits. + 2023-12-07 Andrew Pinski Jakub Jelinek diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 58f84237d787..d493135edca9 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,137 @@ +2023-12-10 Ken Matsui + + * cp-trait.def: Define __remove_pointer. + * semantics.cc (finish_trait_type): Handle CPTK_REMOVE_POINTER. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_object. + * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_OBJECT. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_function. + * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_FUNCTION. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_reference. + * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_REFERENCE. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_member_object_pointer. + * constraint.cc (diagnose_trait_expr): Handle + CPTK_IS_MEMBER_OBJECT_POINTER. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_member_function_pointer. + * constraint.cc (diagnose_trait_expr): Handle + CPTK_IS_MEMBER_FUNCTION_POINTER. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_member_pointer. + * constraint.cc (diagnose_trait_expr): Handle + CPTK_IS_MEMBER_POINTER. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_scoped_enum. + * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_SCOPED_ENUM. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_bounded_array. + * constraint.cc (diagnose_trait_expr): Handle + CPTK_IS_BOUNDED_ARRAY. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Ken Matsui + + * cp-trait.def: Define __is_array. + * constraint.cc (diagnose_trait_expr): Handle CPTK_IS_ARRAY. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + +2023-12-10 Jason Merrill + + * parser.cc (cp_parser_simple_type_specifier): Move trait + handling to default label. + +2023-12-10 Ken Matsui + + * parser.cc (cp_lexer_lookup_trait): Rename to ... + (cp_lexer_peek_trait): ... this. Handle a subsequent token for + the corresponding built-in trait. + (cp_lexer_lookup_trait_expr): Rename to ... + (cp_lexer_peek_trait_expr): ... this. + (cp_lexer_lookup_trait_type): Rename to ... + (cp_lexer_peek_trait_type): ... this. + (cp_lexer_next_token_is_decl_specifier_keyword): Call + cp_lexer_peek_trait_type. + (cp_parser_simple_type_specifier): Likewise. + (cp_parser_primary_expression): Call cp_lexer_peek_trait_expr. + +2023-12-10 Ken Matsui + Patrick Palka + + * cp-objcp-common.cc (names_builtin_p): Remove all RID value + cases for built-in traits. Check for built-in traits via + the new cik_trait kind. + * cp-tree.h (enum cp_trait_kind): Set its underlying type to + addr_space_t. + (struct cp_trait): New struct to hold trait information. + (cp_traits): New array to hold a mapping to all traits. + (cik_reserved_for_udlit): Rename to ... + (cik_trait): ... this. + (IDENTIFIER_ANY_OP_P): Exclude cik_trait. + (IDENTIFIER_TRAIT_P): New macro to detect cik_trait. + * lex.cc (cp_traits): Define its values, declared in cp-tree.h. + (init_cp_traits): New function to set cik_trait and + IDENTIFIER_CP_INDEX for all built-in trait identifiers. + (cxx_init): Call init_cp_traits function. + * parser.cc (cp_lexer_lookup_trait): New function to look up a + built-in trait by IDENTIFIER_CP_INDEX. + (cp_lexer_lookup_trait_expr): Likewise, look up an + expression-yielding built-in trait. + (cp_lexer_lookup_trait_type): Likewise, look up a type-yielding + built-in trait. + (cp_keyword_starts_decl_specifier_p): Remove all RID value cases + for built-in traits. + (cp_lexer_next_token_is_decl_specifier_keyword): Handle + type-yielding built-in traits. + (cp_parser_primary_expression): Remove all RID value cases for + built-in traits. Handle expression-yielding built-in traits. + (cp_parser_trait): Handle cp_trait instead of enum rid. + (cp_parser_simple_type_specifier): Remove all RID value cases + for built-in traits. Handle type-yielding built-in traits. + +2023-12-10 Ken Matsui + + * constraint.cc (diagnose_trait_expr): Sort built-in traits + alphabetically. + * cp-trait.def: Likewise. + * semantics.cc (trait_expr_value): Likewise. + (finish_trait_expr): Likewise. + (finish_trait_type): Likewise. + 2023-12-09 Jakub Jelinek * parser.cc (cp_parser_statement, cp_parser_expression_statement, diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 04b711199ff6..daab6e223b46 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,18 @@ +2023-12-10 Harald Anlauf + + PR fortran/111503 + * expr.cc (gfc_is_simply_contiguous): Determine characteristics of + NULL() from optional MOLD argument, otherwise treat as contiguous. + * primary.cc (gfc_variable_attr): Derive attributes of NULL(MOLD) + from MOLD. + +2023-12-10 Harald Anlauf + + PR fortran/105543 + * resolve.cc (resolve_symbol): For a CLASS-valued function having a + RESULT clause, ensure that attr.class_ok is set for its symbol as + well as for its resolved result variable. + 2023-12-08 Tobias Burnus * dump-parse-tree.cc (show_omp_node): Handle EXEC_OMP_ALLOCATE diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 73aa606733f7..36db275f8669 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,141 @@ +2023-12-10 Fei Gao + Xiao Zeng + + * gcc.target/riscv/zicond_ifcvt_opt.c: Add tests for shift like op. + +2023-12-10 Richard Sandiford + + * gcc.target/aarch64/sme2/acle-asm/uzp_bf16_x2.c: XFAIL z0_z23 tests + for big-endian. + * gcc.target/aarch64/sme2/acle-asm/uzp_f16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_s16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_s32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_s64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_s8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_u16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_u32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_u64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzp_u8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_bf16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_f16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_s16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_s32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_s64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_s8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_u16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_u32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_u64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/uzpq_u8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_bf16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_f16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_s16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_s32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_s64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_s8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_u16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_u32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_u64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zip_u8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_bf16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_f16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_s16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_s32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_s64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_s8_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_u16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_u32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_u64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/zipq_u8_x2.c: Likewise. + +2023-12-10 Richard Sandiford + + * gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that + contain Z8-Z23 saves to little-endian. + * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. + * gcc.target/aarch64/sme/locally_streaming_1.c: Likewise. + +2023-12-10 Richard Sandiford + + * gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables. + * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. + * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. + +2023-12-10 Harald Anlauf + + PR fortran/111503 + * gfortran.dg/contiguous_14.f90: New test. + +2023-12-10 Harald Anlauf + + PR fortran/105543 + * gfortran.dg/contiguous_13.f90: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __remove_pointer. + * g++.dg/ext/remove_pointer.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __is_object. + * g++.dg/ext/is_object.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __is_function. + * g++.dg/ext/is_function.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __is_reference. + * g++.dg/ext/is_reference.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of + __is_member_object_pointer. + * g++.dg/ext/is_member_object_pointer.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of + __is_member_function_pointer. + * g++.dg/ext/is_member_function_pointer.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of + __is_member_pointer. + * g++.dg/ext/is_member_pointer.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __is_scoped_enum. + * g++.dg/ext/is_scoped_enum.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of + __is_bounded_array. + * g++.dg/ext/is_bounded_array.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Test existence of __is_array. + * g++.dg/ext/is_array.C: New test. + +2023-12-10 Ken Matsui + + * g++.dg/ext/has-builtin-1.C: Sort built-in traits alphabetically. + 2023-12-09 Jakub Jelinek PR tree-optimization/112887 diff --git a/include/ChangeLog b/include/ChangeLog index 1907ce61099d..1afa9f846de8 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,9 @@ +2023-12-10 Tom Tromey + + * dwarf2.def (DW_IDX_GNU_internal, DW_IDX_GNU_external): Comment. + (DW_IDX_GNU_main, DW_IDX_GNU_language, DW_IDX_GNU_linkage_name): + New constants. + 2023-12-01 Jason Merrill * demangle.h (enum demangle_component_type): Add From 46e342b985e6b4058db73875103cced2666e84e2 Mon Sep 17 00:00:00 2001 From: Haochen Gui Date: Mon, 11 Dec 2023 08:40:34 +0800 Subject: [PATCH 164/311] rs6000: Enable lrintsi2 on old archs with stfiwx enabled The powerpc 32-bit processors (e.g. 5470) supports "fctiw" instruction, but the instruction can't be generated on such platforms as the insn is guard by TARGET_POPCNTD. The root cause is SImode in float register is supported from Power7. Actually implementation of "fctiw" only needs stfiwx which is supported by the old 32-bit processors. This patch enables "fctiw" expand for these processors. gcc/ PR target/112707 * config/rs6000/rs6000.md (expand lrintsi2): New. (insn lrintsi2): Rename to... (*lrintsi): ...this. (lrintsi_di): New. gcc/testsuite/ PR target/112707 * gcc.target/powerpc/pr112707-1.c: New. --- gcc/config/rs6000/rs6000.md | 30 ++++++++++++++++++- gcc/testsuite/gcc.target/powerpc/pr112707-1.c | 16 ++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr112707-1.c diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 28482e3617e6..3d9491769fc4 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6740,7 +6740,27 @@ "fctid %0,%1" [(set_attr "type" "fp")]) -(define_insn "lrintsi2" +(define_expand "lrintsi2" + [(set (match_operand:SI 0 "gpc_reg_operand" "=d") + (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")] + UNSPEC_FCTIW))] + "TARGET_HARD_FLOAT && TARGET_STFIWX" +{ + /* For those old archs in which SImode can't be hold in float registers, + call lrintsi_di to put the result in DImode then convert it via + stack. */ + if (!TARGET_POPCNTD) + { + rtx tmp = gen_reg_rtx (DImode); + emit_insn (gen_lrintsi_di (tmp, operands[1])); + rtx stack = rs6000_allocate_stack_temp (SImode, false, true); + emit_insn (gen_stfiwx (stack, tmp)); + emit_move_insn (operands[0], stack); + DONE; + } +}) + +(define_insn "*lrintsi" [(set (match_operand:SI 0 "gpc_reg_operand" "=d") (unspec:SI [(match_operand:SFDF 1 "gpc_reg_operand" "")] UNSPEC_FCTIW))] @@ -6748,6 +6768,14 @@ "fctiw %0,%1" [(set_attr "type" "fp")]) +(define_insn "lrintsi_di" + [(set (match_operand:DI 0 "gpc_reg_operand" "=d") + (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")] + UNSPEC_FCTIW))] + "TARGET_HARD_FLOAT && !TARGET_POPCNTD" + "fctiw %0,%1" + [(set_attr "type" "fp")]) + (define_insn "btrunc2" [(set (match_operand:SFDF 0 "gpc_reg_operand" "=d,wa") (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "d,wa")] diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-1.c b/gcc/testsuite/gcc.target/powerpc/pr112707-1.c new file mode 100644 index 000000000000..cce6bd7f6907 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr112707-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=7450 -fno-math-errno" } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-final { scan-assembler-times {\mfctiw\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstfiwx\M} 2 } } */ + +int test1 (double a) +{ + return __builtin_irint (a); +} + +int test2 (float a) +{ + return __builtin_irint (a); +} From ae226cb1ee17d61c416c9d4d8c5a142788b8afff Mon Sep 17 00:00:00 2001 From: Haochen Gui Date: Mon, 11 Dec 2023 08:41:55 +0800 Subject: [PATCH 165/311] rs6000: Guard fctid on PowerPC64 and PowerPC476 fctid is only supported on 64-bit Power processors and powerpc 476. It should be guarded by this condition. The patch fixes the issue. gcc/ PR target/112707 * config/rs6000/rs6000.h (TARGET_FCTID): Define. * config/rs6000/rs6000.md (lrintdi2): Add guard TARGET_FCTID. * (lrounddi2): Replace TARGET_FPRND with TARGET_FCTID. gcc/testsuite/ PR target/112707 * gcc.target/powerpc/pr112707.h: New. * gcc.target/powerpc/pr112707-2.c: New. * gcc.target/powerpc/pr112707-3.c: New. * gcc.target/powerpc/pr88558-p7.c: Check fctid on ilp32 and has_arch_ppc64 as it's now guarded by powerpc64. * gcc.target/powerpc/pr88558-p8.c: Likewise. * gfortran.dg/nint_p7.f90: Add powerpc64 target requirement as lrounddi2 is now guarded by powerpc64. --- gcc/config/rs6000/rs6000.h | 2 ++ gcc/config/rs6000/rs6000.md | 4 ++-- gcc/testsuite/gcc.target/powerpc/pr112707-2.c | 9 +++++++++ gcc/testsuite/gcc.target/powerpc/pr112707-3.c | 9 +++++++++ gcc/testsuite/gcc.target/powerpc/pr112707.h | 10 ++++++++++ gcc/testsuite/gcc.target/powerpc/pr88558-p7.c | 2 +- gcc/testsuite/gcc.target/powerpc/pr88558-p8.c | 2 +- gcc/testsuite/gfortran.dg/nint_p7.f90 | 1 + 8 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr112707-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr112707-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr112707.h diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 326c45221e9c..df44b86fb05c 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -467,6 +467,8 @@ extern int rs6000_vector_align[]; #define TARGET_FCFIDUS TARGET_POPCNTD #define TARGET_FCTIDUZ TARGET_POPCNTD #define TARGET_FCTIWUZ TARGET_POPCNTD +/* Only powerpc64 and powerpc476 support fctid. */ +#define TARGET_FCTID (TARGET_POWERPC64 || rs6000_cpu == PROCESSOR_PPC476) #define TARGET_CTZ TARGET_MODULO #define TARGET_EXTSWSLI (TARGET_MODULO && TARGET_POWERPC64) #define TARGET_MADDLD TARGET_MODULO diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 3d9491769fc4..58126628ca04 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6736,7 +6736,7 @@ [(set (match_operand:DI 0 "gpc_reg_operand" "=d") (unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")] UNSPEC_FCTID))] - "TARGET_HARD_FLOAT" + "TARGET_HARD_FLOAT && TARGET_FCTID" "fctid %0,%1" [(set_attr "type" "fp")]) @@ -6830,7 +6830,7 @@ (set (match_operand:DI 0 "gpc_reg_operand") (unspec:DI [(match_dup 2)] UNSPEC_FCTID))] - "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FPRND" + "TARGET_HARD_FLOAT && TARGET_VSX && TARGET_FCTID" { operands[2] = gen_reg_rtx (mode); }) diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-2.c b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c new file mode 100644 index 000000000000..672e00691eaf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr112707-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=7450 -fno-math-errno" } */ +/* { dg-require-effective-target ilp32 } */ +/* { dg-skip-if "" { has_arch_ppc64 } } */ +/* { dg-final { scan-assembler-not {\mfctid\M} } } */ + +/* powerpc 7450 doesn't support ppc64 (-m32 -mpowerpc64), so skips it. */ + +#include "pr112707.h" diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707-3.c b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c new file mode 100644 index 000000000000..924338fd3900 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr112707-3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-math-errno -mdejagnu-cpu=476fp" } */ +/* { dg-require-effective-target ilp32 } */ + +/* powerpc 476fp has hard float enabled which is required by fctid */ + +#include "pr112707.h" + +/* { dg-final { scan-assembler-times {\mfctid\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr112707.h b/gcc/testsuite/gcc.target/powerpc/pr112707.h new file mode 100644 index 000000000000..e427dc6a72e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr112707.h @@ -0,0 +1,10 @@ +long long test1 (double a) +{ + return __builtin_llrint (a); +} + +long long test2 (float a) +{ + return __builtin_llrint (a); +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c index 3932656c5fd9..2fa0b997e52f 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p7.c @@ -6,7 +6,7 @@ #include "pr88558.h" /* { dg-final { scan-assembler-times {\mfctid\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target { ilp32 && has_arch_ppc64 } } } } */ /* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mfctiw\M} 4 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {\mstfiwx\M} 2 { target lp64 } } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c index 1afc8fd4f0d1..fffb5b88c209 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c +++ b/gcc/testsuite/gcc.target/powerpc/pr88558-p8.c @@ -7,7 +7,7 @@ #include "pr88558.h" /* { dg-final { scan-assembler-times {\mfctid\M} 4 { target lp64 } } } */ -/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target ilp32 } } } */ +/* { dg-final { scan-assembler-times {\mfctid\M} 2 { target { ilp32 && has_arch_ppc64 } } } } */ /* { dg-final { scan-assembler-times {\mfctiw\M} 2 { target lp64 } } } */ /* { dg-final { scan-assembler-times {\mfctiw\M} 4 { target ilp32 } } } */ /* { dg-final { scan-assembler-times {\mmfvsrwz\M} 2 { target lp64 } } } */ diff --git a/gcc/testsuite/gfortran.dg/nint_p7.f90 b/gcc/testsuite/gfortran.dg/nint_p7.f90 index 2239824a7fb1..c23eb6783bc3 100644 --- a/gcc/testsuite/gfortran.dg/nint_p7.f90 +++ b/gcc/testsuite/gfortran.dg/nint_p7.f90 @@ -2,6 +2,7 @@ ! { dg-do compile { target { powerpc*-*-* } } } ! { dg-require-effective-target powerpc_vsx_ok } ! { dg-options "-O2 -mdejagnu-cpu=power7 -ffast-math" } +! { dg-require-effective-target has_arch_ppc64 } ! { dg-final { scan-assembler-times "xsrdpi" 2 } } subroutine test_nint(x4,x8) From 2c2df123de432356d28ee7e8a9627dc9e80a399d Mon Sep 17 00:00:00 2001 From: liuhongt Date: Fri, 8 Dec 2023 11:19:52 +0800 Subject: [PATCH 166/311] Support vpcmov for V4HF/V4BF/V2HF/V2BF under TARGET_XOP. gcc/ChangeLog: PR target/112904 * config/i386/mmx.md (*xop_pcmov_): New define_insn. gcc/testsuite/ChangeLog: * g++.target/i386/pr112904.C: New test. --- gcc/config/i386/mmx.md | 22 +++++++++++++++++++ gcc/testsuite/g++.target/i386/pr112904.C | 27 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 gcc/testsuite/g++.target/i386/pr112904.C diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index a07a921b7397..06d6c57876bc 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -4136,6 +4136,17 @@ [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) +(define_insn "*xop_pcmov_" + [(set (match_operand:V4F_64 0 "register_operand" "=x") + (if_then_else:V4F_64 + (match_operand:V4F_64 3 "register_operand" "x") + (match_operand:V4F_64 1 "register_operand" "x") + (match_operand:V4F_64 2 "register_operand" "x")))] + "TARGET_XOP && TARGET_MMX_WITH_SSE" + "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + (define_insn "*xop_pcmov_" [(set (match_operand:VI_16_32 0 "register_operand" "=x") (if_then_else:VI_16_32 @@ -4147,6 +4158,17 @@ [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) +(define_insn "*xop_pcmov_" + [(set (match_operand:V2F_32 0 "register_operand" "=x") + (if_then_else:V2F_32 + (match_operand:V2F_32 3 "register_operand" "x") + (match_operand:V2F_32 1 "register_operand" "x") + (match_operand:V2F_32 2 "register_operand" "x")))] + "TARGET_XOP" + "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "sse4arg") + (set_attr "mode" "TI")]) + ;; XOP permute instructions (define_insn "mmx_ppermv64" [(set (match_operand:V8QI 0 "register_operand" "=x") diff --git a/gcc/testsuite/g++.target/i386/pr112904.C b/gcc/testsuite/g++.target/i386/pr112904.C new file mode 100644 index 000000000000..556be9211971 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr112904.C @@ -0,0 +1,27 @@ +typedef _Float16 v4hf __attribute__((vector_size(8))); +typedef short v4hi __attribute__((vector_size(8))); +typedef _Float16 v2hf __attribute__((vector_size(4))); +typedef short v2hi __attribute__((vector_size(4))); + +typedef __bf16 v4bf __attribute__((vector_size(8))); +typedef __bf16 v2bf __attribute__((vector_size(4))); + +v4hf foo(v4hf a, v4hf b, v4hi c) +{ + return c ? a : b; +} + +v2hf foo1(v2hf a, v2hf b, v2hi c) +{ + return c ? a : b; +} + +v4bf foo(v4bf a, v4bf b, v4hi c) +{ + return c ? a : b; +} + +v2bf foo1(v2bf a, v2bf b, v2hi c) +{ + return c ? a : b; +} From ab3daffcbf35566d468c3028e48068a481048baf Mon Sep 17 00:00:00 2001 From: Nathaniel Shead Date: Thu, 23 Nov 2023 23:15:19 +1100 Subject: [PATCH 167/311] c++: Clear uninstantiated template friend when instantiating [PR104234] Otherwise attempting to get the originating module declaration ICEs because the DECL_CHAIN of an instantiated friend template is no longer its context. PR c++/104234 PR c++/112580 gcc/cp/ChangeLog: * pt.cc (tsubst_template_decl): Clear DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P. gcc/testsuite/ChangeLog: * g++.dg/modules/pr104234.C: New test. Signed-off-by: Nathaniel Shead --- gcc/cp/pt.cc | 2 ++ gcc/testsuite/g++.dg/modules/pr104234.C | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 gcc/testsuite/g++.dg/modules/pr104234.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 669d2ad65c3a..e9dcdb32c6cf 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -14790,6 +14790,8 @@ tsubst_template_decl (tree t, tree args, tsubst_flags_t complain, if (PRIMARY_TEMPLATE_P (t)) DECL_PRIMARY_TEMPLATE (r) = r; + DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P (r) = false; + if (!lambda_fntype && !class_p) { /* Record this non-type partial instantiation. */ diff --git a/gcc/testsuite/g++.dg/modules/pr104234.C b/gcc/testsuite/g++.dg/modules/pr104234.C new file mode 100644 index 000000000000..d81f0d435bcf --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr104234.C @@ -0,0 +1,16 @@ +// PR c++/104234 +// { dg-additional-options "-fmodules-ts" } + +template struct _Node_handle_common { + template friend class _Rb_tree; +}; +struct _Hashtable { + using node_type = _Node_handle_common; + node_type __trans_tmp_1; +}; +template class _Rb_tree { + struct _Rb_tree_impl { + _Rb_tree_impl(); + } _M_impl; +}; +_Rb_tree _M_tmap_; From 4719b6f5ae4d758f193a17bbd5fb6cbacd702a23 Mon Sep 17 00:00:00 2001 From: Nathaniel Shead Date: Sat, 28 Oct 2023 16:04:52 +1100 Subject: [PATCH 168/311] c++: Fix noexcept checking for trivial operations [PR96090] This patch stops eager folding of trivial operations (construction and assignment) from occurring when checking for noexceptness. This was previously done in PR c++/53025, but only for copy/move construction, and the __is_nothrow_xible builtins did not receive the same treatment when they were added. To handle `is_nothrow_default_constructible`, the patch also ensures that when no parameters are passed we do value initialisation instead of just building the constructor call: in particular, value-initialisation doesn't necessarily actually invoke the constructor for trivial default constructors, and so we need to handle this case as well. This is contrary to the proposed resolution of CWG2820; for now we just ensure it matches the behaviour of the `noexcept` operator and create testcases formalising this, and if that issue gets accepted we can revisit. PR c++/96090 PR c++/100470 gcc/cp/ChangeLog: * call.cc (build_over_call): Prevent folding of trivial special members when checking for noexcept. * method.cc (constructible_expr): Perform value-initialisation for empty parameter lists. (is_nothrow_xible): Treat as noexcept operator. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/noexcept81.C: New test. * g++.dg/ext/is_nothrow_constructible7.C: New test. * g++.dg/ext/is_nothrow_constructible8.C: New test. Signed-off-by: Nathaniel Shead --- gcc/cp/call.cc | 17 ++--- gcc/cp/method.cc | 19 ++++-- gcc/testsuite/g++.dg/cpp0x/noexcept81.C | 37 +++++++++++ .../g++.dg/ext/is_nothrow_constructible7.C | 20 ++++++ .../g++.dg/ext/is_nothrow_constructible8.C | 64 +++++++++++++++++++ 5 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept81.C create mode 100644 gcc/testsuite/g++.dg/ext/is_nothrow_constructible7.C create mode 100644 gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index c7efc5b077a3..4f0abf8e93fa 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -10247,15 +10247,16 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) /* Avoid actually calling copy constructors and copy assignment operators, if possible. */ - if (! flag_elide_constructors && !force_elide) + if (!force_elide + && (!flag_elide_constructors + /* It's unsafe to elide the operation when handling + a noexcept-expression, it may evaluate to the wrong + value (c++/53025, c++/96090). */ + || cp_noexcept_operand != 0)) /* Do things the hard way. */; - else if (cand->num_convs == 1 - && (DECL_COPY_CONSTRUCTOR_P (fn) - || DECL_MOVE_CONSTRUCTOR_P (fn)) - /* It's unsafe to elide the constructor when handling - a noexcept-expression, it may evaluate to the wrong - value (c++/53025). */ - && (force_elide || cp_noexcept_operand == 0)) + else if (cand->num_convs == 1 + && (DECL_COPY_CONSTRUCTOR_P (fn) + || DECL_MOVE_CONSTRUCTOR_P (fn))) { tree targ; tree arg = argarray[num_artificial_parms_for (fn)]; diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index a70dd5d6adc1..26e6eb79946c 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -2091,6 +2091,7 @@ constructible_expr (tree to, tree from) { tree expr; cp_unevaluated cp_uneval_guard; + const int len = TREE_VEC_LENGTH (from); if (CLASS_TYPE_P (to)) { tree ctype = to; @@ -2098,11 +2099,16 @@ constructible_expr (tree to, tree from) if (!TYPE_REF_P (to)) to = cp_build_reference_type (to, /*rval*/false); tree ob = build_stub_object (to); - vec_alloc (args, TREE_VEC_LENGTH (from)); - for (tree arg : tree_vec_range (from)) - args->quick_push (build_stub_object (arg)); - expr = build_special_member_call (ob, complete_ctor_identifier, &args, - ctype, LOOKUP_NORMAL, tf_none); + if (len == 0) + expr = build_value_init (ctype, tf_none); + else + { + vec_alloc (args, len); + for (tree arg : tree_vec_range (from)) + args->quick_push (build_stub_object (arg)); + expr = build_special_member_call (ob, complete_ctor_identifier, &args, + ctype, LOOKUP_NORMAL, tf_none); + } if (expr == error_mark_node) return error_mark_node; /* The current state of the standard vis-a-vis LWG 2116 is that @@ -2120,7 +2126,6 @@ constructible_expr (tree to, tree from) } else { - const int len = TREE_VEC_LENGTH (from); if (len == 0) return build_value_init (strip_array_types (to), tf_none); if (len > 1) @@ -2216,7 +2221,9 @@ is_trivially_xible (enum tree_code code, tree to, tree from) bool is_nothrow_xible (enum tree_code code, tree to, tree from) { + ++cp_noexcept_operand; tree expr = is_xible_helper (code, to, from, /*trivial*/false); + --cp_noexcept_operand; if (expr == NULL_TREE || expr == error_mark_node) return false; return expr_noexcept_p (expr, tf_none); diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept81.C b/gcc/testsuite/g++.dg/cpp0x/noexcept81.C new file mode 100644 index 000000000000..8310f7d910ab --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/noexcept81.C @@ -0,0 +1,37 @@ +// { dg-do compile { target c++11 } } +// PR c++/96090 + +struct yesthrow_t { + yesthrow_t() noexcept(false) = default; + yesthrow_t(const yesthrow_t&) noexcept(false) = default; + yesthrow_t(yesthrow_t&&) noexcept(false) = default; + yesthrow_t& operator=(const yesthrow_t&) noexcept(false) = default; + yesthrow_t& operator=(yesthrow_t&&) noexcept(false) = default; +}; + +yesthrow_t yes; +static_assert(not noexcept(yesthrow_t(static_cast(yes))), ""); +static_assert(not noexcept(yesthrow_t(static_cast(yes))), ""); +static_assert(not noexcept(yes = static_cast(yes)), ""); +static_assert(not noexcept(yes = static_cast(yes)), ""); + +// Note: this is value-initialisation, and thus by [dcl.init.general] p9 +// a trivial non-user-provided non-deleted default constructor is not called. +// However, CWG2820 proposes to change this behaviour. +static_assert(noexcept(yesthrow_t()), ""); + +struct nothrow_t { + nothrow_t() noexcept(true) = default; + nothrow_t(const nothrow_t&) noexcept(true) = default; + nothrow_t(nothrow_t&&) noexcept(true) = default; + nothrow_t& operator=(const nothrow_t&) noexcept(true) = default; + nothrow_t& operator=(nothrow_t&&) noexcept(true) = default; +}; + +nothrow_t no; +static_assert(noexcept(nothrow_t()), ""); +static_assert(noexcept(nothrow_t(static_cast(no))), ""); +static_assert(noexcept(nothrow_t(static_cast(no))), ""); +static_assert(noexcept(no = static_cast(no)), ""); +static_assert(noexcept(no = static_cast(no)), ""); + diff --git a/gcc/testsuite/g++.dg/ext/is_nothrow_constructible7.C b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible7.C new file mode 100644 index 000000000000..b63b13ac52f6 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible7.C @@ -0,0 +1,20 @@ +// { dg-do compile { target c++11 } } +// PR c++/100470 + +struct S1{ + S1(S1&&) noexcept(false); +}; +struct S2{ + S2(S2&&) noexcept(false) = default; +}; +struct S3{ + S3(S3&&) noexcept(false){} +}; +struct S4{ + S4(S4&&) = default; +}; + +static_assert(!__is_nothrow_constructible(S1, S1), ""); +static_assert(!__is_nothrow_constructible(S2, S2), ""); +static_assert(!__is_nothrow_constructible(S3, S3), ""); +static_assert( __is_nothrow_constructible(S4, S4), ""); diff --git a/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C new file mode 100644 index 000000000000..c2a0b93ae971 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C @@ -0,0 +1,64 @@ +// { dg-do compile { target c++11 } } +// PR c++/96090 + +template +constexpr bool is_nothrow_default_constructible_v + = __is_nothrow_constructible(T); +template +constexpr bool is_nothrow_copy_constructible_v + = __is_nothrow_constructible(T, const T&); +template +constexpr bool is_nothrow_move_constructible_v + = __is_nothrow_constructible(T, T&&); +template +constexpr bool is_nothrow_copy_assignable_v + = __is_nothrow_assignable(T, const T&); +template +constexpr bool is_nothrow_move_assignable_v + = __is_nothrow_assignable(T, T&&); + +struct yesthrow_t { + yesthrow_t() noexcept(false) = default; + yesthrow_t(const yesthrow_t&) noexcept(false) = default; + yesthrow_t(yesthrow_t&&) noexcept(false) = default; + yesthrow_t& operator=(const yesthrow_t&) noexcept(false) = default; + yesthrow_t& operator=(yesthrow_t&&) noexcept(false) = default; +}; + +static_assert(not is_nothrow_copy_constructible_v, ""); +static_assert(not is_nothrow_copy_assignable_v, ""); +static_assert(not is_nothrow_move_constructible_v, ""); +static_assert(not is_nothrow_move_assignable_v, ""); + +// Note: by [meta.unary.prop] p9 this should be value-initialisation, +// and thus by [dcl.init.general] p9 a trivial non-user-provided +// non-deleted default constructor is not called. +// However, CWG2820 proposes to change this behaviour. +static_assert(is_nothrow_default_constructible_v, ""); + +struct nothrow_t { + nothrow_t() noexcept(true) = default; + nothrow_t(const nothrow_t&) noexcept(true) = default; + nothrow_t(nothrow_t&&) noexcept(true) = default; + nothrow_t& operator=(const nothrow_t&) noexcept(true) = default; + nothrow_t& operator=(nothrow_t&&) noexcept(true) = default; +}; + +static_assert(is_nothrow_default_constructible_v, ""); +static_assert(is_nothrow_copy_constructible_v, ""); +static_assert(is_nothrow_copy_assignable_v, ""); +static_assert(is_nothrow_move_constructible_v, ""); +static_assert(is_nothrow_move_assignable_v, ""); + +struct A { A() noexcept(false) = default; }; +struct B { B(const B&) noexcept(false) = default; }; +struct C { C(C&&) noexcept(false) = default; }; +struct D { D& operator=(const D&) noexcept(false) = default; }; +struct E { E& operator=(E&&) noexcept(false) = default; }; + +static_assert(is_nothrow_default_constructible_v, ""); // see above +static_assert(not is_nothrow_copy_constructible_v, ""); +static_assert(not is_nothrow_move_constructible_v, ""); +static_assert(not is_nothrow_copy_assignable_v, ""); +static_assert(not is_nothrow_move_assignable_v, ""); + From ec201e2a6021d144797f5558e4d08a2385de7a63 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Sun, 10 Dec 2023 22:21:58 -0700 Subject: [PATCH 169/311] [PATCH 3/5] [ifcvt] optimize x=c ? (y AND z) : y by RISC-V Zicond like insns Take the following case for example. CFLAGS: -march=rv64gc_zbb_zicond -mabi=lp64d -O2 long test_AND_ceqz (long x, long y, long z, long c) { if (c) x = y & z; else x = y; return x; } Before patch: and a2,a1,a2 czero.eqz a0,a2,a3 czero.nez a3,a1,a3 or a0,a3,a0 ret After patch: and a0,a1,a2 czero.nez a1,a1,a3 or a0,a1,a0 ret Co-authored-by: Xiao Zeng gcc/ChangeLog: * ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND. (noce_bbs_ok_for_cond_zero_arith): Likewise. (noce_try_cond_zero_arith): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/zicond_ifcvt_opt.c: Add TCs for AND. --- gcc/ifcvt.cc | 71 +++++--- .../gcc.target/riscv/zicond_ifcvt_opt.c | 163 +++++++++++++++++- 2 files changed, 212 insertions(+), 22 deletions(-) diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index 6ac91b8cbb33..9e5e93680c78 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -2922,7 +2922,7 @@ noce_cond_zero_binary_op_supported (rtx op) if (opcode == PLUS || opcode == MINUS || opcode == IOR || opcode == XOR || opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT - || opcode == ROTATE || opcode == ROTATERT) + || opcode == ROTATE || opcode == ROTATERT || opcode == AND) return true; return false; @@ -2952,6 +2952,7 @@ get_base_reg (rtx exp) static bool noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr, + rtx *bin_exp_ptr, enum rtx_code *czero_code_ptr, rtx *a_ptr, rtx **to_replace) { @@ -2996,7 +2997,7 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr, { common = b; bin_op1 = XEXP (bin_exp, 1); - czero_code = reverse + czero_code = (reverse ^ (GET_CODE (bin_exp) == AND)) ? noce_reversed_cond_code (if_info) : GET_CODE (cond); } @@ -3012,6 +3013,7 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr, return false; *common_ptr = common; + *bin_exp_ptr = bin_exp; *czero_code_ptr = czero_code; *a_ptr = a; @@ -3025,38 +3027,67 @@ noce_bbs_ok_for_cond_zero_arith (struct noce_if_info *if_info, rtx *common_ptr, static int noce_try_cond_zero_arith (struct noce_if_info *if_info) { - rtx target, a; + rtx target, rtmp, a; rtx_insn *seq; machine_mode mode = GET_MODE (if_info->x); rtx common = NULL_RTX; enum rtx_code czero_code = UNKNOWN; + rtx bin_exp = NULL_RTX; + enum rtx_code bin_code = UNKNOWN; rtx non_zero_op = NULL_RTX; rtx *to_replace = NULL; - if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &czero_code, &a, - &to_replace)) + if (!noce_bbs_ok_for_cond_zero_arith (if_info, &common, &bin_exp, &czero_code, + &a, &to_replace)) return false; - non_zero_op = *to_replace; - start_sequence (); - /* If x is used in both input and out like x = c ? x + z : x, - use a new reg to avoid modifying x */ - if (common && rtx_equal_p (common, if_info->x)) - target = gen_reg_rtx (mode); - else - target = if_info->x; + bin_code = GET_CODE (bin_exp); - target = noce_emit_czero (if_info, czero_code, non_zero_op, target); - if (!target || !to_replace) + if (bin_code == AND) { - end_sequence (); - return false; - } + rtmp = gen_reg_rtx (mode); + noce_emit_move_insn (rtmp, a); - *to_replace = target; - noce_emit_move_insn (if_info->x, a); + target = noce_emit_czero (if_info, czero_code, common, if_info->x); + if (!target) + { + end_sequence (); + return false; + } + + target = expand_simple_binop (mode, IOR, rtmp, target, if_info->x, 0, + OPTAB_WIDEN); + if (!target) + { + end_sequence (); + return false; + } + + if (target != if_info->x) + noce_emit_move_insn (if_info->x, target); + } + else + { + non_zero_op = *to_replace; + /* If x is used in both input and out like x = c ? x + z : x, + use a new reg to avoid modifying x */ + if (common && rtx_equal_p (common, if_info->x)) + target = gen_reg_rtx (mode); + else + target = if_info->x; + + target = noce_emit_czero (if_info, czero_code, non_zero_op, target); + if (!target || !to_replace) + { + end_sequence (); + return false; + } + + *to_replace = target; + noce_emit_move_insn (if_info->x, a); + } seq = end_ifcvt_sequence (if_info); if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info)) diff --git a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c index efed199627e5..a02a2757d2cf 100644 --- a/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c +++ b/gcc/testsuite/gcc.target/riscv/zicond_ifcvt_opt.c @@ -615,5 +615,164 @@ test_RotateR_eqz (unsigned long x, unsigned long y, unsigned long z, return x; } -/* { dg-final { scan-assembler-times {czero\.eqz} 28 } } */ -/* { dg-final { scan-assembler-times {czero\.nez} 28 } } */ +long +test_AND_ceqz (long x, long y, long z, long c) +{ + if (c) + x = y & z; + else + x = y; + return x; +} + +long +test_AND_ceqz_x (long x, long z, long c) +{ + if (c) + x = x & z; + + return x; +} + +long +test_AND_nez (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = y & z; + return x; +} + +long +test_AND_nez_x (long x, long z, long c) +{ + if (c) + { + } + else + x = x & z; + return x; +} + +long +test_AND_nez_2 (long x, long y, long z, long c) +{ + if (!c) + x = y & z; + else + x = y; + return x; +} + +long +test_AND_nez_x_2 (long x, long z, long c) +{ + if (!c) + x = x & z; + + return x; +} + +long +test_AND_eqz_2 (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = y & z; + return x; +} + +long +test_AND_eqz_x_2 (long x, long z, long c) +{ + if (!c) + { + } + else + x = x & z; + return x; +} + +long +test_AND_ceqz_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = z & y; + else + x = y; + return x; +} + +long +test_AND_ceqz_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + x = z & x; + + return x; +} + +long +test_AND_nez_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (c) + x = y; + else + x = z & y; + return x; +} + +long +test_AND_nez_x_reverse_bin_oprands (long x, long z, long c) +{ + if (c) + { + } + else + x = z & x; + return x; +} + +long +test_AND_nez_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = z & y; + else + x = y; + return x; +} + +long +test_AND_nez_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + x = z & x; + + return x; +} + +long +test_AND_eqz_2_reverse_bin_oprands (long x, long y, long z, long c) +{ + if (!c) + x = y; + else + x = z & y; + return x; +} + +long +test_AND_eqz_x_2_reverse_bin_oprands (long x, long z, long c) +{ + if (!c) + { + } + else + x = z & x; + return x; +} +/* { dg-final { scan-assembler-times {czero\.eqz} 36 } } */ +/* { dg-final { scan-assembler-times {czero\.nez} 36 } } */ From a536d235d2204f3ff9be126fec3acbc032db3086 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 11:00:14 +0800 Subject: [PATCH 170/311] RISC-V: Remove poly selftest when --preference=fixed-vlmax This patch fixes multiple ICEs in full coverage testing: cc1: internal compiler error: in riscv_legitimize_poly_move, at config/riscv/riscv.cc:2456^M 0x1fd8d78 riscv_legitimize_poly_move^M ../../../../gcc/gcc/config/riscv/riscv.cc:2456^M 0x1fd9518 riscv_legitimize_move(machine_mode, rtx_def*, rtx_def*)^M ../../../../gcc/gcc/config/riscv/riscv.cc:2583^M 0x2936820 gen_movdi(rtx_def*, rtx_def*)^M ../../../../gcc/gcc/config/riscv/riscv.md:2099^M 0x11a0f28 rtx_insn* insn_gen_fn::operator()(rtx_def*, rtx_def*) const^M ../../../../gcc/gcc/recog.h:431^M 0x13cf2f9 emit_move_insn_1(rtx_def*, rtx_def*)^M ../../../../gcc/gcc/expr.cc:4553^M 0x13d010c emit_move_insn(rtx_def*, rtx_def*)^M ../../../../gcc/gcc/expr.cc:4723^M 0x216f5e0 run_poly_int_selftest^M ../../../../gcc/gcc/config/riscv/riscv-selftests.cc:185^M 0x21701e6 run_poly_int_selftests^M ../../../../gcc/gcc/config/riscv/riscv-selftests.cc:226^M 0x2172109 selftest::riscv_run_selftests()^M ../../../../gcc/gcc/config/riscv/riscv-selftests.cc:371^M 0x3b8067b selftest::run_tests()^M ../../../../gcc/gcc/selftest-run-tests.cc:112^M 0x1ad90ee toplev::run_self_tests()^M ../../../../gcc/gcc/toplev.cc:2209^M Running target riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax The rootcause is that we are testing POLY value computation during FIXED-VLMAX and ICE in this code: if (BYTES_PER_RISCV_VECTOR.is_constant ()) { gcc_assert (value.is_constant ()); -----> assert failed. riscv_emit_move (dest, GEN_INT (value.to_constant ())); return; } For example, a poly value [15, 16] is computed by csrr vlen + multiple scalar integer instructions. However, such compile-time unknown value need to be computed when it is scalable vector, that is !BYTES_PER_RISCV_VECTOR.is_constant (), since csrr vlenb = [16, 0] when -march=rv64gcv --param=riscv-autovec-preference=fixed-vlmax and we have no chance to compute compile-time POLY value. Also, we never reach the situation to compute a compile time unknown value when it is FIXED-VLMAX vector. So disable POLY selftest for FIXED-VLMAX. gcc/ChangeLog: * config/riscv/riscv-selftests.cc (riscv_run_selftests): Remove poly self test when FIXED-VLMAX. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/poly-selftest-1.c: New test. --- gcc/config/riscv/riscv-selftests.cc | 14 +++++++++++++- .../gcc.target/riscv/rvv/base/poly-selftest-1.c | 12 ++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/poly-selftest-1.c diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc index 0ac17fb70a1f..289916b999e2 100644 --- a/gcc/config/riscv/riscv-selftests.cc +++ b/gcc/config/riscv/riscv-selftests.cc @@ -368,7 +368,19 @@ namespace selftest { void riscv_run_selftests (void) { - run_poly_int_selftests (); + if (!BYTES_PER_RISCV_VECTOR.is_constant ()) + /* We can know POLY value = [4, 4] when BYTES_PER_RISCV_VECTOR + is !is_constant () since we can use csrr vlenb and scalar shift + instruction to compute such POLY value and store it into a scalar + register. Wheras, we can't know [4, 4] on it is specified as + FIXED-VLMAX since BYTES_PER_RISCV_VECTOR = 16 for -march=rv64gcv + and csrr vlenb is 16 which is totally unrelated to any + compile-time unknown POLY value. + + Since we never need to compute a compile-time unknown POLY value + when --param=riscv-autovec-preference=fixed-vlmax, disable poly + selftests in such situation. */ + run_poly_int_selftests (); run_const_vector_selftests (); run_broadcast_selftests (); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/poly-selftest-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/poly-selftest-1.c new file mode 100644 index 000000000000..0f128ac26b2e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/poly-selftest-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O0 -fself-test=$srcdir/selftests --param=riscv-autovec-preference=fixed-vlmax -S" } */ + +/* Verify that -fself-test does not fail on a non empty source. */ + +int i; void bar(); void foo() +{ + while (i--) + bar(); +} + +/* { dg-regexp {^-fself-test: [0-9]+ pass\(es\) in [.0-9]+ seconds$|.*: note: self-tests are not enabled in this build$} } */ From 237951066bc52559e43e76172baa7d1dec4cee75 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 11 Dec 2023 08:34:15 +0100 Subject: [PATCH 171/311] extend.texi: Mark builtin arguments with @var{...} In many cases we just specify types for the builtin arguments, in other cases types and names with @var{name} syntax, and in other case with just name. Shall we tweak that somehow? If the argument names are unimportant, perhaps it is fine to leave that out, but shouldn't we always use @var{...} around the parameter names when specified? On Fri, Dec 01, 2023 at 10:43:57AM -0700, Sandra Loosemore wrote: > Yup. The Texinfo manual says: "When using @deftypefn command and > variations, you should mark parameter names with @var to distinguish these > from data type names, keywords, and other parts of the literal syntax of the > programming language." Here is a patch which does that (but not adding types to where they were missing, that will be harder to search for). 2023-12-11 Jakub Jelinek * doc/extend.texi (__sync_fetch_and_add, __sync_fetch_and_sub, __sync_fetch_and_or, __sync_fetch_and_and, __sync_fetch_and_xor, __sync_fetch_and_nand, __sync_add_and_fetch, __sync_sub_and_fetch, __sync_or_and_fetch, __sync_and_and_fetch, __sync_xor_and_fetch, __sync_nand_and_fetch, __sync_bool_compare_and_swap, __sync_val_compare_and_swap, __sync_lock_test_and_set, __sync_lock_release, __atomic_load_n, __atomic_load, __atomic_store_n, __atomic_store, __atomic_exchange_n, __atomic_exchange, __atomic_compare_exchange_n, __atomic_compare_exchange, __atomic_add_fetch, __atomic_sub_fetch, __atomic_and_fetch, __atomic_xor_fetch, __atomic_or_fetch, __atomic_nand_fetch, __atomic_fetch_add, __atomic_fetch_sub, __atomic_fetch_and, __atomic_fetch_xor, __atomic_fetch_or, __atomic_fetch_nand, __atomic_test_and_set, __atomic_clear, __atomic_thread_fence, __atomic_signal_fence, __atomic_always_lock_free, __atomic_is_lock_free, __builtin_add_overflow, __builtin_sadd_overflow, __builtin_saddl_overflow, __builtin_saddll_overflow, __builtin_uadd_overflow, __builtin_uaddl_overflow, __builtin_uaddll_overflow, __builtin_sub_overflow, __builtin_ssub_overflow, __builtin_ssubl_overflow, __builtin_ssubll_overflow, __builtin_usub_overflow, __builtin_usubl_overflow, __builtin_usubll_overflow, __builtin_mul_overflow, __builtin_smul_overflow, __builtin_smull_overflow, __builtin_smulll_overflow, __builtin_umul_overflow, __builtin_umull_overflow, __builtin_umulll_overflow, __builtin_add_overflow_p, __builtin_sub_overflow_p, __builtin_mul_overflow_p, __builtin_addc, __builtin_addcl, __builtin_addcll, __builtin_subc, __builtin_subcl, __builtin_subcll, __builtin_alloca, __builtin_alloca_with_align, __builtin_alloca_with_align_and_max, __builtin_speculation_safe_value, __builtin_nan, __builtin_nand32, __builtin_nand64, __builtin_nand128, __builtin_nanf, __builtin_nanl, __builtin_nanf@var{n}, __builtin_nanf@var{n}x, __builtin_nans, __builtin_nansd32, __builtin_nansd64, __builtin_nansd128, __builtin_nansf, __builtin_nansl, __builtin_nansf@var{n}, __builtin_nansf@var{n}x, __builtin_ffs, __builtin_clz, __builtin_ctz, __builtin_clrsb, __builtin_popcount, __builtin_parity, __builtin_bswap16, __builtin_bswap32, __builtin_bswap64, __builtin_bswap128, __builtin_extend_pointer, __builtin_goacc_parlevel_id, __builtin_goacc_parlevel_size, vec_clrl, vec_clrr, vec_mulh, vec_mul, vec_div, vec_dive, vec_mod, __builtin_rx_mvtc): Use @var{...} around parameter names. (vec_rl, vec_sl, vec_sr, vec_sra): Likewise. Use @var{...} also around A, B and R in description. --- gcc/doc/extend.texi | 294 ++++++++++++++++++++++---------------------- 1 file changed, 147 insertions(+), 147 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index c074b360f78f..e8b5e771f7a0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -13045,12 +13045,12 @@ variables to be protected. The list is ignored by GCC which treats it as empty. GCC interprets an empty list as meaning that all globally accessible variables should be protected. -@defbuiltin{@var{type} __sync_fetch_and_add (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_fetch_and_sub (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_fetch_and_or (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_fetch_and_and (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_fetch_and_xor (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_fetch_and_nand (@var{type} *ptr, @var{type} value, ...)} +@defbuiltin{@var{type} __sync_fetch_and_add (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_fetch_and_sub (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_fetch_and_or (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_fetch_and_and (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_fetch_and_xor (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_fetch_and_nand (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} These built-in functions perform the operation suggested by the name, and returns the value that had previously been in memory. That is, operations on integer operands have the following semantics. Operations on pointer @@ -13070,13 +13070,13 @@ type. It must not be a boolean type. as @code{*ptr = ~(tmp & value)} instead of @code{*ptr = ~tmp & value}. @enddefbuiltin -@defbuiltin{@var{type} __sync_add_and_fetch (@var{type} *ptr, @ - @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_sub_and_fetch (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_or_and_fetch (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_and_and_fetch (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_xor_and_fetch (@var{type} *ptr, @var{type} value, ...)} -@defbuiltinx{@var{type} __sync_nand_and_fetch (@var{type} *ptr, @var{type} value, ...)} +@defbuiltin{@var{type} __sync_add_and_fetch (@var{type} *@var{ptr}, @ + @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_sub_and_fetch (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_or_and_fetch (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_and_and_fetch (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_xor_and_fetch (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} +@defbuiltinx{@var{type} __sync_nand_and_fetch (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} These built-in functions perform the operation suggested by the name, and return the new value. That is, operations on integer operands have the following semantics. Operations on pointer operands are performed as @@ -13095,8 +13095,8 @@ as @code{*ptr = ~(*ptr & value)} instead of @code{*ptr = ~*ptr & value}. @enddefbuiltin -@defbuiltin{bool __sync_bool_compare_and_swap (@var{type} *ptr, @var{type} oldval, @var{type} newval, ...)} -@defbuiltinx{@var{type} __sync_val_compare_and_swap (@var{type} *ptr, @var{type} oldval, @var{type} newval, ...)} +@defbuiltin{bool __sync_bool_compare_and_swap (@var{type} *@var{ptr}, @var{type} @var{oldval}, @var{type} @var{newval}, ...)} +@defbuiltinx{@var{type} __sync_val_compare_and_swap (@var{type} *@var{ptr}, @var{type} @var{oldval}, @var{type} @var{newval}, ...)} These built-in functions perform an atomic compare and swap. That is, if the current value of @code{*@var{ptr}} is @var{oldval}, then write @var{newval} into @@ -13111,7 +13111,7 @@ of @code{*@var{ptr}} before the operation. This built-in function issues a full memory barrier. @enddefbuiltin -@defbuiltin{@var{type} __sync_lock_test_and_set (@var{type} *ptr, @var{type} value, ...)} +@defbuiltin{@var{type} __sync_lock_test_and_set (@var{type} *@var{ptr}, @var{type} @var{value}, ...)} This built-in function, as described by Intel, is not a traditional test-and-set operation, but rather an atomic exchange operation. It writes @var{value} into @code{*@var{ptr}}, and returns the previous contents of @@ -13131,7 +13131,7 @@ be globally visible yet, and previous memory loads may not yet be satisfied. @enddefbuiltin -@defbuiltin{void __sync_lock_release (@var{type} *ptr, ...)} +@defbuiltin{void __sync_lock_release (@var{type} *@var{ptr}, ...)} This built-in function releases the lock acquired by @code{__sync_lock_test_and_set}. Normally this means writing the constant 0 to @code{*@var{ptr}}. @@ -13248,7 +13248,7 @@ reserved for the memory order. The remainder of the signed int is reserved for target use and should be 0. Use of the predefined atomic values ensures proper usage. -@defbuiltin{@var{type} __atomic_load_n (@var{type} *ptr, int memorder)} +@defbuiltin{@var{type} __atomic_load_n (@var{type} *@var{ptr}, int @var{memorder})} This built-in function implements an atomic load operation. It returns the contents of @code{*@var{ptr}}. @@ -13258,13 +13258,13 @@ and @code{__ATOMIC_CONSUME}. @enddefbuiltin -@defbuiltin{void __atomic_load (@var{type} *ptr, @var{type} *ret, int memorder)} +@defbuiltin{void __atomic_load (@var{type} *@var{ptr}, @var{type} *@var{ret}, int @var{memorder})} This is the generic version of an atomic load. It returns the contents of @code{*@var{ptr}} in @code{*@var{ret}}. @enddefbuiltin -@defbuiltin{void __atomic_store_n (@var{type} *ptr, @var{type} val, int memorder)} +@defbuiltin{void __atomic_store_n (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} This built-in function implements an atomic store operation. It writes @code{@var{val}} into @code{*@var{ptr}}. @@ -13273,13 +13273,13 @@ The valid memory order variants are @enddefbuiltin -@defbuiltin{void __atomic_store (@var{type} *ptr, @var{type} *val, int memorder)} +@defbuiltin{void __atomic_store (@var{type} *@var{ptr}, @var{type} *@var{val}, int @var{memorder})} This is the generic version of an atomic store. It stores the value of @code{*@var{val}} into @code{*@var{ptr}}. @enddefbuiltin -@defbuiltin{@var{type} __atomic_exchange_n (@var{type} *ptr, @var{type} val, int memorder)} +@defbuiltin{@var{type} __atomic_exchange_n (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} This built-in function implements an atomic exchange operation. It writes @var{val} into @code{*@var{ptr}}, and returns the previous contents of @code{*@var{ptr}}. @@ -13288,14 +13288,14 @@ All memory order variants are valid. @enddefbuiltin -@defbuiltin{void __atomic_exchange (@var{type} *ptr, @var{type} *val, @var{type} *ret, int memorder)} +@defbuiltin{void __atomic_exchange (@var{type} *@var{ptr}, @var{type} *@var{val}, @var{type} *@var{ret}, int @var{memorder})} This is the generic version of an atomic exchange. It stores the contents of @code{*@var{val}} into @code{*@var{ptr}}. The original value of @code{*@var{ptr}} is copied into @code{*@var{ret}}. @enddefbuiltin -@defbuiltin{bool __atomic_compare_exchange_n (@var{type} *ptr, @var{type} *expected, @var{type} desired, bool weak, int success_memorder, int failure_memorder)} +@defbuiltin{bool __atomic_compare_exchange_n (@var{type} *@var{ptr}, @var{type} *@var{expected}, @var{type} @var{desired}, bool @var{weak}, int @var{success_memorder}, int @var{failure_memorder})} This built-in function implements an atomic compare and exchange operation. This compares the contents of @code{*@var{ptr}} with the contents of @code{*@var{expected}}. If equal, the operation is a @emph{read-modify-write} @@ -13319,7 +13319,7 @@ stronger order than that specified by @var{success_memorder}. @enddefbuiltin -@defbuiltin{bool __atomic_compare_exchange (@var{type} *ptr, @var{type} *expected, @var{type} *desired, bool weak, int success_memorder, int failure_memorder)} +@defbuiltin{bool __atomic_compare_exchange (@var{type} *@var{ptr}, @var{type} *@var{expected}, @var{type} *@var{desired}, bool @var{weak}, int @var{success_memorder}, int @var{failure_memorder})} This built-in function implements the generic version of @code{__atomic_compare_exchange}. The function is virtually identical to @code{__atomic_compare_exchange_n}, except the desired value is also a @@ -13327,12 +13327,12 @@ pointer. @enddefbuiltin -@defbuiltin{@var{type} __atomic_add_fetch (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_sub_fetch (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_and_fetch (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_xor_fetch (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_or_fetch (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_nand_fetch (@var{type} *ptr, @var{type} val, int memorder)} +@defbuiltin{@var{type} __atomic_add_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_sub_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_and_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_xor_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_or_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_nand_fetch (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} These built-in functions perform the operation suggested by the name, and return the result of the operation. Operations on pointer arguments are performed as if the operands were of the @code{uintptr_t} type. That is, @@ -13348,12 +13348,12 @@ type. It must not be a boolean type. All memory orders are valid. @enddefbuiltin -@defbuiltin{@var{type} __atomic_fetch_add (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_fetch_sub (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_fetch_and (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_fetch_xor (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_fetch_or (@var{type} *ptr, @var{type} val, int memorder)} -@defbuiltinx{@var{type} __atomic_fetch_nand (@var{type} *ptr, @var{type} val, int memorder)} +@defbuiltin{@var{type} __atomic_fetch_add (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_fetch_sub (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_fetch_and (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_fetch_xor (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_fetch_or (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} +@defbuiltinx{@var{type} __atomic_fetch_nand (@var{type} *@var{ptr}, @var{type} @var{val}, int @var{memorder})} These built-in functions perform the operation suggested by the name, and return the value that had previously been in @code{*@var{ptr}}. Operations on pointer arguments are performed as if the operands were of @@ -13370,7 +13370,7 @@ The same constraints on arguments apply as for the corresponding @enddefbuiltin -@defbuiltin{bool __atomic_test_and_set (void *ptr, int memorder)} +@defbuiltin{bool __atomic_test_and_set (void *@var{ptr}, int @var{memorder})} This built-in function performs an atomic test-and-set operation on the byte at @code{*@var{ptr}}. The byte is set to some implementation @@ -13383,7 +13383,7 @@ All memory orders are valid. @enddefbuiltin -@defbuiltin{void __atomic_clear (bool *ptr, int memorder)} +@defbuiltin{void __atomic_clear (bool *@var{ptr}, int @var{memorder})} This built-in function performs an atomic clear operation on @code{*@var{ptr}}. After the operation, @code{*@var{ptr}} contains 0. @@ -13398,7 +13398,7 @@ The valid memory order variants are @enddefbuiltin -@defbuiltin{void __atomic_thread_fence (int memorder)} +@defbuiltin{void __atomic_thread_fence (int @var{memorder})} This built-in function acts as a synchronization fence between threads based on the specified memory order. @@ -13407,7 +13407,7 @@ All memory orders are valid. @enddefbuiltin -@defbuiltin{void __atomic_signal_fence (int memorder)} +@defbuiltin{void __atomic_signal_fence (int @var{memorder})} This built-in function acts as a synchronization fence between a thread and signal handlers based in the same thread. @@ -13416,7 +13416,7 @@ All memory orders are valid. @enddefbuiltin -@defbuiltin{bool __atomic_always_lock_free (size_t size, void *ptr)} +@defbuiltin{bool __atomic_always_lock_free (size_t @var{size}, void *@var{ptr})} This built-in function returns @code{true} if objects of @var{size} bytes always generate lock-free atomic instructions for the target architecture. @@ -13433,7 +13433,7 @@ if (__atomic_always_lock_free (sizeof (long long), 0)) @enddefbuiltin -@defbuiltin{bool __atomic_is_lock_free (size_t size, void *ptr)} +@defbuiltin{bool __atomic_is_lock_free (size_t @var{size}, void *@var{ptr})} This built-in function returns @code{true} if objects of @var{size} bytes always generate lock-free atomic instructions for the target architecture. If @@ -13451,13 +13451,13 @@ compiler may also ignore this parameter. The following built-in functions allow performing simple arithmetic operations together with checking whether the operations overflowed. -@defbuiltin{bool __builtin_add_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)} -@defbuiltinx{bool __builtin_sadd_overflow (int a, int b, int *res)} -@defbuiltinx{bool __builtin_saddl_overflow (long int a, long int b, long int *res)} -@defbuiltinx{bool __builtin_saddll_overflow (long long int a, long long int b, long long int *res)} -@defbuiltinx{bool __builtin_uadd_overflow (unsigned int a, unsigned int b, unsigned int *res)} -@defbuiltinx{bool __builtin_uaddl_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)} -@defbuiltinx{bool __builtin_uaddll_overflow (unsigned long long int a, unsigned long long int b, unsigned long long int *res)} +@defbuiltin{bool __builtin_add_overflow (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} *@var{res})} +@defbuiltinx{bool __builtin_sadd_overflow (int @var{a}, int @var{b}, int *@var{res})} +@defbuiltinx{bool __builtin_saddl_overflow (long int @var{a}, long int @var{b}, long int *@var{res})} +@defbuiltinx{bool __builtin_saddll_overflow (long long int @var{a}, long long int @var{b}, long long int *@var{res})} +@defbuiltinx{bool __builtin_uadd_overflow (unsigned int @var{a}, unsigned int @var{b}, unsigned int *@var{res})} +@defbuiltinx{bool __builtin_uaddl_overflow (unsigned long int @var{a}, unsigned long int @var{b}, unsigned long int *@var{res})} +@defbuiltinx{bool __builtin_uaddll_overflow (unsigned long long int @var{a}, unsigned long long int @var{b}, unsigned long long int *@var{res})} These built-in functions promote the first two operands into infinite precision signed type and perform addition on those promoted operands. The result is then @@ -13477,13 +13477,13 @@ after addition, conditional jump on carry etc. @enddefbuiltin -@defbuiltin{bool __builtin_sub_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)} -@defbuiltinx{bool __builtin_ssub_overflow (int a, int b, int *res)} -@defbuiltinx{bool __builtin_ssubl_overflow (long int a, long int b, long int *res)} -@defbuiltinx{bool __builtin_ssubll_overflow (long long int a, long long int b, long long int *res)} -@defbuiltinx{bool __builtin_usub_overflow (unsigned int a, unsigned int b, unsigned int *res)} -@defbuiltinx{bool __builtin_usubl_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)} -@defbuiltinx{bool __builtin_usubll_overflow (unsigned long long int a, unsigned long long int b, unsigned long long int *res)} +@defbuiltin{bool __builtin_sub_overflow (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} *@var{res})} +@defbuiltinx{bool __builtin_ssub_overflow (int @var{a}, int @var{b}, int *@var{res})} +@defbuiltinx{bool __builtin_ssubl_overflow (long int @var{a}, long int @var{b}, long int *@var{res})} +@defbuiltinx{bool __builtin_ssubll_overflow (long long int @var{a}, long long int @var{b}, long long int *@var{res})} +@defbuiltinx{bool __builtin_usub_overflow (unsigned int @var{a}, unsigned int @var{b}, unsigned int *@var{res})} +@defbuiltinx{bool __builtin_usubl_overflow (unsigned long int @var{a}, unsigned long int @var{b}, unsigned long int *@var{res})} +@defbuiltinx{bool __builtin_usubll_overflow (unsigned long long int @var{a}, unsigned long long int @var{b}, unsigned long long int *@var{res})} These built-in functions are similar to the add overflow checking built-in functions above, except they perform subtraction, subtract the second argument @@ -13491,13 +13491,13 @@ from the first one, instead of addition. @enddefbuiltin -@defbuiltin{bool __builtin_mul_overflow (@var{type1} a, @var{type2} b, @var{type3} *res)} -@defbuiltinx{bool __builtin_smul_overflow (int a, int b, int *res)} -@defbuiltinx{bool __builtin_smull_overflow (long int a, long int b, long int *res)} -@defbuiltinx{bool __builtin_smulll_overflow (long long int a, long long int b, long long int *res)} -@defbuiltinx{bool __builtin_umul_overflow (unsigned int a, unsigned int b, unsigned int *res)} -@defbuiltinx{bool __builtin_umull_overflow (unsigned long int a, unsigned long int b, unsigned long int *res)} -@defbuiltinx{bool __builtin_umulll_overflow (unsigned long long int a, unsigned long long int b, unsigned long long int *res)} +@defbuiltin{bool __builtin_mul_overflow (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} *@var{res})} +@defbuiltinx{bool __builtin_smul_overflow (int @var{a}, int @var{b}, int *@var{res})} +@defbuiltinx{bool __builtin_smull_overflow (long int @var{a}, long int @var{b}, long int *@var{res})} +@defbuiltinx{bool __builtin_smulll_overflow (long long int @var{a}, long long int @var{b}, long long int *@var{res})} +@defbuiltinx{bool __builtin_umul_overflow (unsigned int @var{a}, unsigned int @var{b}, unsigned int *@var{res})} +@defbuiltinx{bool __builtin_umull_overflow (unsigned long int @var{a}, unsigned long int @var{b}, unsigned long int *@var{res})} +@defbuiltinx{bool __builtin_umulll_overflow (unsigned long long int @var{a}, unsigned long long int @var{b}, unsigned long long int *@var{res})} These built-in functions are similar to the add overflow checking built-in functions above, except they perform multiplication, instead of addition. @@ -13507,9 +13507,9 @@ functions above, except they perform multiplication, instead of addition. The following built-in functions allow checking if simple arithmetic operation would overflow. -@defbuiltin{bool __builtin_add_overflow_p (@var{type1} a, @var{type2} b, @var{type3} c)} -@defbuiltinx{bool __builtin_sub_overflow_p (@var{type1} a, @var{type2} b, @var{type3} c)} -@defbuiltinx{bool __builtin_mul_overflow_p (@var{type1} a, @var{type2} b, @var{type3} c)} +@defbuiltin{bool __builtin_add_overflow_p (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} @var{c})} +@defbuiltinx{bool __builtin_sub_overflow_p (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} @var{c})} +@defbuiltinx{bool __builtin_mul_overflow_p (@var{type1} @var{a}, @var{type2} @var{b}, @var{type3} @var{c})} These built-in functions are similar to @code{__builtin_add_overflow}, @code{__builtin_sub_overflow}, or @code{__builtin_mul_overflow}, except that @@ -13549,9 +13549,9 @@ after addition, conditional jump on carry etc. @enddefbuiltin -@defbuiltin{{unsigned int} __builtin_addc (unsigned int a, unsigned int b, unsigned int carry_in, unsigned int *carry_out)} -@defbuiltinx{{unsigned long int} __builtin_addcl (unsigned long int a, unsigned long int b, unsigned int carry_in, unsigned long int *carry_out)} -@defbuiltinx{{unsigned long long int} __builtin_addcll (unsigned long long int a, unsigned long long int b, unsigned long long int carry_in, unsigned long long int *carry_out)} +@defbuiltin{{unsigned int} __builtin_addc (unsigned int @var{a}, unsigned int @var{b}, unsigned int @var{carry_in}, unsigned int *@var{carry_out})} +@defbuiltinx{{unsigned long int} __builtin_addcl (unsigned long int @var{a}, unsigned long int @var{b}, unsigned int @var{carry_in}, unsigned long int *@var{carry_out})} +@defbuiltinx{{unsigned long long int} __builtin_addcll (unsigned long long int @var{a}, unsigned long long int @var{b}, unsigned long long int @var{carry_in}, unsigned long long int *@var{carry_out})} These built-in functions are equivalent to: @smallexample @@ -13571,9 +13571,9 @@ emitted if one of them (preferrably the third one) has only values @enddefbuiltin -@defbuiltin{{unsigned int} __builtin_subc (unsigned int a, unsigned int b, unsigned int carry_in, unsigned int *carry_out)} -@defbuiltinx{{unsigned long int} __builtin_subcl (unsigned long int a, unsigned long int b, unsigned int carry_in, unsigned long int *carry_out)} -@defbuiltinx{{unsigned long long int} __builtin_subcll (unsigned long long int a, unsigned long long int b, unsigned long long int carry_in, unsigned long long int *carry_out)} +@defbuiltin{{unsigned int} __builtin_subc (unsigned int @var{a}, unsigned int @var{b}, unsigned int @var{carry_in}, unsigned int *@var{carry_out})} +@defbuiltinx{{unsigned long int} __builtin_subcl (unsigned long int @var{a}, unsigned long int @var{b}, unsigned int @var{carry_in}, unsigned long int *@var{carry_out})} +@defbuiltinx{{unsigned long long int} __builtin_subcll (unsigned long long int @var{a}, unsigned long long int @var{b}, unsigned long long int @var{carry_in}, unsigned long long int *@var{carry_out})} These built-in functions are equivalent to: @smallexample @@ -14355,7 +14355,7 @@ for all target libcs, but in all cases they will gracefully fallback to libc calls. These built-in functions appear both with and without the @code{__builtin_} prefix. -@defbuiltin{{void *} __builtin_alloca (size_t size)} +@defbuiltin{{void *} __builtin_alloca (size_t @var{size})} The @code{__builtin_alloca} function must be called at block scope. The function allocates an object @var{size} bytes large on the stack of the calling function. The object is aligned on the default stack @@ -14395,7 +14395,7 @@ where GCC provides them as an extension. @enddefbuiltin -@defbuiltin{{void *} __builtin_alloca_with_align (size_t size, size_t alignment)} +@defbuiltin{{void *} __builtin_alloca_with_align (size_t @var{size}, size_t @var{alignment})} The @code{__builtin_alloca_with_align} function must be called at block scope. The function allocates an object @var{size} bytes large on the stack of the calling function. The allocated object is aligned on @@ -14442,7 +14442,7 @@ an extension. @xref{Variable Length}, for details. @enddefbuiltin -@defbuiltin{{void *} __builtin_alloca_with_align_and_max (size_t size, size_t alignment, size_t max_size)} +@defbuiltin{{void *} __builtin_alloca_with_align_and_max (size_t @var{size}, size_t @var{alignment}, size_t @var{max_size})} Similar to @code{__builtin_alloca_with_align} but takes an extra argument specifying an upper bound for @var{size} in case its value cannot be computed at compile time, for use by @option{-fstack-usage}, @option{-Wstack-usage} @@ -14495,7 +14495,7 @@ recognized in such contexts. @enddefbuiltin -@defbuiltin{@var{type} __builtin_speculation_safe_value (@var{type} val, @var{type} failval)} +@defbuiltin{@var{type} __builtin_speculation_safe_value (@var{type} @var{val}, @var{type} @var{failval})} This built-in function can be used to help mitigate against unsafe speculative execution. @var{type} may be any integral type or any @@ -15227,7 +15227,7 @@ argument. GCC treats this parameter as type-generic, which means it does not do default promotion from float to double. @enddefbuiltin -@defbuiltin{double __builtin_nan (const char *str)} +@defbuiltin{double __builtin_nan (const char *@var{str})} This is an implementation of the ISO C99 function @code{nan}. Since ISO C99 defines this function in terms of @code{strtod}, which we @@ -15244,68 +15244,68 @@ consumed by @code{strtol}, is evaluated early enough that it is considered a compile-time constant. @enddefbuiltin -@defbuiltin{_Decimal32 __builtin_nand32 (const char *str)} +@defbuiltin{_Decimal32 __builtin_nand32 (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{_Decimal32}. @enddefbuiltin -@defbuiltin{_Decimal64 __builtin_nand64 (const char *str)} +@defbuiltin{_Decimal64 __builtin_nand64 (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{_Decimal64}. @enddefbuiltin -@defbuiltin{_Decimal128 __builtin_nand128 (const char *str)} +@defbuiltin{_Decimal128 __builtin_nand128 (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{_Decimal128}. @enddefbuiltin -@defbuiltin{float __builtin_nanf (const char *str)} +@defbuiltin{float __builtin_nanf (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{float}. @enddefbuiltin -@defbuiltin{{long double} __builtin_nanl (const char *str)} +@defbuiltin{{long double} __builtin_nanl (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{long double}. @enddefbuiltin -@defbuiltin{_Float@var{n} __builtin_nanf@var{n} (const char *str)} +@defbuiltin{_Float@var{n} __builtin_nanf@var{n} (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{_Float@var{n}}. @enddefbuiltin -@defbuiltin{_Float@var{n}x __builtin_nanf@var{n}x (const char *str)} +@defbuiltin{_Float@var{n}x __builtin_nanf@var{n}x (const char *@var{str})} Similar to @code{__builtin_nan}, except the return type is @code{_Float@var{n}x}. @enddefbuiltin -@defbuiltin{double __builtin_nans (const char *str)} +@defbuiltin{double __builtin_nans (const char *@var{str})} Similar to @code{__builtin_nan}, except the significand is forced to be a signaling NaN@. The @code{nans} function is proposed by @uref{https://www.open-std.org/jtc1/sc22/wg14/www/docs/n965.htm,,WG14 N965}. @enddefbuiltin -@defbuiltin{_Decimal32 __builtin_nansd32 (const char *str)} +@defbuiltin{_Decimal32 __builtin_nansd32 (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{_Decimal32}. @enddefbuiltin -@defbuiltin{_Decimal64 __builtin_nansd64 (const char *str)} +@defbuiltin{_Decimal64 __builtin_nansd64 (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{_Decimal64}. @enddefbuiltin -@defbuiltin{_Decimal128 __builtin_nansd128 (const char *str)} +@defbuiltin{_Decimal128 __builtin_nansd128 (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{_Decimal128}. @enddefbuiltin -@defbuiltin{float __builtin_nansf (const char *str)} +@defbuiltin{float __builtin_nansf (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{float}. @enddefbuiltin -@defbuiltin{{long double} __builtin_nansl (const char *str)} +@defbuiltin{{long double} __builtin_nansl (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{long double}. @enddefbuiltin -@defbuiltin{_Float@var{n} __builtin_nansf@var{n} (const char *str)} +@defbuiltin{_Float@var{n} __builtin_nansf@var{n} (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{_Float@var{n}}. @enddefbuiltin -@defbuiltin{_Float@var{n}x __builtin_nansf@var{n}x (const char *str)} +@defbuiltin{_Float@var{n}x __builtin_nansf@var{n}x (const char *@var{str})} Similar to @code{__builtin_nans}, except the return type is @code{_Float@var{n}x}. @enddefbuiltin @@ -15324,32 +15324,32 @@ With @code{-ffinite-math-only} option this built-in function will always return 0. @enddefbuiltin -@defbuiltin{int __builtin_ffs (int x)} +@defbuiltin{int __builtin_ffs (int @var{x})} Returns one plus the index of the least significant 1-bit of @var{x}, or if @var{x} is zero, returns zero. @enddefbuiltin -@defbuiltin{int __builtin_clz (unsigned int x)} +@defbuiltin{int __builtin_clz (unsigned int @var{x})} Returns the number of leading 0-bits in @var{x}, starting at the most significant bit position. If @var{x} is 0, the result is undefined. @enddefbuiltin -@defbuiltin{int __builtin_ctz (unsigned int x)} +@defbuiltin{int __builtin_ctz (unsigned int @var{x})} Returns the number of trailing 0-bits in @var{x}, starting at the least significant bit position. If @var{x} is 0, the result is undefined. @enddefbuiltin -@defbuiltin{int __builtin_clrsb (int x)} +@defbuiltin{int __builtin_clrsb (int @var{x})} Returns the number of leading redundant sign bits in @var{x}, i.e.@: the number of bits following the most significant bit that are identical to it. There are no special cases for 0 or other values. @enddefbuiltin -@defbuiltin{int __builtin_popcount (unsigned int x)} +@defbuiltin{int __builtin_popcount (unsigned int @var{x})} Returns the number of 1-bits in @var{x}. @enddefbuiltin -@defbuiltin{int __builtin_parity (unsigned int x)} +@defbuiltin{int __builtin_parity (unsigned int @var{x})} Returns the parity of @var{x}, i.e.@: the number of 1-bits in @var{x} modulo 2. @enddefbuiltin @@ -15582,29 +15582,29 @@ Returns the first argument raised to the power of the second. Unlike the @code{pow} function no guarantees about precision and rounding are made. @enddefbuiltin -@defbuiltin{uint16_t __builtin_bswap16 (uint16_t x)} +@defbuiltin{uint16_t __builtin_bswap16 (uint16_t @var{x})} Returns @var{x} with the order of the bytes reversed; for example, @code{0xaabb} becomes @code{0xbbaa}. Byte here always means exactly 8 bits. @enddefbuiltin -@defbuiltin{uint32_t __builtin_bswap32 (uint32_t x)} +@defbuiltin{uint32_t __builtin_bswap32 (uint32_t @var{x})} Similar to @code{__builtin_bswap16}, except the argument and return types are 32-bit. @enddefbuiltin -@defbuiltin{uint64_t __builtin_bswap64 (uint64_t x)} +@defbuiltin{uint64_t __builtin_bswap64 (uint64_t @var{x})} Similar to @code{__builtin_bswap32}, except the argument and return types are 64-bit. @enddefbuiltin -@defbuiltin{uint128_t __builtin_bswap128 (uint128_t x)} +@defbuiltin{uint128_t __builtin_bswap128 (uint128_t @var{x})} Similar to @code{__builtin_bswap64}, except the argument and return types are 128-bit. Only supported on targets when 128-bit types are supported. @enddefbuiltin -@defbuiltin{Pmode __builtin_extend_pointer (void * x)} +@defbuiltin{Pmode __builtin_extend_pointer (void * @var{x})} On targets where the user visible pointer size is smaller than the size of an actual hardware address this function returns the extended user pointer. Targets where this is true included ILP32 mode on x86_64 or @@ -15612,12 +15612,12 @@ Aarch64. This function is mainly useful when writing inline assembly code. @enddefbuiltin -@defbuiltin{int __builtin_goacc_parlevel_id (int x)} +@defbuiltin{int __builtin_goacc_parlevel_id (int @var{x})} Returns the openacc gang, worker or vector id depending on whether @var{x} is 0, 1 or 2. @enddefbuiltin -@defbuiltin{int __builtin_goacc_parlevel_size (int x)} +@defbuiltin{int __builtin_goacc_parlevel_size (int @var{x})} Returns the openacc gang, worker or vector size depending on whether @var{x} is 0, 1 or 2. @enddefbuiltin @@ -22920,9 +22920,9 @@ implemented by the @code{vctzdm} instruction. @smallexample @exdent vector signed char -@exdent vec_clrl (vector signed char a, unsigned int n); +@exdent vec_clrl (vector signed char @var{a}, unsigned int @var{n}); @exdent vector unsigned char -@exdent vec_clrl (vector unsigned char a, unsigned int n); +@exdent vec_clrl (vector unsigned char @var{a}, unsigned int @var{n}); @end smallexample Clear the left-most @code{(16 - n)} bytes of vector argument @code{a}, as if implemented by the @code{vclrlb} instruction on a big-endian target @@ -22932,9 +22932,9 @@ value of @code{n} that is greater than 16 is treated as if it equaled 16. @smallexample @exdent vector signed char -@exdent vec_clrr (vector signed char a, unsigned int n); +@exdent vec_clrr (vector signed char @var{a}, unsigned int @var{n}); @exdent vector unsigned char -@exdent vec_clrr (vector unsigned char a, unsigned int n); +@exdent vec_clrr (vector unsigned char @var{a}, unsigned int @var{n}); @end smallexample Clear the right-most @code{(16 - n)} bytes of vector argument @code{a}, as if implemented by the @code{vclrrb} instruction on a big-endian target @@ -23388,9 +23388,9 @@ Vector Integer Multiply/Divide/Modulo @smallexample @exdent vector signed int -@exdent vec_mulh (vector signed int a, vector signed int b); +@exdent vec_mulh (vector signed int @var{a}, vector signed int @var{b}); @exdent vector unsigned int -@exdent vec_mulh (vector unsigned int a, vector unsigned int b); +@exdent vec_mulh (vector unsigned int @var{a}, vector unsigned int @var{b}); @end smallexample For each integer value @code{i} from 0 to 3, do the following. The integer @@ -23400,9 +23400,9 @@ into word element @code{i} of the vector returned. @smallexample @exdent vector signed long long -@exdent vec_mulh (vector signed long long a, vector signed long long b); +@exdent vec_mulh (vector signed long long @var{a}, vector signed long long @var{b}); @exdent vector unsigned long long -@exdent vec_mulh (vector unsigned long long a, vector unsigned long long b); +@exdent vec_mulh (vector unsigned long long @var{a}, vector unsigned long long @var{b}); @end smallexample For each integer value @code{i} from 0 to 1, do the following. The integer @@ -23412,9 +23412,9 @@ are placed into doubleword element @code{i} of the vector returned. @smallexample @exdent vector unsigned long long -@exdent vec_mul (vector unsigned long long a, vector unsigned long long b); +@exdent vec_mul (vector unsigned long long @var{a}, vector unsigned long long @var{b}); @exdent vector signed long long -@exdent vec_mul (vector signed long long a, vector signed long long b); +@exdent vec_mul (vector signed long long @var{a}, vector signed long long @var{b}); @end smallexample For each integer value @code{i} from 0 to 1, do the following. The integer @@ -23424,9 +23424,9 @@ are placed into doubleword element @code{i} of the vector returned. @smallexample @exdent vector signed int -@exdent vec_div (vector signed int a, vector signed int b); +@exdent vec_div (vector signed int @var{a}, vector signed int @var{b}); @exdent vector unsigned int -@exdent vec_div (vector unsigned int a, vector unsigned int b); +@exdent vec_div (vector unsigned int @var{a}, vector unsigned int @var{b}); @end smallexample For each integer value @code{i} from 0 to 3, do the following. The integer in @@ -23437,9 +23437,9 @@ the vector returned. If an attempt is made to perform any of the divisions @smallexample @exdent vector signed long long -@exdent vec_div (vector signed long long a, vector signed long long b); +@exdent vec_div (vector signed long long @var{a}, vector signed long long @var{b}); @exdent vector unsigned long long -@exdent vec_div (vector unsigned long long a, vector unsigned long long b); +@exdent vec_div (vector unsigned long long @var{a}, vector unsigned long long @var{b}); @end smallexample For each integer value @code{i} from 0 to 1, do the following. The integer in @@ -23451,9 +23451,9 @@ the quotient is undefined. @smallexample @exdent vector signed int -@exdent vec_dive (vector signed int a, vector signed int b); +@exdent vec_dive (vector signed int @var{a}, vector signed int @var{b}); @exdent vector unsigned int -@exdent vec_dive (vector unsigned int a, vector unsigned int b); +@exdent vec_dive (vector unsigned int @var{a}, vector unsigned int @var{b}); @end smallexample For each integer value @code{i} from 0 to 3, do the following. The integer in @@ -23465,9 +23465,9 @@ divisions ÷ 0 then the quotient is undefined. @smallexample @exdent vector signed long long -@exdent vec_dive (vector signed long long a, vector signed long long b); +@exdent vec_dive (vector signed long long @var{a}, vector signed long long @var{b}); @exdent vector unsigned long long -@exdent vec_dive (vector unsigned long long a, vector unsigned long long b); +@exdent vec_dive (vector unsigned long long @var{a}, vector unsigned long long @var{b}); @end smallexample For each integer value @code{i} from 0 to 1, do the following. The integer in @@ -23479,9 +23479,9 @@ quotient cannot be represented in 64 bits, or if an attempt is made to perform @smallexample @exdent vector signed int -@exdent vec_mod (vector signed int a, vector signed int b); +@exdent vec_mod (vector signed int @var{a}, vector signed int @var{b}); @exdent vector unsigned int -@exdent vec_mod (vector unsigned int a, vector unsigned int b); +@exdent vec_mod (vector unsigned int @var{a}, vector unsigned int @var{b}); @end smallexample For each integer value @code{i} from 0 to 3, do the following. The integer in @@ -23492,9 +23492,9 @@ the vector returned. If an attempt is made to perform any of the divisions @smallexample @exdent vector signed long long -@exdent vec_mod (vector signed long long a, vector signed long long b); +@exdent vec_mod (vector signed long long @var{a}, vector signed long long @var{b}); @exdent vector unsigned long long -@exdent vec_mod (vector unsigned long long a, vector unsigned long long b); +@exdent vec_mod (vector unsigned long long @var{a}, vector unsigned long long @var{b}); @end smallexample For each integer value @code{i} from 0 to 1, do the following. The integer in @@ -23509,14 +23509,14 @@ immediate value is either 0, 1, 2 or 3. @findex vec_genpcvm @smallexample -@exdent vector unsigned __int128 vec_rl (vector unsigned __int128 A, - vector unsigned __int128 B); -@exdent vector signed __int128 vec_rl (vector signed __int128 A, - vector unsigned __int128 B); +@exdent vector unsigned __int128 vec_rl (vector unsigned __int128 @var{A}, + vector unsigned __int128 @var{B}); +@exdent vector signed __int128 vec_rl (vector signed __int128 @var{A}, + vector unsigned __int128 @var{B}); @end smallexample -Result value: Each element of R is obtained by rotating the corresponding element -of A left by the number of bits specified by the corresponding element of B. +Result value: Each element of @var{R} is obtained by rotating the corresponding element +of @var{A} left by the number of bits specified by the corresponding element of @var{B}. @smallexample @@ -23550,28 +23550,28 @@ input. The shift is obtained from the third input in the 7-bit field bits [125:131] where all bits counted from zero at the left. @smallexample -@exdent vector unsigned __int128 vec_sl(vector unsigned __int128 A, vector unsigned __int128 B); -@exdent vector signed __int128 vec_sl(vector signed __int128 A, vector unsigned __int128 B); +@exdent vector unsigned __int128 vec_sl(vector unsigned __int128 @var{A}, vector unsigned __int128 @var{B}); +@exdent vector signed __int128 vec_sl(vector signed __int128 @var{A}, vector unsigned __int128 @var{B}); @end smallexample -Result value: Each element of R is obtained by shifting the corresponding element of -A left by the number of bits specified by the corresponding element of B. +Result value: Each element of @var{R} is obtained by shifting the corresponding element of +@var{A} left by the number of bits specified by the corresponding element of @var{B}. @smallexample -@exdent vector unsigned __int128 vec_sr(vector unsigned __int128 A, vector unsigned __int128 B); -@exdent vector signed __int128 vec_sr(vector signed __int128 A, vector unsigned __int128 B); +@exdent vector unsigned __int128 vec_sr(vector unsigned __int128 @var{A}, vector unsigned __int128 @var{B}); +@exdent vector signed __int128 vec_sr(vector signed __int128 @var{A}, vector unsigned __int128 @var{B}); @end smallexample -Result value: Each element of R is obtained by shifting the corresponding element of -A right by the number of bits specified by the corresponding element of B. +Result value: Each element of @var{R} is obtained by shifting the corresponding element of +@var{A} right by the number of bits specified by the corresponding element of @var{B}. @smallexample -@exdent vector unsigned __int128 vec_sra(vector unsigned __int128 A, vector unsigned __int128 B); -@exdent vector signed __int128 vec_sra(vector signed __int128 A, vector unsigned __int128 B); +@exdent vector unsigned __int128 vec_sra(vector unsigned __int128 @var{A}, vector unsigned __int128 @var{B}); +@exdent vector signed __int128 vec_sra(vector signed __int128 @var{A}, vector unsigned __int128 @var{B}); @end smallexample -Result value: Each element of R is obtained by arithmetic shifting the corresponding -element of A right by the number of bits specified by the corresponding element of B. +Result value: Each element of @var{R} is obtained by arithmetic shifting the corresponding +element of @var{A} right by the number of bits specified by the corresponding element of @var{B}. @smallexample @exdent vector unsigned __int128 vec_mule (vector unsigned long long, @@ -24350,7 +24350,7 @@ Generates the @code{mvtaclo} machine instruction to set the bottom 32 bits of the accumulator. @enddefbuiltin -@defbuiltin{void __builtin_rx_mvtc (int reg, int val)} +@defbuiltin{void __builtin_rx_mvtc (int @var{reg}, int @var{val})} Generates the @code{mvtc} machine instruction which sets control register number @code{reg} to @code{val}. @enddefbuiltin From 330bb06485b18bae3effe83b45b97dbdcd543367 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 14:43:34 +0800 Subject: [PATCH 172/311] RISC-V: Fix ICE in extract_single_source This patch fixes the following ICE in VSETVL PASS: bug.c:39:1: internal compiler error: Segmentation fault 39 | } | ^ 0x1ad5a08 crash_signal ../../../../gcc/gcc/toplev.cc:316 0x7f7f55feb90f ??? ./signal/../sysdeps/unix/sysv/linux/x86_64/libc_sigaction.c:0 0x218d7c7 extract_single_source ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:583 0x218d95d extract_single_source ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:604 0x218fbc5 pre_vsetvl::compute_lcm_local_properties() ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:2703 0x2190ef4 pre_vsetvl::earliest_fuse_vsetvl_info() ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:2890 0x2193e62 pass_vsetvl::lazy_vsetvl() ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:3537 0x219406a pass_vsetvl::execute(function*) ../../../../gcc/gcc/config/riscv/riscv-vsetvl.cc:3584 The rootcause we have a case that the def info can not be traced: (insn 208 327 333 27 (use (reg/i:DI 10 a0)) "bug.c":36:1 -1 (nil)) It's obvious, we conservatively disable any optimization in this situation if AVL def_info can not be tracded. Committed as it is obvious. gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (extract_single_source): Fix ICE. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c: New test. --- gcc/config/riscv/riscv-vsetvl.cc | 2 + .../riscv/rvv/vsetvl/avl_use_bug-1.c | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 90477f331d7c..ed5a2b58ab04 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -579,6 +579,8 @@ extract_single_source (set_info *set) if (!set->insn ()->is_phi ()) return nullptr; hash_set sets = get_all_sets (set, true, false, true); + if (sets.is_empty ()) + return nullptr; insn_info *first_insn = (*sets.begin ())->insn (); if (first_insn->is_artificial ()) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c new file mode 100644 index 000000000000..330221c2d7b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64 --param=riscv-autovec-lmul=m8 --param=riscv-autovec-preference=fixed-vlmax -O2" } */ + +struct a_struct +{ + unsigned char a_character; +}; + +struct a_struct an_array[5]; +struct a_struct *a_ptr; +int yabba = 1; + +int +f (a, b) + unsigned char a; + unsigned long b; +{ + long i, j, p, q, r, s; + + if (b != (unsigned long) 0) + { + if (yabba) + return -1; + s = 4000000 / b; + for (i = 0; i < 11; i++) + { + for (j = 0; j < 256; j++) + { + if (((p - s < 0) ? -s : 0) < (( q - s < 0) ? -s : q)) + r = i; + } + } + } + + if (yabba) + return 0; + a_ptr = &an_array[a]; + a_ptr->a_character = (unsigned char) r; +} From 7e854b58084c131fceca9e8fa9dcc7469972e69d Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Sat, 9 Dec 2023 12:06:29 +0800 Subject: [PATCH 173/311] RISC-V: Support highest overlap for wv instructions According to RVV ISA, we can allow vwadd.wv v2, v2, v3 overlap. Before this patch: nop vsetivli zero,4,e8,m4,tu,ma vle16.v v8,0(a0) vmv8r.v v0,v8 vwsub.wv v0,v8,v12 nop addi a4,a0,100 vle16.v v8,0(a4) vmv8r.v v24,v8 vwsub.wv v24,v8,v12 nop addi a4,a0,200 vle16.v v8,0(a4) vmv8r.v v16,v8 vwsub.wv v16,v8,v12 nop After this patch: nop vsetivli zero,4,e8,m4,tu,ma vle16.v v0,0(a0) vwsub.wv v0,v0,v4 nop addi a4,a0,100 vle16.v v24,0(a4) vwsub.wv v24,v24,v28 nop addi a4,a0,200 vle16.v v16,0(a4) vwsub.wv v16,v16,v20 PR target/112431 gcc/ChangeLog: * config/riscv/vector.md: Support highest overlap for wv instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr112431-39.c: New test. * gcc.target/riscv/rvv/base/pr112431-40.c: New test. * gcc.target/riscv/rvv/base/pr112431-41.c: New test. --- gcc/config/riscv/vector.md | 88 +++++----- .../gcc.target/riscv/rvv/base/pr112431-39.c | 158 ++++++++++++++++++ .../gcc.target/riscv/rvv/base/pr112431-40.c | 94 +++++++++++ .../gcc.target/riscv/rvv/base/pr112431-41.c | 62 +++++++ 4 files changed, 360 insertions(+), 42 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index bace1a2852c6..a1284fd3251f 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3776,46 +3776,48 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_sub" - [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") + [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTI - (match_operand:VWEXTI 3 "register_operand" " vr, vr") + (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr") (any_extend:VWEXTI - (match_operand: 4 "register_operand" " vr, vr"))) - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] "TARGET_VECTOR" "vwsub.wv\t%0,%3,%4%p1" [(set_attr "type" "viwalu") - (set_attr "mode" "")]) + (set_attr "mode" "") + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_add" - [(set (match_operand:VWEXTI 0 "register_operand" "=&vr,&vr") + [(set (match_operand:VWEXTI 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") (if_then_else:VWEXTI (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") + [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (any_extend:VWEXTI - (match_operand: 4 "register_operand" " vr, vr")) - (match_operand:VWEXTI 3 "register_operand" " vr, vr")) - (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")) + (match_operand:VWEXTI 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")) + (match_operand:VWEXTI 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] "TARGET_VECTOR" "vwadd.wv\t%0,%3,%4%p1" [(set_attr "type" "viwalu") - (set_attr "mode" "")]) + (set_attr "mode" "") + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen__scalar" [(set (match_operand:VWEXTI 0 "register_operand" "=vr, vr") @@ -7054,54 +7056,56 @@ (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_add" - [(set (match_operand:VWEXTF 0 "register_operand" "=&vr, &vr") + [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") (if_then_else:VWEXTF (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (match_operand 9 "const_int_operand" " i, i") + [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTF (float_extend:VWEXTF - (match_operand: 4 "register_operand" " vr, vr")) - (match_operand:VWEXTF 3 "register_operand" " vr, vr")) - (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr")) + (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr")) + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] "TARGET_VECTOR" "vfwadd.wv\t%0,%3,%4%p1" [(set_attr "type" "vfwalu") (set_attr "mode" "") (set (attr "frm_mode") - (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) + (symbol_ref "riscv_vector::get_frm_mode (operands[9])")) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen_sub" - [(set (match_operand:VWEXTF 0 "register_operand" "=&vr, &vr") + [(set (match_operand:VWEXTF 0 "register_operand" "=vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, vd, vr, ?&vr, ?&vr") (if_then_else:VWEXTF (unspec: - [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") - (match_operand 5 "vector_length_operand" " rK, rK") - (match_operand 6 "const_int_operand" " i, i") - (match_operand 7 "const_int_operand" " i, i") - (match_operand 8 "const_int_operand" " i, i") - (match_operand 9 "const_int_operand" " i, i") + [(match_operand: 1 "vector_mask_operand" " vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1, vm,Wc1,vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK, rK") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") + (match_operand 9 "const_int_operand" " i, i, i, i, i, i, i, i, i, i, i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) (minus:VWEXTF - (match_operand:VWEXTF 3 "register_operand" " vr, vr") + (match_operand:VWEXTF 3 "register_operand" " vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr, vr") (float_extend:VWEXTF - (match_operand: 4 "register_operand" " vr, vr"))) - (match_operand:VWEXTF 2 "vector_merge_operand" " vu, 0")))] + (match_operand: 4 "register_operand" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84, vr, vr"))) + (match_operand:VWEXTF 2 "vector_merge_operand" " vu, vu, 0, 0, vu, vu, 0, 0, vu, vu, 0, 0, vu, 0")))] "TARGET_VECTOR" "vfwsub.wv\t%0,%3,%4%p1" [(set_attr "type" "vfwalu") (set_attr "mode" "") (set (attr "frm_mode") - (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) + (symbol_ref "riscv_vector::get_frm_mode (operands[9])")) + (set_attr "group_overlap" "W21,W21,W21,W21,W42,W42,W42,W42,W84,W84,W84,W84,none,none")]) (define_insn "@pred_single_widen__scalar" [(set (match_operand:VWEXTF 0 "register_operand" "=vr, vr") diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c new file mode 100644 index 000000000000..47820dd29f0b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-39.c @@ -0,0 +1,158 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v0 = __riscv_vwsub_wv_i16m2_tu (v0, v0, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v1 = __riscv_vwsub_wv_i16m2_tu (v1, v1, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v2 = __riscv_vwsub_wv_i16m2_tu (v2, v2, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v3 = __riscv_vwsub_wv_i16m2_tu (v3, v3, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v4 = __riscv_vwsub_wv_i16m2_tu (v4, v4, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v5 = __riscv_vwsub_wv_i16m2_tu (v5, v5, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v6 = __riscv_vwsub_wv_i16m2_tu (v6, v6, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v7 = __riscv_vwsub_wv_i16m2_tu (v7, v7, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v8 = __riscv_vwsub_wv_i16m2_tu (v8, v8, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v9 = __riscv_vwsub_wv_i16m2_tu (v9, v9, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v10 = __riscv_vwsub_wv_i16m2_tu (v10, v10, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v10, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v11 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v11 = __riscv_vwsub_wv_i16m2_tu (v11, v11, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v11, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v12 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v12 = __riscv_vwsub_wv_i16m2_tu (v12, v12, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v12, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v13 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v13 = __riscv_vwsub_wv_i16m2_tu (v13, v13, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v13, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v14 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v14 = __riscv_vwsub_wv_i16m2_tu (v14, v14, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v14, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m1_t v15_n = __riscv_vle8_v_i8m1 (in, 4);in+=100; + vint16m2_t v15 = __riscv_vwcvt_x_x_v_i16m2 (v15_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m2 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v3, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v4, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v5, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v6, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v7, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v8, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v9, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v10, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v11, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v12, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v13, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v14, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v15, 4);out+=100; + } +} + +void +foo2 (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m2_t v0 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v0 = __riscv_vwadd_wv_i16m2_tu (v0, v0, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v1 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v1 = __riscv_vwadd_wv_i16m2_tu (v1, v1, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v2 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v2 = __riscv_vwadd_wv_i16m2_tu (v2, v2, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v3 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v3 = __riscv_vwadd_wv_i16m2_tu (v3, v3, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v3, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v4 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v4 = __riscv_vwadd_wv_i16m2_tu (v4, v4, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v4, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v5 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v5 = __riscv_vwadd_wv_i16m2_tu (v5, v5, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v5, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v6 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v6 = __riscv_vwadd_wv_i16m2_tu (v6, v6, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v6, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v7 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v7 = __riscv_vwadd_wv_i16m2_tu (v7, v7, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v7, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v8 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v8 = __riscv_vwadd_wv_i16m2_tu (v8, v8, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v8, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v9 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v9 = __riscv_vwadd_wv_i16m2_tu (v9, v9, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v9, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v10 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v10 = __riscv_vwadd_wv_i16m2_tu (v10, v10, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v10, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v11 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v11 = __riscv_vwadd_wv_i16m2_tu (v11, v11, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v11, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v12 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v12 = __riscv_vwadd_wv_i16m2_tu (v12, v12, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v12, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v13 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v13 = __riscv_vwadd_wv_i16m2_tu (v13, v13, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v13, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m2_t v14 = __riscv_vle16_v_i16m2 (in, 4);in+=100; + v14 = __riscv_vwadd_wv_i16m2_tu (v14, v14, __riscv_vreinterpret_v_i16m1_i8m1 (__riscv_vget_v_i16m2_i16m1 (v14, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m1_t v15_n = __riscv_vle8_v_i8m1 (in, 4);in+=100; + vint16m2_t v15 = __riscv_vwcvt_x_x_v_i16m2 (v15_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m2 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v3, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v4, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v5, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v6, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v7, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v8, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v9, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v10, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v11, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v12, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v13, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v14, 4);out+=100; + __riscv_vsse16_v_i16m2 (out, 4, v15, 4);out+=100; + } +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c new file mode 100644 index 000000000000..e44b80105793 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-40.c @@ -0,0 +1,94 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m4_t v0 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v0 = __riscv_vwsub_wv_i16m4_tu (v0, v0, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v1 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v1 = __riscv_vwsub_wv_i16m4_tu (v1, v1, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v2 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v2 = __riscv_vwsub_wv_i16m4_tu (v2, v2, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v3 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v3 = __riscv_vwsub_wv_i16m4_tu (v3, v3, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v3, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v4 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v4 = __riscv_vwsub_wv_i16m4_tu (v4, v4, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v4, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v5 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v5 = __riscv_vwsub_wv_i16m4_tu (v5, v5, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v5, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v6 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v6 = __riscv_vwsub_wv_i16m4_tu (v6, v6, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v6, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m2_t v7_n = __riscv_vle8_v_i8m2 (in, 4);in+=100; + vint16m4_t v7 = __riscv_vwcvt_x_x_v_i16m4 (v7_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m4 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v3, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v4, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v5, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v6, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v7, 4);out+=100; + } +} + +void +foo2 (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m4_t v0 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v0 = __riscv_vwadd_wv_i16m4_tu (v0, v0, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v1 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v1 = __riscv_vwadd_wv_i16m4_tu (v1, v1, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v2 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v2 = __riscv_vwadd_wv_i16m4_tu (v2, v2, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v3 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v3 = __riscv_vwadd_wv_i16m4_tu (v3, v3, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v3, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v4 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v4 = __riscv_vwadd_wv_i16m4_tu (v4, v4, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v4, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v5 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v5 = __riscv_vwadd_wv_i16m4_tu (v5, v5, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v5, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m4_t v6 = __riscv_vle16_v_i16m4 (in, 4);in+=100; + v6 = __riscv_vwadd_wv_i16m4_tu (v6, v6, __riscv_vreinterpret_v_i16m2_i8m2 (__riscv_vget_v_i16m4_i16m2 (v6, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m2_t v7_n = __riscv_vle8_v_i8m2 (in, 4);in+=100; + vint16m4_t v7 = __riscv_vwcvt_x_x_v_i16m4 (v7_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m4 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v3, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v4, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v5, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v6, 4);out+=100; + __riscv_vsse16_v_i16m4 (out, 4, v7, 4);out+=100; + } +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c new file mode 100644 index 000000000000..dc27006f6f9a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-41.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void +foo (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m8_t v0 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v0 = __riscv_vwsub_wv_i16m8_tu (v0, v0, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m8_t v1 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v1 = __riscv_vwsub_wv_i16m8_tu (v1, v1, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m8_t v2 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v2 = __riscv_vwsub_wv_i16m8_tu (v2, v2, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m4_t v3_n = __riscv_vle8_v_i8m4 (in, 4);in+=100; + vint16m8_t v3 = __riscv_vwcvt_x_x_v_i16m8 (v3_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m8 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v3, 4);out+=100; + } +} + +void +foo2 (void *in, void *out, int n) +{ + for (int i = 0; i < n; i++) + { + asm volatile("nop" ::: "memory"); + vint16m8_t v0 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v0 = __riscv_vwadd_wv_i16m8_tu (v0, v0, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v0, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m8_t v1 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v1 = __riscv_vwadd_wv_i16m8_tu (v1, v1, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v1, 1)), 4); + asm volatile("nop" ::: "memory"); + vint16m8_t v2 = __riscv_vle16_v_i16m8 (in, 4);in+=100; + v2 = __riscv_vwadd_wv_i16m8_tu (v2, v2, __riscv_vreinterpret_v_i16m4_i8m4 (__riscv_vget_v_i16m8_i16m4 (v2, 1)), 4); + asm volatile("nop" ::: "memory"); + vint8m4_t v3_n = __riscv_vle8_v_i8m4 (in, 4);in+=100; + vint16m8_t v3 = __riscv_vwcvt_x_x_v_i16m8 (v3_n, 4); + + asm volatile("nop" ::: "memory"); + __riscv_vsse16_v_i16m8 (out, 4, v0, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v1, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v2, 4);out+=100; + __riscv_vsse16_v_i16m8 (out, 4, v3, 4);out+=100; + } +} + +/* { dg-final { scan-assembler-not {vmv1r} } } */ +/* { dg-final { scan-assembler-not {vmv2r} } } */ +/* { dg-final { scan-assembler-not {vmv4r} } } */ +/* { dg-final { scan-assembler-not {vmv8r} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ From 53e954a673a0d6ac80ab1f0591ea4f751e67374c Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Mon, 11 Dec 2023 09:20:49 +0100 Subject: [PATCH 174/311] Testsuite, Darwin: actually skip test Previous commit xfailed instead of skipping, but we really want to skip. gcc/testsuite/ChangeLog: * gcc.target/i386/libcall-1.c: Skip on darwin. --- gcc/testsuite/gcc.target/i386/libcall-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/libcall-1.c b/gcc/testsuite/gcc.target/i386/libcall-1.c index a40ff5620fb1..cb95aca7a29a 100644 --- a/gcc/testsuite/gcc.target/i386/libcall-1.c +++ b/gcc/testsuite/gcc.target/i386/libcall-1.c @@ -4,6 +4,6 @@ /* { dg-do compile { target int128 } } */ /* { dg-options "-O2 -mcmodel=large" } */ /* { dg-final { scan-assembler "globl\t__divti3" } } */ -/* { dg-xfail-if "PR90698" { *-*-darwin* } } */ +/* { dg-skip-if "PR90698" { *-*-darwin* } } */ __int128 a, b; void foo () { a = a / b; } From 037f57025921e026d520c1eda26a54853e4cb245 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 9 Nov 2023 16:03:11 +0800 Subject: [PATCH 175/311] Simplify vector ((VCE (a cmp b ? -1 : 0)) < 0) ? c : d to just (VCE ((a cmp b) ? (VCE c) : (VCE d))). When I'm working on PR112443, I notice there's some misoptimizations: after we fold _mm{,256}_blendv_epi8/pd/ps into gimple, the backend fails to combine it back to v{,p}blendv{v,ps,pd} since the pattern is too complicated, so I think maybe we should hanlde it in the gimple level. The dump is like _1 = c_3(D) >= { 0, 0, 0, 0 }; _2 = VEC_COND_EXPR <_1, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>; _7 = VIEW_CONVERT_EXPR(_2); _8 = VIEW_CONVERT_EXPR(b_6(D)); _9 = VIEW_CONVERT_EXPR(a_5(D)); _10 = _7 < { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; _11 = VEC_COND_EXPR <_10, _8, _9>; It can be optimized to _1 = c_2(D) >= { 0, 0, 0, 0 }; _6 = VEC_COND_EXPR <_1, b_5(D), a_4(D)>; since _7 is either -1 or 0, the selection of _7 < 0 ? _8 : _9 should be euqal to _1 ? b : a as long as TYPE_PRECISION of the component type of the second VEC_COND_EXPR is less equal to the first one. The patch add a gimple pattern to handle that. gcc/ChangeLog: * match.pd (VCE (a cmp b ? -1 : 0) < 0) ? c : d ---> (VCE ((a cmp b) ? (VCE:c) : (VCE:d))): New gimple simplication. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512vl-blendv-3.c: New test. * gcc.target/i386/blendv-3.c: New test. --- gcc/match.pd | 22 +++++++++ .../gcc.target/i386/avx512vl-blendv-3.c | 6 +++ gcc/testsuite/gcc.target/i386/blendv-3.c | 46 +++++++++++++++++++ 3 files changed, 74 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c create mode 100644 gcc/testsuite/gcc.target/i386/blendv-3.c diff --git a/gcc/match.pd b/gcc/match.pd index 4d554ba47215..5deddd57d21e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5190,6 +5190,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (optimize_vectors_before_lowering_p () && types_match (@0, @3)) (vec_cond (bit_and @0 (bit_not @3)) @2 @1))) +/* ((VCE (a cmp b ? -1 : 0)) < 0) ? c : d is just + (VCE ((a cmp b) ? (VCE c) : (VCE d))) when TYPE_PRECISION of the + component type of the outer vec_cond is greater equal the inner one. */ +(for cmp (simple_comparison) + (simplify + (vec_cond + (lt (view_convert@5 (vec_cond@6 (cmp@4 @0 @1) + integer_all_onesp + integer_zerop)) + integer_zerop) @2 @3) + (if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (@0)) + && VECTOR_INTEGER_TYPE_P (TREE_TYPE (@5)) + && !TYPE_UNSIGNED (TREE_TYPE (@5)) + && VECTOR_TYPE_P (TREE_TYPE (@6)) + && VECTOR_TYPE_P (type) + && tree_int_cst_le (TYPE_SIZE (TREE_TYPE (type)), + TYPE_SIZE (TREE_TYPE (TREE_TYPE (@6)))) + && TYPE_SIZE (type) == TYPE_SIZE (TREE_TYPE (@6))) + (with { tree vtype = TREE_TYPE (@6);} + (view_convert:type + (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3))))))) + /* c1 ? c2 ? a : b : b --> (c1 & c2) ? a : b */ (simplify (vec_cond @0 (vec_cond:s @1 @2 @3) @3) diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c new file mode 100644 index 000000000000..2777e72ab5f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512bw -O2" } */ +/* { dg-final { scan-assembler-times {vp?blendv(?:b|p[sd])[ \t]*} 6 } } */ +/* { dg-final { scan-assembler-not {vpcmp} } } */ + +#include "blendv-3.c" diff --git a/gcc/testsuite/gcc.target/i386/blendv-3.c b/gcc/testsuite/gcc.target/i386/blendv-3.c new file mode 100644 index 000000000000..fa0fb067a73d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/blendv-3.c @@ -0,0 +1,46 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times {vp?blendv(?:b|p[sd])[ \t]*} 6 } } */ +/* { dg-final { scan-assembler-not {vpcmp} } } */ + +#include + +__m256i +foo (__m256i a, __m256i b, __m256i c) +{ + return _mm256_blendv_epi8 (a, b, ~c < 0); +} + +__m256d +foo1 (__m256d a, __m256d b, __m256i c) +{ + __m256i d = ~c < 0; + return _mm256_blendv_pd (a, b, (__m256d)d); +} + +__m256 +foo2 (__m256 a, __m256 b, __m256i c) +{ + __m256i d = ~c < 0; + return _mm256_blendv_ps (a, b, (__m256)d); +} + +__m128i +foo4 (__m128i a, __m128i b, __m128i c) +{ + return _mm_blendv_epi8 (a, b, ~c < 0); +} + +__m128d +foo5 (__m128d a, __m128d b, __m128i c) +{ + __m128i d = ~c < 0; + return _mm_blendv_pd (a, b, (__m128d)d); +} + +__m128 +foo6 (__m128 a, __m128 b, __m128i c) +{ + __m128i d = ~c < 0; + return _mm_blendv_ps (a, b, (__m128)d); +} From 0af729c69fa977dd2901f698f1ffbaa34a0ebbcf Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Mon, 11 Dec 2023 09:33:47 +0100 Subject: [PATCH 176/311] Testsuite, i386: mark test as requiring dfp Test currently fails on darwin with: error: decimal floating-point not supported for this target gcc/testsuite/ChangeLog: * gcc.target/i386/pr112445.c: Require dfp. --- gcc/testsuite/gcc.target/i386/pr112445.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/i386/pr112445.c b/gcc/testsuite/gcc.target/i386/pr112445.c index 91ed4212290c..1434ab483ffd 100644 --- a/gcc/testsuite/gcc.target/i386/pr112445.c +++ b/gcc/testsuite/gcc.target/i386/pr112445.c @@ -1,4 +1,5 @@ /* { dg-do compile { target int128 } } */ +/* { dg-require-effective-target dfp } */ /* { dg-options "-O -march=cascadelake -fwrapv" } */ typedef _Decimal64 d64; From d9dd06ad51b7479f09acb88adf404664a1e18b2a Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Fri, 8 Dec 2023 12:50:01 +0100 Subject: [PATCH 177/311] RISC-V: Recognize stepped series in expand_vec_perm_const. We currently try to recognize various forms of stepped (const_vector) sequence variants in expand_const_vector. Because of complications with canonicalization and encoding it is easier to identify such patterns in expand_vec_perm_const_1 already where perm.series_p () is available. This patch introduces shuffle_series as new permutation pattern and tries to recognize series like [base0 base1 base1 + step ...]. If such a series is found the series is expanded by expand_vec_series and a gather is emitted. On top the patch fixes the step recognition in expand_const_vector for stepped series where such a series would end up before. This fixes several execution failures when running code compiled for a scalable vector size of 128 on a target with vlen = 256 or higher. The problem was only noticed there because the encoding for a reversed [2 2]-element vector ("3 2 1 0") is { [1 2], [0 2], [1 4] }. Some testcases that failed were: vect-alias-check-18.c vect-alias-check-1.F90 pr64365.c On a 128-bit target, only the first two elements are used. The third element causing the complications only comes into effect at vlen = 256. With this patch the testsuite results are similar with vlen = 128, vlen = 256 as well as vlen = 512 (apart from the fixed-vlmax tests of course). gcc/ChangeLog: PR target/112853 * config/riscv/riscv-v.cc (expand_const_vector): Fix step calculation. (modulo_sel_indices): Also perform modulo for variable-length constants. (shuffle_series): Recognize series permutations. (expand_vec_perm_const_1): Add shuffle_series. --- gcc/config/riscv/riscv-v.cc | 66 +++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 9b99d0aca844..484c690c3db5 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1378,12 +1378,15 @@ expand_const_vector (rtx target, rtx src) { base0, base1, base1 + step, base1 + step * 2, ... } */ rtx base0 = builder.elt (0); rtx base1 = builder.elt (1); - rtx step = builder.elt (2); + rtx base2 = builder.elt (2); + + scalar_mode elem_mode = GET_MODE_INNER (mode); + rtx step = simplify_binary_operation (MINUS, elem_mode, base2, base1); + /* Step 1 - { base1, base1 + step, base1 + step * 2, ... } */ rtx tmp = gen_reg_rtx (mode); expand_vec_series (tmp, base1, step); /* Step 2 - { base0, base1, base1 + step, base1 + step * 2, ... } */ - scalar_mode elem_mode = GET_MODE_INNER (mode); if (!rtx_equal_p (base0, const0_rtx)) base0 = force_reg (elem_mode, base0); @@ -3395,6 +3398,63 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d) return true; } +static bool +shuffle_series_patterns (struct expand_vec_perm_d *d) +{ + if (!d->one_vector_p || d->perm.encoding ().npatterns () != 1) + return false; + + poly_int64 el1 = d->perm[0]; + poly_int64 el2 = d->perm[1]; + poly_int64 el3 = d->perm[2]; + + poly_int64 step1 = el2 - el1; + poly_int64 step2 = el3 - el2; + + bool need_insert = false; + bool have_series = false; + + /* Check for a full series. */ + if (known_ne (step1, 0) && d->perm.series_p (0, 1, el1, step1)) + have_series = true; + + /* Check for a series starting at the second element. */ + else if (known_ne (step2, 0) && d->perm.series_p (1, 1, el2, step2)) + { + have_series = true; + need_insert = true; + } + + if (!have_series) + return false; + + /* Get a vector int-mode to be used for the permute selector. */ + machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); + insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode); + + /* We need to be able to insert an element and shift the vector. */ + if (need_insert && icode == CODE_FOR_nothing) + return false; + + /* Success! */ + if (d->testing_p) + return true; + + /* Create the series. */ + machine_mode eltmode = Pmode; + rtx series = gen_reg_rtx (sel_mode); + expand_vec_series (series, gen_int_mode (need_insert ? el2 : el1, eltmode), + gen_int_mode (need_insert ? step2 : step1, eltmode)); + + /* Insert the remaining element if necessary. */ + if (need_insert) + emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode))); + + emit_vlmax_gather_insn (d->target, d->op0, series); + + return true; +} + /* Recognize the pattern that can be shuffled by generic approach. */ static bool @@ -3475,6 +3535,8 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d) return true; if (shuffle_extract_and_slide1up_patterns (d)) return true; + if (shuffle_series_patterns (d)) + return true; if (shuffle_generic_patterns (d)) return true; return false; From 63194a0e8ede9e15dfa01c6ec7aeea8f7702d3b7 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 17:44:24 +0800 Subject: [PATCH 178/311] RISC-V: Rename test[NFC] Since I want to commit multiple tests which are fixing vsetvl bugs, rename it to make testcases more easier maintain. Committed as it is obvious. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c: Moved to... * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-1.c: ...here. --- .../riscv/rvv/vsetvl/{avl_use_bug-1.c => vsetvl_bug-1.c} | 2 ++ 1 file changed, 2 insertions(+) rename gcc/testsuite/gcc.target/riscv/rvv/vsetvl/{avl_use_bug-1.c => vsetvl_bug-1.c} (99%) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-1.c similarity index 99% rename from gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c rename to gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-1.c index 330221c2d7b4..45727a035b48 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-1.c @@ -6,10 +6,12 @@ struct a_struct unsigned char a_character; }; + struct a_struct an_array[5]; struct a_struct *a_ptr; int yabba = 1; + int f (a, b) unsigned char a; From f9b4dbb8ac75f96c4897ba9aafcaf0bbad4fbe44 Mon Sep 17 00:00:00 2001 From: Iain Buclaw Date: Thu, 7 Dec 2023 11:55:12 +0100 Subject: [PATCH 179/311] d: Merge upstream dmd, druntime 2bbf64907c, phobos b64bfbf91 D front-end changes: - Import dmd v2.106.0. D runtime changes: - Import druntime v2.106.0. Phobos changes: - Import phobos v2.106.0. gcc/d/ChangeLog: * Make-lang.in (D_FRONTEND_OBJS): Rename d/common-string.o to d/common-smallbuffer.o. * dmd/MERGE: Merge upstream dmd 2bbf64907c. * dmd/VERSION: Bump version to v2.106.0. * modules.cc (layout_moduleinfo_fields): Update for new front-end interface. (layout_moduleinfo): Likewise. libphobos/ChangeLog: * libdruntime/MERGE: Merge upstream druntime 2bbf64907c. * src/MERGE: Merge upstream phobos b64bfbf91. --- gcc/d/Make-lang.in | 2 +- gcc/d/dmd/MERGE | 2 +- gcc/d/dmd/VERSION | 2 +- gcc/d/dmd/aggregate.d | 10 - gcc/d/dmd/aggregate.h | 1 - gcc/d/dmd/attrib.d | 67 ------ gcc/d/dmd/attrib.h | 9 - gcc/d/dmd/canthrow.d | 2 +- gcc/d/dmd/common/README.md | 2 +- gcc/d/dmd/common/file.d | 15 +- gcc/d/dmd/common/{string.d => smallbuffer.d} | 49 ++-- gcc/d/dmd/cparse.d | 8 + gcc/d/dmd/dcast.d | 12 +- gcc/d/dmd/denum.d | 7 - gcc/d/dmd/dimport.d | 16 -- gcc/d/dmd/dmodule.d | 36 ++- gcc/d/dmd/dsymbol.d | 172 -------------- gcc/d/dmd/dsymbol.h | 5 +- gcc/d/dmd/dsymbolsem.d | 214 +++++++++++++++++ gcc/d/dmd/dtemplate.d | 7 +- gcc/d/dmd/enum.h | 1 - gcc/d/dmd/escape.d | 2 +- gcc/d/dmd/expressionsem.d | 2 +- gcc/d/dmd/hdrgen.d | 27 +++ gcc/d/dmd/import.h | 1 - gcc/d/dmd/initsem.d | 20 +- gcc/d/dmd/module.h | 1 + gcc/d/dmd/nspace.d | 14 -- gcc/d/dmd/nspace.h | 1 - gcc/d/dmd/parse.d | 12 +- gcc/d/dmd/root/file.d | 2 +- gcc/d/dmd/root/filename.d | 4 +- gcc/d/dmd/root/speller.d | 2 +- gcc/d/dmd/root/string.d | 2 +- gcc/d/dmd/typesem.d | 58 +++++ gcc/d/modules.cc | 4 +- .../fail_compilation/misc_parser_err_cov1.d | 2 +- gcc/testsuite/gdc.test/runnable/dbitfields.d | 34 +++ libphobos/libdruntime/MERGE | 2 +- libphobos/libdruntime/core/cpuid.d | 7 +- libphobos/src/MERGE | 2 +- libphobos/src/std/algorithm/searching.d | 218 +++++++----------- libphobos/src/std/conv.d | 5 +- libphobos/src/std/range/package.d | 24 +- libphobos/src/std/uni/package.d | 12 + 45 files changed, 579 insertions(+), 518 deletions(-) rename gcc/d/dmd/common/{string.d => smallbuffer.d} (82%) diff --git a/gcc/d/Make-lang.in b/gcc/d/Make-lang.in index b3007a96bd07..a0d4d7cbeb40 100644 --- a/gcc/d/Make-lang.in +++ b/gcc/d/Make-lang.in @@ -95,7 +95,7 @@ D_FRONTEND_OBJS = \ d/common-bitfields.o \ d/common-file.o \ d/common-outbuffer.o \ - d/common-string.o \ + d/common-smallbuffer.o \ d/compiler.o \ d/cond.o \ d/constfold.o \ diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index aa0062c10eb9..5edcee1c84df 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -ff57fec51558013b25cadb7e83da9f4675915d56 +2bbf64907cbbb483d003e0a8fcf8b502e4883799 The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/VERSION b/gcc/d/dmd/VERSION index 41fdc654b149..8c95cd04f804 100644 --- a/gcc/d/dmd/VERSION +++ b/gcc/d/dmd/VERSION @@ -1 +1 @@ -v2.106.0-rc.1 +v2.106.0 diff --git a/gcc/d/dmd/aggregate.d b/gcc/d/dmd/aggregate.d index 307bb0171c47..352ca88f470d 100644 --- a/gcc/d/dmd/aggregate.d +++ b/gcc/d/dmd/aggregate.d @@ -178,16 +178,6 @@ extern (C++) abstract class AggregateDeclaration : ScopeDsymbol return sc2; } - override final void setScope(Scope* sc) - { - // Might need a scope to resolve forward references. The check for - // semanticRun prevents unnecessary setting of _scope during deferred - // setScope phases for aggregates which already finished semantic(). - // See https://issues.dlang.org/show_bug.cgi?id=16607 - if (semanticRun < PASS.semanticdone) - ScopeDsymbol.setScope(sc); - } - /*************************************** * Returns: * The total number of fields minus the number of hidden fields. diff --git a/gcc/d/dmd/aggregate.h b/gcc/d/dmd/aggregate.h index cd8f1a15fbda..98fa6bd1bb00 100644 --- a/gcc/d/dmd/aggregate.h +++ b/gcc/d/dmd/aggregate.h @@ -113,7 +113,6 @@ public: Sizeok sizeok; // set when structsize contains valid data virtual Scope *newScope(Scope *sc); - void setScope(Scope *sc) override final; virtual void finalizeSize() = 0; uinteger_t size(const Loc &loc) override final; bool fill(const Loc &loc, Expressions &elements, bool ctorinit); diff --git a/gcc/d/dmd/attrib.d b/gcc/d/dmd/attrib.d index 251e2e88ca5c..faf04890e8e4 100644 --- a/gcc/d/dmd/attrib.d +++ b/gcc/d/dmd/attrib.d @@ -123,19 +123,6 @@ extern (C++) abstract class AttribDeclaration : Dsymbol return sc; } - override void setScope(Scope* sc) - { - Dsymbols* d = include(sc); - //printf("\tAttribDeclaration::setScope '%s', d = %p\n",toChars(), d); - if (d) - { - Scope* sc2 = newScope(sc); - d.foreachDsymbol( s => s.setScope(sc2) ); - if (sc2 != sc) - sc2.pop(); - } - } - override void importAll(Scope* sc) { Dsymbols* d = include(sc); @@ -338,14 +325,6 @@ extern (C++) final class DeprecatedDeclaration : StorageClassDeclaration return scx; } - override void setScope(Scope* sc) - { - //printf("DeprecatedDeclaration::setScope() %p\n", this); - if (decl) - Dsymbol.setScope(sc); // for forward reference - return AttribDeclaration.setScope(sc); - } - override void accept(Visitor v) { v.visit(this); @@ -433,13 +412,6 @@ extern (C++) final class CPPMangleDeclaration : AttribDeclaration sc.aligndecl, sc.inlining); } - override void setScope(Scope* sc) - { - if (decl) - Dsymbol.setScope(sc); // for forward reference - return AttribDeclaration.setScope(sc); - } - override const(char)* toChars() const { return toString().ptr; @@ -703,13 +675,6 @@ extern (C++) final class AnonDeclaration : AttribDeclaration return new AnonDeclaration(loc, isunion, Dsymbol.arraySyntaxCopy(decl)); } - override void setScope(Scope* sc) - { - if (decl) - Dsymbol.setScope(sc); - return AttribDeclaration.setScope(sc); - } - override void setFieldOffset(AggregateDeclaration ad, ref FieldState fieldState, bool isunion) { //printf("\tAnonDeclaration::setFieldOffset %s %p\n", isunion ? "union" : "struct", this); @@ -913,11 +878,6 @@ extern (C++) class ConditionalDeclaration : AttribDeclaration } } - override void setScope(Scope* sc) - { - include(sc).foreachDsymbol( s => s.setScope(sc) ); - } - override void accept(Visitor v) { v.visit(this); @@ -983,13 +943,6 @@ extern (C++) final class StaticIfDeclaration : ConditionalDeclaration } } - override void setScope(Scope* sc) - { - // do not evaluate condition before semantic pass - // But do set the scope, in case we need it for forward referencing - Dsymbol.setScope(sc); - } - override void importAll(Scope* sc) { // do not evaluate condition before semantic pass @@ -1104,13 +1057,6 @@ extern (C++) final class StaticForeachDeclaration : AttribDeclaration // change this to give semantics to documentation comments on static foreach declarations } - override void setScope(Scope* sc) - { - // do not evaluate condition before semantic pass - // But do set the scope, in case we need it for forward referencing - Dsymbol.setScope(sc); - } - override void importAll(Scope* sc) { // do not evaluate aggregate before semantic pass @@ -1209,11 +1155,6 @@ extern (C++) final class MixinDeclaration : AttribDeclaration return new MixinDeclaration(loc, Expression.arraySyntaxCopy(exps)); } - override void setScope(Scope* sc) - { - Dsymbol.setScope(sc); - } - override const(char)* kind() const { return "mixin"; @@ -1264,14 +1205,6 @@ extern (C++) final class UserAttributeDeclaration : AttribDeclaration return sc2; } - override void setScope(Scope* sc) - { - //printf("UserAttributeDeclaration::setScope() %p\n", this); - if (decl) - Dsymbol.setScope(sc); // for forward reference of UDAs - return AttribDeclaration.setScope(sc); - } - extern (D) static Expressions* concat(Expressions* udas1, Expressions* udas2) { Expressions* udas; diff --git a/gcc/d/dmd/attrib.h b/gcc/d/dmd/attrib.h index efea9af950cc..98c5e5219777 100644 --- a/gcc/d/dmd/attrib.h +++ b/gcc/d/dmd/attrib.h @@ -26,7 +26,6 @@ public: virtual Dsymbols *include(Scope *sc); virtual Scope *newScope(Scope *sc); - void setScope(Scope *sc) override; void importAll(Scope *sc) override; void addComment(const utf8_t *comment) override; const char *kind() const override; @@ -61,7 +60,6 @@ public: DeprecatedDeclaration *syntaxCopy(Dsymbol *s) override; Scope *newScope(Scope *sc) override; - void setScope(Scope *sc) override; void accept(Visitor *v) override { v->visit(this); } }; @@ -84,7 +82,6 @@ public: CPPMangleDeclaration *syntaxCopy(Dsymbol *s) override; Scope *newScope(Scope *sc) override; - void setScope(Scope *sc) override; const char *toChars() const override; void accept(Visitor *v) override { v->visit(this); } }; @@ -135,7 +132,6 @@ public: unsigned anonalignsize; // size of anonymous struct for alignment purposes AnonDeclaration *syntaxCopy(Dsymbol *s) override; - void setScope(Scope *sc) override; void setFieldOffset(AggregateDeclaration *ad, FieldState& fieldState, bool isunion) override; const char *kind() const override; AnonDeclaration *isAnonDeclaration() override { return this; } @@ -163,7 +159,6 @@ public: bool oneMember(Dsymbol **ps, Identifier *ident) override final; Dsymbols *include(Scope *sc) override; void addComment(const utf8_t *comment) override final; - void setScope(Scope *sc) override; void accept(Visitor *v) override { v->visit(this); } }; @@ -176,7 +171,6 @@ public: StaticIfDeclaration *syntaxCopy(Dsymbol *s) override; Dsymbols *include(Scope *sc) override; - void setScope(Scope *sc) override; void importAll(Scope *sc) override; StaticIfDeclaration *isStaticIfDeclaration() override { return this; } const char *kind() const override; @@ -196,7 +190,6 @@ public: bool oneMember(Dsymbol **ps, Identifier *ident) override; Dsymbols *include(Scope *sc) override; void addComment(const utf8_t *comment) override; - void setScope(Scope *sc) override; void importAll(Scope *sc) override; const char *kind() const override; void accept(Visitor *v) override { v->visit(this); } @@ -223,7 +216,6 @@ public: d_bool compiled; MixinDeclaration *syntaxCopy(Dsymbol *s) override; - void setScope(Scope *sc) override; const char *kind() const override; void accept(Visitor *v) override { v->visit(this); } }; @@ -239,7 +231,6 @@ public: UserAttributeDeclaration *syntaxCopy(Dsymbol *s) override; Scope *newScope(Scope *sc) override; - void setScope(Scope *sc) override; Expressions *getAttributes(); const char *kind() const override; void accept(Visitor *v) override { v->visit(this); } diff --git a/gcc/d/dmd/canthrow.d b/gcc/d/dmd/canthrow.d index 67305922df64..5a608a9986d9 100644 --- a/gcc/d/dmd/canthrow.d +++ b/gcc/d/dmd/canthrow.d @@ -22,7 +22,6 @@ import dmd.declaration; import dmd.dsymbol; import dmd.errorsink; import dmd.expression; -import dmd.expressionsem; import dmd.func; import dmd.globals; import dmd.init; @@ -81,6 +80,7 @@ CT canThrow(Expression e, FuncDeclaration func, ErrorSink eSink) if (!f.isDtorDeclaration()) errorSupplementalInferredAttr(f, 10, false, STC.nothrow_); + import dmd.expressionsem : checkOverriddenDtor; f.checkOverriddenDtor(null, e.loc, dd => dd.type.toTypeFunction().isnothrow, "not nothrow"); } else if (func) diff --git a/gcc/d/dmd/common/README.md b/gcc/d/dmd/common/README.md index 853fd4ff502a..ad507c75bd49 100644 --- a/gcc/d/dmd/common/README.md +++ b/gcc/d/dmd/common/README.md @@ -5,4 +5,4 @@ | [bitfields.d](https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/bitfields.d) | Pack multiple boolean fields into bit fields | | [file.d](https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/file.d) | Functions and objects dedicated to file I/O and management | | [outbuffer.d](https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/outbuffer.d) | An expandable buffer in which you can write text or binary data | -| [string.d](https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/string.d) | Common string functions including filename manipulation | +| [string.d](https://github.com/dlang/dmd/blob/master/compiler/src/dmd/common/smallbuffer.d) | Common string functions including filename manipulation | diff --git a/gcc/d/dmd/common/file.d b/gcc/d/dmd/common/file.d index 076f357e50b7..704110f50bd6 100644 --- a/gcc/d/dmd/common/file.d +++ b/gcc/d/dmd/common/file.d @@ -17,13 +17,13 @@ module dmd.common.file; import core.stdc.errno : errno; import core.stdc.stdio : fprintf, remove, rename, stderr; import core.stdc.stdlib : exit; -import core.stdc.string : strerror; +import core.stdc.string : strerror, strlen; import core.sys.windows.winbase; import core.sys.windows.winnt; import core.sys.posix.fcntl; import core.sys.posix.unistd; -import dmd.common.string; +import dmd.common.smallbuffer; nothrow: @@ -129,7 +129,8 @@ struct FileMapping(Datum) enum openFlags = CREATE_ALWAYS; } - handle = filename.asDString.extendedPathThen!(p => CreateFileW(p.ptr, createFileMode, 0, null, openFlags, FILE_ATTRIBUTE_NORMAL, null)); + handle = filename[0 .. strlen(filename)]. + extendedPathThen!(p => CreateFileW(p.ptr, createFileMode, 0, null, openFlags, FILE_ATTRIBUTE_NORMAL, null)); if (handle == invalidHandle) { static if (is(Datum == const)) @@ -312,7 +313,7 @@ struct FileMapping(Datum) else version(Windows) { import core.sys.windows.winbase; - if (deleteme.asDString.extendedPathThen!(p => DeleteFileW(p.ptr)) == 0) + if (deleteme[0 .. strlen(deleteme)].extendedPathThen!(p => DeleteFileW(p.ptr)) == 0) { fprintf(stderr, "DeleteFileW error %d\n", GetLastError()); return false; @@ -447,8 +448,8 @@ struct FileMapping(Datum) else version(Windows) { import core.sys.windows.winbase; - auto r = oldname.asDString.extendedPathThen!( - p1 => filename.asDString.extendedPathThen!(p2 => MoveFileExW(p1.ptr, p2.ptr, MOVEFILE_REPLACE_EXISTING)) + auto r = oldname[0 .. strlen(oldname)].extendedPathThen!( + p1 => filename[0 .. strlen(filename)].extendedPathThen!(p2 => MoveFileExW(p1.ptr, p2.ptr, MOVEFILE_REPLACE_EXISTING)) ); if (r == 0) { @@ -483,7 +484,7 @@ extern(D) static bool writeFile(const(char)* name, const void[] data) nothrow else version (Windows) { DWORD numwritten; // here because of the gotos - const nameStr = name.asDString; + const nameStr = name[0 .. strlen(name)]; // work around Windows file path length limitation // (see documentation for extendedPathThen). HANDLE h = nameStr.extendedPathThen! diff --git a/gcc/d/dmd/common/string.d b/gcc/d/dmd/common/smallbuffer.d similarity index 82% rename from gcc/d/dmd/common/string.d rename to gcc/d/dmd/common/smallbuffer.d index 9453a3474da8..ec0eaae647f8 100644 --- a/gcc/d/dmd/common/string.d +++ b/gcc/d/dmd/common/smallbuffer.d @@ -4,11 +4,11 @@ * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved * Authors: Walter Bright, https://www.digitalmars.com * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) - * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/string.d, common/_string.d) - * Documentation: https://dlang.org/phobos/dmd_common_string.html - * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/common/string.d + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/smallbuffer.d, common/_smallbuffer.d) + * Documentation: https://dlang.org/phobos/dmd_common_smallbuffer.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/common/smallbuffer */ -module dmd.common.string; +module dmd.common.smallbuffer; nothrow: @@ -106,34 +106,12 @@ unittest assert(b[] !is buf[]); } -/** -Converts a zero-terminated C string to a D slice. Takes linear time and allocates no memory. - -Params: -stringz = the C string to be converted - -Returns: -a slice comprehending the string. The terminating 0 is not part of the slice. -*/ -auto asDString(C)(C* stringz) pure @nogc nothrow -{ - import core.stdc.string : strlen; - return stringz[0 .. strlen(stringz)]; -} - -/// -unittest -{ - const char* p = "123".ptr; - assert(p.asDString == "123"); -} - /** (Windows only) Converts a narrow string to a wide string using `buffer` as strorage. Returns a slice managed by `buffer` containing the converted string. The terminating zero is not part of the returned slice, but is guaranteed to follow it. */ -version(Windows) wchar[] toWStringz(const(char)[] narrow, ref SmallBuffer!wchar buffer) nothrow +version(Windows) wchar[] toWStringz(scope const(char)[] narrow, ref SmallBuffer!wchar buffer) nothrow { import core.sys.windows.winnls : MultiByteToWideChar; import dmd.common.file : CodePage; @@ -141,16 +119,17 @@ version(Windows) wchar[] toWStringz(const(char)[] narrow, ref SmallBuffer!wchar if (narrow is null) return null; - const requiredLength = MultiByteToWideChar(CodePage, 0, narrow.ptr, cast(int) narrow.length, buffer.ptr, cast(int) buffer.length); - if (requiredLength < cast(int) buffer.length) + size_t length; + int i; + while (1) { - buffer[requiredLength] = 0; - return buffer[0 .. requiredLength]; + // https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar + length = MultiByteToWideChar(CodePage, 0, narrow.ptr, cast(int) narrow.length, buffer.ptr, cast(int) buffer.length); + if (length < buffer.length) + break; + buffer.create(length + 1); + assert(++i == 1); // ensure loop should only execute once or twice } - - buffer.create(requiredLength + 1); - const length = MultiByteToWideChar(CodePage, 0, narrow.ptr, cast(int) narrow.length, buffer.ptr, requiredLength); - assert(length == requiredLength); buffer[length] = 0; return buffer[0 .. length]; } diff --git a/gcc/d/dmd/cparse.d b/gcc/d/dmd/cparse.d index ed5f1f8b9a21..89a594823aed 100644 --- a/gcc/d/dmd/cparse.d +++ b/gcc/d/dmd/cparse.d @@ -1890,6 +1890,14 @@ final class CParser(AST) : Parser!AST if (specifier.alignExps) error("no alignment-specifier for typedef declaration"); // C11 6.7.5-2 + if (specifier.vector_size) + { + auto length = new AST.IntegerExp(token.loc, specifier.vector_size / dt.size(), AST.Type.tuns32); + auto tsa = new AST.TypeSArray(dt, length); + dt = new AST.TypeVector(tsa); + specifier.vector_size = 0; // used it up + } + bool isalias = true; if (auto ts = dt.isTypeStruct()) { diff --git a/gcc/d/dmd/dcast.d b/gcc/d/dmd/dcast.d index 14c67f062a32..bb86b080be6e 100644 --- a/gcc/d/dmd/dcast.d +++ b/gcc/d/dmd/dcast.d @@ -68,7 +68,6 @@ Expression implicitCastTo(Expression e, Scope* sc, Type t) Expression visit(Expression e) { // printf("Expression.implicitCastTo(%s of type %s) => %s\n", e.toChars(), e.type.toChars(), t.toChars()); - if (const match = (sc && sc.flags & SCOPE.Cfile) ? e.cimplicitConvTo(t) : e.implicitConvTo(t)) { // no need for an extra cast when matching is exact @@ -802,8 +801,8 @@ extern(C++) MATCH implicitConvTo(Expression e, Type t) return result; } - else if (tb.ty == Tvector && (typeb.ty == Tarray || typeb.ty == Tsarray)) - { + else if (tb.ty == Tvector && (typeb.ty == Tarray || typeb.ty == Tsarray || typeb.ty == Tpointer)) + { // Tpointer because ImportC eagerly converts Tsarray to Tpointer result = MATCH.exact; // Convert array literal to vector type TypeVector tv = tb.isTypeVector(); @@ -1487,6 +1486,10 @@ MATCH cimplicitConvTo(Expression e, Type t) if (tb.equals(typeb)) return MATCH.exact; + + if (tb.isTypeVector() || typeb.isTypeVector()) + return implicitConvTo(e, t); // permissive checking doesn't apply to vectors + if ((typeb.isintegral() || typeb.isfloating()) && (tb.isintegral() || tb.isfloating())) return MATCH.convert; @@ -2298,9 +2301,10 @@ Expression castTo(Expression e, Scope* sc, Type t, Type att = null) ae.type = tp; } } - else if (tb.ty == Tvector && (typeb.ty == Tarray || typeb.ty == Tsarray)) + else if (tb.ty == Tvector && (typeb.ty == Tarray || typeb.ty == Tsarray || typeb.ty == Tpointer)) { // Convert array literal to vector type + // The Tpointer case comes from C eagerly converting Tsarray to Tpointer TypeVector tv = tb.isTypeVector(); TypeSArray tbase = tv.basetype.isTypeSArray(); assert(tbase.ty == Tsarray); diff --git a/gcc/d/dmd/denum.d b/gcc/d/dmd/denum.d index 797f6ee0a2df..5713be1e99e7 100644 --- a/gcc/d/dmd/denum.d +++ b/gcc/d/dmd/denum.d @@ -83,13 +83,6 @@ extern (C++) final class EnumDeclaration : ScopeDsymbol return ed; } - override void setScope(Scope* sc) - { - if (semanticRun > PASS.initial) - return; - ScopeDsymbol.setScope(sc); - } - override bool oneMember(Dsymbol* ps, Identifier ident) { if (isAnonymous()) diff --git a/gcc/d/dmd/dimport.d b/gcc/d/dmd/dimport.d index 0132e49cbedb..5c01a9f5889b 100644 --- a/gcc/d/dmd/dimport.d +++ b/gcc/d/dmd/dimport.d @@ -305,22 +305,6 @@ extern (C++) final class Import : Dsymbol return this; } - override void setScope(Scope* sc) - { - Dsymbol.setScope(sc); - if (aliasdecls.length) - { - if (!mod) - importAll(sc); - - sc = sc.push(mod); - sc.visibility = visibility; - foreach (ad; aliasdecls) - ad.setScope(sc); - sc = sc.pop(); - } - } - override bool overloadInsert(Dsymbol s) { /* Allow multiple imports with the same package base, but disallow diff --git a/gcc/d/dmd/dmodule.d b/gcc/d/dmd/dmodule.d index 5f5de6390fbc..d096e437cf94 100644 --- a/gcc/d/dmd/dmodule.d +++ b/gcc/d/dmd/dmodule.d @@ -33,6 +33,7 @@ import dmd.errorsink; import dmd.expression; import dmd.expressionsem; import dmd.file_manager; +import dmd.func; import dmd.globals; import dmd.id; import dmd.identifier; @@ -969,7 +970,7 @@ extern (C++) final class Module : Package * If this works out well, it can be extended to all modules * before any semantic() on any of them. */ - setScope(sc); // remember module scope for semantic + this.setScope(sc); // remember module scope for semantic for (size_t i = 0; i < members.length; i++) { Dsymbol s = (*members)[i]; @@ -1576,3 +1577,36 @@ private const(char)[] processSource (const(ubyte)[] src, Module mod) return buf; } + +/******************************************* + * Look for member of the form: + * const(MemberInfo)[] getMembers(string); + * Returns NULL if not found + */ +extern(C++) FuncDeclaration findGetMembers(ScopeDsymbol dsym) +{ + import dmd.opover : search_function; + Dsymbol s = search_function(dsym, Id.getmembers); + FuncDeclaration fdx = s ? s.isFuncDeclaration() : null; + version (none) + { + // Finish + __gshared TypeFunction tfgetmembers; + if (!tfgetmembers) + { + Scope sc; + sc.eSink = global.errorSink; + auto parameters = new Parameters(); + Parameters* p = new Parameter(STC.in_, Type.tchar.constOf().arrayOf(), null, null); + parameters.push(p); + Type tret = null; + TypeFunction tf = new TypeFunction(parameters, tret, VarArg.none, LINK.d); + tfgetmembers = tf.dsymbolSemantic(Loc.initial, &sc).isTypeFunction(); + } + if (fdx) + fdx = fdx.overloadExactMatch(tfgetmembers); + } + if (fdx && fdx.isVirtual()) + fdx = null; + return fdx; +} diff --git a/gcc/d/dmd/dsymbol.d b/gcc/d/dmd/dsymbol.d index a52745fcc0e4..8f5a292a284c 100644 --- a/gcc/d/dmd/dsymbol.d +++ b/gcc/d/dmd/dsymbol.d @@ -31,7 +31,6 @@ import dmd.dmodule; import dmd.dversion; import dmd.dscope; import dmd.dstruct; -import dmd.dsymbolsem; import dmd.dtemplate; import dmd.errors; import dmd.expression; @@ -44,11 +43,9 @@ import dmd.lexer; import dmd.location; import dmd.mtype; import dmd.nspace; -import dmd.opover; import dmd.root.aav; import dmd.root.rmem; import dmd.rootobject; -import dmd.root.speller; import dmd.root.string; import dmd.statement; import dmd.staticassert; @@ -386,40 +383,6 @@ extern (C++) class Dsymbol : ASTNode return '`' ~ cstr.toDString() ~ "`\0"; } - final bool checkDeprecated(const ref Loc loc, Scope* sc) - { - if (global.params.useDeprecated == DiagnosticReporting.off) - return false; - if (!this.isDeprecated()) - return false; - // Don't complain if we're inside a deprecated symbol's scope - if (sc.isDeprecated()) - return false; - // Don't complain if we're inside a template constraint - // https://issues.dlang.org/show_bug.cgi?id=21831 - if (sc.flags & SCOPE.constraint) - return false; - - const(char)* message = null; - for (Dsymbol p = this; p; p = p.parent) - { - message = p.depdecl ? p.depdecl.getMessage() : null; - if (message) - break; - } - if (message) - deprecation(loc, "%s `%s` is deprecated - %s", kind, toPrettyChars, message); - else - deprecation(loc, "%s `%s` is deprecated", kind, toPrettyChars); - - if (auto ti = sc.parent ? sc.parent.isInstantiated() : null) - ti.printInstantiationTrace(Classification.deprecation); - else if (auto ti = sc.parent ? sc.parent.isTemplateInstance() : null) - ti.printInstantiationTrace(Classification.deprecation); - - return true; - } - /********************************** * Determine which Module a Dsymbol is in. */ @@ -749,113 +712,10 @@ extern (C++) class Dsymbol : ASTNode return toAlias(); } - /************************************* - * Set scope for future semantic analysis so we can - * deal better with forward references. - */ - void setScope(Scope* sc) - { - //printf("Dsymbol::setScope() %p %s, %p stc = %llx\n", this, toChars(), sc, sc.stc); - if (!sc.nofree) - sc.setNoFree(); // may need it even after semantic() finishes - _scope = sc; - if (sc.depdecl) - depdecl = sc.depdecl; - if (!userAttribDecl) - userAttribDecl = sc.userAttribDecl; - } - void importAll(Scope* sc) { } - extern (D) final Dsymbol search_correct(Identifier ident) - { - /*************************************************** - * Search for symbol with correct spelling. - */ - extern (D) Dsymbol symbol_search_fp(const(char)[] seed, out int cost) - { - /* If not in the lexer's string table, it certainly isn't in the symbol table. - * Doing this first is a lot faster. - */ - if (!seed.length) - return null; - Identifier id = Identifier.lookup(seed); - if (!id) - return null; - cost = 0; // all the same cost - Dsymbol s = this; - Module.clearCache(); - return s.search(Loc.initial, id, IgnoreErrors); - } - - if (global.gag) - return null; // don't do it for speculative compiles; too time consuming - // search for exact name first - if (auto s = this.search(Loc.initial, ident, IgnoreErrors)) - return s; - return speller!symbol_search_fp(ident.toString()); - } - - /*************************************** - * Search for identifier id as a member of `this`. - * `id` may be a template instance. - * - * Params: - * loc = location to print the error messages - * sc = the scope where the symbol is located - * id = the id of the symbol - * flags = the search flags which can be `SearchLocalsOnly` or `IgnorePrivateImports` - * - * Returns: - * symbol found, NULL if not - */ - extern (D) final Dsymbol searchX(const ref Loc loc, Scope* sc, RootObject id, int flags) - { - //printf("Dsymbol::searchX(this=%p,%s, ident='%s')\n", this, toChars(), ident.toChars()); - Dsymbol s = toAlias(); - Dsymbol sm; - if (Declaration d = s.isDeclaration()) - { - if (d.inuse) - { - .error(loc, "circular reference to `%s`", d.toPrettyChars()); - return null; - } - } - switch (id.dyncast()) - { - case DYNCAST.identifier: - sm = s.search(loc, cast(Identifier)id, flags); - break; - case DYNCAST.dsymbol: - { - // It's a template instance - //printf("\ttemplate instance id\n"); - Dsymbol st = cast(Dsymbol)id; - TemplateInstance ti = st.isTemplateInstance(); - sm = s.search(loc, ti.name); - if (!sm) - return null; - sm = sm.toAlias(); - TemplateDeclaration td = sm.isTemplateDeclaration(); - if (!td) - return null; // error but handled later - ti.tempdecl = td; - if (!ti.semanticRun) - ti.dsymbolSemantic(sc); - sm = ti.toAlias(); - break; - } - case DYNCAST.type: - case DYNCAST.expression: - default: - assert(0); - } - return sm; - } - bool overloadInsert(Dsymbol s) { //printf("Dsymbol::overloadInsert('%s')\n", s.toChars()); @@ -1468,38 +1328,6 @@ public: return "ScopeDsymbol"; } - /******************************************* - * Look for member of the form: - * const(MemberInfo)[] getMembers(string); - * Returns NULL if not found - */ - final FuncDeclaration findGetMembers() - { - Dsymbol s = search_function(this, Id.getmembers); - FuncDeclaration fdx = s ? s.isFuncDeclaration() : null; - version (none) - { - // Finish - __gshared TypeFunction tfgetmembers; - if (!tfgetmembers) - { - Scope sc; - sc.eSink = global.errorSink; - auto parameters = new Parameters(); - Parameters* p = new Parameter(STC.in_, Type.tchar.constOf().arrayOf(), null, null); - parameters.push(p); - Type tret = null; - TypeFunction tf = new TypeFunction(parameters, tret, VarArg.none, LINK.d); - tfgetmembers = tf.dsymbolSemantic(Loc.initial, &sc).isTypeFunction(); - } - if (fdx) - fdx = fdx.overloadExactMatch(tfgetmembers); - } - if (fdx && fdx.isVirtual()) - fdx = null; - return fdx; - } - /******************************** * Insert Dsymbol in table. * Params: diff --git a/gcc/d/dmd/dsymbol.h b/gcc/d/dmd/dsymbol.h index e0c2046bf902..15c997027dae 100644 --- a/gcc/d/dmd/dsymbol.h +++ b/gcc/d/dmd/dsymbol.h @@ -205,7 +205,6 @@ public: const char *locToChars(); bool equals(const RootObject * const o) const override; bool isAnonymous() const; - bool checkDeprecated(const Loc &loc, Scope *sc); Module *getModule(); bool isCsymbol(); Module *getAccessModule(); @@ -228,7 +227,6 @@ public: virtual const char *kind() const; virtual Dsymbol *toAlias(); // resolve real symbol virtual Dsymbol *toAlias2(); - virtual void setScope(Scope *sc); virtual void importAll(Scope *sc); virtual bool overloadInsert(Dsymbol *s); virtual uinteger_t size(const Loc &loc); @@ -342,7 +340,6 @@ public: bool isforwardRef() override final; static void multiplyDefined(const Loc &loc, Dsymbol *s1, Dsymbol *s2); const char *kind() const override; - FuncDeclaration *findGetMembers(); virtual Dsymbol *symtabInsert(Dsymbol *s); virtual Dsymbol *symtabLookup(Dsymbol *s, Identifier *id); bool hasStaticCtorOrDtor() override; @@ -431,3 +428,5 @@ public: void addMember(Dsymbol *dsym, Scope *sc, ScopeDsymbol *sds); Dsymbol *search(Dsymbol *d, const Loc &loc, Identifier *ident, int flags = SearchLocalsOnly); +bool checkDeprecated(Dsymbol *d, const Loc &loc, Scope *sc); +void setScope(Dsymbol *d, Scope *sc); diff --git a/gcc/d/dmd/dsymbolsem.d b/gcc/d/dmd/dsymbolsem.d index 430377ff8743..060abfe18966 100644 --- a/gcc/d/dmd/dsymbolsem.d +++ b/gcc/d/dmd/dsymbolsem.d @@ -212,6 +212,39 @@ const(char)* getMessage(DeprecatedDeclaration dd) return dd.msgstr; } +bool checkDeprecated(Dsymbol d, const ref Loc loc, Scope* sc) +{ + if (global.params.useDeprecated == DiagnosticReporting.off) + return false; + if (!d.isDeprecated()) + return false; + // Don't complain if we're inside a deprecated symbol's scope + if (sc.isDeprecated()) + return false; + // Don't complain if we're inside a template constraint + // https://issues.dlang.org/show_bug.cgi?id=21831 + if (sc.flags & SCOPE.constraint) + return false; + + const(char)* message = null; + for (Dsymbol p = d; p; p = p.parent) + { + message = p.depdecl ? p.depdecl.getMessage() : null; + if (message) + break; + } + if (message) + deprecation(loc, "%s `%s` is deprecated - %s", d.kind, d.toPrettyChars, message); + else + deprecation(loc, "%s `%s` is deprecated", d.kind, d.toPrettyChars); + + if (auto ti = sc.parent ? sc.parent.isInstantiated() : null) + ti.printInstantiationTrace(Classification.deprecation); + else if (auto ti = sc.parent ? sc.parent.isTemplateInstance() : null) + ti.printInstantiationTrace(Classification.deprecation); + + return true; +} // Returns true if a contract can appear without a function body. package bool allowsContractWithoutBody(FuncDeclaration funcdecl) @@ -7811,6 +7844,37 @@ extern(C++) Dsymbol search(Dsymbol d, const ref Loc loc, Identifier ident, int f return v.result; } +Dsymbol search_correct(Dsymbol d, Identifier ident) +{ + /*************************************************** + * Search for symbol with correct spelling. + */ + Dsymbol symbol_search_fp(const(char)[] seed, out int cost) + { + /* If not in the lexer's string table, it certainly isn't in the symbol table. + * Doing this first is a lot faster. + */ + if (!seed.length) + return null; + Identifier id = Identifier.lookup(seed); + if (!id) + return null; + cost = 0; // all the same cost + Dsymbol s = d; + Module.clearCache(); + return s.search(Loc.initial, id, IgnoreErrors); + } + + if (global.gag) + return null; // don't do it for speculative compiles; too time consuming + // search for exact name first + if (auto s = d.search(Loc.initial, ident, IgnoreErrors)) + return s; + + import dmd.root.speller : speller; + return speller!symbol_search_fp(ident.toString()); +} + private extern(C++) class SearchVisitor : Visitor { alias visit = Visitor.visit; @@ -8407,3 +8471,153 @@ private extern(C++) class SearchVisitor : Visitor return setResult(s); } } +/************************************* + * Set scope for future semantic analysis so we can + * deal better with forward references. + * + * Params: + * d = dsymbol for which the scope is set + * sc = scope that is used to set the value + */ +extern(C++) void setScope(Dsymbol d, Scope* sc) +{ + scope setScopeVisitor = new SetScopeVisitor(sc); + d.accept(setScopeVisitor); +} + +private extern(C++) class SetScopeVisitor : Visitor +{ + alias visit = typeof(super).visit; + Scope* sc; + + this(Scope* sc) + { + this.sc = sc; + } + + override void visit(Dsymbol d) + { + //printf("Dsymbol::setScope() %p %s, %p stc = %llx\n", d, d.toChars(), sc, sc.stc); + if (!sc.nofree) + sc.setNoFree(); // may need it even after semantic() finishes + d._scope = sc; + if (sc.depdecl) + d.depdecl = sc.depdecl; + if (!d.userAttribDecl) + d.userAttribDecl = sc.userAttribDecl; + } + + override void visit(Import i) + { + visit(cast(Dsymbol)i); + if (i.aliasdecls.length) + { + if (!i.mod) + i.importAll(sc); + + sc = sc.push(i.mod); + sc.visibility = i.visibility; + foreach (ad; i.aliasdecls) + ad.setScope(sc); + sc = sc.pop(); + } + } + + override void visit(Nspace ns) + { + visit(cast(Dsymbol)ns); + if (ns.members) + { + assert(sc); + sc = sc.push(ns); + sc.linkage = LINK.cpp; // namespaces default to C++ linkage + sc.parent = ns; + ns.members.foreachDsymbol(s => s.setScope(sc)); + sc.pop(); + } + } + + override void visit(EnumDeclaration ed) + { + if (ed.semanticRun > PASS.initial) + return; + visit(cast(Dsymbol)ed); + } + + override void visit(AggregateDeclaration ad) + { + // Might need a scope to resolve forward references. The check for + // semanticRun prevents unnecessary setting of _scope during deferred + // setScope phases for aggregates which already finished semantic(). + // See https://issues.dlang.org/show_bug.cgi?id=16607 + if (ad.semanticRun < PASS.semanticdone) + visit(cast(Dsymbol)ad); + } + + override void visit(AttribDeclaration atr) + { + Dsymbols* d = atr.include(sc); + //printf("\tAttribDeclaration::setScope '%s', d = %p\n",toChars(), d); + if (d) + { + Scope* sc2 = atr.newScope(sc); + d.foreachDsymbol( s => s.setScope(sc2) ); + if (sc2 != sc) + sc2.pop(); + } + } + + override void visit(DeprecatedDeclaration dd) + { + //printf("DeprecatedDeclaration::setScope() %p\n", this); + if (dd.decl) + visit(cast(Dsymbol)dd); // for forward reference + visit(cast(AttribDeclaration)dd); + } + + override void visit(CPPMangleDeclaration cppmd) + { + if (cppmd.decl) + visit(cast(Dsymbol)cppmd); // for forward reference + visit(cast(AttribDeclaration)cppmd); + } + + override void visit(AnonDeclaration anond) + { + if (anond.decl) + visit(cast(Dsymbol)anond); // for forward reference + visit(cast(AttribDeclaration)anond); + } + + override void visit(ConditionalDeclaration condd) + { + condd.include(sc).foreachDsymbol( s => s.setScope(sc) ); + } + + override void visit(StaticIfDeclaration sid) + { + // do not evaluate condition before semantic pass + // But do set the scope, in case we need it for forward referencing + visit(cast(Dsymbol)sid); // for forward reference + } + + override void visit(StaticForeachDeclaration sfd) + { + // do not evaluate condition before semantic pass + // But do set the scope, in case we need it for forward referencing + visit(cast(Dsymbol)sfd); // for forward reference + } + + override void visit(MixinDeclaration md) + { + visit(cast(Dsymbol)md); + } + + override void visit(UserAttributeDeclaration uad) + { + //printf("UserAttributeDeclaration::setScope() %p\n", this); + if (uad.decl) + visit(cast(Dsymbol)uad); + visit(cast(AttribDeclaration)uad); + } +} diff --git a/gcc/d/dmd/dtemplate.d b/gcc/d/dmd/dtemplate.d index 037e0d01196c..326d66364b8f 100644 --- a/gcc/d/dmd/dtemplate.d +++ b/gcc/d/dmd/dtemplate.d @@ -7515,7 +7515,12 @@ extern (C++) class TemplateInstance : ScopeDsymbol } //printf("\t-. mi = %s\n", mi.toPrettyChars()); - assert(!memberOf || (!memberOf.isRoot() && mi.isRoot()), "can only re-append from non-root to root module"); + if (memberOf) // already appended to some module + { + assert(mi.isRoot(), "can only re-append to a root module"); + if (memberOf.isRoot()) + return null; // no need to move to another root module + } Dsymbols* a = mi.members; a.push(this); diff --git a/gcc/d/dmd/enum.h b/gcc/d/dmd/enum.h index e17e8cf5b0af..5f91ead9e174 100644 --- a/gcc/d/dmd/enum.h +++ b/gcc/d/dmd/enum.h @@ -46,7 +46,6 @@ public: bool inuse(bool v); EnumDeclaration *syntaxCopy(Dsymbol *s) override; - void setScope(Scope *sc) override; bool oneMember(Dsymbol **ps, Identifier *ident) override; Type *getType() override; const char *kind() const override; diff --git a/gcc/d/dmd/escape.d b/gcc/d/dmd/escape.d index e25fc84234e4..f928b08503a0 100644 --- a/gcc/d/dmd/escape.d +++ b/gcc/d/dmd/escape.d @@ -2343,7 +2343,7 @@ void finishScopeParamInference(FuncDeclaration funcdecl, ref TypeFunction f) VarDeclaration[10] tmp = void; size_t dim = (funcdecl.vthis !is null) + (funcdecl.parameters ? funcdecl.parameters.length : 0); - import dmd.common.string : SmallBuffer; + import dmd.common.smallbuffer : SmallBuffer; auto sb = SmallBuffer!VarDeclaration(dim, tmp[]); VarDeclaration[] array = sb[]; diff --git a/gcc/d/dmd/expressionsem.d b/gcc/d/dmd/expressionsem.d index e6b90183b511..1664bf22dca0 100644 --- a/gcc/d/dmd/expressionsem.d +++ b/gcc/d/dmd/expressionsem.d @@ -4387,7 +4387,7 @@ private extern (C++) final class ExpressionSemanticVisitor : Visitor auto e = initializerToExpression(init, t, (sc.flags & SCOPE.Cfile) != 0); if (!e) { - error(cle.loc, "cannot convert initializer `%s` to expression", init.toChars()); + error(cle.loc, "cannot convert initializer `%s` to expression", toChars(init)); return setError(); } result = e; diff --git a/gcc/d/dmd/hdrgen.d b/gcc/d/dmd/hdrgen.d index ac2dda3e89f8..0944ade4c287 100644 --- a/gcc/d/dmd/hdrgen.d +++ b/gcc/d/dmd/hdrgen.d @@ -1968,6 +1968,10 @@ private void visitVarDecl(VarDeclaration v, bool anywritten, ref OutBuffer buf, v._init.initializerToBuffer(buf, &hgs); } + const commentIt = hgs.importcHdr && isSpecialCName(v.ident); + if (commentIt) + buf.writestring("/+"); + if (anywritten) { buf.writestring(", "); @@ -2000,8 +2004,31 @@ private void visitVarDecl(VarDeclaration v, bool anywritten, ref OutBuffer buf, buf.writestring(" = "); vinit(v); } + if (commentIt) + buf.writestring("+/"); } +/************************************* + * The names __DATE__, __TIME__,__EOF__, __VENDOR__, __TIMESTAMP__, __VERSION__ + * are special to the D lexer and cannot be used as D source variable names. + * Params: + * id = name to check + * Returns: + * true if special C name + */ +private bool isSpecialCName(Identifier id) +{ + auto s = id.toString(); + if (s.length >= 7 && s[0] == '_' && s[1] == '_' && + (id == Id.DATE || + id == Id.TIME || + id == Id.EOFX || + id == Id.VENDOR || + id == Id.TIMESTAMP || + id == Id.VERSIONX)) + return true; + return false; +} /********************************************* * Print expression to buffer. diff --git a/gcc/d/dmd/import.h b/gcc/d/dmd/import.h index aeb3621f1ebc..624cd7406a3f 100644 --- a/gcc/d/dmd/import.h +++ b/gcc/d/dmd/import.h @@ -43,7 +43,6 @@ public: Import *syntaxCopy(Dsymbol *s) override; // copy only syntax trees void importAll(Scope *sc) override; Dsymbol *toAlias() override; - void setScope(Scope* sc) override; bool overloadInsert(Dsymbol *s) override; Import *isImport() override { return this; } diff --git a/gcc/d/dmd/initsem.d b/gcc/d/dmd/initsem.d index 76c2d8916b0b..6d31f956c8e5 100644 --- a/gcc/d/dmd/initsem.d +++ b/gcc/d/dmd/initsem.d @@ -199,7 +199,7 @@ extern(C++) Initializer initializerSemantic(Initializer init, Scope* sc, ref Typ uint length; const(uint) amax = 0x80000000; bool errors = false; - //printf("ArrayInitializer::semantic(%s), ai: %s %p\n", t.toChars(), i.toChars(), i); + //printf("ArrayInitializer::semantic(%s), ai: %s\n", t.toChars(), toChars(i)); if (i.sem) // if semantic() already run { return i; @@ -600,7 +600,17 @@ extern(C++) Initializer initializerSemantic(Initializer init, Scope* sc, ref Typ Initializer visitC(CInitializer ci) { - //printf("CInitializer::semantic() tx: %s t: %s ci: %s\n", (tx ? tx.toChars() : "".ptr), t.toChars(), ci.toChars()); + //printf("CInitializer::semantic() tx: %s t: %s ci: %s\n", (tx ? tx.toChars() : "".ptr), t.toChars(), toChars(ci)); + static if (0) + if (auto ts = tx.isTypeStruct()) + { + import dmd.common.outbuffer; + OutBuffer buf; + HdrGenStage hgs; + toCBuffer(ts.sym, buf, hgs); + printf("%s\n", buf.peekChars()); + } + /* Rewrite CInitializer into ExpInitializer, ArrayInitializer, or StructInitializer */ t = t.toBasetype(); @@ -794,6 +804,7 @@ extern(C++) Initializer initializerSemantic(Initializer init, Scope* sc, ref Typ for (size_t index = 0; index < ci.initializerList.length; ) { CInitializer cprev; + size_t indexprev; L1: DesigInit di = ci.initializerList[index]; Designators* dlist = di.designatorList; @@ -827,6 +838,7 @@ extern(C++) Initializer initializerSemantic(Initializer init, Scope* sc, ref Typ /* The peeling didn't work, so unpeel it */ ci = cprev; + index = indexprev; di = ci.initializerList[index]; goto L2; } @@ -837,12 +849,14 @@ extern(C++) Initializer initializerSemantic(Initializer init, Scope* sc, ref Typ { if (fieldi == nfields) break; - if (index == 0 && ci.initializerList.length == 1 && di.initializer.isCInitializer()) + if (/*index == 0 && ci.initializerList.length == 1 &&*/ di.initializer.isCInitializer()) { /* Try peeling off this set of { } and see if it works */ cprev = ci; ci = di.initializer.isCInitializer(); + indexprev = index; + index = 0; goto L1; } diff --git a/gcc/d/dmd/module.h b/gcc/d/dmd/module.h index 92efc1656da7..cab0b0a4c1be 100644 --- a/gcc/d/dmd/module.h +++ b/gcc/d/dmd/module.h @@ -169,3 +169,4 @@ struct ModuleDeclaration }; extern void getLocalClasses(Module* mod, Array& aclasses); +FuncDeclaration *findGetMembers(ScopeDsymbol *dsym); diff --git a/gcc/d/dmd/nspace.d b/gcc/d/dmd/nspace.d index a49e0bf0cc2c..22c6e63a465a 100644 --- a/gcc/d/dmd/nspace.d +++ b/gcc/d/dmd/nspace.d @@ -85,20 +85,6 @@ extern (C++) final class Nspace : ScopeDsymbol return ns; } - override void setScope(Scope* sc) - { - ScopeDsymbol.setScope(sc); - if (members) - { - assert(sc); - sc = sc.push(this); - sc.linkage = LINK.cpp; // namespaces default to C++ linkage - sc.parent = this; - members.foreachDsymbol(s => s.setScope(sc)); - sc.pop(); - } - } - override bool hasPointers() { //printf("Nspace::hasPointers() %s\n", toChars()); diff --git a/gcc/d/dmd/nspace.h b/gcc/d/dmd/nspace.h index 7d30402c5953..701cc935eb57 100644 --- a/gcc/d/dmd/nspace.h +++ b/gcc/d/dmd/nspace.h @@ -21,7 +21,6 @@ class Nspace final : public ScopeDsymbol public: Expression *identExp; Nspace *syntaxCopy(Dsymbol *s) override; - void setScope(Scope *sc) override; bool hasPointers() override; void setFieldOffset(AggregateDeclaration *ad, FieldState& fieldState, bool isunion) override; const char *kind() const override; diff --git a/gcc/d/dmd/parse.d b/gcc/d/dmd/parse.d index f9d174ab14f2..b6f30b93f47d 100644 --- a/gcc/d/dmd/parse.d +++ b/gcc/d/dmd/parse.d @@ -8428,7 +8428,12 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer AST.TemplateParameters* tpl = null; nextToken(); - if (token.value == TOK.leftParenthesis) + if (token.value != TOK.leftParenthesis) + { + error("expected `(` following `is`, not `%s`", token.toChars()); + goto Lerr; + } + else { nextToken(); if (token.value == TOK.identifier && peekNext() == TOK.leftParenthesis) @@ -8476,11 +8481,6 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer else check(TOK.rightParenthesis); } - else - { - error("`type identifier : specialization` expected following `is`"); - goto Lerr; - } e = new AST.IsExp(loc, targ, ident, tok, tspec, tok2, tpl); break; } diff --git a/gcc/d/dmd/root/file.d b/gcc/d/dmd/root/file.d index 1fb105682ea3..fdf13d4e8b02 100644 --- a/gcc/d/dmd/root/file.d +++ b/gcc/d/dmd/root/file.d @@ -24,7 +24,7 @@ import dmd.root.rmem; import dmd.root.string; import dmd.common.file; -import dmd.common.string; +import dmd.common.smallbuffer; nothrow: diff --git a/gcc/d/dmd/root/filename.d b/gcc/d/dmd/root/filename.d index 631c08c44d27..8f31f2120488 100644 --- a/gcc/d/dmd/root/filename.d +++ b/gcc/d/dmd/root/filename.d @@ -37,7 +37,7 @@ version (Windows) import core.sys.windows.windef; import core.sys.windows.winnls; - import dmd.common.string : extendedPathThen; + import dmd.common.smallbuffer : extendedPathThen; extern (Windows) DWORD GetFullPathNameW(LPCWSTR, DWORD, LPWSTR, LPWSTR*) nothrow @nogc; extern (Windows) void SetLastError(DWORD) nothrow @nogc; @@ -1177,7 +1177,7 @@ version(Windows) */ private auto toWStringzThen(alias F)(const(char)[] str) nothrow { - import dmd.common.string : SmallBuffer, toWStringz; + import dmd.common.smallbuffer : SmallBuffer, toWStringz; if (!str.length) return F(""w.ptr); diff --git a/gcc/d/dmd/root/speller.d b/gcc/d/dmd/root/speller.d index b646bdda0ccf..7ad08b7216e9 100644 --- a/gcc/d/dmd/root/speller.d +++ b/gcc/d/dmd/root/speller.d @@ -42,7 +42,7 @@ private: import core.stdc.stdlib; import core.stdc.string; -import dmd.common.string : SmallBuffer; +import dmd.common.smallbuffer : SmallBuffer; enum isSearchFunction(alias fun) = is(searchFunctionType!fun); alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}()); diff --git a/gcc/d/dmd/root/string.d b/gcc/d/dmd/root/string.d index 8b204ab4cad5..5ee81a9b63dd 100644 --- a/gcc/d/dmd/root/string.d +++ b/gcc/d/dmd/root/string.d @@ -69,7 +69,7 @@ The return value of `T` auto toCStringThen(alias dg)(const(char)[] src) nothrow { import dmd.root.rmem : mem; - import dmd.common.string : SmallBuffer; + import dmd.common.smallbuffer : SmallBuffer; const len = src.length + 1; char[512] small = void; diff --git a/gcc/d/dmd/typesem.d b/gcc/d/dmd/typesem.d index 8795002cd154..2063a954b996 100644 --- a/gcc/d/dmd/typesem.d +++ b/gcc/d/dmd/typesem.d @@ -372,6 +372,64 @@ private void resolveHelper(TypeQualified mt, const ref Loc loc, Scope* sc, Dsymb pt = t.merge(); } +/*************************************** + * Search for identifier id as a member of `this`. + * `id` may be a template instance. + * + * Params: + * loc = location to print the error messages + * sc = the scope where the symbol is located + * id = the id of the symbol + * flags = the search flags which can be `SearchLocalsOnly` or `IgnorePrivateImports` + * + * Returns: + * symbol found, NULL if not + */ +private Dsymbol searchX(Dsymbol dsym, const ref Loc loc, Scope* sc, RootObject id, int flags) +{ + //printf("Dsymbol::searchX(this=%p,%s, ident='%s')\n", this, toChars(), ident.toChars()); + Dsymbol s = dsym.toAlias(); + Dsymbol sm; + if (Declaration d = s.isDeclaration()) + { + if (d.inuse) + { + .error(loc, "circular reference to `%s`", d.toPrettyChars()); + return null; + } + } + switch (id.dyncast()) + { + case DYNCAST.identifier: + sm = s.search(loc, cast(Identifier)id, flags); + break; + case DYNCAST.dsymbol: + { + // It's a template instance + //printf("\ttemplate instance id\n"); + Dsymbol st = cast(Dsymbol)id; + TemplateInstance ti = st.isTemplateInstance(); + sm = s.search(loc, ti.name); + if (!sm) + return null; + sm = sm.toAlias(); + TemplateDeclaration td = sm.isTemplateDeclaration(); + if (!td) + return null; // error but handled later + ti.tempdecl = td; + if (!ti.semanticRun) + ti.dsymbolSemantic(sc); + sm = ti.toAlias(); + break; + } + case DYNCAST.type: + case DYNCAST.expression: + default: + assert(0); + } + return sm; +} + /****************************************** * We've mistakenly parsed `t` as a type. * Redo `t` as an Expression only if there are no type modifiers. diff --git a/gcc/d/modules.cc b/gcc/d/modules.cc index e3c1ef9f82ec..250743e8dbee 100644 --- a/gcc/d/modules.cc +++ b/gcc/d/modules.cc @@ -503,7 +503,7 @@ layout_moduleinfo_fields (Module *decl, tree type) if (decl->sshareddtor) layout_moduleinfo_field (ptr_type_node, type, offset); - if (decl->findGetMembers ()) + if (findGetMembers (decl)) layout_moduleinfo_field (ptr_type_node, type, offset); if (decl->sictor) @@ -571,7 +571,7 @@ layout_moduleinfo (Module *decl) aimports_dim--; } - sgetmembers = decl->findGetMembers (); + sgetmembers = findGetMembers (decl); size_t flags = 0; if (decl->sctor) diff --git a/gcc/testsuite/gdc.test/fail_compilation/misc_parser_err_cov1.d b/gcc/testsuite/gdc.test/fail_compilation/misc_parser_err_cov1.d index 9de436b01198..a170b77b88c2 100644 --- a/gcc/testsuite/gdc.test/fail_compilation/misc_parser_err_cov1.d +++ b/gcc/testsuite/gdc.test/fail_compilation/misc_parser_err_cov1.d @@ -7,7 +7,7 @@ fail_compilation/misc_parser_err_cov1.d(30): Error: basic type expected, not `)` fail_compilation/misc_parser_err_cov1.d(31): Error: `__traits(identifier, args...)` expected fail_compilation/misc_parser_err_cov1.d(31): Error: semicolon expected following auto declaration, not `o` fail_compilation/misc_parser_err_cov1.d(31): Error: expression expected, not `)` -fail_compilation/misc_parser_err_cov1.d(32): Error: `type identifier : specialization` expected following `is` +fail_compilation/misc_parser_err_cov1.d(32): Error: expected `(` following `is`, not `;` fail_compilation/misc_parser_err_cov1.d(33): Error: semicolon expected following auto declaration, not `auto` fail_compilation/misc_parser_err_cov1.d(33): Error: found `+` when expecting `(` following `mixin` fail_compilation/misc_parser_err_cov1.d(35): Error: `key:value` expected for associative array literal diff --git a/gcc/testsuite/gdc.test/runnable/dbitfields.d b/gcc/testsuite/gdc.test/runnable/dbitfields.d index 0d1877a7bfdf..aa154c7bbb21 100644 --- a/gcc/testsuite/gdc.test/runnable/dbitfields.d +++ b/gcc/testsuite/gdc.test/runnable/dbitfields.d @@ -173,6 +173,39 @@ static assert(test7u() == 1); static assert(test7s() == -1); static assert(test7s2() == -2); +/******************************************/ +// https://issues.dlang.org/show_bug.cgi?id=24257 + +struct S24257 +{ + uint : 15; + bool done : 1; +} + +bool advance() +{ + S24257 n; + n.done = false; + n.done = true; + return n.done; +} + +bool retard() +{ + S24257 n; + n.done = true; + n.done = false; + return n.done; +} + +static assert(advance() == true); + +void test24257() +{ + assert(advance() == true); + assert(retard() == false); +} + /******************************************/ int main() @@ -184,6 +217,7 @@ int main() test5(); test6(); test7(); + test24257(); return 0; } diff --git a/libphobos/libdruntime/MERGE b/libphobos/libdruntime/MERGE index aa0062c10eb9..5edcee1c84df 100644 --- a/libphobos/libdruntime/MERGE +++ b/libphobos/libdruntime/MERGE @@ -1,4 +1,4 @@ -ff57fec51558013b25cadb7e83da9f4675915d56 +2bbf64907cbbb483d003e0a8fcf8b502e4883799 The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/libphobos/libdruntime/core/cpuid.d b/libphobos/libdruntime/core/cpuid.d index 9c5735728b52..62edbac34f33 100644 --- a/libphobos/libdruntime/core/cpuid.d +++ b/libphobos/libdruntime/core/cpuid.d @@ -628,16 +628,17 @@ void getAMDcacheinfo() if (max_extended_cpuid >= 0x8000_0006) { // AMD K6-III or K6-2+ or later. - ubyte numcores = 1; + uint numcores = 1; if (max_extended_cpuid >= 0x8000_0008) { + // read the number of physical cores (minus 1) from the 8 lowest ECX bits version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx"; } else asm pure nothrow @nogc { mov EAX, 0x8000_0008; cpuid; - mov numcores, CL; + mov numcores, ECX; } - ++numcores; + numcores = (numcores & 0xFF) + 1; if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; } diff --git a/libphobos/src/MERGE b/libphobos/src/MERGE index 1b20d58c1e22..3c0e1b28d316 100644 --- a/libphobos/src/MERGE +++ b/libphobos/src/MERGE @@ -1,4 +1,4 @@ -17bafda797296e04f40f16a9660e5a9685392db4 +b64bfbf911fcd1675ae9792545649c9d45bb907e The first line of this file holds the git revision number of the last merge done from the dlang/phobos repository. diff --git a/libphobos/src/std/algorithm/searching.d b/libphobos/src/std/algorithm/searching.d index 68979057f545..4526aa22bc02 100644 --- a/libphobos/src/std/algorithm/searching.d +++ b/libphobos/src/std/algorithm/searching.d @@ -2895,94 +2895,100 @@ if (isForwardRange!R1 && ifTestable!(typeof(haystack.front), unaryFun!pred)) assert(findSkip!isWhite(s) == 2); } +private struct FindSplitResult(ubyte emptyRangeIndex, Types...) +{ + this(Types vals) + { + asTuple = typeof(asTuple)(vals); + } + void opAssign(typeof(asTuple) rhs) + { + asTuple = rhs; + } + Tuple!Types asTuple; + alias asTuple this; + + static if (hasConstEmptyMember!(typeof(asTuple[emptyRangeIndex]))) + { + bool opCast(T : bool)() const => !asTuple[emptyRangeIndex].empty; + } + else + { + bool opCast(T : bool)() => !asTuple[emptyRangeIndex].empty; + } +} + /** These functions find the first occurrence of `needle` in `haystack` and then split `haystack` as follows. -`findSplit` returns a tuple `result` containing $(I three) ranges. `result[0]` -is the portion of `haystack` before `needle`, `result[1]` is the portion of -`haystack` that matches `needle`, and `result[2]` is the portion of `haystack` -after the match. If `needle` was not found, `result[0]` comprehends `haystack` +$(PANEL +`findSplit` returns a tuple `result` containing $(I three) ranges. +$(UL +$(LI `result[0]` is the portion of `haystack` before `needle`) +$(LI `result[1]` is the portion of +`haystack` that matches `needle`) +$(LI `result[2]` is the portion of `haystack` +after the match.) +) +If `needle` was not found, `result[0]` comprehends `haystack` entirely and `result[1]` and `result[2]` are empty. -`findSplitBefore` returns a tuple `result` containing two ranges. `result[0]` is -the portion of `haystack` before `needle`, and `result[1]` is the balance of -`haystack` starting with the match. If `needle` was not found, `result[0]` +`findSplitBefore` returns a tuple `result` containing two ranges. +$(UL +$(LI `result[0]` is the portion of `haystack` before `needle`) +$(LI `result[1]` is the balance of `haystack` starting with the match.) +) +If `needle` was not found, `result[0]` comprehends `haystack` entirely and `result[1]` is empty. `findSplitAfter` returns a tuple `result` containing two ranges. -`result[0]` is the portion of `haystack` up to and including the -match, and `result[1]` is the balance of `haystack` starting -after the match. If `needle` was not found, `result[0]` is empty +$(UL +$(LI `result[0]` is the portion of `haystack` up to and including the +match) +$(LI `result[1]` is the balance of `haystack` starting +after the match.) +) +If `needle` was not found, `result[0]` is empty and `result[1]` is `haystack`. - +) +$(P In all cases, the concatenation of the returned ranges spans the entire `haystack`. If `haystack` is a random-access range, all three components of the tuple have the same type as `haystack`. Otherwise, `haystack` must be a $(REF_ALTTEXT forward range, isForwardRange, std,range,primitives) and -the type of `result[0]` and `result[1]` is the same as $(REF takeExactly, -std,range). +the type of `result[0]` (and `result[1]` for `findSplit`) is the same as +the result of $(REF takeExactly, std,range). For more information about `pred` see $(LREF find). - +) Params: - pred = Predicate to use for comparing needle against haystack. - haystack = The range to search. - needle = What to look for. + pred = Predicate to compare 2 elements. + haystack = The forward range to search. + needle = The forward range to look for. Returns: -A sub-type of `Tuple!()` of the split portions of `haystack` (see above for -details). This sub-type of `Tuple!()` has `opCast` defined for `bool`. This -`opCast` returns `true` when the separating `needle` was found -and `false` otherwise. +A sub-type of $(REF Tuple, std, typecons) of the split portions of `haystack` (see above for +details). This sub-type of `Tuple` defines `opCast!bool`, which +returns `true` when the separating `needle` was found and `false` otherwise. See_Also: $(LREF find) */ auto findSplit(alias pred = "a == b", R1, R2)(R1 haystack, R2 needle) if (isForwardRange!R1 && isForwardRange!R2) { - static struct Result(S1, S2) if (isForwardRange!S1 && - isForwardRange!S2) - { - this(S1 pre, S1 separator, S2 post) - { - asTuple = typeof(asTuple)(pre, separator, post); - } - void opAssign(typeof(asTuple) rhs) - { - asTuple = rhs; - } - Tuple!(S1, S1, S2) asTuple; - static if (hasConstEmptyMember!(typeof(asTuple[1]))) - { - bool opCast(T : bool)() const - { - return !asTuple[1].empty; - } - } - else - { - bool opCast(T : bool)() - { - return !asTuple[1].empty; - } - } - alias asTuple this; - } - static if (isSomeString!R1 && isSomeString!R2 || (isRandomAccessRange!R1 && hasSlicing!R1 && hasLength!R1 && hasLength!R2)) { auto balance = find!pred(haystack, needle); immutable pos1 = haystack.length - balance.length; immutable pos2 = balance.empty ? pos1 : pos1 + needle.length; - return Result!(typeof(haystack[0 .. pos1]), - typeof(haystack[pos2 .. haystack.length]))(haystack[0 .. pos1], - haystack[pos1 .. pos2], - haystack[pos2 .. haystack.length]); + alias Slice = typeof(haystack[0 .. pos1]); + return FindSplitResult!(1, Slice, Slice, Slice)( + haystack[0 .. pos1], haystack[pos1 .. pos2], haystack[pos2 .. haystack.length]); } else { @@ -3011,10 +3017,11 @@ if (isForwardRange!R1 && isForwardRange!R2) { pos1 = pos2; } - return Result!(typeof(takeExactly(original, pos1)), - typeof(h))(takeExactly(original, pos1), - takeExactly(haystack, pos2 - pos1), - h); + return FindSplitResult!(1, + typeof(takeExactly(original, pos1)), + typeof(takeExactly(original, pos1)), typeof(h))( + takeExactly(original, pos1), + takeExactly(haystack, pos2 - pos1), h); } } @@ -3022,43 +3029,14 @@ if (isForwardRange!R1 && isForwardRange!R2) auto findSplitBefore(alias pred = "a == b", R1, R2)(R1 haystack, R2 needle) if (isForwardRange!R1 && isForwardRange!R2) { - static struct Result(S1, S2) if (isForwardRange!S1 && - isForwardRange!S2) - { - this(S1 pre, S2 post) - { - asTuple = typeof(asTuple)(pre, post); - } - void opAssign(typeof(asTuple) rhs) - { - asTuple = rhs; - } - Tuple!(S1, S2) asTuple; - static if (hasConstEmptyMember!(typeof(asTuple[1]))) - { - bool opCast(T : bool)() const - { - return !asTuple[1].empty; - } - } - else - { - bool opCast(T : bool)() - { - return !asTuple[1].empty; - } - } - alias asTuple this; - } - static if (isSomeString!R1 && isSomeString!R2 || (isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && hasLength!R2)) { auto balance = find!pred(haystack, needle); immutable pos = haystack.length - balance.length; - return Result!(typeof(haystack[0 .. pos]), - typeof(haystack[pos .. haystack.length]))(haystack[0 .. pos], - haystack[pos .. haystack.length]); + return FindSplitResult!(1, + typeof(haystack[0 .. pos]), typeof(haystack[0 .. pos]))( + haystack[0 .. pos], haystack[pos .. haystack.length]); } else { @@ -3088,9 +3066,9 @@ if (isForwardRange!R1 && isForwardRange!R2) pos1 = pos2; haystack = h; } - return Result!(typeof(takeExactly(original, pos1)), - typeof(haystack))(takeExactly(original, pos1), - haystack); + return FindSplitResult!(1, + typeof(takeExactly(original, pos1)), typeof(haystack))( + takeExactly(original, pos1), haystack); } } @@ -3098,47 +3076,19 @@ if (isForwardRange!R1 && isForwardRange!R2) auto findSplitAfter(alias pred = "a == b", R1, R2)(R1 haystack, R2 needle) if (isForwardRange!R1 && isForwardRange!R2) { - static struct Result(S1, S2) if (isForwardRange!S1 && - isForwardRange!S2) - { - this(S1 pre, S2 post) - { - asTuple = typeof(asTuple)(pre, post); - } - void opAssign(typeof(asTuple) rhs) - { - asTuple = rhs; - } - Tuple!(S1, S2) asTuple; - static if (hasConstEmptyMember!(typeof(asTuple[1]))) - { - bool opCast(T : bool)() const - { - return !asTuple[0].empty; - } - } - else - { - bool opCast(T : bool)() - { - return !asTuple[0].empty; - } - } - alias asTuple this; - } - static if (isSomeString!R1 && isSomeString!R2 || isRandomAccessRange!R1 && hasLength!R1 && hasSlicing!R1 && hasLength!R2) { auto balance = find!pred(haystack, needle); immutable pos = balance.empty ? 0 : haystack.length - balance.length + needle.length; - return Result!(typeof(haystack[0 .. pos]), - typeof(haystack[pos .. haystack.length]))(haystack[0 .. pos], - haystack[pos .. haystack.length]); + return FindSplitResult!(0, + typeof(haystack[0 .. pos]), typeof(haystack[0 .. pos]))( + haystack[0 .. pos], haystack[pos .. haystack.length]); } else { import std.range : takeExactly; + alias Res = FindSplitResult!(0, typeof(takeExactly(haystack, 0)), typeof(haystack)); auto original = haystack.save; auto h = haystack.save; auto n = needle.save; @@ -3148,9 +3098,7 @@ if (isForwardRange!R1 && isForwardRange!R2) if (h.empty) { // Failed search - return Result!(typeof(takeExactly(original, 0)), - typeof(original))(takeExactly(original, 0), - original); + return Res(takeExactly(original, 0), original); } if (binaryFun!pred(h.front, n.front)) { @@ -3166,9 +3114,7 @@ if (isForwardRange!R1 && isForwardRange!R2) pos2 = ++pos1; } } - return Result!(typeof(takeExactly(original, pos2)), - typeof(h))(takeExactly(original, pos2), - h); + return Res(takeExactly(original, pos2), h); } } @@ -3185,12 +3131,12 @@ if (isForwardRange!R1 && isForwardRange!R2) } else assert(0); - // works with const aswell - if (const split = "dlang-rocks".findSplit("-")) + // findSplitBefore returns 2 ranges + if (const split = [2, 3, 2, 3, 4, 1].findSplitBefore!"a > b"([2, 2])) { - assert(split[0] == "dlang"); - assert(split[1] == "-"); - assert(split[2] == "rocks"); + assert(split[0] == [2, 3, 2]); + // [3, 4] each greater than [2, 2] + assert(split[1] == [3, 4, 1]); } else assert(0); } diff --git a/libphobos/src/std/conv.d b/libphobos/src/std/conv.d index 4248e4b9d44b..3a5338148944 100644 --- a/libphobos/src/std/conv.d +++ b/libphobos/src/std/conv.d @@ -4848,8 +4848,9 @@ private S textImpl(S, U...)(U args) static foreach (arg; args) { static if ( - isSomeChar!(typeof(arg)) || isSomeString!(typeof(arg)) || - ( isInputRange!(typeof(arg)) && isSomeChar!(ElementType!(typeof(arg))) ) + isSomeChar!(typeof(arg)) + || isSomeString!(typeof(arg)) + || ( isInputRange!(typeof(arg)) && isSomeChar!(ElementType!(typeof(arg))) ) ) app.put(arg); else static if ( diff --git a/libphobos/src/std/range/package.d b/libphobos/src/std/range/package.d index 1b4f233d94a3..c985015a7af4 100644 --- a/libphobos/src/std/range/package.d +++ b/libphobos/src/std/range/package.d @@ -1025,7 +1025,18 @@ if (Ranges.length > 0 && } else { - @property bool empty() => frontIndex >= backIndex; + @property bool empty() + { + if (frontIndex == 0) + { + // special handling: we might be in Range.init state! + // For instance, `format!"%s"` uses Range.init to ensure + // that formatting is possible. + // In that case, we must still behave in an internally consistent way. + return source[0].empty; + } + return frontIndex >= backIndex; + } } static if (allSatisfy!(isForwardRange, R)) @@ -1705,6 +1716,17 @@ pure @safe nothrow @nogc unittest } } +/// https://issues.dlang.org/show_bug.cgi?id=24243 +pure @safe nothrow unittest +{ + import std.algorithm.iteration : filter; + + auto range = chain([2], [3].filter!"a"); + + // This might happen in format!"%s"(range), for instance. + assert(typeof(range).init.empty); +} + /** Choose one of two ranges at runtime depending on a Boolean condition. diff --git a/libphobos/src/std/uni/package.d b/libphobos/src/std/uni/package.d index 9903d6c28c35..fec7e5f78ac7 100644 --- a/libphobos/src/std/uni/package.d +++ b/libphobos/src/std/uni/package.d @@ -7706,6 +7706,12 @@ public: return this.tupleof == other.tupleof; } + // Define a default toHash to allow AA usage + size_t toHash() const @trusted + { + return hashOf(slen_, hashOf(small_)); + } + /++ True if this object contains valid extended grapheme cluster. Decoding primitives of this module always return a valid `Grapheme`. @@ -7907,6 +7913,12 @@ static assert(Grapheme.sizeof == size_t.sizeof*4); assert(equal(h[], iota(cast(int)'A', cast(int)'Z'+1))); } +// ensure Grapheme can be used as an AA key. +@safe unittest +{ + int[Grapheme] aa; +} + /++ $(P Does basic case-insensitive comparison of `r1` and `r2`. This function uses simpler comparison rule thus achieving better performance From 889341a897d3d2e9fb09de1a1c5e764a2c03424f Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Fri, 8 Dec 2023 16:37:31 +0000 Subject: [PATCH 180/311] aarch64: arm_neon.h - Fix -Wincompatible-pointer-types errors In the Linux kernel, u64/s64 are [un]signed long long, not [un]signed long. This means that when the `arm_neon.h' header is used by the kernel, any use of the `uint64_t' / `in64_t' types needs to be correctly cast to the correct `__builtin_aarch64_simd_di' / `__builtin_aarch64_simd_df' types when calling the relevant ACLE builtins. This patch adds the necessary fixes to ensure that `vstl1_*' and `vldap1_*' intrinsics are correctly defined for use by the kernel. gcc/ChangeLog: * config/aarch64/arm_neon.h (vldap1_lane_u64): Add `const' to `__builtin_aarch64_simd_di *' cast. (vldap1q_lane_u64): Likewise. (vldap1_lane_s64): Cast __src to `const __builtin_aarch64_simd_di *'. (vldap1q_lane_s64): Likewise. (vldap1_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. (vldap1q_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. (vldap1_lane_p64): Add `const' to `__builtin_aarch64_simd_di *' cast. (vldap1q_lane_p64): Add `const' to `__builtin_aarch64_simd_di *' cast. (vstl1_lane_u64): remove stray `const'. (vstl1_lane_s64): Cast __src to `__builtin_aarch64_simd_di *'. (vstl1q_lane_s64): Likewise. (vstl1_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. (vstl1q_lane_f64): Likewise. --- gcc/config/aarch64/arm_neon.h | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index ef0d75e07ce1..f394de595f74 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -13456,7 +13456,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) { return __builtin_aarch64_vec_ldap1_lanev1di_usus ( - (__builtin_aarch64_simd_di *) __src, __vec, __lane); + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } __extension__ extern __inline uint64x2_t @@ -13464,35 +13464,39 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) { return __builtin_aarch64_vec_ldap1_lanev2di_usus ( - (__builtin_aarch64_simd_di *) __src, __vec, __lane); + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } __extension__ extern __inline int64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) { - return __builtin_aarch64_vec_ldap1_lanev1di (__src, __vec, __lane); + return __builtin_aarch64_vec_ldap1_lanev1di ( + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } __extension__ extern __inline int64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) { - return __builtin_aarch64_vec_ldap1_lanev2di (__src, __vec, __lane); + return __builtin_aarch64_vec_ldap1_lanev2di ( + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } __extension__ extern __inline float64x1_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) { - return __builtin_aarch64_vec_ldap1_lanev1df (__src, __vec, __lane); + return __builtin_aarch64_vec_ldap1_lanev1df ( + (const __builtin_aarch64_simd_df *) __src, __vec, __lane); } __extension__ extern __inline float64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) { - return __builtin_aarch64_vec_ldap1_lanev2df (__src, __vec, __lane); + return __builtin_aarch64_vec_ldap1_lanev2df ( + (const __builtin_aarch64_simd_df *) __src, __vec, __lane); } __extension__ extern __inline poly64x1_t @@ -13500,7 +13504,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane) { return __builtin_aarch64_vec_ldap1_lanev1di_psps ( - (__builtin_aarch64_simd_di *) __src, __vec, __lane); + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } __extension__ extern __inline poly64x2_t @@ -13508,14 +13512,14 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vldap1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane) { return __builtin_aarch64_vec_ldap1_lanev2di_psps ( - (__builtin_aarch64_simd_di *) __src, __vec, __lane); + (const __builtin_aarch64_simd_di *) __src, __vec, __lane); } /* vstl1_lane. */ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vstl1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) +vstl1_lane_u64 (uint64_t *__src, uint64x1_t __vec, const int __lane) { __builtin_aarch64_vec_stl1_lanev1di_sus ((__builtin_aarch64_simd_di *) __src, __vec, __lane); @@ -13533,28 +13537,32 @@ __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vstl1_lane_s64 (int64_t *__src, int64x1_t __vec, const int __lane) { - __builtin_aarch64_vec_stl1_lanev1di (__src, __vec, __lane); + __builtin_aarch64_vec_stl1_lanev1di ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vstl1q_lane_s64 (int64_t *__src, int64x2_t __vec, const int __lane) { - __builtin_aarch64_vec_stl1_lanev2di (__src, __vec, __lane); + __builtin_aarch64_vec_stl1_lanev2di ((__builtin_aarch64_simd_di *) __src, + __vec, __lane); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vstl1_lane_f64 (float64_t *__src, float64x1_t __vec, const int __lane) { - __builtin_aarch64_vec_stl1_lanev1df (__src, __vec, __lane); + __builtin_aarch64_vec_stl1_lanev1df ((__builtin_aarch64_simd_df *) __src, + __vec, __lane); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vstl1q_lane_f64 (float64_t *__src, float64x2_t __vec, const int __lane) { - __builtin_aarch64_vec_stl1_lanev2df (__src, __vec, __lane); + __builtin_aarch64_vec_stl1_lanev2df ((__builtin_aarch64_simd_df *) __src, + __vec, __lane); } __extension__ extern __inline void From 61f5b64ae6481e4adb69d007bd1112987bde67ce Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Mon, 11 Dec 2023 09:40:03 +0100 Subject: [PATCH 181/311] Testsuite, asan, darwin: Adjust output pattern Since the last import from upstream libsanitizer, the output has changed and now looks more like this: READ of size 6 at 0x7ff7beb2a144 thread T0 #0 0x101cf7796 in MemcmpInterceptorCommon(void*, int (*)(void const*, void const*, unsigned long), void const*, void const*, unsigned long) sanitizer_common_interceptors.inc:813 #1 0x101cf7b99 in memcmp sanitizer_common_interceptors.inc:840 #2 0x108a0c39f in __stack_chk_guard+0xf (dyld:x86_64+0x8039f) so let's adjust the pattern accordingly. gcc/testsuite/ChangeLog: * c-c++-common/asan/memcmp-1.c: Adjust pattern on darwin. --- gcc/testsuite/c-c++-common/asan/memcmp-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/asan/memcmp-1.c b/gcc/testsuite/c-c++-common/asan/memcmp-1.c index 0d08c098e0ea..31fd6e498510 100644 --- a/gcc/testsuite/c-c++-common/asan/memcmp-1.c +++ b/gcc/testsuite/c-c++-common/asan/memcmp-1.c @@ -21,4 +21,4 @@ main () /* { dg-output " #\[2-9\] 0x\[0-9a-f\]+ +(in _*main|\[(\])\[^\n\r]*(\n|\r\n|\r)" { target { ! *-*-darwin* } } } */ /* { dg-output " #\[0-9\] 0x\[0-9a-f\]+ +(in MemcmpInterceptorCommon|\[(\])\[^\n\r]*(\n|\r\n|\r)" { target *-*-darwin* } } */ -/* { dg-output " #\[1-9\] 0x\[0-9a-f\]+ +(in wrap_(memcmp|bcmp)|\[(\])\[^\n\r]*(\n|\r\n|\r)" { target *-*-darwin* } } */ +/* { dg-output " #\[1-9\] 0x\[0-9a-f\]+ +(in (memcmp|bcmp)|\[(\])\[^\n\r]*(\n|\r\n|\r)" { target *-*-darwin* } } */ From 4db744149b6ed46314107187bb25e142c729f442 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 19:58:43 +0800 Subject: [PATCH 182/311] RISC-V: Robostify shuffle index used by vrgather and fix regression Notice there are some regression FAILs: FAIL: gcc.target/riscv/rvv/autovec/pr110950.c -O3 -ftree-vectorize scan-assembler-times vslide1up\\.vx 1 FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c -std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax scan-assembler-times vrgather\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 19 FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/perm-4.c -std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax scan-assembler-times vrgatherei16\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 12 FAIL: gcc.target/riscv/rvv/autovec/vls/perm-4.c -O3 -ftree-vectorize --param riscv-autovec-preference=scalable scan-assembler-times vrgather\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 19 FAIL: gcc.target/riscv/rvv/autovec/vls/perm-4.c -O3 -ftree-vectorize --param riscv-autovec-preference=scalable scan-assembler-times vrgatherei16\\.vv\\tv[0-9]+,\\s*v[0-9]+,\\s*v[0-9]+ 12 pr110950 is not a regression, adapt testcase is enough. The rest FAILs which is caused by this patch: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d9dd06ad51b7479f09acb88adf404664a1e18b2a need to be recovered back. Robostify the gather index to fixe those FAILs. gcc/ChangeLog: * config/riscv/riscv-v.cc (get_gather_index_mode): New function. (shuffle_series_patterns): Robostify shuffle index. (shuffle_generic_patterns): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr110950.c: Adapt test. --- gcc/config/riscv/riscv-v.cc | 80 +++++++++++-------- .../gcc.target/riscv/rvv/autovec/pr110950.c | 2 +- 2 files changed, 49 insertions(+), 33 deletions(-) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 484c690c3db5..944b37b5df7f 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2923,6 +2923,39 @@ struct expand_vec_perm_d bool testing_p; }; +/* Return the appropriate index mode for gather instructions. */ +opt_machine_mode +get_gather_index_mode (struct expand_vec_perm_d *d) +{ + machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); + poly_uint64 nunits = GET_MODE_NUNITS (d->vmode); + + if (GET_MODE_INNER (d->vmode) == QImode) + { + if (nunits.is_constant ()) + { + /* If indice is LMUL8 CONST_VECTOR and any element value + exceed the range of 0 ~ 255, Forbid such permutation + since we need vector HI mode to hold such indice and + we don't have it. */ + if (!d->perm.all_in_range_p (0, 255) + && !get_vector_mode (HImode, nunits).exists (&sel_mode)) + return opt_machine_mode (); + } + else + { + /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv. + Otherwise, it could overflow the index range. */ + if (!get_vector_mode (HImode, nunits).exists (&sel_mode)) + return opt_machine_mode (); + } + } + else if (riscv_get_v_regno_alignment (sel_mode) > 1 + && GET_MODE_INNER (sel_mode) != HImode) + sel_mode = get_vector_mode (HImode, nunits).require (); + return sel_mode; +} + /* Recognize the patterns that we can use merge operation to shuffle the vectors. The value of Each element (index i) in selector can only be either i or nunits + i. We will check the pattern is actually monotonic. @@ -3428,12 +3461,10 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) if (!have_series) return false; - /* Get a vector int-mode to be used for the permute selector. */ - machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); - insn_code icode = optab_handler (vec_shl_insert_optab, sel_mode); - - /* We need to be able to insert an element and shift the vector. */ - if (need_insert && icode == CODE_FOR_nothing) + /* Disable shuffle if we can't find an appropriate integer index mode for + gather. */ + machine_mode sel_mode; + if (!get_gather_index_mode (d).exists (&sel_mode)) return false; /* Success! */ @@ -3448,7 +3479,12 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) /* Insert the remaining element if necessary. */ if (need_insert) - emit_insn (GEN_FCN (icode) (series, series, gen_int_mode (el1, eltmode))); + { + insn_code icode = code_for_pred_slide (UNSPEC_VSLIDE1UP, sel_mode); + rtx ops[] + = {series, series, gen_int_mode (el1, GET_MODE_INNER (sel_mode))}; + emit_vlmax_insn (icode, BINARY_OP, ops); + } emit_vlmax_gather_insn (d->target, d->op0, series); @@ -3460,36 +3496,16 @@ shuffle_series_patterns (struct expand_vec_perm_d *d) static bool shuffle_generic_patterns (struct expand_vec_perm_d *d) { - machine_mode sel_mode = related_int_vector_mode (d->vmode).require (); - poly_uint64 nunits = GET_MODE_NUNITS (d->vmode); + machine_mode sel_mode; /* We don't enable SLP for non-power of 2 NPATTERNS. */ if (!pow2p_hwi (d->perm.encoding().npatterns ())) return false; - if (GET_MODE_INNER (d->vmode) == QImode) - { - if (nunits.is_constant ()) - { - /* If indice is LMUL8 CONST_VECTOR and any element value - exceed the range of 0 ~ 255, Forbid such permutation - since we need vector HI mode to hold such indice and - we don't have it. */ - if (!d->perm.all_in_range_p (0, 255) - && !get_vector_mode (HImode, nunits).exists (&sel_mode)) - return false; - } - else - { - /* Permuting two SEW8 variable-length vectors need vrgatherei16.vv. - Otherwise, it could overflow the index range. */ - if (!get_vector_mode (HImode, nunits).exists (&sel_mode)) - return false; - } - } - else if (riscv_get_v_regno_alignment (sel_mode) > 1 - && GET_MODE_INNER (sel_mode) != HImode) - sel_mode = get_vector_mode (HImode, nunits).require (); + /* Disable shuffle if we can't find an appropriate integer index mode for + gather. */ + if (!get_gather_index_mode (d).exists (&sel_mode)) + return false; /* Success! */ if (d->testing_p) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c index b927f1ea92ab..17dd43973416 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110950.c @@ -9,4 +9,4 @@ void b() { c[a] = d[-a]; } -/* { dg-final { scan-assembler-times {vslide1up\.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vrgather} 1 } } */ From 0a569f87cd8436abaa6ac8c0da6e441152904e67 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Mon, 11 Dec 2023 20:16:28 +0800 Subject: [PATCH 183/311] RTL-SSA: Fix ICE on record_use of RTL_SSA for RISC-V VSETVL PASS This patch fixes an ICE on record_use during RTL_SSA initialization RISC-V backend VSETVL PASS. This is the ICE: 0x11a8603 partial_subreg_p(machine_mode, machine_mode) ../../../../gcc/gcc/rtl.h:3187 0x3b695eb rtl_ssa::function_info::record_use(rtl_ssa::function_info::build_info&, rtl_ssa::insn_info*, rtx_obj_reference) ../../../../gcc/gcc/rtl-ssa/insns.cc:524 In record_use: if (HARD_REGISTER_NUM_P (regno) && partial_subreg_p (use->mode (), mode)) Assertion failed on partial_subreg_p which is: inline bool partial_subreg_p (machine_mode outermode, machine_mode innermode) { /* Modes involved in a subreg must be ordered. In particular, we must always know at compile time whether the subreg is paradoxical. */ poly_int64 outer_prec = GET_MODE_PRECISION (outermode); poly_int64 inner_prec = GET_MODE_PRECISION (innermode); gcc_checking_assert (ordered_p (outer_prec, inner_prec)); -----> cause ICE. return maybe_lt (outer_prec, inner_prec); } RISC-V VSETVL PASS is an advanced lazy vsetvl insertion PASS after RA (register allocation). The rootcause is that we have a pattern (reduction instruction) that includes both VLA (length-agnostic) and VLS (fixed-length) modes. (insn 168 173 170 31 (set (reg:RVVM1SI 101 v5 [311]) (unspec:RVVM1SI [ (unspec:V32BI [ (const_vector:V32BI [ (const_int 1 [0x1]) repeated x32 ]) (reg:DI 30 t5 [312]) (const_int 2 [0x2]) repeated x2 (reg:SI 66 vl) (reg:SI 67 vtype) ] UNSPEC_VPREDICATE) (unspec:RVVM1SI [ (reg:V32SI 96 v0 [orig:185 vect__96.40 ] [185]) -----> VLS mode NUNITS = 32 elements. (reg:RVVM1SI 113 v17 [439]) -----> VLA mode NUNITS = [8, 8] elements. ] UNSPEC_REDUC_XOR) (unspec:RVVM1SI [ (reg:SI 0 zero) ] UNSPEC_VUNDEF) ] UNSPEC_REDUC)) 15948 {pred_redxorv32si} In this case, record_use is trying to check partial_subreg_p (use->mode (), mode) for RTX = (reg:V32SI 96 v0 [orig:185 vect__96.40 ] [185]). use->mode () == V32SImode, wheras mode = RVVM1SImode. Then it ICE since they are !ordered_p. Set the use mode as the biggest mode which is natural fall back mode. gcc/ChangeLog: * rtl-ssa/insns.cc (function_info::record_use): Add !ordered_p case. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c: New test. --- gcc/rtl-ssa/insns.cc | 11 +++++++--- .../riscv/rvv/vsetvl/vsetvl_bug-2.c | 21 +++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c diff --git a/gcc/rtl-ssa/insns.cc b/gcc/rtl-ssa/insns.cc index 2fa48e0dacd2..a54168d5c5f7 100644 --- a/gcc/rtl-ssa/insns.cc +++ b/gcc/rtl-ssa/insns.cc @@ -520,9 +520,14 @@ function_info::record_use (build_info &bi, insn_info *insn, // the instruction (unusually) references the same register in two // different but equal-sized modes. gcc_checking_assert (use->insn () == insn); - if (HARD_REGISTER_NUM_P (regno) - && partial_subreg_p (use->mode (), mode)) - use->set_mode (mode); + if (HARD_REGISTER_NUM_P (regno)) + { + if (!ordered_p (GET_MODE_PRECISION (use->mode ()), + GET_MODE_PRECISION (mode))) + use->set_mode (reg_raw_mode[regno]); + else if (partial_subreg_p (use->mode (), mode)) + use->set_mode (mode); + } use->record_reference (ref, false); } } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c new file mode 100644 index 000000000000..bbc02eab8180 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d --param=riscv-autovec-lmul=m4 -O3 -fomit-frame-pointer -funroll-loops" } */ + +int safe_lshift_func_int32_t_s_s_left, safe_lshift_func_int32_t_s_s_right, + safe_sub_func_uint64_t_u_u_ui2, safe_mul_func_uint64_t_u_u_ui2, g_79_2, + g_97_l_439; +void g_97(int * __restrict l_437) +{ + for (; g_97_l_439; g_97_l_439 += 1) + for (char l_502 = 0; l_502 < 4; l_502++) + { + int __trans_tmp_14 = ((safe_lshift_func_int32_t_s_s_right >= 2 + || safe_lshift_func_int32_t_s_s_left) + ? 1 : safe_lshift_func_int32_t_s_s_right); + long __trans_tmp_15 = __trans_tmp_14 * safe_mul_func_uint64_t_u_u_ui2; + unsigned short __trans_tmp_16 = -__trans_tmp_15; + int __trans_tmp_7 + = (__trans_tmp_16 ^ 65535UL) - safe_sub_func_uint64_t_u_u_ui2; + *l_437 ^= (short)(__trans_tmp_7 ^ g_79_2); + } +} From 8d2e5ad7f1723c9125c2d511c281b4fe62ff29cd Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Mon, 11 Dec 2023 13:38:19 +0100 Subject: [PATCH 184/311] ada: Fix Ada bootstrap on FreeBSD Ada bootstrap on FreeBSD/amd64 was also broken by the recent warning changes: terminals.c: In function 'allocate_pty_desc': terminals.c:1200:12: error: implicit declaration of function 'openpty'; did you mean 'openat'? [-Wimplicit-function-declaration] 1200 | status = openpty (&master_fd, &slave_fd, NULL, NULL, NULL); | ^~~~~~~ | openat terminals.c: At top level: terminals.c:1268:9: warning: "TABDLY" redefined 1268 | #define TABDLY 0 | ^~~~~~ In file included from /usr/include/termios.h:38, from terminals.c:1109: /usr/include/sys/_termios.h:111:9: note: this is the location of the previous definition 111 | #define TABDLY 0x00000004 /* tab delay mask */ | ^~~~~~ make[7]: *** [../gcc-interface/Makefile:302: terminals.o] Error 1 Fixed by including the necessary header and guarding the fallback definition of TABDLY. This allowed a 64-bit-only bootstrap on x86_64-unknown-freebsd14.0 to complete successfully. 2023-12-11 Rainer Orth gcc/ada: * terminals.c [__FreeBSD__]: Include . (TABDLY): Only define if missing. --- gcc/ada/terminals.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gcc/ada/terminals.c b/gcc/ada/terminals.c index 14de0feb32ac..21784b1202b8 100644 --- a/gcc/ada/terminals.c +++ b/gcc/ada/terminals.c @@ -1125,6 +1125,9 @@ __gnat_setup_winsize (void *desc ATTRIBUTE_UNUSED, #if defined (__APPLE__) # include #endif +#if defined (__FreeBSD__) +# include +#endif #define CDISABLE _POSIX_VDISABLE @@ -1265,11 +1268,13 @@ allocate_pty_desc (pty_desc **desc) { #ifndef NLDLY #define NLDLY 0 #define CRDLY 0 -#define TABDLY 0 #define BSDLY 0 #define VTDLY 0 #define FFDLY 0 #endif +#ifndef TABDLY +#define TABDLY 0 +#endif /* child_setup_tty - set terminal properties * From 2505a8b41d3b74a545755a278f3750a29c1340b6 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 11 Dec 2023 15:08:07 +0100 Subject: [PATCH 185/311] OpenMP: Minor '!$omp allocators' cleanup gcc/fortran/ChangeLog: * trans-openmp.cc (gfc_omp_call_add_alloc, gfc_omp_call_is_alloc): Set 'fn spec'. libgomp/ChangeLog: * libgomp_g.h (GOMP_add_alloc, GOMP_is_alloc): Add. --- gcc/fortran/trans-openmp.cc | 8 ++++++-- libgomp/libgomp_g.h | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 9e166c94f8e6..95184920cf70 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -8361,8 +8361,10 @@ gfc_omp_call_add_alloc (tree ptr) if (fn == NULL_TREE) { fn = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + tree att = build_tree_list (NULL_TREE, build_string (4, ". R ")); + att = tree_cons (get_identifier ("fn spec"), att, TYPE_ATTRIBUTES (fn)); + fn = build_type_attribute_variant (fn, att); fn = build_fn_decl ("GOMP_add_alloc", fn); -/* FIXME: attributes. */ } return build_call_expr_loc (input_location, fn, 1, ptr); } @@ -8380,7 +8382,9 @@ gfc_omp_call_is_alloc (tree ptr) fn = build_function_type_list (boolean_type_node, ptr_type_node, NULL_TREE); fn = build_fn_decl ("GOMP_is_alloc", fn); -/* FIXME: attributes. */ + tree att = build_tree_list (NULL_TREE, build_string (4, ". R ")); + att = tree_cons (get_identifier ("fn spec"), att, TYPE_ATTRIBUTES (fn)); + fn = build_type_attribute_variant (fn, att); } return build_call_expr_loc (input_location, fn, 1, ptr); } diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 95046312ae9b..ec619f255f2a 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -366,6 +366,9 @@ extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned, /* allocator.c */ +extern void GOMP_add_alloc (void *); +extern bool GOMP_is_alloc (void *); + extern void *GOMP_alloc (size_t, size_t, uintptr_t); extern void GOMP_free (void *, uintptr_t); From 055f08feee3424a29403047a7049af467f2b26cd Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 11 Dec 2023 15:19:02 +0100 Subject: [PATCH 186/311] OpenMP: Support acquires/release in 'omp require atomic_default_mem_order' This is an OpenMP 5.2 feature. gcc/c/ChangeLog: * c-parser.cc (c_parser_omp_requires): Handle acquires/release in atomic_default_mem_order clause. (c_parser_omp_atomic): Update. gcc/cp/ChangeLog: * parser.cc (cp_parser_omp_requires): Handle acquires/release in atomic_default_mem_order clause. (cp_parser_omp_atomic): Update. gcc/fortran/ChangeLog: * gfortran.h (enum gfc_omp_requires_kind): Add OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE and OMP_REQ_ATOMIC_MEM_ORDER_RELEASE. (gfc_namespace): Add a 7th bit to omp_requires. * module.cc (enum ab_attribute): Add AB_OMP_REQ_MEM_ORDER_ACQUIRE and AB_OMP_REQ_MEM_ORDER_RELEASE (mio_symbol_attribute): Handle it. * openmp.cc (gfc_omp_requires_add_clause): Update for acquire/release. (gfc_match_omp_requires): Likewise. (gfc_match_omp_atomic): Handle them for atomic_default_mem_order. * parse.cc: Likewise. gcc/testsuite/ChangeLog: * c-c++-common/gomp/requires-3.c: Update for now valid code. * gfortran.dg/gomp/requires-3.f90: Likewise. * gfortran.dg/gomp/requires-2.f90: Update dg-error. * gfortran.dg/gomp/requires-5.f90: Likewise. * c-c++-common/gomp/requires-5.c: New test. * c-c++-common/gomp/requires-6.c: New test. * c-c++-common/gomp/requires-7.c: New test. * c-c++-common/gomp/requires-8.c: New test. * gfortran.dg/gomp/requires-10.f90: New test. * gfortran.dg/gomp/requires-11.f90: New test. --- gcc/c/c-parser.cc | 32 ++++++++++- gcc/cp/parser.cc | 32 ++++++++++- gcc/fortran/gfortran.h | 22 ++++---- gcc/fortran/module.cc | 19 +++++++ gcc/fortran/openmp.cc | 53 +++++++++++++++---- gcc/fortran/parse.cc | 8 +++ gcc/testsuite/c-c++-common/gomp/requires-3.c | 8 +-- gcc/testsuite/c-c++-common/gomp/requires-5.c | 23 ++++++++ gcc/testsuite/c-c++-common/gomp/requires-6.c | 23 ++++++++ gcc/testsuite/c-c++-common/gomp/requires-7.c | 11 ++++ gcc/testsuite/c-c++-common/gomp/requires-8.c | 14 +++++ .../gfortran.dg/gomp/requires-10.f90 | 36 +++++++++++++ .../gfortran.dg/gomp/requires-11.f90 | 31 +++++++++++ gcc/testsuite/gfortran.dg/gomp/requires-2.f90 | 2 +- gcc/testsuite/gfortran.dg/gomp/requires-3.f90 | 7 +-- gcc/testsuite/gfortran.dg/gomp/requires-5.f90 | 2 +- 16 files changed, 291 insertions(+), 32 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/gomp/requires-5.c create mode 100644 gcc/testsuite/c-c++-common/gomp/requires-6.c create mode 100644 gcc/testsuite/c-c++-common/gomp/requires-7.c create mode 100644 gcc/testsuite/c-c++-common/gomp/requires-8.c create mode 100644 gcc/testsuite/gfortran.dg/gomp/requires-10.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/requires-11.f90 diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index df9a07928b56..5700ccccc493 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -20896,6 +20896,28 @@ c_parser_omp_atomic (location_t loc, c_parser *parser, bool openacc) case OMP_MEMORY_ORDER_SEQ_CST: memory_order = OMP_MEMORY_ORDER_SEQ_CST; break; + case OMP_MEMORY_ORDER_ACQUIRE: + if (code == NOP_EXPR) /* atomic write */ + { + error_at (loc, "%<#pragma omp atomic write%> incompatible with " + "% clause implicitly provided by a " + "% directive"); + memory_order = OMP_MEMORY_ORDER_SEQ_CST; + } + else + memory_order = OMP_MEMORY_ORDER_ACQUIRE; + break; + case OMP_MEMORY_ORDER_RELEASE: + if (code == OMP_ATOMIC_READ) + { + error_at (loc, "%<#pragma omp atomic read%> incompatible with " + "% clause implicitly provided by a " + "% directive"); + memory_order = OMP_MEMORY_ORDER_SEQ_CST; + } + else + memory_order = OMP_MEMORY_ORDER_RELEASE; + break; case OMP_MEMORY_ORDER_ACQ_REL: switch (code) { @@ -25724,15 +25746,21 @@ c_parser_omp_requires (c_parser *parser) else if (!strcmp (p, "relaxed")) this_req = (enum omp_requires) OMP_MEMORY_ORDER_RELAXED; + else if (!strcmp (p, "release")) + this_req + = (enum omp_requires) OMP_MEMORY_ORDER_RELEASE; else if (!strcmp (p, "acq_rel")) this_req = (enum omp_requires) OMP_MEMORY_ORDER_ACQ_REL; + else if (!strcmp (p, "acquire")) + this_req + = (enum omp_requires) OMP_MEMORY_ORDER_ACQUIRE; } if (this_req == 0) { error_at (c_parser_peek_token (parser)->location, - "expected %, % or " - "%"); + "expected %, %, " + "%, % or %"); switch (c_parser_peek_token (parser)->type) { case CPP_EOF: diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index b987324f6691..fe8845b0fc31 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -42504,6 +42504,28 @@ cp_parser_omp_atomic (cp_parser *parser, cp_token *pragma_tok, bool openacc) case OMP_MEMORY_ORDER_SEQ_CST: memory_order = OMP_MEMORY_ORDER_SEQ_CST; break; + case OMP_MEMORY_ORDER_ACQUIRE: + if (code == NOP_EXPR) /* atomic write */ + { + error_at (loc, "%<#pragma omp atomic write%> incompatible with " + "% clause implicitly provided by a " + "% directive"); + memory_order = OMP_MEMORY_ORDER_SEQ_CST; + } + else + memory_order = OMP_MEMORY_ORDER_ACQUIRE; + break; + case OMP_MEMORY_ORDER_RELEASE: + if (code == OMP_ATOMIC_READ) + { + error_at (loc, "%<#pragma omp atomic read%> incompatible with " + "% clause implicitly provided by a " + "% directive"); + memory_order = OMP_MEMORY_ORDER_SEQ_CST; + } + else + memory_order = OMP_MEMORY_ORDER_RELEASE; + break; case OMP_MEMORY_ORDER_ACQ_REL: switch (code) { @@ -49194,15 +49216,21 @@ cp_parser_omp_requires (cp_parser *parser, cp_token *pragma_tok) else if (!strcmp (p, "relaxed")) this_req = (enum omp_requires) OMP_MEMORY_ORDER_RELAXED; + else if (!strcmp (p, "release")) + this_req + = (enum omp_requires) OMP_MEMORY_ORDER_RELEASE; else if (!strcmp (p, "acq_rel")) this_req = (enum omp_requires) OMP_MEMORY_ORDER_ACQ_REL; + else if (!strcmp (p, "acquire")) + this_req + = (enum omp_requires) OMP_MEMORY_ORDER_ACQUIRE; } if (this_req == 0) { error_at (cp_lexer_peek_token (parser->lexer)->location, - "expected %, % or " - "%"); + "expected %, %, " + "%, % or %"); switch (cp_lexer_peek_token (parser->lexer)->type) { case CPP_EOF: diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 28569d07e716..c86a025a3fce 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1496,19 +1496,23 @@ enum gfc_omp_atomic_op enum gfc_omp_requires_kind { /* Keep in sync with gfc_namespace, esp. with omp_req_mem_order. */ - OMP_REQ_ATOMIC_MEM_ORDER_SEQ_CST = 1, /* 01 */ - OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL = 2, /* 10 */ - OMP_REQ_ATOMIC_MEM_ORDER_RELAXED = 3, /* 11 */ - OMP_REQ_REVERSE_OFFLOAD = (1 << 2), - OMP_REQ_UNIFIED_ADDRESS = (1 << 3), - OMP_REQ_UNIFIED_SHARED_MEMORY = (1 << 4), - OMP_REQ_DYNAMIC_ALLOCATORS = (1 << 5), + OMP_REQ_ATOMIC_MEM_ORDER_SEQ_CST = 1, /* 001 */ + OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL = 2, /* 010 */ + OMP_REQ_ATOMIC_MEM_ORDER_RELAXED = 3, /* 011 */ + OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE = 4, /* 100 */ + OMP_REQ_ATOMIC_MEM_ORDER_RELEASE = 5, /* 101 */ + OMP_REQ_REVERSE_OFFLOAD = (1 << 3), + OMP_REQ_UNIFIED_ADDRESS = (1 << 4), + OMP_REQ_UNIFIED_SHARED_MEMORY = (1 << 5), + OMP_REQ_DYNAMIC_ALLOCATORS = (1 << 6), OMP_REQ_TARGET_MASK = (OMP_REQ_REVERSE_OFFLOAD | OMP_REQ_UNIFIED_ADDRESS | OMP_REQ_UNIFIED_SHARED_MEMORY), OMP_REQ_ATOMIC_MEM_ORDER_MASK = (OMP_REQ_ATOMIC_MEM_ORDER_SEQ_CST | OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL - | OMP_REQ_ATOMIC_MEM_ORDER_RELAXED) + | OMP_REQ_ATOMIC_MEM_ORDER_RELAXED + | OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE + | OMP_REQ_ATOMIC_MEM_ORDER_RELEASE) }; enum gfc_omp_memorder @@ -2258,7 +2262,7 @@ typedef struct gfc_namespace unsigned implicit_interface_calls:1; /* OpenMP requires. */ - unsigned omp_requires:6; + unsigned omp_requires:7; unsigned omp_target_seen:1; /* Set to 1 if this is an implicit OMP structured block. */ diff --git a/gcc/fortran/module.cc b/gcc/fortran/module.cc index c07e9dc9ba21..3c07818e2cf9 100644 --- a/gcc/fortran/module.cc +++ b/gcc/fortran/module.cc @@ -2093,6 +2093,7 @@ enum ab_attribute AB_OMP_REQ_REVERSE_OFFLOAD, AB_OMP_REQ_UNIFIED_ADDRESS, AB_OMP_REQ_UNIFIED_SHARED_MEMORY, AB_OMP_REQ_DYNAMIC_ALLOCATORS, AB_OMP_REQ_MEM_ORDER_SEQ_CST, AB_OMP_REQ_MEM_ORDER_ACQ_REL, + AB_OMP_REQ_MEM_ORDER_ACQUIRE, AB_OMP_REQ_MEM_ORDER_RELEASE, AB_OMP_REQ_MEM_ORDER_RELAXED, AB_OMP_DEVICE_TYPE_NOHOST, AB_OMP_DEVICE_TYPE_HOST, AB_OMP_DEVICE_TYPE_ANY }; @@ -2175,7 +2176,9 @@ static const mstring attr_bits[] = minit ("OMP_REQ_DYNAMIC_ALLOCATORS", AB_OMP_REQ_DYNAMIC_ALLOCATORS), minit ("OMP_REQ_MEM_ORDER_SEQ_CST", AB_OMP_REQ_MEM_ORDER_SEQ_CST), minit ("OMP_REQ_MEM_ORDER_ACQ_REL", AB_OMP_REQ_MEM_ORDER_ACQ_REL), + minit ("OMP_REQ_MEM_ORDER_ACQUIRE", AB_OMP_REQ_MEM_ORDER_ACQUIRE), minit ("OMP_REQ_MEM_ORDER_RELAXED", AB_OMP_REQ_MEM_ORDER_RELAXED), + minit ("OMP_REQ_MEM_ORDER_RELEASE", AB_OMP_REQ_MEM_ORDER_RELEASE), minit ("OMP_DEVICE_TYPE_HOST", AB_OMP_DEVICE_TYPE_HOST), minit ("OMP_DEVICE_TYPE_NOHOST", AB_OMP_DEVICE_TYPE_NOHOST), minit ("OMP_DEVICE_TYPE_ANYHOST", AB_OMP_DEVICE_TYPE_ANY), @@ -2442,9 +2445,15 @@ mio_symbol_attribute (symbol_attribute *attr) if ((gfc_current_ns->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_MASK) == OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL) MIO_NAME (ab_attribute) (AB_OMP_REQ_MEM_ORDER_ACQ_REL, attr_bits); + if ((gfc_current_ns->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_MASK) + == OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE) + MIO_NAME (ab_attribute) (AB_OMP_REQ_MEM_ORDER_ACQUIRE, attr_bits); if ((gfc_current_ns->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_MASK) == OMP_REQ_ATOMIC_MEM_ORDER_RELAXED) MIO_NAME (ab_attribute) (AB_OMP_REQ_MEM_ORDER_RELAXED, attr_bits); + if ((gfc_current_ns->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_MASK) + == OMP_REQ_ATOMIC_MEM_ORDER_RELEASE) + MIO_NAME (ab_attribute) (AB_OMP_REQ_MEM_ORDER_RELEASE, attr_bits); } switch (attr->omp_device_type) { @@ -2724,11 +2733,21 @@ mio_symbol_attribute (symbol_attribute *attr) "acq_rel", &gfc_current_locus, module_name); break; + case AB_OMP_REQ_MEM_ORDER_ACQUIRE: + gfc_omp_requires_add_clause (OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE, + "acquires", &gfc_current_locus, + module_name); + break; case AB_OMP_REQ_MEM_ORDER_RELAXED: gfc_omp_requires_add_clause (OMP_REQ_ATOMIC_MEM_ORDER_RELAXED, "relaxed", &gfc_current_locus, module_name); break; + case AB_OMP_REQ_MEM_ORDER_RELEASE: + gfc_omp_requires_add_clause (OMP_REQ_ATOMIC_MEM_ORDER_RELEASE, + "release", &gfc_current_locus, + module_name); + break; case AB_OMP_DEVICE_TYPE_HOST: attr->omp_device_type = OMP_DEVICE_TYPE_HOST; break; diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index 251da667236d..8c0e5445ddb8 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -6251,14 +6251,15 @@ gfc_omp_requires_add_clause (gfc_omp_requires_kind clause, != (int) clause) { const char *other; - if (prog_unit->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_SEQ_CST) - other = "seq_cst"; - else if (prog_unit->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL) - other = "acq_rel"; - else if (prog_unit->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_RELAXED) - other = "relaxed"; - else - gcc_unreachable (); + switch (prog_unit->omp_requires & OMP_REQ_ATOMIC_MEM_ORDER_MASK) + { + case OMP_REQ_ATOMIC_MEM_ORDER_SEQ_CST: other = "seq_cst"; break; + case OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL: other = "acq_rel"; break; + case OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE: other = "acquire"; break; + case OMP_REQ_ATOMIC_MEM_ORDER_RELAXED: other = "relaxed"; break; + case OMP_REQ_ATOMIC_MEM_ORDER_RELEASE: other = "release"; break; + default: gcc_unreachable (); + } if (module_name) gfc_error ("!$OMP REQUIRES clause % " @@ -6372,15 +6373,25 @@ gfc_match_omp_requires (void) clause = "acq_rel"; requires_clause = OMP_REQ_ATOMIC_MEM_ORDER_ACQ_REL; } + else if (gfc_match (" acquire )") == MATCH_YES) + { + clause = "acquire"; + requires_clause = OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE; + } else if (gfc_match (" relaxed )") == MATCH_YES) { clause = "relaxed"; requires_clause = OMP_REQ_ATOMIC_MEM_ORDER_RELAXED; } + else if (gfc_match (" release )") == MATCH_YES) + { + clause = "release"; + requires_clause = OMP_REQ_ATOMIC_MEM_ORDER_RELEASE; + } else { - gfc_error ("Expected SEQ_CST, ACQ_REL or RELAXED for " - "ATOMIC_DEFAULT_MEM_ORDER clause at %C"); + gfc_error ("Expected ACQ_REL, ACQUIRE, RELAXED, RELEASE or " + "SEQ_CST for ATOMIC_DEFAULT_MEM_ORDER clause at %C"); goto error; } } @@ -6827,6 +6838,28 @@ gfc_match_omp_atomic (void) else c->memorder = OMP_MEMORDER_RELEASE; break; + case OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE: + if (c->atomic_op == GFC_OMP_ATOMIC_WRITE) + { + gfc_error ("!$OMP ATOMIC WRITE at %L incompatible with " + "ACQUIRES clause implicitly provided by a " + "REQUIRES directive", &loc); + c->memorder = OMP_MEMORDER_SEQ_CST; + } + else + c->memorder = OMP_MEMORDER_ACQUIRE; + break; + case OMP_REQ_ATOMIC_MEM_ORDER_RELEASE: + if (c->atomic_op == GFC_OMP_ATOMIC_READ) + { + gfc_error ("!$OMP ATOMIC READ at %L incompatible with " + "RELEASE clause implicitly provided by a " + "REQUIRES directive", &loc); + c->memorder = OMP_MEMORDER_SEQ_CST; + } + else + c->memorder = OMP_MEMORDER_RELEASE; + break; default: gcc_unreachable (); } diff --git a/gcc/fortran/parse.cc b/gcc/fortran/parse.cc index c0eb0575a90f..9b4c39274bea 100644 --- a/gcc/fortran/parse.cc +++ b/gcc/fortran/parse.cc @@ -7274,10 +7274,18 @@ done: omp_requires_mask = (enum omp_requires) (omp_requires_mask | OMP_MEMORY_ORDER_ACQ_REL); break; + case OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE: + omp_requires_mask + = (enum omp_requires) (omp_requires_mask | OMP_MEMORY_ORDER_ACQUIRE); + break; case OMP_REQ_ATOMIC_MEM_ORDER_RELAXED: omp_requires_mask = (enum omp_requires) (omp_requires_mask | OMP_MEMORY_ORDER_RELAXED); break; + case OMP_REQ_ATOMIC_MEM_ORDER_RELEASE: + omp_requires_mask + = (enum omp_requires) (omp_requires_mask | OMP_MEMORY_ORDER_RELEASE); + break; } if (omp_target_seen) diff --git a/gcc/testsuite/c-c++-common/gomp/requires-3.c b/gcc/testsuite/c-c++-common/gomp/requires-3.c index bd2479ba8ffa..2fd601acefa4 100644 --- a/gcc/testsuite/c-c++-common/gomp/requires-3.c +++ b/gcc/testsuite/c-c++-common/gomp/requires-3.c @@ -1,6 +1,6 @@ -#pragma omp requires atomic_default_mem_order(acquire) /* { dg-error "expected 'seq_cst', 'relaxed' or 'acq_rel'" } */ -#pragma omp requires atomic_default_mem_order(release) /* { dg-error "expected 'seq_cst', 'relaxed' or 'acq_rel'" } */ -#pragma omp requires atomic_default_mem_order(foobar) /* { dg-error "expected 'seq_cst', 'relaxed' or 'acq_rel'" } */ -#pragma omp requires atomic_default_mem_order ( /* { dg-error "expected 'seq_cst', 'relaxed' or 'acq_rel'" } */ +#pragma omp requires atomic_default_mem_order(foobar) /* { dg-error "expected 'acq_rel', 'acquire', 'relaxed', 'release' or 'seq_cst'" } */ +#pragma omp requires atomic_default_mem_order ( /* { dg-error "expected 'acq_rel', 'acquire', 'relaxed', 'release' or 'seq_cst'" } */ /* { dg-error "expected '\\\)' before end of line" "" { target *-*-* } .-1 } */ #pragma omp requires atomic_default_mem_order(seq_cst), /* { dg-error "expected end of line before ',' token" } */ +/* Valid since since 5.2, but ... */ +#pragma omp requires atomic_default_mem_order(acquire) /* { dg-error "more than one 'atomic_default_mem_order' clause in a single compilation unit" } */ diff --git a/gcc/testsuite/c-c++-common/gomp/requires-5.c b/gcc/testsuite/c-c++-common/gomp/requires-5.c new file mode 100644 index 000000000000..53e0b75f1b70 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/requires-5.c @@ -0,0 +1,23 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +#pragma omp requires atomic_default_mem_order(release) + +int +foo (int x, int y) +{ + int z; + + #pragma omp atomic write + x = y; + + #pragma omp atomic update + x += 1; + + #pragma omp atomic read acquire + z = x; + return z; +} + +/* { dg-final { scan-tree-dump "#pragma omp atomic release" "original" } } */ +/* { dg-final { scan-tree-dump "#pragma omp atomic release" "original" } } */ +/* { dg-final { scan-tree-dump "z = #pragma omp atomic read acquire" "original" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/requires-6.c b/gcc/testsuite/c-c++-common/gomp/requires-6.c new file mode 100644 index 000000000000..4470c8cae1a4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/requires-6.c @@ -0,0 +1,23 @@ +/* { dg-additional-options "-fdump-tree-original" } */ + +#pragma omp requires atomic_default_mem_order(acquire) + +int +bar (int a, int b) +{ + int c; + + #pragma omp atomic write release + a = b; + + #pragma omp atomic update + a += 1; + + #pragma omp atomic read + c = a; + return c; +} + +/* { dg-final { scan-tree-dump "#pragma omp atomic release" "original" } } */ +/* { dg-final { scan-tree-dump "#pragma omp atomic acquire" "original" } } */ +/* { dg-final { scan-tree-dump "c = #pragma omp atomic read acquire" "original" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/requires-7.c b/gcc/testsuite/c-c++-common/gomp/requires-7.c new file mode 100644 index 000000000000..4735ef2a6e00 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/requires-7.c @@ -0,0 +1,11 @@ +#pragma omp requires atomic_default_mem_order(release) + +int +foo (int x) +{ + int z; + + #pragma omp atomic read /* { dg-error "'#pragma omp atomic read' incompatible with 'release' clause implicitly provided by a 'requires' directive" } */ + z = x; + return z; +} diff --git a/gcc/testsuite/c-c++-common/gomp/requires-8.c b/gcc/testsuite/c-c++-common/gomp/requires-8.c new file mode 100644 index 000000000000..4d56e7d33e96 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/requires-8.c @@ -0,0 +1,14 @@ +#pragma omp requires atomic_default_mem_order(acquire) + +int +bar (int a, int b) +{ + int c; + + #pragma omp atomic write /* { dg-error "'#pragma omp atomic write' incompatible with 'acquire' clause implicitly provided by a 'requires' directive" } */ + a = b; + + #pragma omp atomic read + c = a; + return c; +} diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-10.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-10.f90 new file mode 100644 index 000000000000..e912e3e867f9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/requires-10.f90 @@ -0,0 +1,36 @@ +! { dg-additional-options "-fdump-tree-original" } + +function foo (x, y) result (z) + !$omp requires atomic_default_mem_order(release) + implicit none + real :: x, y, z + + !$omp atomic write + x = y + + !$omp atomic update + x = x + 1 + + !$omp atomic read acquire + z = x +end + +function bar (a, b) result (c) + !$omp requires atomic_default_mem_order(acquire) + implicit none + real :: a, b, c + + !$omp atomic write release + a = b + + !$omp atomic update + a = a + 1 + + !$omp atomic read + c = a +end + +! { dg-final { scan-tree-dump-times "#pragma omp atomic release" 3 "original" } } */ +! { dg-final { scan-tree-dump-times "#pragma omp atomic acquire" 1 "original" } } */ +! { dg-final { scan-tree-dump-times "z = #pragma omp atomic read acquire" 1 "original" } } */ +! { dg-final { scan-tree-dump-times "c = #pragma omp atomic read acquire" 1 "original" } } */ diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-11.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-11.f90 new file mode 100644 index 000000000000..c55009d5d263 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/requires-11.f90 @@ -0,0 +1,31 @@ +function foo (x, y) result (z) + !$omp requires atomic_default_mem_order(release) + implicit none + real :: x, y, z + + !$omp atomic write + x = y + + !$omp atomic update + x = x + 1 + + !$omp atomic read ! { dg-error "!.OMP ATOMIC READ at .1. incompatible with RELEASE clause implicitly provided by a REQUIRES directive" } + z = x +end + +function bar (a, b) result (c) + !$omp requires atomic_default_mem_order(acquire) + implicit none + real :: a, b, c + + !$omp atomic write ! { dg-error "!.OMP ATOMIC WRITE at .1. incompatible with ACQUIRES clause implicitly provided by a REQUIRES directive" } + a = b + + !$omp atomic update + a = a + 1 + + !$omp atomic read + c = a +end + + diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 index 7b63d4a8b3bd..5f11a7bfb2ad 100644 --- a/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/requires-2.f90 @@ -8,7 +8,7 @@ !$omp requires atomic_default_mem_order (seq_cst) !$omp requires atomic_default_mem_order (seq_cst) !$omp requires atomic_default_mem_order (acq_rel) ! { dg-error "overrides a previous 'atomic_default_mem_order\\(seq_cst\\)'" } -!$omp requires atomic_default_mem_order (foo) ! { dg-error "Expected SEQ_CST, ACQ_REL or RELAXED for ATOMIC_DEFAULT_MEM_ORDER clause" } +!$omp requires atomic_default_mem_order (foo) ! { dg-error "Expected ACQ_REL, ACQUIRE, RELAXED, RELEASE or SEQ_CST for ATOMIC_DEFAULT_MEM_ORDER clause" } end ! { dg-prune-output "not yet supported" } diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-3.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-3.f90 index 4429aab2ee62..8c9d6ed3b210 100644 --- a/gcc/testsuite/gfortran.dg/gomp/requires-3.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/requires-3.f90 @@ -1,4 +1,5 @@ -!$omp requires atomic_default_mem_order(acquire) ! { dg-error "Expected SEQ_CST, ACQ_REL or RELAXED for ATOMIC_DEFAULT_MEM_ORDER clause" } -!$omp requires atomic_default_mem_order(release) ! { dg-error "Expected SEQ_CST, ACQ_REL or RELAXED for ATOMIC_DEFAULT_MEM_ORDER clause" } -!$omp requires atomic_default_mem_order(foobar) ! { dg-error "Expected SEQ_CST, ACQ_REL or RELAXED for ATOMIC_DEFAULT_MEM_ORDER clause" } +!$omp requires atomic_default_mem_order(foobar) ! { dg-error "Expected ACQ_REL, ACQUIRE, RELAXED, RELEASE or SEQ_CST for ATOMIC_DEFAULT_MEM_ORDER clause" } + +!$omp requires atomic_default_mem_order(acquire) ! OK since OpenMP 5.2 +!$omp requires atomic_default_mem_order(release) ! { dg-error "!.OMP REQUIRES clause 'atomic_default_mem_order\\(release\\)' specified at .1. overrides a previous 'atomic_default_mem_order\\(acquire\\)' \\(which might be through using a module\\)" } end diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-5.f90 b/gcc/testsuite/gfortran.dg/gomp/requires-5.f90 index ade2a3613c66..e719e929294c 100644 --- a/gcc/testsuite/gfortran.dg/gomp/requires-5.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/requires-5.f90 @@ -8,7 +8,7 @@ subroutine foo !$omp requires unified_shared_memory !$omp requires atomic_default_mem_order(relaxed) !$omp requires atomic_default_mem_order(relaxed) -!$omp requires atomic_default_mem_order(seq_cst) ! { dg-error "overrides a previous 'atomic_default_mem_order\\(seq_cst\\)'" } +!$omp requires atomic_default_mem_order(seq_cst) ! { dg-error "overrides a previous 'atomic_default_mem_order\\(relaxed\\)'" } !$omp target !$omp end target end From b1474da1bb9cc829cbf597f9a51393795108419f Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Mon, 11 Dec 2023 14:16:04 +0100 Subject: [PATCH 187/311] RISC-V: testsuite: Fix strcmp-run.c test. This fixes expectations in the strcmp-run test which would sometimes fail with newlib. The test expects libc strcmp return values and asserts the vectorized result is similar to those. Therefore hard-code the expected results instead of relying on a strcmp call. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: Adjust test expectation and target selector. * gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: Adjust target selector. * gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: Ditto. --- .../riscv/rvv/autovec/builtin/strcmp-run.c | 25 ++++++++++--------- .../riscv/rvv/autovec/builtin/strlen-run.c | 2 +- .../riscv/rvv/autovec/builtin/strncmp-run.c | 2 +- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c index 6dec7da91c1b..43c9e0f42edc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c @@ -1,8 +1,6 @@ -/* { dg-do run } */ +/* { dg-do run { target { riscv_v } } } */ /* { dg-additional-options "-O3 -minline-strcmp" } */ -#include - int __attribute__ ((noipa)) foo (const char *s, const char *t) @@ -10,23 +8,26 @@ foo (const char *s, const char *t) return __builtin_strcmp (s, t); } -int -__attribute__ ((noipa, optimize ("0"))) -foo2 (const char *s, const char *t) -{ - return strcmp (s, t); -} - #define SZ 10 -int main () +int +main () { const char *s[SZ] = {"", "asdf", "0", "\0", "!@#$%***m1123fdnmoi43", "a", "z", "1", "9", "12345678901234567889012345678901234567890"}; + const int ref[SZ * SZ] + = {0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 97, 0, 49, 97, 64, + 115, -25, 48, 40, 48, 48, -49, 0, 48, 15, -49, -74, -1, -9, -1, + 0, -97, -48, 0, -33, -97, -122, -49, -57, -49, 33, -64, -15, 33, 0, + -64, -89, -16, -24, -16, 97, -115, 49, 97, 64, 0, -25, 48, 40, 48, + 122, 25, 74, 122, 89, 25, 0, 73, 65, 73, 49, -48, 1, 49, 16, + -48, -73, 0, -8, -50, 57, -40, 9, 57, 24, -40, -65, 8, 0, 8, + 49, -48, 1, 49, 16, -48, -73, 50, -8, 0}; + for (int i = 0; i < SZ; i++) for (int j = 0; j < SZ; j++) - if (foo (s[i], s[j]) != foo2 (s[i], s[j])) + if (foo (s[i], s[j]) != ref [i * SZ + j]) __builtin_abort (); } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c index d29297a5f86c..deeb8401ef11 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target { riscv_v } } } */ /* { dg-additional-options "-O3 -minline-strlen" } */ int diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c index 8d1471a3a135..b7cd94254d9d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c @@ -1,4 +1,4 @@ -/* { dg-do run } */ +/* { dg-do run { target { riscv_v } } } */ /* { dg-additional-options "-O3 -minline-strcmp" } */ #include From f5aa23f7f633313039c840ab36695a38efbb1a99 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Mon, 11 Dec 2023 09:48:04 -0500 Subject: [PATCH 188/311] c++: alias CTAD and specializations table A rewritten guide for alias CTAD isn't really a specialization of the original guide, so we shouldn't register it as such. This avoids an ICE in the below modules testcase for which we otherwise crash due to the guide's empty DECL_CONTEXT when walking the specializations table. It also preemptively avoids the same ICE in modules/concept-6 in C++23 mode with the inherited CTAD patch. gcc/cp/ChangeLog: * pt.cc (alias_ctad_tweaks): Pass use_spec_table=false to tsubst_decl. gcc/testsuite/ChangeLog: * g++.dg/modules/concept-8.h: New test. * g++.dg/modules/concept-8_a.H: New test. * g++.dg/modules/concept-8_b.C: New test. --- gcc/cp/pt.cc | 3 ++- gcc/testsuite/g++.dg/modules/concept-8.h | 14 ++++++++++++++ gcc/testsuite/g++.dg/modules/concept-8_a.H | 5 +++++ gcc/testsuite/g++.dg/modules/concept-8_b.C | 8 ++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/modules/concept-8.h create mode 100644 gcc/testsuite/g++.dg/modules/concept-8_a.H create mode 100644 gcc/testsuite/g++.dg/modules/concept-8_b.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index e9dcdb32c6cf..208fa21032e3 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -30020,7 +30020,8 @@ alias_ctad_tweaks (tree tmpl, tree uguides) /* Parms are to have DECL_CHAIN tsubsted, which would be skipped if cp_unevaluated_operand. */ cp_evaluated ev; - g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, complain); + g = tsubst_decl (DECL_TEMPLATE_RESULT (f), targs, complain, + /*use_spec_table=*/false); } if (g == error_mark_node) continue; diff --git a/gcc/testsuite/g++.dg/modules/concept-8.h b/gcc/testsuite/g++.dg/modules/concept-8.h new file mode 100644 index 000000000000..a25f9b752fd4 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/concept-8.h @@ -0,0 +1,14 @@ +// A version of concept-6.h using an alias template + alias CTAD + +template +struct Base +{ + Base (const _Callable &) + requires true + {} +}; + +template requires true +using Derived = Base<_Callable>; + +inline Derived all = [] (auto&& __r) {}; diff --git a/gcc/testsuite/g++.dg/modules/concept-8_a.H b/gcc/testsuite/g++.dg/modules/concept-8_a.H new file mode 100644 index 000000000000..da0467781c1c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/concept-8_a.H @@ -0,0 +1,5 @@ +// { dg-require-effective-target c++20 } +// { dg-additional-options "-fmodule-header -fconcepts" } +// { dg-module-cmi {} } + +#include "concept-8.h" diff --git a/gcc/testsuite/g++.dg/modules/concept-8_b.C b/gcc/testsuite/g++.dg/modules/concept-8_b.C new file mode 100644 index 000000000000..9a9f014ee096 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/concept-8_b.C @@ -0,0 +1,8 @@ +// { dg-require-effective-target c++20 } +// { dg-additional-options "-fmodules-ts -fconcepts -fdump-lang-module-alias -fno-module-lazy" } + +#include "concept-8.h" +import "concept-8_a.H"; + +// { dg-final { scan-lang-dump-times {named merge key \(matched\) function_decl:'::Base<::._anon_0>::__ct '} 2 module } } +// { dg-final { scan-lang-dump-not {merge key \(new\)} module } } From f5fc001a84a7dbb942a6252b3162dd38b4aae311 Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Mon, 11 Dec 2023 14:24:41 +0000 Subject: [PATCH 189/311] aarch64: enable mixed-types for aarch64 simdclones This patch enables the use of mixed-types for simd clones for AArch64, adds aarch64 as a target_vect_simd_clones and corrects the way the simdlen is chosen for non-specified simdlen clauses according to the 'Vector Function Application Binary Interface Specification for AArch64'. Additionally this patch also restricts combinations of simdlen and return/argument types that map to vectors larger than 128 bits as we currently do not have a way to represent these types in a way that is consistent internally and externally. gcc/ChangeLog: * config/aarch64/aarch64.cc (lane_size): New function. (aarch64_simd_clone_compute_vecsize_and_simdlen): Determine simdlen according to NDS rule and reject combination of simdlen and types that lead to vectors larger than 128bits. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Add aarch64 targets to vect_simd_clones. * c-c++-common/gomp/declare-variant-14.c: Adapt test for aarch64. * c-c++-common/gomp/pr60823-1.c: Likewise. * c-c++-common/gomp/pr60823-2.c: Likewise. * c-c++-common/gomp/pr60823-3.c: Likewise. * g++.dg/gomp/attrs-10.C: Likewise. * g++.dg/gomp/declare-simd-1.C: Likewise. * g++.dg/gomp/declare-simd-3.C: Likewise. * g++.dg/gomp/declare-simd-4.C: Likewise. * g++.dg/gomp/declare-simd-7.C: Likewise. * g++.dg/gomp/declare-simd-8.C: Likewise. * g++.dg/gomp/pr88182.C: Likewise. * gcc.dg/declare-simd.c: Likewise. * gcc.dg/gomp/declare-simd-1.c: Likewise. * gcc.dg/gomp/declare-simd-3.c: Likewise. * gcc.dg/gomp/pr87887-1.c: Likewise. * gcc.dg/gomp/pr87895-1.c: Likewise. * gcc.dg/gomp/pr89246-1.c: Likewise. * gcc.dg/gomp/pr99542.c: Likewise. * gcc.dg/gomp/simd-clones-2.c: Likewise. * gcc.dg/vect/vect-simd-clone-1.c: Likewise. * gcc.dg/vect/vect-simd-clone-2.c: Likewise. * gcc.dg/vect/vect-simd-clone-4.c: Likewise. * gcc.dg/vect/vect-simd-clone-5.c: Likewise. * gcc.dg/vect/vect-simd-clone-6.c: Likewise. * gcc.dg/vect/vect-simd-clone-7.c: Likewise. * gcc.dg/vect/vect-simd-clone-8.c: Likewise. * gfortran.dg/gomp/declare-simd-2.f90: Likewise. * gfortran.dg/gomp/declare-simd-coarray-lib.f90: Likewise. * gfortran.dg/gomp/declare-variant-14.f90: Likewise. * gfortran.dg/gomp/pr79154-1.f90: Likewise. * gfortran.dg/gomp/pr83977.f90: Likewise. libgomp/ChangeLog: * testsuite/libgomp.c/declare-variant-1.c: Adapt test for aarch64. * testsuite/libgomp.fortran/declare-simd-1.f90: Likewise. --- gcc/config/aarch64/aarch64.cc | 180 +++++++++++++----- .../c-c++-common/gomp/declare-variant-14.c | 8 +- gcc/testsuite/c-c++-common/gomp/pr60823-1.c | 5 +- gcc/testsuite/c-c++-common/gomp/pr60823-2.c | 4 + gcc/testsuite/c-c++-common/gomp/pr60823-3.c | 6 +- gcc/testsuite/g++.dg/gomp/attrs-10.C | 65 ++++++- gcc/testsuite/g++.dg/gomp/declare-simd-1.C | 137 ++++++++++++- gcc/testsuite/g++.dg/gomp/declare-simd-3.C | 24 ++- gcc/testsuite/g++.dg/gomp/declare-simd-4.C | 19 +- gcc/testsuite/g++.dg/gomp/declare-simd-7.C | 4 - gcc/testsuite/g++.dg/gomp/declare-simd-8.C | 1 - gcc/testsuite/g++.dg/gomp/pr88182.C | 5 +- gcc/testsuite/gcc.dg/declare-simd.c | 1 - gcc/testsuite/gcc.dg/gomp/declare-simd-1.c | 89 ++++++++- gcc/testsuite/gcc.dg/gomp/declare-simd-3.c | 10 +- gcc/testsuite/gcc.dg/gomp/pr87887-1.c | 2 + gcc/testsuite/gcc.dg/gomp/pr87895-1.c | 1 - gcc/testsuite/gcc.dg/gomp/pr89246-1.c | 1 + gcc/testsuite/gcc.dg/gomp/pr99542.c | 4 +- gcc/testsuite/gcc.dg/gomp/simd-clones-2.c | 8 +- gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c | 5 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c | 4 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c | 4 + gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c | 20 +- gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c | 20 +- gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c | 8 + .../gcc.target/aarch64/declare-simd-1.c | 42 ++++ .../gcc.target/aarch64/declare-simd-2.c | 60 ++++++ .../gfortran.dg/gomp/declare-simd-2.f90 | 4 +- .../gomp/declare-simd-coarray-lib.f90 | 2 +- .../gfortran.dg/gomp/declare-variant-14.f90 | 8 +- gcc/testsuite/gfortran.dg/gomp/pr79154-1.f90 | 4 +- gcc/testsuite/gfortran.dg/gomp/pr83977.f90 | 2 +- gcc/testsuite/lib/target-supports.exp | 3 +- .../testsuite/libgomp.c/declare-variant-1.c | 6 +- .../libgomp.fortran/declare-simd-1.f90 | 10 +- 37 files changed, 662 insertions(+), 119 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/declare-simd-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/declare-simd-2.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 0889ceb7db17..a58b02d8421e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -27552,33 +27552,61 @@ supported_simd_type (tree t) return false; } -/* Return true for types that currently are supported as SIMD return - or argument types. */ +/* Determine the lane size for the clone argument/return type. This follows + the LS(P) rule in the VFABIA64. */ -static bool -currently_supported_simd_type (tree t, tree b) +static unsigned +lane_size (cgraph_simd_clone_arg_type clone_arg_type, tree type) { - if (COMPLEX_FLOAT_TYPE_P (t)) - return false; + gcc_assert (clone_arg_type != SIMD_CLONE_ARG_TYPE_MASK); - if (TYPE_SIZE (t) != TYPE_SIZE (b)) - return false; + /* For non map-to-vector types that are pointers we use the element type it + points to. */ + if (POINTER_TYPE_P (type)) + switch (clone_arg_type) + { + default: + break; + case SIMD_CLONE_ARG_TYPE_UNIFORM: + case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP: + case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP: + type = TREE_TYPE (type); + break; + } - return supported_simd_type (t); + /* For types (or pointers of non map-to-vector types point to) that are + integers or floating point, we use their size if they are 1, 2, 4 or 8. + */ + if (INTEGRAL_TYPE_P (type) + || SCALAR_FLOAT_TYPE_P (type)) + switch (TYPE_PRECISION (type) / BITS_PER_UNIT) + { + default: + break; + case 1: + case 2: + case 4: + case 8: + return TYPE_PRECISION (type); + } + /* For any other we use the size of uintptr_t. For map-to-vector types that + are pointers, using the size of uintptr_t is the same as using the size of + their type, seeing all pointers are the same size as uintptr_t. */ + return POINTER_SIZE; } + /* Implement TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN. */ static int aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, struct cgraph_simd_clone *clonei, - tree base_type, int num, - bool explicit_p) + tree base_type ATTRIBUTE_UNUSED, + int num, bool explicit_p) { tree t, ret_type; - unsigned int elt_bits, count; + unsigned int nds_elt_bits; unsigned HOST_WIDE_INT const_simdlen; - poly_uint64 vec_bits; if (!TARGET_SIMD) return 0; @@ -27598,80 +27626,132 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, } ret_type = TREE_TYPE (TREE_TYPE (node->decl)); + /* According to AArch64's Vector ABI the type that determines the simdlen is + the narrowest of types, so we ignore base_type for AArch64. */ if (TREE_CODE (ret_type) != VOID_TYPE - && !currently_supported_simd_type (ret_type, base_type)) + && !supported_simd_type (ret_type)) { if (!explicit_p) ; - else if (TYPE_SIZE (ret_type) != TYPE_SIZE (base_type)) - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support mixed size types " - "for % functions"); - else if (supported_simd_type (ret_type)) + else if (COMPLEX_FLOAT_TYPE_P (ret_type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support return type %qT " - "for % functions", ret_type); + "for simd", ret_type); else warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "unsupported return type %qT for % functions", + "unsupported return type %qT for simd", ret_type); return 0; } + auto_vec> vec_elts (clonei->nargs + 1); + + /* We are looking for the NDS type here according to the VFABIA64. */ + if (TREE_CODE (ret_type) != VOID_TYPE) + { + nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type); + vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits)); + } + else + nds_elt_bits = POINTER_SIZE; + int i; tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl)); bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE); - for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0; t && t != void_list_node; t = TREE_CHAIN (t), i++) { tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t); - if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM - && !currently_supported_simd_type (arg_type, base_type)) + && !supported_simd_type (arg_type)) { if (!explicit_p) ; - else if (TYPE_SIZE (arg_type) != TYPE_SIZE (base_type)) - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support mixed size types " - "for % functions"); - else + else if (COMPLEX_FLOAT_TYPE_P (ret_type)) warning_at (DECL_SOURCE_LOCATION (node->decl), 0, "GCC does not currently support argument type %qT " - "for % functions", arg_type); + "for simd", arg_type); + else + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "unsupported argument type %qT for simd", + arg_type); return 0; } + unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type); + if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + vec_elts.safe_push (std::make_pair (arg_type, lane_bits)); + if (nds_elt_bits > lane_bits) + nds_elt_bits = lane_bits; } clonei->vecsize_mangle = 'n'; clonei->mask_mode = VOIDmode; - elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + poly_uint64 simdlen; + auto_vec simdlens (2); + /* Keep track of the possible simdlens the clones of this function can have, + and check them later to see if we support them. */ if (known_eq (clonei->simdlen, 0U)) { - count = 2; - vec_bits = (num == 0 ? 64 : 128); - clonei->simdlen = exact_div (vec_bits, elt_bits); + simdlen = exact_div (poly_uint64 (64), nds_elt_bits); + simdlens.safe_push (simdlen); + simdlens.safe_push (simdlen * 2); } else + simdlens.safe_push (clonei->simdlen); + + clonei->vecsize_int = 0; + clonei->vecsize_float = 0; + + /* We currently do not support generating simdclones where vector arguments + do not fit into a single vector register, i.e. vector types that are more + than 128-bits large. This is because of how we currently represent such + types in ACLE, where we use a struct to allow us to pass them as arguments + and return. + Hence why we have to check whether the simdlens available for this + simdclone would cause a vector type to be larger than 128-bits, and reject + such a clone. */ + unsigned j = 0; + while (j < simdlens.length ()) { - count = 1; - vec_bits = clonei->simdlen * elt_bits; - /* For now, SVE simdclones won't produce illegal simdlen, So only check - const simdlens here. */ - if (clonei->simdlen.is_constant (&const_simdlen) - && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U)) - { - if (explicit_p) - warning_at (DECL_SOURCE_LOCATION (node->decl), 0, - "GCC does not currently support simdlen %wd for " - "type %qT", - const_simdlen, base_type); - return 0; - } + bool remove_simdlen = false; + for (auto elt : vec_elts) + if (known_gt (simdlens[j] * elt.second, 128U)) + { + /* Don't issue a warning for every simdclone when there is no + specific simdlen clause. */ + if (explicit_p && maybe_ne (clonei->simdlen, 0U)) + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support simdlen %wd for " + "type %qT", + constant_lower_bound (simdlens[j]), elt.first); + remove_simdlen = true; + break; + } + if (remove_simdlen) + simdlens.ordered_remove (j); + else + j++; } - clonei->vecsize_int = vec_bits; - clonei->vecsize_float = vec_bits; + + + int count = simdlens.length (); + if (count == 0) + { + if (explicit_p && known_eq (clonei->simdlen, 0U)) + { + /* Warn the user if we can't generate any simdclone. */ + simdlen = exact_div (poly_uint64 (64), nds_elt_bits); + warning_at (DECL_SOURCE_LOCATION (node->decl), 0, + "GCC does not currently support a simdclone with simdlens" + " %wd and %wd for these types.", + constant_lower_bound (simdlen), + constant_lower_bound (simdlen*2)); + } + return 0; + } + + gcc_assert (num < count); + clonei->simdlen = simdlens[num]; return count; } diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c index cdb0bb34f505..e3668893afe3 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-14.c @@ -15,13 +15,15 @@ int test1 (int x) { /* At gimplification time, we can't decide yet which function to call. */ - /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */ + /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" { target { !aarch64*-*-* } } } } */ /* After simd clones are created, the original non-clone test1 shall call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones shall call f01 with score 8. */ /* { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } } */ - /* { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" } } */ - /* { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" } } */ + /* { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target { !aarch64*-*-* } } } } */ + /* { dg-final { scan-tree-dump-times "f03 \\\(x" 10 "optimized" { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { !aarch64*-*-* } } } } */ + /* { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { aarch64*-*-* } } } } */ int a = f04 (x); int b = f04 (x); return a + b; diff --git a/gcc/testsuite/c-c++-common/gomp/pr60823-1.c b/gcc/testsuite/c-c++-common/gomp/pr60823-1.c index 2cc44e82c3cf..9408c0f5bba6 100644 --- a/gcc/testsuite/c-c++-common/gomp/pr60823-1.c +++ b/gcc/testsuite/c-c++-common/gomp/pr60823-1.c @@ -2,7 +2,11 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fopenmp-simd" } */ +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(4) notinbranch +#endif int foo (const double c1, const double c2) { @@ -17,4 +21,3 @@ foo (const double c1, const double c2) } return res; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-13 } */ diff --git a/gcc/testsuite/c-c++-common/gomp/pr60823-2.c b/gcc/testsuite/c-c++-common/gomp/pr60823-2.c index 4c87620076a0..e402b5ab06b3 100644 --- a/gcc/testsuite/c-c++-common/gomp/pr60823-2.c +++ b/gcc/testsuite/c-c++-common/gomp/pr60823-2.c @@ -3,7 +3,11 @@ /* { dg-require-effective-target vect_simd_clones } */ /* { dg-options "-O2 -fopenmp-simd" } */ +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(4) notinbranch +#endif __attribute__((noinline)) int foo (double c1, double c2) { diff --git a/gcc/testsuite/c-c++-common/gomp/pr60823-3.c b/gcc/testsuite/c-c++-common/gomp/pr60823-3.c index 56ad50c41f6e..44e02b6a2c06 100644 --- a/gcc/testsuite/c-c++-common/gomp/pr60823-3.c +++ b/gcc/testsuite/c-c++-common/gomp/pr60823-3.c @@ -9,8 +9,11 @@ void bar (char *, double *); struct S { char c[sizeof (double)]; }; void baz (struct S, struct S); union U { struct S s; double d; }; - +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(4) notinbranch +#endif __attribute__((noinline)) int foo (double c1, double c2) { @@ -28,6 +31,5 @@ foo (double c1, double c2) baz (*(struct S *)&c1, *(struct S *)&c2); return c1 + c2 + ((struct S *)&c1)->c[1]; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-16 } */ #endif diff --git a/gcc/testsuite/g++.dg/gomp/attrs-10.C b/gcc/testsuite/g++.dg/gomp/attrs-10.C index ed0214a8de23..7a968ba8cc24 100644 --- a/gcc/testsuite/g++.dg/gomp/attrs-10.C +++ b/gcc/testsuite/g++.dg/gomp/attrs-10.C @@ -5,7 +5,11 @@ extern "C" void abort (); [[omp::directive (declare simd, linear (l))]] extern int f1 (int l); extern int f2 (int), f3 [[omp::directive (declare simd, uniform (m))]] (int m), f4 (int), z; +#ifdef __aarch64__ +[[omp::directive (declare simd, linear (l), simdlen(4))]] extern int f5 [[omp::directive (declare simd uniform (l) simdlen (2) notinbranch)]] (int l); +#else [[omp::directive (declare simd, linear (l), simdlen(4))]] extern int f5 [[omp::directive (declare simd uniform (l) simdlen (8) notinbranch)]] (int l); +#endif int f1 (int l) @@ -13,6 +17,11 @@ f1 (int l) return l; } +// { dg-final { scan-assembler-times "_ZGVnN2l__Z2f1i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2l__Z2f1i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4l__Z2f1i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4l__Z2f1i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4l__Z2f1i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4l__Z2f1i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4l__Z2f1i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -28,7 +37,7 @@ f2 (int l) return l + 1; } -// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f2i:" { target { i?86-*-* x86_64-*-* } } } } +// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f2i:" { target { i?86-*-* x86_64-*-* aarch64*-*-* } } } } int f3 (int l) @@ -36,6 +45,11 @@ f3 (int l) return l + 2; } +// { dg-final { scan-assembler-times "_ZGVnN2u__Z2f3i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2u__Z2f3i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4u__Z2f3i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4u__Z2f3i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4u__Z2f3i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4u__Z2f3i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4u__Z2f3i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -51,14 +65,18 @@ f4 (int l) return l + 3; } -// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f4i:" { target { i?86-*-* x86_64-*-* } } } } +// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f4i:" { target { i?86-*-* x86_64-*-* aarch64*-*-* } } } } int f5 (int l) -{ // { dg-warning "GCC does not currently support simdlen 8 for type 'int'" "" { target aarch64*-*-* } .-1 } +{ return l + 4; } +// { dg-final { scan-assembler-times "_ZGVnN4l__Z2f5i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4l__Z2f5i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN2u__Z2f5i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4l__Z2f5i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4l__Z2f5i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4l__Z2f5i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -76,12 +94,21 @@ f5 (int l) [[omp::directive (declare simd, linear (l), simdlen(4), notinbranch), omp::directive (declare simd, uniform (l), simdlen(4), inbranch)]] int +#ifdef __aarch64__ +f6 [[omp::sequence (directive (declare simd uniform (l) simdlen (2), notinbranch), + omp::directive (declare simd linear (l) simdlen (2) inbranch))]] (int l) +#else f6 [[omp::sequence (directive (declare simd uniform (l) simdlen (8), notinbranch), omp::directive (declare simd linear (l) simdlen (8) inbranch))]] (int l) -{ // { dg-warning "GCC does not currently support simdlen 8 for type 'int'" "" { target aarch64*-*-* } .-2 } +#endif +{ return l + 5; } +// { dg-final { scan-assembler-times "_ZGVnN4l__Z2f6i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4u__Z2f6i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN2u__Z2f6i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4u__Z2f6i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4l__Z2f6i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbM8l__Z2f6i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -109,7 +136,7 @@ f7 (int l) return l + 6; } -// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f7i:" { target { i?86-*-* x86_64-*-* } } } } +// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f7i:" { target { i?86-*-* x86_64-*-* aarch64*-*-* } } } } int f8 (int l) @@ -117,17 +144,26 @@ f8 (int l) return l + 7; } -// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f8i:" { target { i?86-*-* x86_64-*-* } } } } +// { dg-final { scan-assembler-not "_ZGV\[a-zA-Z0-9]__Z2f8i:" { target { i?86-*-* x86_64-*-* aarch64*-*-* } } } } [[omp::sequence (omp::directive (declare variant (f7), match (construct={parallel})), directive (declare simd uniform (l), simdlen(4)))]] int +#ifdef __aarch64__ +f9 [[omp::directive (declare simd uniform (l) simdlen (2)), +#else f9 [[omp::directive (declare simd uniform (l) simdlen (8)), +#endif omp::directive (declare variant (f8) match (construct={parallel,for}))]] (int l) -{ // { dg-warning "GCC does not currently support simdlen 8 for type 'int'" "" { target aarch64*-*-* } .-2 } +{ return l + 8; } +// { dg-final { scan-assembler-times "_ZGVnN2u__Z2f9i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2u__Z2f9i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4u__Z2f9i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4u__Z2f9i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4u__Z2f9i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4u__Z2f9i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4u__Z2f9i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -174,6 +210,9 @@ f10 (int x) template [[omp::directive (declare simd, notinbranch)]] int f10<0> (int); +// { dg-final { scan-assembler-times "_ZGVnN2v__Z3f10ILi0EEii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4v__Z3f10ILi0EEii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbN4v__Z3f10ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcN4v__Z3f10ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdN8v__Z3f10ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -181,6 +220,9 @@ template [[omp::directive (declare simd, notinbranch)]] int f10<0> (int); template int f10<1> [[omp::directive (declare simd inbranch linear(x))]] (int x); +// { dg-final { scan-assembler-times "_ZGVnM2l__Z3f10ILi1EEii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4l__Z3f10ILi1EEii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4l__Z3f10ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4l__Z3f10ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdM8l__Z3f10ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -195,6 +237,9 @@ f11<0> (int x) return x; } +// { dg-final { scan-assembler-times "_ZGVnM2v__Z3f11ILi0EEii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4v__Z3f11ILi0EEii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4v__Z3f11ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4v__Z3f11ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdM8v__Z3f11ILi0EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -206,6 +251,9 @@ f11<1> [[omp::directive (declare simd, notinbranch, linear (y))]] (int y) return y; } +// { dg-final { scan-assembler-times "_ZGVnN2l__Z3f11ILi1EEii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4l__Z3f11ILi1EEii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbN4l__Z3f11ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcN4l__Z3f11ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdN8l__Z3f11ILi1EEii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -223,6 +271,9 @@ S::f12 (int x) return x; } +// { dg-final { scan-assembler-times "_ZGVnM2uv__ZN1S3f12Ei:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2uv__ZN1S3f12Ei:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4uv__ZN1S3f12Ei:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4uv__ZN1S3f12Ei:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdM8uv__ZN1S3f12Ei:" 1 { target { i?86-*-* x86_64-*-* } } } } diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-1.C b/gcc/testsuite/g++.dg/gomp/declare-simd-1.C index 00996b60e527..83255d9764a9 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-1.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-1.C @@ -2,19 +2,30 @@ // { dg-do compile } // { dg-options "-fopenmp -ffat-lto-objects" } +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) \ + linear (c : 4) simdlen (2) notinbranch +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) \ linear (c : 4) simdlen (8) notinbranch #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a \ : 4) simdlen (4) inbranch +#endif int f1 (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f2 (int a, int *b, int c) { return a + *b + c; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN2uva8l4__Z2f2iPii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2uva8l4__Z2f2iPii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM8uva32l4__Z2f2iPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN8uva32l4__Z2f2iPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM8uva32l4__Z2f2iPii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -24,14 +35,22 @@ int f2 (int a, int *b, int c) // { dg-final { scan-assembler-times "_ZGVeM8uva32l4__Z2f2iPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVeN8uva32l4__Z2f2iPii:" 1 { target { i?86-*-* x86_64-*-* } } } } +#ifdef __aarch64__ +#pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif template T f3 (int a, int *b, T c); template <> int f3 (int, int *, int); +#ifdef __aarch64__ +#pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) notinbranch simdlen (2) +#else #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) notinbranch simdlen (4) +#endif template int f4 (int a, int *b, T c) { @@ -44,22 +63,38 @@ int f4 (int, int *, int); template int f5 (int a, int *b, T c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif template <> int f5 (int a, int *b, int c); template int f6 (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) inbranch simdlen (2) +#else #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) inbranch simdlen (4) +#endif template <> int f6<3> (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (long long)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (long long)) linear (c : 4) simdlen (8) +#endif __extension__ long long f7 (long long a, long long *b, long long c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) notinbranch simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) notinbranch simdlen (8) +#endif extern "C" int f8 (int a, int *b, int c); @@ -82,6 +117,9 @@ namespace N1 } } +// { dg-final { scan-assembler-times "_ZGVnN2va16__ZN2N12N23f10EPx:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2va16__ZN2N12N23f10EPx:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM2va16__ZN2N12N23f10EPx:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN2va16__ZN2N12N23f10EPx:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM2va16__ZN2N12N23f10EPx:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -95,28 +133,48 @@ namespace N1 struct A { +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif int f11 (int a, int *b, int c); #pragma omp declare simd template int f12 (int a, int *b, int c); +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) notinbranch simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) inbranch +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) notinbranch simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) inbranch +#endif static int f13 (int a, int *b, int c); +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif int f14 (int a, int *b, int c) { return a + *b + c; } #pragma omp declare simd template int f15 (int a, int *b, int c) { return a + *b + c; } +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif static int f16 (int a, int *b, int c) { return a + *b + c; } }; @@ -129,28 +187,48 @@ int A::f15<2> (int, int *, int); template struct B { +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) notinbranch + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) inbranch +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) notinbranch #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) inbranch +#endif int f17 (int a, int *b, int c); #pragma omp declare simd template int f18 (int a, int *b, int c); +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif static int f19 (int a, int *b, int c); +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif int f20 (int a, int *b, int c) { return a + *b + c; } #pragma omp declare simd template int f21 (int a, int *b, int c) { return a + *b + c; } +#ifdef __aarch64__ + #pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) + #pragma omp declare simd uniform (c) aligned (b : 2 * sizeof (int)) linear (a : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a : 4) simdlen (4) +#endif static int f22 (int a, int *b, int c) { return a + *b + c; } template @@ -176,25 +254,38 @@ template <> template <> int B::f21<9> (int, int *, int); +#ifdef __aarch64__ +#pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) uniform (a, c) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) uniform (a, c) +#endif template <> template <> int B::f23<7> (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd simdlen (2) aligned (b : 4 * sizeof (int)) linear (a, c : 2) +#else #pragma omp declare simd simdlen (4) aligned (b : 8 * sizeof (int)) linear (a, c : 2) +#endif template <> template <> int B::f24<-1> (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) uniform (a, c) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) uniform (a, c) +#endif template <> template <> int B::f25<7> (int a, int *b, int c) { return a + *b + c; } +// { dg-final { scan-assembler-times "_ZGVnN2vuva8u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vuva8u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { aarch64*-*-* } } } } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } // { dg-final { scan-assembler-times "_ZGVbM8vuva32u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN8vuva32u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM8vuva32u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -204,7 +295,11 @@ int B::f25<7> (int a, int *b, int c) // { dg-final { scan-assembler-times "_ZGVeM8vuva32u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVeN8vuva32u__ZN1BIiE3f25ILi7EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } +#ifdef __aarch64__ +#pragma omp declare simd simdlen (2) aligned (b : 4 * sizeof (int)) linear (a, c : 2) +#else #pragma omp declare simd simdlen (4) aligned (b : 8 * sizeof (int)) linear (a, c : 2) +#endif template <> template <> int B::f26<-1> (int a, int *b, int c) @@ -212,7 +307,9 @@ int B::f26<-1> (int a, int *b, int c) return a + *b + c; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN2vl2va16__ZN1BIiE3f26ILin1EEEiiPii:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vl2va16__ZN1BIiE3f26ILin1EEEiiPii:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4vl2va32__ZN1BIiE3f26ILin1EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4vl2va32__ZN1BIiE3f26ILin1EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4vl2va32__ZN1BIiE3f26ILin1EEEiiPii:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -225,26 +322,45 @@ int B::f26<-1> (int a, int *b, int c) int f27 (int x) { +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif extern int f28 (int a, int *b, int c); { x++; +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) linear (c) +#else #pragma omp declare simd simdlen (4) linear (c) +#endif extern int f29 (int a, int *b, int c); } return x; } +#ifdef __aarch64__ +#pragma omp declare simd simdlen (4) +#else #pragma omp declare simd simdlen (16) +#endif int f30 (int x) { +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif + extern int f31 (int a, int *b, int c); return x; } -// { dg-warning "GCC does not currently support simdlen 16 for type 'int'" "" { target aarch64*-*-* } .-7 } +// { dg-final { scan-assembler-times "_ZGVnN4v__Z3f30i:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4v__Z3f30i:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM16v__Z3f30i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN16v__Z3f30i:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM16v__Z3f30i:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -267,10 +383,18 @@ int f33 (int x) { if (x) +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif extern int f34 (int a, int *b, int c); while (x < 10) +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif extern int f35 (int a, int *b, int c); return x; } @@ -287,10 +411,13 @@ struct D int f37 (int a); int e; }; -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-3 } void f38 (D &d) { +#ifdef __aarch64__ + d.f37 <2> (6); +#else d.f37 <16> (6); +#endif } diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-3.C b/gcc/testsuite/g++.dg/gomp/declare-simd-3.C index ee4ab6febd0d..d89d9a7cf6b5 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-3.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-3.C @@ -13,7 +13,11 @@ int f1 (int a, int b, int c, int &d, int &e, int &f) return a + b + c + d + e + f; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-11 } +// { dg-final { scan-assembler-times "_ZGVnN2vulLUR4__Z2f1iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vulLUR4__Z2f1iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4vulLUR4__Z2f1iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4vulLUR4__Z2f1iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4vulLUR4__Z2f1iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4vulLUR4__Z2f1iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4vulLUR4__Z2f1iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -42,7 +46,11 @@ int f2 (int a, int b, int c, int &d, int &e, int &f) return a + b + c + d + e + f; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-17 } +// { dg-final { scan-assembler-times "_ZGVnN2vulLUR4__Z2f2iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vulLUR4__Z2f2iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4vulLUR4__Z2f2iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4vulLUR4__Z2f2iiiRiS_S_:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4vulLUR4__Z2f2iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4vulLUR4__Z2f2iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4vulLUR4__Z2f2iiiRiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -58,7 +66,11 @@ int f3 (const int a, const int b, const int c, const int &d, const int &e, const return a + b + c + d + e + f; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN2vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4vulLUR4__Z2f3iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -80,7 +92,11 @@ int f4 (const int a, const int b, const int c, const int &d, const int &e, const return a + b + c + d + e + f; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-11 } +// { dg-final { scan-assembler-times "_ZGVnN2vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM2vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnN4vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4vulLUR4__Z2f4iiiRKiS0_S0_:" 1 { target { i?86-*-* x86_64-*-* } } } } diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-4.C b/gcc/testsuite/g++.dg/gomp/declare-simd-4.C index d76defbc9331..0b76d922f3e1 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-4.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-4.C @@ -5,7 +5,9 @@ f1 (int *p, int *q, short *s) return *p + *q + *s; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN4l4ln4ln6__Z2f1PiS_Ps:" 1 { target { aarch64*-*-* } } } } +// { dg-final { scan-assembler-times "_ZGVnM4l4ln4ln6__Z2f1PiS_Ps:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbM4l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVbN4l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcM4l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } @@ -13,29 +15,40 @@ f1 (int *p, int *q, short *s) // { dg-final { scan-assembler-times "_ZGVdM8l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdN8l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVeM16l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVeN16l4ln4ln6__Z2f1PiS_Ps:" 1 { target { i?86-*-* x86_64-*-* } } } } +#ifdef __aarch64__ +#pragma omp declare simd linear(p:s) linear(q:t) uniform (s) linear(r:s) notinbranch simdlen(4) uniform(t) +#else #pragma omp declare simd linear(p:s) linear(q:t) uniform (s) linear(r:s) notinbranch simdlen(8) uniform(t) +#endif int f2 (int *p, short *q, int s, int r, int &t) { return *p + *q + r; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN4ls2ls4uls2u__Z2f2PiPsiiRi:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbN8ls2ls4uls2u__Z2f2PiPsiiRi:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcN8ls2ls4uls2u__Z2f2PiPsiiRi:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdN8ls2ls4uls2u__Z2f2PiPsiiRi:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVeN8ls2ls4uls2u__Z2f2PiPsiiRi:" 1 { target { i?86-*-* x86_64-*-* } } } } +#ifdef __aarch64__ +#pragma omp declare simd linear(ref(p):s) linear(val(q):t) uniform (s) linear(uval(r):s) notinbranch simdlen(4) uniform(t) +#else #pragma omp declare simd linear(ref(p):s) linear(val(q):t) uniform (s) linear(uval(r):s) notinbranch simdlen(8) uniform(t) +#endif int f3 (int &p, short &q, int s, int &r, int &t) { return p + q + r; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } +// { dg-final { scan-assembler-times "_ZGVnN4Rs2Ls4uUs2u__Z2f3RiRsiS_S_:" 1 { target { aarch64*-*-* } } } } + // { dg-final { scan-assembler-times "_ZGVbN8Rs2Ls4uUs2u__Z2f3RiRsiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVcN8Rs2Ls4uUs2u__Z2f3RiRsiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } // { dg-final { scan-assembler-times "_ZGVdN8Rs2Ls4uUs2u__Z2f3RiRsiS_S_:" 1 { target { i?86-*-* x86_64-*-* } } } } diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-7.C b/gcc/testsuite/g++.dg/gomp/declare-simd-7.C index 373be28ebd32..52e9f182da35 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-7.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-7.C @@ -18,7 +18,6 @@ foo1 (int a, int b, float c, S d, int *e, int f, int &g, int &h, int &i, int j, { return bar1 (a, b, c, d, e, f, g, h, i, j, k); } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-4 } #pragma omp declare simd inbranch uniform (b, c, d, e) aligned (e : 16) \ linear (f : 2) linear (ref (g) : 1) \ @@ -29,7 +28,6 @@ foo2 (int a, int b, float c, S d, int *e, int f, int &g, int &h, int &i, int j, { return bar2 (a, b, c, d, e, f, g, h, i, j, k); } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-4 } #pragma omp declare simd notinbranch uniform (b, c, d, e) aligned (e : 16) \ linear (f : 2) linear (ref (g) : 1) \ @@ -40,7 +38,6 @@ foo3 (int a, int b, float c, S d, int *e, int f, int &g, int &h, int &i, int j, { return bar3 (a, b, c, d, e, f, g, h, i, j, k); } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-4 } #pragma omp declare simd inbranch uniform (b, c, d, e) aligned (e : 16) \ linear (f : 2) linear (ref (g) : 1) \ @@ -51,4 +48,3 @@ foo4 (int a, int b, float c, S d, int *e, int f, int &g, int &h, int &i, int j, { return bar4 (a, b, c, d, e, f, g, h, i, j, k); } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-4 } diff --git a/gcc/testsuite/g++.dg/gomp/declare-simd-8.C b/gcc/testsuite/g++.dg/gomp/declare-simd-8.C index ef183136833a..01c91e890914 100644 --- a/gcc/testsuite/g++.dg/gomp/declare-simd-8.C +++ b/gcc/testsuite/g++.dg/gomp/declare-simd-8.C @@ -4,7 +4,6 @@ template struct S { #pragma omp declare simd aligned(a : N * 2) aligned(b) linear(ref(b): N) float foo (float *a, T *&b) { return *a + *b; } - // { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-1 } }; S<16, float> s; diff --git a/gcc/testsuite/g++.dg/gomp/pr88182.C b/gcc/testsuite/g++.dg/gomp/pr88182.C index 504f50376408..ca29d000036e 100644 --- a/gcc/testsuite/g++.dg/gomp/pr88182.C +++ b/gcc/testsuite/g++.dg/gomp/pr88182.C @@ -1,7 +1,11 @@ // { dg-do run } // { dg-options "-O -fopenmp-simd -ftree-loop-if-convert -fno-ssa-phiopt" } +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(4) notinbranch +#endif __attribute__((noinline)) int foo (double c1, double c2) { @@ -18,7 +22,6 @@ foo (double c1, double c2) } return res; } -// { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-15 } __attribute__((noinline, noclone)) void bar (double *x, double *y) diff --git a/gcc/testsuite/gcc.dg/declare-simd.c b/gcc/testsuite/gcc.dg/declare-simd.c index 2c8c1b7da4f3..1c71b60c9742 100644 --- a/gcc/testsuite/gcc.dg/declare-simd.c +++ b/gcc/testsuite/gcc.dg/declare-simd.c @@ -3,7 +3,6 @@ #pragma omp declare simd linear (p2, p3) extern void fn2 (float p1, float *p2, float *p3); -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target { { aarch64*-*-* } && lp64 } } .-1 } */ float *a, *b; void fn1 (float *p1) diff --git a/gcc/testsuite/gcc.dg/gomp/declare-simd-1.c b/gcc/testsuite/gcc.dg/gomp/declare-simd-1.c index add322873e64..c8a99a8c7070 100644 --- a/gcc/testsuite/gcc.dg/gomp/declare-simd-1.c +++ b/gcc/testsuite/gcc.dg/gomp/declare-simd-1.c @@ -1,19 +1,30 @@ /* Test parsing of #pragma omp declare simd */ /* { dg-do compile } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) \ + linear (c : 4) simdlen (2) notinbranch +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) \ linear (c : 4) simdlen (8) notinbranch +#endif #pragma omp declare simd uniform (c) aligned (b : 4 * sizeof (int)) linear (a \ : 4) simdlen (4) inbranch int f1 (int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f2 (int a, int *b, int c) { return a + *b + c; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnN2uva8l4_f2:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnM2uva8l4_f2:" 1 { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM8uva32l4_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN8uva32l4_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM8uva32l4_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -23,34 +34,56 @@ int f2 (int a, int *b, int c) /* { dg-final { scan-assembler-times "_ZGVeM8uva32l4_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN8uva32l4_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (long long)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (long long)) linear (c : 4) simdlen (8) +#endif __extension__ long long f3 (long long a, long long *b, long long c); int f4 (int x) { +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 4 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif __extension__ __extension__ __extension__ extern int f5 (int a, int *b, int c); { x++; +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) linear (c) +#else #pragma omp declare simd simdlen (4) linear (c) +#endif extern int f6 (int a, int *b, int c); } return x; } +#ifdef __aarch64__ +#pragma omp declare simd simdlen (4) +#else #pragma omp declare simd simdlen (16) +#endif int f7 (int x) { +#ifdef __aarch64__ + #pragma omp declare simd simdlen (2) aligned (b : 2 * sizeof (int)) +#else #pragma omp declare simd simdlen (8) aligned (b : 8 * sizeof (int)) +#endif extern int f8 (int a, int *b, int c); return x; } -/* { dg-warning "GCC does not currently support simdlen 16 for type 'int'" "" { target aarch64*-*-* } .-7 } */ +/* { dg-final { scan-assembler-times "_ZGVnM4v_f7:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN4v_f7:" 1 { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM16v_f7:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN16v_f7:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM16v_f7:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -60,17 +93,27 @@ f7 (int x) /* { dg-final { scan-assembler-times "_ZGVeM16v_f7:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN16v_f7:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f12 (int c; int *b; int a; int a, int *b, int c); +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f13 (int c; int *b; int a; int a, int *b, int c) { return a + *b + c; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnM2uva8l4_f13:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN2uva8l4_f13:" 1 { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM8uva32l4_f13:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN8uva32l4_f13:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM8uva32l4_f13:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -80,7 +123,11 @@ f13 (int c; int *b; int a; int a, int *b, int c) /* { dg-final { scan-assembler-times "_ZGVeM8uva32l4_f13:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN8uva32l4_f13:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f14 (a, b, c) int a, c; @@ -89,7 +136,9 @@ f14 (a, b, c) return a + *b + c; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-7 } */ +/* { dg-final { scan-assembler-times "_ZGVnM2uva8l4_f14:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN2uva8l4_f14:" 1 { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM8uva32l4_f14:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN8uva32l4_f14:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM8uva32l4_f14:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -99,14 +148,20 @@ f14 (a, b, c) /* { dg-final { scan-assembler-times "_ZGVeM8uva32l4_f14:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN8uva32l4_f14:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (a) aligned (b : 2 * sizeof (int)) linear (c : 4) simdlen (2) +#else #pragma omp declare simd uniform (a) aligned (b : 8 * sizeof (int)) linear (c : 4) simdlen (8) +#endif int f15 (int a, int *b, int c) { return a + *b + c; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnM2uva8l4_f15:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN2uva8l4_f15:" 1 { target { aarch64*-*-* } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM8uva32l4_f15:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN8uva32l4_f15:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM8uva32l4_f15:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -116,19 +171,33 @@ f15 (int a, int *b, int c) /* { dg-final { scan-assembler-times "_ZGVeM8uva32l4_f15:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN8uva32l4_f15:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd uniform (d) aligned (e : 2 * sizeof (int)) linear (f : 2) simdlen (2) +#else #pragma omp declare simd uniform (d) aligned (e : 8 * sizeof (int)) linear (f : 4) simdlen (8) +#endif int f15 (int d, int *e, int f); +#ifdef __aarch64__ +#pragma omp declare simd aligned (g : sizeof (*g)) linear (h : 2 * sizeof (g[0]) + sizeof (h)) simdlen (2) +#else #pragma omp declare simd aligned (g : sizeof (*g)) linear (h : 2 * sizeof (g[0]) + sizeof (h)) simdlen (4) +#endif int f16 (long *g, int h); +#ifdef __aarch64__ +#pragma omp declare simd aligned (h : sizeof (*h)) linear (g : 2 * sizeof (h[0]) + sizeof (g)) simdlen (2) +#else #pragma omp declare simd aligned (h : sizeof (*h)) linear (g : 2 * sizeof (h[0]) + sizeof (g)) simdlen (4) +#endif int f17 (int g, long *h) { return g + h[0]; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnM2l20va8_f17:" 1 { target { { aarch64*-*-* } && lp64 } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN2l20va8_f17:" 1 { target { { aarch64*-*-* } && lp64 } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM4l20va8_f17:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN4l20va8_f17:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM4l20va8_f17:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ @@ -146,7 +215,11 @@ int f17 (int g, long *h) /* { dg-final { scan-assembler-times "_ZGVeM4l12va4_f17:" 1 { target { { i?86-*-* x86_64-*-* } && ilp32 } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN4l12va4_f17:" 1 { target { { i?86-*-* x86_64-*-* } && ilp32 } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd aligned (i : sizeof (*i)) linear (j : 2 * sizeof (i[0]) + sizeof (j)) simdlen (2) +#else #pragma omp declare simd aligned (i : sizeof (*i)) linear (j : 2 * sizeof (i[0]) + sizeof (j)) simdlen (4) +#endif int f18 (j, i) long *i; @@ -155,7 +228,9 @@ f18 (j, i) return j + i[0]; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } .-7 } */ +/* { dg-final { scan-assembler-times "_ZGVnM2l20va8_f18:" 1 { target { { aarch64*-*-* } && lp64 } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN2l20va8_f18:" 1 { target { { aarch64*-*-* } && lp64 } } } } */ + /* { dg-final { scan-assembler-times "_ZGVbM4l20va8_f18:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN4l20va8_f18:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM4l20va8_f18:" 1 { target { { i?86-*-* x86_64-*-* } && lp64 } } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/declare-simd-3.c b/gcc/testsuite/gcc.dg/gomp/declare-simd-3.c index bf01c023541a..ba8e71008b5b 100644 --- a/gcc/testsuite/gcc.dg/gomp/declare-simd-3.c +++ b/gcc/testsuite/gcc.dg/gomp/declare-simd-3.c @@ -4,8 +4,8 @@ f1 (int *p, int *q, short *s) { return *p + *q + *s; } - -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnM4l4ln4ln6_f1:" 1 { target { aarch64*-*-* } } } } */ +/* { dg-final { scan-assembler-times "_ZGVnN4l4ln4ln6_f1:" 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbM4l4ln4ln6_f1:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN4l4ln4ln6_f1:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcM4l4ln4ln6_f1:" 1 { target { i?86-*-* x86_64-*-* } } } } */ @@ -15,14 +15,18 @@ f1 (int *p, int *q, short *s) /* { dg-final { scan-assembler-times "_ZGVeM16l4ln4ln6_f1:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVeN16l4ln4ln6_f1:" 1 { target { i?86-*-* x86_64-*-* } } } } */ +#ifdef __aarch64__ +#pragma omp declare simd linear(p:s) linear(q:t) uniform (s) linear(r:s) notinbranch simdlen(4) uniform(t) +#else #pragma omp declare simd linear(p:s) linear(q:t) uniform (s) linear(r:s) notinbranch simdlen(8) uniform(t) +#endif int f2 (int *p, short *q, int s, int r, int t) { return *p + *q + r; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } */ +/* { dg-final { scan-assembler-times "_ZGVnN4ls2ls4uls2u_f2:" 1 { target { aarch64*-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVbN8ls2ls4uls2u_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVcN8ls2ls4uls2u_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ /* { dg-final { scan-assembler-times "_ZGVdN8ls2ls4uls2u_f2:" 1 { target { i?86-*-* x86_64-*-* } } } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/pr87887-1.c b/gcc/testsuite/gcc.dg/gomp/pr87887-1.c index 8b04ffd0809b..6e143aa0b5aa 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr87887-1.c +++ b/gcc/testsuite/gcc.dg/gomp/pr87887-1.c @@ -10,6 +10,7 @@ foo (int x) { return (struct S) { x }; } +/* { dg-warning "unsupported return type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd int @@ -17,6 +18,7 @@ bar (struct S x) { return x.n; } +/* { dg-warning "unsupported argument type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd uniform (x) int diff --git a/gcc/testsuite/gcc.dg/gomp/pr87895-1.c b/gcc/testsuite/gcc.dg/gomp/pr87895-1.c index 7338f18d1920..22f5c6914164 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr87895-1.c +++ b/gcc/testsuite/gcc.dg/gomp/pr87895-1.c @@ -17,4 +17,3 @@ bar (int *x, int y) if ((y == 0) ? (*x = 0) : *x) return 0; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-5 } */ diff --git a/gcc/testsuite/gcc.dg/gomp/pr89246-1.c b/gcc/testsuite/gcc.dg/gomp/pr89246-1.c index dfe629c1c6a5..cdaec6b38516 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr89246-1.c +++ b/gcc/testsuite/gcc.dg/gomp/pr89246-1.c @@ -8,6 +8,7 @@ int foo (__int128 x) { return x; } +/* { dg-warning "unsupported argument type ‘__int128’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd extern int bar (int x); diff --git a/gcc/testsuite/gcc.dg/gomp/pr99542.c b/gcc/testsuite/gcc.dg/gomp/pr99542.c index b67ff5a37a20..f38e21da1858 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr99542.c +++ b/gcc/testsuite/gcc.dg/gomp/pr99542.c @@ -3,8 +3,8 @@ /* { dg-options "-O0 -fopenmp-simd" } */ #pragma omp declare simd -extern int foo (__int128 x); /* { dg-warning "GCC does not currently support mixed size types for 'simd' function" "" { target aarch64*-*-* } } */ -/* { dg-warning "unsupported argument type '__int128' for simd" "" { target i?86-*-* x86_64-*-* } .-1 } */ +extern int foo (__int128 x); +/* { dg-warning "unsupported argument type '__int128' for simd" "" { target i?86-*-* x86_64-*-* aarch64*-*-* } .-1 } */ #pragma omp declare simd uniform (x) extern int baz (__int128 x); diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c index 9f7c84dc70ba..5fe4069c01c1 100644 --- a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c @@ -6,7 +6,6 @@ int addit(int a, int b, int *c) { return a + b; } -/* { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } .-4 } */ /* { dg-final { scan-tree-dump {(?n)^__attribute__\(\(omp declare simd \(notinbranch aligned\(2:32\)\), omp declare simd \(inbranch uniform\(2\) linear\(1:66\)\)\)\)$} "optimized" } } */ #pragma omp declare simd uniform(a) aligned(a:32) linear(k:1) notinbranch @@ -17,6 +16,13 @@ float setArray(float *a, float x, int k) } /* { dg-final { scan-tree-dump {(?n)^__attribute__\(\(omp declare simd \(notinbranch uniform\(0\) aligned\(0:32\) linear\(2:1\)\)\)\)$} "optimized" } } */ +/* { dg-final { scan-tree-dump "_ZGVnN2ua32vl_setArray" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN4ua32vl_setArray" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN2vvva32_addit" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN4vvva32_addit" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnM2vl66u_addit" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnM4vl66u_addit" "optimized { target aarch64*-*-* } } } */ + /* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVbM4vl66u_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c index c44bfe511a57..ec6d2daaa29d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-1.c @@ -12,8 +12,13 @@ int array[N]; #pragma omp declare simd simdlen(4) notinbranch #pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3) +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#pragma omp declare simd simdlen(2) notinbranch uniform(b) linear(c:3) +#else #pragma omp declare simd simdlen(8) notinbranch #pragma omp declare simd simdlen(8) notinbranch uniform(b) linear(c:3) +#endif __attribute__((noinline)) int foo (int a, int b, int c) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c index feab989cfd59..13fc93614b4d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-2.c @@ -12,8 +12,13 @@ int array[N] __attribute__((aligned (32))); #pragma omp declare simd simdlen(4) notinbranch aligned(a:16) uniform(a) linear(b) #pragma omp declare simd simdlen(4) notinbranch aligned(a:32) uniform(a) linear(b) +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch aligned(a:16) uniform(a) linear(b) +#pragma omp declare simd simdlen(2) notinbranch aligned(a:32) uniform(a) linear(b) +#else #pragma omp declare simd simdlen(8) notinbranch aligned(a:16) uniform(a) linear(b) #pragma omp declare simd simdlen(8) notinbranch aligned(a:32) uniform(a) linear(b) +#endif __attribute__((noinline)) void foo (int *a, int b, int c) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c index 42414671c254..2f9f8306d672 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-4.c @@ -12,7 +12,11 @@ float d[N]; int e[N]; unsigned short f[N]; +#ifdef __aarch64__ +#pragma omp declare simd simdlen(4) notinbranch uniform(b) +#else #pragma omp declare simd simdlen(8) notinbranch uniform(b) +#endif __attribute__((noinline)) float foo (float a, float b, float c) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c index 620cec36e4c0..097081897205 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-5.c @@ -10,7 +10,11 @@ int d[N], e[N]; +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch uniform(b) linear(c:3) +#else #pragma omp declare simd simdlen(4) notinbranch uniform(b) linear(c:3) +#endif __attribute__((noinline)) long long int foo (int a, int b, int c) { diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c index 440091d70e83..978cd4faa9bc 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-6.c @@ -8,14 +8,24 @@ #define N 1024 #endif -int a[N]; -long long int b[N]; -short c[N]; +#ifdef __aarch64__ +#define TYPE1 int +#define TYPE2 int +#define TYPE3 short +#else +#define TYPE1 int +#define TYPE2 long long int +#define TYPE3 short +#endif + +TYPE1 a[N]; +TYPE2 b[N]; +TYPE3 c[N]; #pragma omp declare simd #pragma omp declare simd uniform(b) linear(c:3) -__attribute__((noinline)) short -foo (int a, long long int b, short c) +__attribute__((noinline)) TYPE3 +foo (TYPE1 a, TYPE2 b, TYPE3 c) { return a + b + c; } diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c index 62246e288372..68ea4716968e 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-7.c @@ -8,14 +8,24 @@ #define N 1024 #endif -int a[N]; -long long int b[N]; -short c[N]; +#ifdef __aarch64__ +#define TYPE1 int +#define TYPE2 int +#define TYPE3 short +#else +#define TYPE1 int +#define TYPE2 long long int +#define TYPE3 short +#endif + +TYPE1 a[N]; +TYPE2 b[N]; +TYPE3 c[N]; #pragma omp declare simd #pragma omp declare simd uniform(b) linear(c:3) -__attribute__((noinline)) short -foo (int a, long long int b, int c) +__attribute__((noinline)) TYPE3 +foo (TYPE1 a, TYPE2 b, TYPE1 c) { return a + b + c; } diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c index 11ea21326891..29842825584f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-8.c @@ -12,14 +12,22 @@ int a[N], b[N]; long int c[N]; unsigned char d[N]; +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(8) notinbranch +#endif __attribute__((noinline)) int foo (long int a, int b, int c) { return a + b + c; } +#ifdef __aarch64__ +#pragma omp declare simd simdlen(2) notinbranch +#else #pragma omp declare simd simdlen(8) notinbranch +#endif __attribute__((noinline)) long int bar (int a, int b, long int c) { diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c new file mode 100644 index 000000000000..aab8c17f0c44 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp-simd" } */ +#ifdef __cplusplus +extern "C" { +#endif +#pragma omp declare simd +int __attribute__ ((const)) f00 (int a , char b) /* { dg-warning {GCC does not currently support a simdclone with simdlens 8 and 16 for these types.} } */ +{ + return a + b; +} + +#pragma omp declare simd +long long __attribute__ ((const)) f01 (int a , short b) /* { dg-warning {GCC does not currently support a simdclone with simdlens 4 and 8 for these types.} } */ +{ + return a + b; +} + +#pragma omp declare simd linear(b) +long long __attribute__ ((const)) f02 (short *b, int a) /* { dg-warning {GCC does not currently support a simdclone with simdlens 4 and 8 for these types.} } */ +{ + return a + *b; +} + +#pragma omp declare simd uniform(b) +void f03 (char b, int a) /* { dg-warning {GCC does not currently support a simdclone with simdlens 8 and 16 for these types.} } */ +{ +} + +#pragma omp declare simd simdlen(4) +double f04 (void) /* { dg-warning {GCC does not currently support simdlen 4 for type 'double'} } */ +{ + return 4; +} + +#pragma omp declare simd simdlen(16) +void f05 (short a) /* { dg-warning {GCC does not currently support simdlen 16 for type 'short int'} } */ +{ +} +#ifdef __cplusplus +} +#endif + diff --git a/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c new file mode 100644 index 000000000000..e2e80f0c663d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/declare-simd-2.c @@ -0,0 +1,60 @@ +/* { dg-do compile } */ +/* { dg-options "-fopenmp-simd" } */ +#ifdef __cplusplus +extern "C" { +#endif +#pragma omp declare simd +short __attribute__ ((const)) f00 (short a , char b) +{ + return a + b; +} +/* { dg-final { scan-assembler {_ZGVnN8vv_f00:} } } */ +/* { dg-final { scan-assembler {_ZGVnM8vv_f00:} } } */ + +#pragma omp declare simd notinbranch +short __attribute__ ((const)) f01 (int a , short b) +{ + return a + b; +} +/* { dg-final { scan-assembler {_ZGVnN4vv_f01:} } } */ +/* { dg-final { scan-assembler-not {_ZGVnM4vv_f01:} } } */ + +#pragma omp declare simd linear(b) inbranch +int __attribute__ ((const)) f02 (int a, short *b) +{ + return a + *b; +} +/* { dg-final { scan-assembler {_ZGVnM4vl2_f02:} } } */ +/* { dg-final { scan-assembler-not {_ZGVnN4vl2_f02:} } } */ + +#pragma omp declare simd uniform(a) notinbranch +void f03 (char b, int a) +{ +} +/* { dg-final { scan-assembler {_ZGVnN8vu_f03:} } } */ +/* { dg-final { scan-assembler {_ZGVnN16vu_f03:} } } */ +/* { dg-final { scan-assembler-not {_ZGVnM8vu_f03:} } } */ +/* { dg-final { scan-assembler-not {_ZGVnM16vu_f03:} } } */ + +#pragma omp declare simd simdlen(2) +float f04 (double a) +{ + return (float) a; +} +/* { dg-final { scan-assembler {_ZGVnN2v_f04:} } } */ +/* { dg-final { scan-assembler {_ZGVnM2v_f04:} } } */ + +#pragma omp declare simd uniform(a) linear (b) +void f05 (short a, short *b, short c) +{ + *b += a + c; +} + +/* { dg-final { scan-assembler {_ZGVnN4ul2v_f05:} } } */ +/* { dg-final { scan-assembler {_ZGVnN4ul2v_f05:} } } */ +/* { dg-final { scan-assembler {_ZGVnM8ul2v_f05:} } } */ +/* { dg-final { scan-assembler {_ZGVnM8ul2v_f05:} } } */ +#ifdef __cplusplus +} +#endif + diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-simd-2.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-simd-2.f90 index bbf70d9664a7..8f76774fd6e0 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-simd-2.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-simd-2.f90 @@ -1,6 +1,6 @@ ! { dg-do compile } -function f1 (a, b, c, d, e, f) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +function f1 (a, b, c, d, e, f) integer, value :: a, b, c integer :: d, e, f, f1 !$omp declare simd (f1) uniform(b) linear(c, d) linear(uval(e)) linear(ref(f)) @@ -12,7 +12,7 @@ function f1 (a, b, c, d, e, f) ! { dg-warning "GCC does not currently support mi f = f + 1 f1 = a + b + c + d + e + f end function f1 -integer function f2 (a, b) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +integer function f2 (a, b) integer :: a, b !$omp declare simd uniform(b) linear(ref(a):b) a = a + 1 diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-simd-coarray-lib.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-simd-coarray-lib.f90 index f0c4e39efba1..1f74da76ffe3 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-simd-coarray-lib.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-simd-coarray-lib.f90 @@ -5,7 +5,7 @@ ! Failed as TREE_TYPE(fndecl) did not include the ! hidden caf_token/caf_offset arguments. ! -integer function f(x) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +integer function f(x) integer :: x[*] !$omp declare simd f = x[1] diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 index 06c9a5d1ed8f..6319df0558f3 100644 --- a/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/declare-variant-14.f90 @@ -35,13 +35,15 @@ contains integer :: a, b ! At gimplification time, we can't decide yet which function to call. - ! { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } + ! { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" { target { !aarch64*-*-* } } } } ! After simd clones are created, the original non-clone test1 shall ! call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones ! shall call f01 with score 8. ! { dg-final { scan-tree-dump-not "f04 \\\(x" "optimized" } } - ! { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" } } - ! { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" } } + ! { dg-final { scan-tree-dump-times "f03 \\\(x" 14 "optimized" { target { !aarch64*-*-* } } } } + ! { dg-final { scan-tree-dump-times "f03 \\\(x" 6 "optimized" { target { aarch64*-*-* } } } } + ! { dg-final { scan-tree-dump-times "f01 \\\(x" 4 "optimized" { target { !aarch64*-*-* } } } } + ! { dg-final { scan-tree-dump-times "f01 \\\(x" 0 "optimized" { target { aarch64*-*-* } } } } a = f04 (x) b = f04 (x) test1 = a + b diff --git a/gcc/testsuite/gfortran.dg/gomp/pr79154-1.f90 b/gcc/testsuite/gfortran.dg/gomp/pr79154-1.f90 index ea147bfa78ec..6376baa6383c 100644 --- a/gcc/testsuite/gfortran.dg/gomp/pr79154-1.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/pr79154-1.f90 @@ -1,7 +1,7 @@ ! PR fortran/79154 ! { dg-do compile } -pure real function foo (a, b) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +pure real function foo (a, b) !$omp declare simd(foo) ! { dg-bogus "may not appear in PURE" } real, intent(in) :: a, b foo = a + b @@ -20,7 +20,7 @@ pure real function baz (a, b) real, intent(in) :: a, b baz = a + b end function baz -elemental real function fooe (a, b) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +elemental real function fooe (a, b) !$omp declare simd(fooe) ! { dg-bogus "may not appear in PURE" } real, intent(in) :: a, b fooe = a + b diff --git a/gcc/testsuite/gfortran.dg/gomp/pr83977.f90 b/gcc/testsuite/gfortran.dg/gomp/pr83977.f90 index ea8e229fe548..b8ad1a7e39c1 100644 --- a/gcc/testsuite/gfortran.dg/gomp/pr83977.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/pr83977.f90 @@ -1,7 +1,7 @@ ! PR middle-end/83977 ! { dg-do compile } -integer function foo (a, b) ! { dg-warning "GCC does not currently support mixed size types for 'simd' functions" "" { target aarch64*-*-* } } +integer function foo (a, b) integer :: a, b !$omp declare simd uniform(b) linear(ref(a):b) a = a + 1 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 40a60c198cfe..14e3e119792a 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4321,7 +4321,8 @@ proc check_effective_target_vect_simd_clones { } { return [check_cached_effective_target_indexed vect_simd_clones { expr { (([istarget i?86-*-*] || [istarget x86_64-*-*]) && [check_effective_target_avx512f]) - || [istarget amdgcn-*-*] }}] + || [istarget amdgcn-*-*] + || [istarget aarch64*-*-*] }}] } # Return 1 if this is a AArch64 target supporting big endian diff --git a/libgomp/testsuite/libgomp.c/declare-variant-1.c b/libgomp/testsuite/libgomp.c/declare-variant-1.c index d16608f7e6dd..6129f23a0f80 100644 --- a/libgomp/testsuite/libgomp.c/declare-variant-1.c +++ b/libgomp/testsuite/libgomp.c/declare-variant-1.c @@ -46,8 +46,10 @@ test1 (int x) call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones shall call f01 with score 8. */ /* { dg-final { scan-ltrans-tree-dump-not "f04 \\\(x" "optimized" } } */ - /* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" } } */ - /* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" } } */ + /* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" { target { !aarch64*-*-* } } } } } */ + /* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" { target { !aarch64*-*-* } } } } } */ + /* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 10 "optimized" { target { aarch64*-*-* } } } } } */ + /* { dg-final { scan-ltrans-tree-dump-not "f01 \\\(x" "optimized" { target { aarch64*-*-* } } } } } */ int a = f04 (x); int b = f04 (x); return a + b; diff --git a/libgomp/testsuite/libgomp.fortran/declare-simd-1.f90 b/libgomp/testsuite/libgomp.fortran/declare-simd-1.f90 index cb8f4df8d62e..9d4452459aa7 100644 --- a/libgomp/testsuite/libgomp.fortran/declare-simd-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/declare-simd-1.f90 @@ -1,5 +1,5 @@ ! { dg-do run { target vect_simd_clones } } -! { dg-options "-fno-inline" } +! { dg-options "-fno-inline -cpp -D__aarch64__" } ! { dg-additional-options "-msse2" { target sse2_runtime } } ! { dg-additional-options "-mavx" { target avx_runtime } } @@ -75,7 +75,11 @@ end module declare_simd_1_mod end do contains function baz (x, y, z) +#ifdef __aarch64__ + !$omp declare simd (baz) simdlen (4) uniform (x, y) +#else !$omp declare simd (baz) simdlen (8) uniform (x, y) +#endif !$omp declare simd (baz) integer, value :: y real, value :: z @@ -90,6 +94,10 @@ function bar (a, b, c) real :: bar double precision, value :: a !$omp declare simd (bar) +#ifdef __aarch64__ + !$omp declare simd (bar) simdlen (2) linear (b : 2) +#else !$omp declare simd (bar) simdlen (4) linear (b : 2) +#endif bar = a + b * c end function bar From eea25179d8d1406685b8b0995ba841605f895283 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Mon, 11 Dec 2023 08:40:41 -0700 Subject: [PATCH 190/311] [PATCH] wrong code on m68k with -mlong-jump-table-offsets and -malign-int (PR target/112413) On m68k the compiler assumes that the PC-relative jump-via-jump-table instruction and the jump table are adjacent with no padding in between. When -mlong-jump-table-offsets is combined with -malign-int, a 2-byte nop may be inserted before the jump table, causing the jump to add the fetched offset to the wrong PC base and thus jump to the wrong address. Fixed by referencing the jump table via its label. On the test case in the PR the object code change is (the moveal at 16 is the nop): a: 6536 bcss 42 c: e588 lsll #2,%d0 e: 203b 0808 movel %pc@(18 ,%d0:l),%d0 - 12: 4efb 0802 jmp %pc@(16 ,%d0:l) + 12: 4efb 0804 jmp %pc@(18 ,%d0:l) 16: 284c moveal %a4,%a4 18: 0000 0020 orib #32,%d0 1c: 0000 002c orib #44,%d0 Bootstrapped and tested on m68k-linux-gnu, no regressions. Note: I don't have commit rights to I would need assistance applying this. PR target/112413 gcc/ * config/m68k/linux.h (ASM_RETURN_CASE_JUMP): For TARGET_LONG_JUMP_TABLE_OFFSETS, reference the jump table via its label. * config/m68k/m68kelf.h (ASM_RETURN_CASE_JUMP): Likewise. * config/m68k/netbsd-elf.h (ASM_RETURN_CASE_JUMP): Likewise. --- gcc/config/m68k/linux.h | 4 ++-- gcc/config/m68k/m68kelf.h | 4 ++-- gcc/config/m68k/netbsd-elf.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/config/m68k/linux.h b/gcc/config/m68k/linux.h index 2e1cb5498b86..37069c4d0826 100644 --- a/gcc/config/m68k/linux.h +++ b/gcc/config/m68k/linux.h @@ -102,12 +102,12 @@ along with GCC; see the file COPYING3. If not see if (ADDRESS_REG_P (operands[0])) \ return "jmp %%pc@(2,%0:l)"; \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "ext%.l %0\n\tjmp %%pc@(2,%0:l)"; \ } \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "jmp %%pc@(2,%0:w)"; \ } while (0) diff --git a/gcc/config/m68k/m68kelf.h b/gcc/config/m68k/m68kelf.h index 01ee724ef2bb..f89c9b70455b 100644 --- a/gcc/config/m68k/m68kelf.h +++ b/gcc/config/m68k/m68kelf.h @@ -59,12 +59,12 @@ along with GCC; see the file COPYING3. If not see if (ADDRESS_REG_P (operands[0])) \ return "jmp %%pc@(2,%0:l)"; \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "ext%.l %0\n\tjmp %%pc@(2,%0:l)"; \ } \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "jmp %%pc@(2,%0:w)"; \ } while (0) diff --git a/gcc/config/m68k/netbsd-elf.h b/gcc/config/m68k/netbsd-elf.h index 4d4a6d71cc45..6ba581b7b18b 100644 --- a/gcc/config/m68k/netbsd-elf.h +++ b/gcc/config/m68k/netbsd-elf.h @@ -137,12 +137,12 @@ while (0) if (ADDRESS_REG_P (operands[0])) \ return "jmp %%pc@(2,%0:l)"; \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "ext%.l %0\n\tjmp %%pc@(2,%0:l)"; \ } \ else if (TARGET_LONG_JUMP_TABLE_OFFSETS) \ - return "jmp %%pc@(2,%0:l)"; \ + return "jmp %%pc@(%l1,%0:l)"; \ else \ return "jmp %%pc@(2,%0:w)"; \ } while (0) From acbfb8b9495b802e414e6ab94b810ef7b0c8aa1d Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Sat, 9 Dec 2023 13:43:23 -0800 Subject: [PATCH 191/311] expr: catch more `a*bool` while expanding [PR 112935] After r14-1655-g52c92fb3f40050 (and the other commits which touch zero_one_valued_p), we end up with a with `bool * a` but where the bool is an SSA name that might not have non-zero bits set on it (to 0x1) even though it does the non-zero bits would be 0x1. The case of coremarks, it is only phiopt4 which adds the new ssa name and nothing afterwards updates the nonzero bits on it. This fixes the regression by using gimple_zero_one_valued_p rather than tree_nonzero_bits to match the cases where the SSA_NAME didn't have the non-zero bits set. gimple_zero_one_valued_p handles one level of cast and also and an `&`. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: PR middle-end/112935 * expr.cc (expand_expr_real_2): Use gimple_zero_one_valued_p instead of tree_nonzero_bits to find boolean defined expressions. Signed-off-by: Andrew Pinski --- gcc/expr.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 6da51f2aca29..4686cacd22fe 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -10209,8 +10209,9 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, /* Expand X*Y as X&-Y when Y must be zero or one. */ if (SCALAR_INT_MODE_P (mode)) { - bool bit0_p = tree_nonzero_bits (treeop0) == 1; - bool bit1_p = tree_nonzero_bits (treeop1) == 1; + bool gimple_zero_one_valued_p (tree, tree (*)(tree)); + bool bit0_p = gimple_zero_one_valued_p (treeop0, nullptr); + bool bit1_p = gimple_zero_one_valued_p (treeop1, nullptr); /* Expand X*Y as X&Y when both X and Y must be zero or one. */ if (bit0_p && bit1_p) From 4eaaf7f5a378e81bbcd180cb10bf58726a68f229 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Mon, 10 Jul 2023 21:53:24 -0700 Subject: [PATCH 192/311] analyzer: Remove check of unsigned_char in maybe_undo_optimize_bit_field_compare. The check for the type seems unnecessary and gets in the way sometimes. Also with a patch I am working on for match.pd, it causes a failure to happen. Before my patch the IR was: _1 = BIT_FIELD_REF ; _2 = _1 & 1; _3 = _2 != 0; _4 = (int) _3; __analyzer_eval (_4); Where _2 was an unsigned char type. And After my patch we have: _1 = BIT_FIELD_REF ; _2 = (int) _1; _3 = _2 & 1; __analyzer_eval (_3); But in this case, the BIT_AND_EXPR is in an int type. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/analyzer/ChangeLog: * region-model-manager.cc (maybe_undo_optimize_bit_field_compare): Remove the check for type being unsigned_char_type_node. --- gcc/analyzer/region-model-manager.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index b631bcb04d04..26c34e38875d 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -596,9 +596,6 @@ maybe_undo_optimize_bit_field_compare (tree type, tree cst, const svalue *arg1) { - if (type != unsigned_char_type_node) - return NULL; - const binding_map &map = compound_sval->get_map (); unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (cst); /* If "mask" is a contiguous range of set bits, see if the From 85c5efcffed19ca6160eeecc2d4faebd9fee63aa Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Sat, 11 Nov 2023 15:54:10 -0800 Subject: [PATCH 193/311] MATCH: (convert)(zero_one !=/== 0/1) for outer type and zero_one type are the same When I moved two_value to match.pd, I removed the check for the {0,+-1} as I had placed it after the {0,+-1} case for cond in match.pd. In the case of {0,+-1} and non boolean, before we would optmize those case to just `(convert)a` but after we would get `(convert)(a != 0)` which was not handled anyways to just `(convert)a`. So this adds a pattern to match `(convert)(zeroone != 0)` and simplify to `(convert)zeroone`. Also this optimizes (convert)(zeroone == 0) into (zeroone^1) if the type match. Removing the opposite transformation from fold. The opposite transformation was added with https://gcc.gnu.org/pipermail/gcc-patches/2006-February/190514.html It is no longer considered the canonicalization either, even VRP will transform it back into `(~a) & 1` so removing it is a good idea. Note the testcase pr69270.c needed a slight update due to not matching exactly a scan pattern, this update makes it more robust and will match before and afterwards and if there are other changes in this area too. Note the testcase gcc.target/i386/pr110790-2.c needs a slight update for better code generation in LP64 bit mode. Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: PR tree-optimization/111972 PR tree-optimization/110637 * match.pd (`(convert)(zeroone !=/== CST)`): Match and simplify to ((convert)zeroone){,^1}. * fold-const.cc (fold_binary_loc): Remove transformation of `(~a) & 1` and `(a ^ 1) & 1` into `(convert)(a == 0)`. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr110637-1.c: New test. * gcc.dg/tree-ssa/pr110637-2.c: New test. * gcc.dg/tree-ssa/pr110637-3.c: New test. * gcc.dg/tree-ssa/pr111972-1.c: New test. * gcc.dg/tree-ssa/pr69270.c: Update testcase. * gcc.target/i386/pr110790-2.c: Update testcase. * gcc.dg/fold-even-1.c: Removed. Signed-off-by: Andrew Pinski --- gcc/fold-const.cc | 27 ----------------- gcc/match.pd | 16 ++++++++++ gcc/testsuite/gcc.dg/fold-even-1.c | 32 -------------------- gcc/testsuite/gcc.dg/tree-ssa/pr110637-1.c | 10 +++++++ gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c | 13 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr110637-3.c | 14 +++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr111972-1.c | 34 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr69270.c | 4 +-- gcc/testsuite/gcc.target/i386/pr110790-2.c | 16 ++++++++-- 9 files changed, 103 insertions(+), 63 deletions(-) delete mode 100644 gcc/testsuite/gcc.dg/fold-even-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110637-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr110637-3.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr111972-1.c diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 2692b98ceac2..f5d68ac323a1 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -12077,33 +12077,6 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type, goto bit_rotate; case BIT_AND_EXPR: - /* Fold (X ^ 1) & 1 as (X & 1) == 0. */ - if (TREE_CODE (arg0) == BIT_XOR_EXPR - && INTEGRAL_TYPE_P (type) - && integer_onep (TREE_OPERAND (arg0, 1)) - && integer_onep (arg1)) - { - tree tem2; - tem = TREE_OPERAND (arg0, 0); - tem2 = fold_convert_loc (loc, TREE_TYPE (tem), arg1); - tem2 = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (tem), - tem, tem2); - return fold_build2_loc (loc, EQ_EXPR, type, tem2, - build_zero_cst (TREE_TYPE (tem))); - } - /* Fold ~X & 1 as (X & 1) == 0. */ - if (TREE_CODE (arg0) == BIT_NOT_EXPR - && INTEGRAL_TYPE_P (type) - && integer_onep (arg1)) - { - tree tem2; - tem = TREE_OPERAND (arg0, 0); - tem2 = fold_convert_loc (loc, TREE_TYPE (tem), arg1); - tem2 = fold_build2_loc (loc, BIT_AND_EXPR, TREE_TYPE (tem), - tem, tem2); - return fold_build2_loc (loc, EQ_EXPR, type, tem2, - build_zero_cst (TREE_TYPE (tem))); - } /* Fold !X & 1 as X == 0. */ if (TREE_CODE (arg0) == TRUTH_NOT_EXPR && integer_onep (arg1)) diff --git a/gcc/match.pd b/gcc/match.pd index 5deddd57d21e..15bca217aafb 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3332,6 +3332,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) (rcmp @0 @1)))) +/* (type)([0,1]@a != 0) -> (type)a + (type)([0,1]@a == 1) -> (type)a + (type)([0,1]@a == 0) -> a ^ 1 + (type)([0,1]@a != 1) -> a ^ 1. */ +(for eqne (eq ne) + (simplify + (convert (eqne zero_one_valued_p@0 INTEGER_CST@1)) + (if ((integer_zerop (@1) || integer_onep (@1))) + (if ((eqne == EQ_EXPR) ^ integer_zerop (@1)) + (convert @0) + /* Only do this if the types match as (type)(a == 0) is + canonical form normally, while `a ^ 1` is canonical when + there is no type change. */ + (if (types_match (type, TREE_TYPE (@0))) + (bit_xor @0 { build_one_cst (type); } )))))) + /* We can't reassociate at all for saturating types. */ (if (!TYPE_SATURATING (type)) diff --git a/gcc/testsuite/gcc.dg/fold-even-1.c b/gcc/testsuite/gcc.dg/fold-even-1.c deleted file mode 100644 index 94711ab1499e..000000000000 --- a/gcc/testsuite/gcc.dg/fold-even-1.c +++ /dev/null @@ -1,32 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-original" } */ -int test1(int a) -{ - return !(a & 1); -} - -int test2(int b) -{ - return (b & 1) == 0; -} - -int test3(int c) -{ - return (c & 1) ^ 1; -} - -int test4(int d) -{ - return (d ^ 1) & 1; -} - -int test5(int e) -{ - return ~e & 1; -} - -/* { dg-final { scan-tree-dump-times "\\(a \& 1\\) == 0" 1 "original" } } */ -/* { dg-final { scan-tree-dump-times "\\(b \& 1\\) == 0" 1 "original" } } */ -/* { dg-final { scan-tree-dump-times "\\(c \& 1\\) == 0" 1 "original" } } */ -/* { dg-final { scan-tree-dump-times "\\(d \& 1\\) == 0" 1 "original" } } */ -/* { dg-final { scan-tree-dump-times "\\(e \& 1\\) == 0" 1 "original" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-1.c new file mode 100644 index 000000000000..3d03b0992a46 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +int f(int a) +{ + int b = (a & 1)!=0; + return b; +} + +/* This should be optimized to just return (a & 1); */ +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c new file mode 100644 index 000000000000..f1c5b90353a3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +int f(int a) +{ + int b = a & 1; + int c = b == 0; + return c; +} + +/* This should be optimized to just return `(a&1) ^ 1` or `(~a) & 1`. */ +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ +/* { dg-final { scan-tree-dump-times "~a" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times " & 1" 1 "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-3.c new file mode 100644 index 000000000000..ce80146d9df0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +int f(int a) +{ + int b = a & 1; + int c = b == 0; + int d = ~a; + int e = d & 1; + return c == e; +} + +/* This should be optimized to just `return 1` */ +/* { dg-final { scan-tree-dump-not " == " "optimized"} } */ +/* { dg-final { scan-tree-dump-times "return 1" 1 "optimized"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr111972-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr111972-1.c new file mode 100644 index 000000000000..0611808ed50d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr111972-1.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-phiopt" } */ +double +foo() { + long n3 = 3450000, xtra = 7270; + long i,ix; + long j; + double Check; + + /* Section 3, Conditional jumps */ + j = 0; + { + for (ix=0; ix2) j = 0; + else j = 1; + if(j<1) j = 1; + else j = 0; + } + } + } + Check = Check + (double)j; + return Check; +} + +/* the above if statements in loop should be optimized to just `j ^ 1` + and should not be (type)(j != 1). */ +/* { dg-final { scan-tree-dump-not " != 1" "phiopt2"} } */ +/* { dg-final { scan-tree-dump-times " \\^ 1" 1 "phiopt2"} } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c b/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c index 0d66cc4383f2..b08ec9d6ddb4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr69270.c @@ -7,8 +7,8 @@ /* { dg-final { scan-tree-dump-times "Replaced .bufferstep_\[0-9\]+. with constant .1." 1 "dom3"} } */ /* And some assignments ought to fold down to constants. */ -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 1;" 1 "dom3"} } */ -/* { dg-final { scan-tree-dump-times "Folded to: _\[0-9\]+ = 0;" 1 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: (?:bufferstep)?_\[0-9\]+ = 1;" 1 "dom3"} } */ +/* { dg-final { scan-tree-dump-times "Folded to: (?:bufferstep)?_\[0-9\]+ = 0;" 1 "dom3"} } */ /* The XOR operations should have been optimized to constants. */ /* { dg-final { scan-tree-dump-not "bit_xor" "dom3"} } */ diff --git a/gcc/testsuite/gcc.target/i386/pr110790-2.c b/gcc/testsuite/gcc.target/i386/pr110790-2.c index 8b9d650c6e9b..16c73cb74659 100644 --- a/gcc/testsuite/gcc.target/i386/pr110790-2.c +++ b/gcc/testsuite/gcc.target/i386/pr110790-2.c @@ -9,5 +9,17 @@ refmpn_tstbit_bad (mp_srcptr ptr, unsigned long bit) return (((ptr)[(bit)/(32 - 0)] & (((mp_limb_t) 1L) << ((bit)%(32 - 0)))) != 0); } -/* { dg-final { scan-assembler "bt\[ql\]" } } */ -/* { dg-final { scan-assembler "setc" } } */ +/* 32bit produces: + btl %eax, %edx + setc %al + movzbl %al, %eax + */ +/* { dg-final { scan-assembler "bt\[ql\]" { target { ! lp64 } } } } */ +/* { dg-final { scan-assembler "setc" { target { ! lp64 } } } } */ + +/* 64bit produces: + shrq %cl, %rax + andl $1, %eax + */ +/* { dg-final { scan-assembler-times "shrq" 2 { target { lp64 } } } } */ +/* { dg-final { scan-assembler-times "andl" 2 { target { lp64 } } } } */ From 35ade856eaafd9c39ce864b25d127e1f98a3bb57 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Sat, 9 Dec 2023 20:02:24 -0800 Subject: [PATCH 194/311] aarch64: Fix wrong code for bfloat when f16 is enabled [PR 111867] The problem here is when f16 is enabled, movbf_aarch64 accepts `Ufc` as a constraint: [ w , Ufc ; fconsts , fp16 ] fmov\t%h0, %1 But that is for fmov values and in this case fmov represents f16 rather than bfloat16 values. This means we would get the wrong value in the register. Built and tested for aarch64-linux-gnu with no regressions. Also tested with `-march=armv9-a+sve2, gcc.dg/torture/bfloat16-basic.c and gcc.dg/torture/bfloat16-builtin.c no longer fail. gcc/ChangeLog: PR target/111867 * config/aarch64/aarch64.cc (aarch64_float_const_representable_p): For BFmode, only accept +0.0. Signed-off-by: Andrew Pinski --- gcc/config/aarch64/aarch64.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index a58b02d8421e..21a24d92b025 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -23936,6 +23936,10 @@ aarch64_float_const_representable_p (rtx x) || REAL_VALUE_MINUS_ZERO (r)) return false; + /* For BFmode, only handle 0.0. */ + if (GET_MODE (x) == BFmode) + return real_iszero (&r, false); + /* Extract exponent. */ r = real_value_abs (&r); exponent = REAL_EXP (&r); From 475b7f44c640c03f968d66dae389ea6165b4b5c2 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Mon, 11 Dec 2023 11:59:33 -0500 Subject: [PATCH 195/311] c++: add fixed testcase [PR63378] We accept this testcase since r12-4453-g79802c5dcc043a. PR c++/63378 gcc/testsuite/ChangeLog: * g++.dg/template/fnspec3.C: New test. --- gcc/testsuite/g++.dg/template/fnspec3.C | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 gcc/testsuite/g++.dg/template/fnspec3.C diff --git a/gcc/testsuite/g++.dg/template/fnspec3.C b/gcc/testsuite/g++.dg/template/fnspec3.C new file mode 100644 index 000000000000..c36cb17751d3 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/fnspec3.C @@ -0,0 +1,20 @@ +// PR c++/63378 +// { dg-do compile { target c++11 } } + +template +struct B { }; + +template +struct A { +private: + template + static B g(); + +public: + template + auto f() -> decltype(g()); +}; + +template<> +template<> +auto A::f() -> B; From 02f562484c17522d79a482ac702a5fa3c2dfdd10 Mon Sep 17 00:00:00 2001 From: Francois-Xavier Coudert Date: Mon, 11 Dec 2023 09:26:23 +0100 Subject: [PATCH 196/311] Testsuite: restrict test to nonpic targets The test is currently failing on x86_64-apple-darwin. gcc/testsuite/ChangeLog: PR testsuite/112297 * gcc.target/i386/pr100936.c: Require nonpic target. --- gcc/testsuite/gcc.target/i386/pr100936.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/i386/pr100936.c b/gcc/testsuite/gcc.target/i386/pr100936.c index c076cbb2405e..af494946fcd3 100644 --- a/gcc/testsuite/gcc.target/i386/pr100936.c +++ b/gcc/testsuite/gcc.target/i386/pr100936.c @@ -1,6 +1,7 @@ /* PR target/100936 */ /* { dg-do assemble } */ /* { dg-options "-O2" } */ +/* { dg-require-effective-target nonpic } */ __seg_gs int var; From 624e274ca3a4405a55662fa72d1163120df0e03d Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Mon, 11 Dec 2023 17:30:20 +0000 Subject: [PATCH 197/311] PR rtl-optimization/112380: Defend against CLOBBERs in combine.cc This patch addresses PR rtl-optimization/112380, an ICE-on-valid regression where a (clobber (const_int 0)) encounters a sanity checking gcc_assert (at line 7554) in simplify-rtx.cc. These CLOBBERs are used internally by GCC's combine pass much like error_mark_node is used by various language front-ends. The solutions are either to handle/accept these CLOBBERs through-out (or in more places in) the middle-end's RTL optimizers, including functions in simplify-rtx.cc that are used by passes other than combine, and/or attempt to prevent these CLOBBERs escaping from try_combine into the RTX/RTL stream. The benefit of the second approach is that it actually allows for better optimization: when try_combine fails to simplify an expression instead of substituting a CLOBBER to avoid the instruction pattern being recognized, noticing the CLOBBER often allows combine to attempt alternate simplifications/transformations looking for those that can be recognized. This first alternative is the minimal fix to address the CLOBBER encountered in the bugzilla PR. 2023-12-11 Roger Sayle gcc/ChangeLog PR rtl-optimization/112380 * combine.cc (expand_field_assignment): Check if gen_lowpart returned a CLOBBER, and avoid calling gen_simplify_binary with it if so. gcc/testsuite/ChangeLog PR rtl-optimization/112380 * gcc.dg/pr112380.c: New test case. --- gcc/combine.cc | 9 ++++++--- gcc/testsuite/gcc.dg/pr112380.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr112380.c diff --git a/gcc/combine.cc b/gcc/combine.cc index 6344cd3c9f24..f2c64a9a979f 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -7466,6 +7466,11 @@ expand_field_assignment (const_rtx x) if (!targetm.scalar_mode_supported_p (compute_mode)) break; + /* gen_lowpart_for_combine returns CLOBBER on failure. */ + rtx lowpart = gen_lowpart (compute_mode, SET_SRC (x)); + if (GET_CODE (lowpart) == CLOBBER) + break; + /* Now compute the equivalent expression. Make a copy of INNER for the SET_DEST in case it is a MEM into which we will substitute; we don't want shared RTL in that case. */ @@ -7480,9 +7485,7 @@ expand_field_assignment (const_rtx x) inner); masked = simplify_gen_binary (ASHIFT, compute_mode, simplify_gen_binary ( - AND, compute_mode, - gen_lowpart (compute_mode, SET_SRC (x)), - mask), + AND, compute_mode, lowpart, mask), pos); x = gen_rtx_SET (copy_rtx (inner), diff --git a/gcc/testsuite/gcc.dg/pr112380.c b/gcc/testsuite/gcc.dg/pr112380.c new file mode 100644 index 000000000000..7dd7a85d363f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112380.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +enum { TGSI_FILE_NULL }; +struct ureg_src { + unsigned File : 4; + unsigned : 2; + unsigned : 2; + unsigned : 2; + unsigned : 1; + unsigned IndirectFile : 4; + unsigned IndirectSwizzle : 2; + int : 16; + int : 6; + int : 16; + int : 16; + unsigned : 10; +} __trans_tmp_1; + +int ureg_src_indirect_addr_1, ntt_emit_texture_instr_sampler_handle_src; + +void ureg_scalar(struct ureg_src); + +void ntt_emit_texture_instr() { + struct ureg_src sampler; + if (ntt_emit_texture_instr_sampler_handle_src) + sampler = __trans_tmp_1; + struct ureg_src reg = sampler; + reg.File != TGSI_FILE_NULL; + reg.IndirectFile = reg.IndirectSwizzle = ureg_src_indirect_addr_1; + sampler = reg; + ureg_scalar(reg); +} From b806c88fab3f9c6833563f9a44b608dd5dd14de9 Mon Sep 17 00:00:00 2001 From: Lipeng Zhu Date: Sat, 9 Dec 2023 10:39:45 -0500 Subject: [PATCH 198/311] libgfortran: Replace mutex with rwlock This patch try to introduce the rwlock and split the read/write to unit_root tree and unit_cache with rwlock instead of the mutex to increase CPU efficiency. In the get_gfc_unit function, the percentage to step into the insert_unit function is around 30%, in most instances, we can get the unit in the phase of reading the unit_cache or unit_root tree. So split the read/write phase by rwlock would be an approach to make it more parallel. BTW, the IPC metrics can gain around 9x in our test server with 220 cores. The benchmark we used is https://github.com/rwesson/NEAT libgcc/ChangeLog: * gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro. (__gthrw): New function. (__gthread_rwlock_rdlock): New function. (__gthread_rwlock_tryrdlock): New function. (__gthread_rwlock_wrlock): New function. (__gthread_rwlock_trywrlock): New function. (__gthread_rwlock_unlock): New function. libgfortran/ChangeLog: * io/async.c (DEBUG_LINE): New macro. * io/async.h (RWLOCK_DEBUG_ADD): New macro. (CHECK_RDLOCK): New macro. (CHECK_WRLOCK): New macro. (TAIL_RWLOCK_DEBUG_QUEUE): New macro. (IN_RWLOCK_DEBUG_QUEUE): New macro. (RDLOCK): New macro. (WRLOCK): New macro. (RWUNLOCK): New macro. (RD_TO_WRLOCK): New macro. (INTERN_RDLOCK): New macro. (INTERN_WRLOCK): New macro. (INTERN_RWUNLOCK): New macro. * io/io.h (struct gfc_unit): Change UNIT_LOCK to UNIT_RWLOCK in a comment. (unit_lock): Remove including associated internal_proto. (unit_rwlock): New declarations including associated internal_proto. (dec_waiting_unlocked): Use WRLOCK and RWUNLOCK on unit_rwlock instead of __gthread_mutex_lock and __gthread_mutex_unlock on unit_lock. * io/transfer.c (st_read_done_worker): Use WRLOCK and RWUNLOCK on unit_rwlock instead of LOCK and UNLOCK on unit_lock. (st_write_done_worker): Likewise. * io/unit.c: Change UNIT_LOCK to UNIT_RWLOCK in 'IO locking rules' comment. Use unit_rwlock variable instead of unit_lock variable. (get_gfc_unit_from_unit_root): New function. (get_gfc_unit): Use RDLOCK, WRLOCK and RWUNLOCK on unit_rwlock instead of LOCK and UNLOCK on unit_lock. (close_unit_1): Use WRLOCK and RWUNLOCK on unit_rwlock instead of LOCK and UNLOCK on unit_lock. (close_units): Likewise. (newunit_alloc): Use RWUNLOCK on unit_rwlock instead of UNLOCK on unit_lock. * io/unix.c (find_file): Use RDLOCK and RWUNLOCK on unit_rwlock instead of LOCK and UNLOCK on unit_lock. (flush_all_units): Use WRLOCK and RWUNLOCK on unit_rwlock instead of LOCK and UNLOCK on unit_lock. --- libgcc/gthr-posix.h | 60 +++++++ libgfortran/io/async.c | 4 + libgfortran/io/async.h | 151 ++++++++++++++++++ libgfortran/io/io.h | 15 +- libgfortran/io/transfer.c | 8 +- libgfortran/io/unit.c | 117 +++++++++----- libgfortran/io/unix.c | 16 +- .../testsuite/libgomp.fortran/rwlock_1.f90 | 33 ++++ .../testsuite/libgomp.fortran/rwlock_2.f90 | 22 +++ .../testsuite/libgomp.fortran/rwlock_3.f90 | 18 +++ 10 files changed, 386 insertions(+), 58 deletions(-) create mode 100644 libgomp/testsuite/libgomp.fortran/rwlock_1.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/rwlock_2.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/rwlock_3.f90 diff --git a/libgcc/gthr-posix.h b/libgcc/gthr-posix.h index aebcfdd9f4ca..73283082997e 100644 --- a/libgcc/gthr-posix.h +++ b/libgcc/gthr-posix.h @@ -48,6 +48,9 @@ typedef pthread_t __gthread_t; typedef pthread_key_t __gthread_key_t; typedef pthread_once_t __gthread_once_t; typedef pthread_mutex_t __gthread_mutex_t; +#ifndef __cplusplus +typedef pthread_rwlock_t __gthread_rwlock_t; +#endif typedef pthread_mutex_t __gthread_recursive_mutex_t; typedef pthread_cond_t __gthread_cond_t; typedef struct timespec __gthread_time_t; @@ -58,6 +61,9 @@ typedef struct timespec __gthread_time_t; #define __GTHREAD_MUTEX_INIT PTHREAD_MUTEX_INITIALIZER #define __GTHREAD_MUTEX_INIT_FUNCTION __gthread_mutex_init_function +#ifndef __cplusplus +#define __GTHREAD_RWLOCK_INIT PTHREAD_RWLOCK_INITIALIZER +#endif #define __GTHREAD_ONCE_INIT PTHREAD_ONCE_INIT #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER) #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER @@ -135,6 +141,13 @@ __gthrw(pthread_mutexattr_init) __gthrw(pthread_mutexattr_settype) __gthrw(pthread_mutexattr_destroy) +#ifndef __cplusplus +__gthrw(pthread_rwlock_rdlock) +__gthrw(pthread_rwlock_tryrdlock) +__gthrw(pthread_rwlock_wrlock) +__gthrw(pthread_rwlock_trywrlock) +__gthrw(pthread_rwlock_unlock) +#endif #if defined(_LIBOBJC) || defined(_LIBOBJC_WEAK) /* Objective-C. */ @@ -885,6 +898,53 @@ __gthread_cond_destroy (__gthread_cond_t* __cond) return __gthrw_(pthread_cond_destroy) (__cond); } +#ifndef __cplusplus +static inline int +__gthread_rwlock_rdlock (__gthread_rwlock_t *__rwlock) +{ + if (__gthread_active_p ()) + return __gthrw_(pthread_rwlock_rdlock) (__rwlock); + else + return 0; +} + +static inline int +__gthread_rwlock_tryrdlock (__gthread_rwlock_t *__rwlock) +{ + if (__gthread_active_p ()) + return __gthrw_(pthread_rwlock_tryrdlock) (__rwlock); + else + return 0; +} + +static inline int +__gthread_rwlock_wrlock (__gthread_rwlock_t *__rwlock) +{ + if (__gthread_active_p ()) + return __gthrw_(pthread_rwlock_wrlock) (__rwlock); + else + return 0; +} + +static inline int +__gthread_rwlock_trywrlock (__gthread_rwlock_t *__rwlock) +{ + if (__gthread_active_p ()) + return __gthrw_(pthread_rwlock_trywrlock) (__rwlock); + else + return 0; +} + +static inline int +__gthread_rwlock_unlock (__gthread_rwlock_t *__rwlock) +{ + if (__gthread_active_p ()) + return __gthrw_(pthread_rwlock_unlock) (__rwlock); + else + return 0; +} +#endif + #endif /* _LIBOBJC */ #endif /* ! GCC_GTHR_POSIX_H */ diff --git a/libgfortran/io/async.c b/libgfortran/io/async.c index 8fa1f0d4ce04..91bf397105dd 100644 --- a/libgfortran/io/async.c +++ b/libgfortran/io/async.c @@ -42,6 +42,10 @@ DEBUG_LINE (__thread const char *aio_prefix = MPREFIX); DEBUG_LINE (__gthread_mutex_t debug_queue_lock = __GTHREAD_MUTEX_INIT;) DEBUG_LINE (aio_lock_debug *aio_debug_head = NULL;) +#ifdef __GTHREAD_RWLOCK_INIT +DEBUG_LINE (aio_rwlock_debug *aio_rwlock_debug_head = NULL;) +DEBUG_LINE (__gthread_rwlock_t debug_queue_rwlock = __GTHREAD_RWLOCK_INIT;) +#endif /* Current unit for asynchronous I/O. Needed for error reporting. */ diff --git a/libgfortran/io/async.h b/libgfortran/io/async.h index ad226c8e8566..f112f6870bb7 100644 --- a/libgfortran/io/async.h +++ b/libgfortran/io/async.h @@ -210,6 +210,128 @@ DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #mutex, mutex); \ } while (0) +#ifdef __GTHREAD_RWLOCK_INIT +#define RWLOCK_DEBUG_ADD(rwlock) do { \ + aio_rwlock_debug *n; \ + n = xmalloc (sizeof (aio_rwlock_debug)); \ + n->prev = TAIL_RWLOCK_DEBUG_QUEUE; \ + if (n->prev) \ + n->prev->next = n; \ + n->next = NULL; \ + n->line = __LINE__; \ + n->func = __FUNCTION__; \ + n->rw = rwlock; \ + if (!aio_rwlock_debug_head) { \ + aio_rwlock_debug_head = n; \ + } \ + } while (0) + +#define CHECK_RDLOCK(rwlock, status) do { \ + aio_rwlock_debug *curr; \ + INTERN_WRLOCK (&debug_queue_rwlock); \ + if (__gthread_rwlock_tryrdlock (rwlock)) { \ + if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) { \ + sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \ + } else \ + sprintf (status, DEBUG_RED "unknown" DEBUG_NORM); \ + } \ + else { \ + __gthread_rwlock_unlock (rwlock); \ + sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM); \ + } \ + INTERN_RWUNLOCK (&debug_queue_rwlock); \ + }while (0) + +#define CHECK_WRLOCK(rwlock, status) do { \ + aio_rwlock_debug *curr; \ + INTERN_WRLOCK (&debug_queue_rwlock); \ + if (__gthread_rwlock_trywrlock (rwlock)) { \ + if ((curr = IN_RWLOCK_DEBUG_QUEUE (rwlock))) { \ + sprintf (status, DEBUG_RED "%s():%d" DEBUG_NORM, curr->func, curr->line); \ + } else \ + sprintf (status, DEBUG_RED "unknown" DEBUG_NORM); \ + } \ + else { \ + __gthread_rwlock_unlock (rwlock); \ + sprintf (status, DEBUG_GREEN "rwunlocked" DEBUG_NORM); \ + } \ + INTERN_RWUNLOCK (&debug_queue_rwlock); \ + }while (0) + +#define TAIL_RWLOCK_DEBUG_QUEUE ({ \ + aio_rwlock_debug *curr = aio_rwlock_debug_head; \ + while (curr && curr->next) { \ + curr = curr->next; \ + } \ + curr; \ + }) + +#define IN_RWLOCK_DEBUG_QUEUE(rwlock) ({ \ + __label__ end; \ + aio_rwlock_debug *curr = aio_rwlock_debug_head; \ + while (curr) { \ + if (curr->rw == rwlock) { \ + goto end; \ + } \ + curr = curr->next; \ + } \ + end:; \ + curr; \ + }) + +#define RDLOCK(rwlock) do { \ + char status[200]; \ + CHECK_RDLOCK (rwlock, status); \ + DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix, \ + DEBUG_RED "RDLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \ + INTERN_RDLOCK (rwlock); \ + INTERN_WRLOCK (&debug_queue_rwlock); \ + RWLOCK_DEBUG_ADD (rwlock); \ + INTERN_RWUNLOCK (&debug_queue_rwlock); \ + DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \ + } while (0) + +#define WRLOCK(rwlock) do { \ + char status[200]; \ + CHECK_WRLOCK (rwlock, status); \ + DEBUG_PRINTF ("%s%-42s prev: %-35s %20s():%-5d %18p\n", aio_prefix, \ + DEBUG_RED "WRLOCK: " DEBUG_NORM #rwlock, status, __FUNCTION__, __LINE__, (void *) rwlock); \ + INTERN_WRLOCK (rwlock); \ + INTERN_WRLOCK (&debug_queue_rwlock); \ + RWLOCK_DEBUG_ADD (rwlock); \ + INTERN_RWUNLOCK (&debug_queue_rwlock); \ + DEBUG_PRINTF ("%s" DEBUG_RED "ACQ:" DEBUG_NORM " %-30s %78p\n", aio_prefix, #rwlock, rwlock); \ + } while (0) + +#define RWUNLOCK(rwlock) do { \ + aio_rwlock_debug *curr; \ + DEBUG_PRINTF ("%s%-75s %20s():%-5d %18p\n", aio_prefix, DEBUG_GREEN "RWUNLOCK: " DEBUG_NORM #rwlock, \ + __FUNCTION__, __LINE__, (void *) rwlock); \ + INTERN_WRLOCK (&debug_queue_rwlock); \ + curr = IN_RWLOCK_DEBUG_QUEUE (rwlock); \ + if (curr) \ + { \ + if (curr->prev) \ + curr->prev->next = curr->next; \ + if (curr->next) { \ + curr->next->prev = curr->prev; \ + if (curr == aio_rwlock_debug_head) \ + aio_rwlock_debug_head = curr->next; \ + } else { \ + if (curr == aio_rwlock_debug_head) \ + aio_rwlock_debug_head = NULL; \ + } \ + free (curr); \ + } \ + INTERN_RWUNLOCK (&debug_queue_rwlock); \ + INTERN_RWUNLOCK (rwlock); \ + } while (0) + +#define RD_TO_WRLOCK(rwlock) \ + RWUNLOCK (rwlock); \ + WRLOCK (rwlock); +#endif + #define DEBUG_LINE(...) __VA_ARGS__ #else @@ -221,12 +343,31 @@ #define LOCK(mutex) INTERN_LOCK (mutex) #define UNLOCK(mutex) INTERN_UNLOCK (mutex) #define TRYLOCK(mutex) (__gthread_mutex_trylock (mutex)) +#ifdef __GTHREAD_RWLOCK_INIT +#define RDLOCK(rwlock) INTERN_RDLOCK (rwlock) +#define WRLOCK(rwlock) INTERN_WRLOCK (rwlock) +#define RWUNLOCK(rwlock) INTERN_RWUNLOCK (rwlock) +#define RD_TO_WRLOCK(rwlock) \ + RWUNLOCK (rwlock); \ + WRLOCK (rwlock); +#endif +#endif + +#ifndef __GTHREAD_RWLOCK_INIT +#define RDLOCK(rwlock) LOCK (rwlock) +#define WRLOCK(rwlock) LOCK (rwlock) +#define RWUNLOCK(rwlock) UNLOCK (rwlock) +#define RD_TO_WRLOCK(rwlock) do {} while (0) #endif #define INTERN_LOCK(mutex) T_ERROR (__gthread_mutex_lock, mutex); #define INTERN_UNLOCK(mutex) T_ERROR (__gthread_mutex_unlock, mutex); +#define INTERN_RDLOCK(rwlock) T_ERROR (__gthread_rwlock_rdlock, rwlock) +#define INTERN_WRLOCK(rwlock) T_ERROR (__gthread_rwlock_wrlock, rwlock) +#define INTERN_RWUNLOCK(rwlock) T_ERROR (__gthread_rwlock_unlock, rwlock) + #if ASYNC_IO /* au->lock has to be held when calling this macro. */ @@ -288,8 +429,18 @@ DEBUG_LINE (typedef struct aio_lock_debug{ struct aio_lock_debug *prev; } aio_lock_debug;) +DEBUG_LINE (typedef struct aio_rwlock_debug{ + __gthread_rwlock_t *rw; + int line; + const char *func; + struct aio_rwlock_debug *next; + struct aio_rwlock_debug *prev; +} aio_rwlock_debug;) + DEBUG_LINE (extern aio_lock_debug *aio_debug_head;) DEBUG_LINE (extern __gthread_mutex_t debug_queue_lock;) +DEBUG_LINE (extern aio_rwlock_debug *aio_rwlock_debug_head;) +DEBUG_LINE (extern __gthread_rwlock_t debug_queue_rwlock;) /* Thread - local storage of the current unit we are looking at. Needed for error reporting. */ diff --git a/libgfortran/io/io.h b/libgfortran/io/io.h index ecdf1dd3f05d..15daa0995b1e 100644 --- a/libgfortran/io/io.h +++ b/libgfortran/io/io.h @@ -690,7 +690,7 @@ typedef struct gfc_unit from the UNIT_ROOT tree, but doesn't free it and the last of the waiting threads will do that. This must be either atomically increased/decreased, or - always guarded by UNIT_LOCK. */ + always guarded by UNIT_RWLOCK. */ int waiting; /* Flag set by close_unit if the unit as been closed. Must be manipulated under unit's lock. */ @@ -769,8 +769,13 @@ internal_proto(default_recl); extern gfc_unit *unit_root; internal_proto(unit_root); -extern __gthread_mutex_t unit_lock; -internal_proto(unit_lock); +#ifdef __GTHREAD_RWLOCK_INIT +extern __gthread_rwlock_t unit_rwlock; +internal_proto(unit_rwlock); +#else +extern __gthread_mutex_t unit_rwlock; +internal_proto(unit_rwlock); +#endif extern int close_unit (gfc_unit *); internal_proto(close_unit); @@ -1015,9 +1020,9 @@ dec_waiting_unlocked (gfc_unit *u) #ifdef HAVE_ATOMIC_FETCH_ADD (void) __atomic_fetch_add (&u->waiting, -1, __ATOMIC_RELAXED); #else - __gthread_mutex_lock (&unit_lock); + WRLOCK (&unit_rwlock); u->waiting--; - __gthread_mutex_unlock (&unit_lock); + RWUNLOCK (&unit_rwlock); #endif } diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c index 500db90c828c..00d516adcb04 100644 --- a/libgfortran/io/transfer.c +++ b/libgfortran/io/transfer.c @@ -4538,9 +4538,9 @@ st_read_done_worker (st_parameter_dt *dtp, bool unlock) if (free_newunit) { /* Avoid inverse lock issues by placing after unlock_unit. */ - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); newunit_free (dtp->common.unit); - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); } } @@ -4634,9 +4634,9 @@ st_write_done_worker (st_parameter_dt *dtp, bool unlock) if (free_newunit) { /* Avoid inverse lock issues by placing after unlock_unit. */ - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); newunit_free (dtp->common.unit); - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); } } diff --git a/libgfortran/io/unit.c b/libgfortran/io/unit.c index 36d025949c21..0c8c35e464ec 100644 --- a/libgfortran/io/unit.c +++ b/libgfortran/io/unit.c @@ -33,34 +33,36 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* IO locking rules: - UNIT_LOCK is a master lock, protecting UNIT_ROOT tree and UNIT_CACHE. + UNIT_RWLOCK is a master rw lock, protecting UNIT_ROOT tree and UNIT_CACHE. + Using an rwlock improves efficiency by allowing us to separate readers + and writers of both UNIT_ROOT and UNIT_CACHE. Concurrent use of different units should be supported, so each unit has its own lock, LOCK. Open should be atomic with its reopening of units and list_read.c in several places needs find_unit another unit while holding stdin - unit's lock, so it must be possible to acquire UNIT_LOCK while holding + unit's lock, so it must be possible to acquire UNIT_RWLOCK while holding some unit's lock. Therefore to avoid deadlocks, it is forbidden - to acquire unit's private locks while holding UNIT_LOCK, except + to acquire unit's private locks while holding UNIT_RWLOCK, except for freshly created units (where no other thread can get at their address yet) or when using just trylock rather than lock operation. In addition to unit's private lock each unit has a WAITERS counter and CLOSED flag. WAITERS counter must be either only atomically incremented/decremented in all places (if atomic builtins - are supported), or protected by UNIT_LOCK in all places (otherwise). + are supported), or protected by UNIT_RWLOCK in all places (otherwise). CLOSED flag must be always protected by unit's LOCK. - After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_LOCK held, + After finding a unit in UNIT_CACHE or UNIT_ROOT with UNIT_RWLOCK held, WAITERS must be incremented to avoid concurrent close from freeing - the unit between unlocking UNIT_LOCK and acquiring unit's LOCK. - Unit freeing is always done under UNIT_LOCK. If close_unit sees any + the unit between unlocking UNIT_RWLOCK and acquiring unit's LOCK. + Unit freeing is always done under UNIT_RWLOCK. If close_unit sees any WAITERS, it doesn't free the unit but instead sets the CLOSED flag and the thread that decrements WAITERS to zero while CLOSED flag is - set is responsible for freeing it (while holding UNIT_LOCK). + set is responsible for freeing it (while holding UNIT_RWLOCK). flush_all_units operation is iterating over the unit tree with - increasing UNIT_NUMBER while holding UNIT_LOCK and attempting to + increasing UNIT_NUMBER while holding UNIT_RWLOCK and attempting to flush each unit (and therefore needs the unit's LOCK held as well). To avoid deadlocks, it just trylocks the LOCK and if unsuccessful, - remembers the current unit's UNIT_NUMBER, unlocks UNIT_LOCK, acquires - unit's LOCK and after flushing reacquires UNIT_LOCK and restarts with + remembers the current unit's UNIT_NUMBER, unlocks UNIT_RWLOCK, acquires + unit's LOCK and after flushing reacquires UNIT_RWLOCK and restarts with the smallest UNIT_NUMBER above the last one flushed. If find_unit/find_or_create_unit/find_file/get_unit routines return @@ -101,10 +103,14 @@ gfc_offset max_offset; gfc_offset default_recl; gfc_unit *unit_root; -#ifdef __GTHREAD_MUTEX_INIT -__gthread_mutex_t unit_lock = __GTHREAD_MUTEX_INIT; +#ifdef __GTHREAD_RWLOCK_INIT +__gthread_rwlock_t unit_rwlock = __GTHREAD_RWLOCK_INIT; #else -__gthread_mutex_t unit_lock; +#ifdef __GTHREAD_MUTEX_INIT +__gthread_mutex_t unit_rwlock = __GTHREAD_MUTEX_INIT; +#else +__gthread_mutex_t unit_rwlock; +#endif #endif /* We use these filenames for error reporting. */ @@ -317,6 +323,28 @@ delete_unit (gfc_unit *old) unit_root = delete_treap (old, unit_root); } +/* get_gfc_unit_from_root()-- Given an integer, return a pointer + to the unit structure. Returns NULL if the unit does not exist, + otherwise returns a locked unit. */ + +static inline gfc_unit * +get_gfc_unit_from_unit_root (int n) +{ + gfc_unit *p; + int c = 0; + p = unit_root; + while (p != NULL) + { + c = compare (n, p->unit_number); + if (c < 0) + p = p->left; + if (c > 0) + p = p->right; + if (c == 0) + break; + } + return p; +} /* get_gfc_unit()-- Given an integer, return a pointer to the unit structure. Returns NULL if the unit does not exist, @@ -329,7 +357,7 @@ get_gfc_unit (int n, int do_create) int c, created = 0; NOTE ("Unit n=%d, do_create = %d", n, do_create); - LOCK (&unit_lock); + RDLOCK (&unit_rwlock); retry: for (c = 0; c < CACHE_SIZE; c++) @@ -339,18 +367,25 @@ retry: goto found; } - p = unit_root; - while (p != NULL) - { - c = compare (n, p->unit_number); - if (c < 0) - p = p->left; - if (c > 0) - p = p->right; - if (c == 0) - break; - } + p = get_gfc_unit_from_unit_root (n); + /* We did not find a unit in the cache nor in the unit list, + create a new (locked) unit and insert into the unit list and + cache. Manipulating either or both the unit list and the unit + cache requires to hold a write-lock [for obvious reasons]: + By separating the read/write lock, we will greatly reduce + the contention on the read part, while the write part is + unlikely once the unit hits the cache. */ + RD_TO_WRLOCK (&unit_rwlock); + + /* In the case of high concurrency, when multiple threads want + to find or create the same unit, the unit number may not + exist in cache nor in the unit list during read phase, then + threads will acquire the write-lock to insert the same unit + number to unit list. To avoid duplicate insert, we need to + find unit list once again to ensure that the unit number + not exist. */ + p = get_gfc_unit_from_unit_root (n); if (p == NULL && do_create) { p = insert_unit (n); @@ -368,8 +403,8 @@ retry: if (created) { /* Newly created units have their lock held already - from insert_unit. Just unlock UNIT_LOCK and return. */ - UNLOCK (&unit_lock); + from insert_unit. Just unlock UNIT_RWLOCK and return. */ + RWUNLOCK (&unit_rwlock); return p; } @@ -380,7 +415,7 @@ found: if (! TRYLOCK (&p->lock)) { /* assert (p->closed == 0); */ - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); return p; } @@ -388,14 +423,14 @@ found: } - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); if (p != NULL && (p->child_dtio == 0)) { LOCK (&p->lock); if (p->closed) { - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); UNLOCK (&p->lock); if (predec_waiting_locked (p) == 0) destroy_unit_mutex (p); @@ -594,8 +629,8 @@ init_units (void) #endif #endif -#ifndef __GTHREAD_MUTEX_INIT - __GTHREAD_MUTEX_INIT_FUNCTION (&unit_lock); +#if (!defined(__GTHREAD_RWLOCK_INIT) && !defined(__GTHREAD_MUTEX_INIT)) + __GTHREAD_MUTEX_INIT_FUNCTION (&unit_rwlock); #endif if (sizeof (max_offset) == 8) @@ -732,7 +767,7 @@ close_unit_1 (gfc_unit *u, int locked) u->closed = 1; if (!locked) - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); for (i = 0; i < CACHE_SIZE; i++) if (unit_cache[i] == u) @@ -759,7 +794,7 @@ close_unit_1 (gfc_unit *u, int locked) destroy_unit_mutex (u); if (!locked) - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); return rc; } @@ -796,10 +831,10 @@ close_unit (gfc_unit *u) void close_units (void) { - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); while (unit_root != NULL) close_unit_1 (unit_root, 1); - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); free (newunits); @@ -906,7 +941,7 @@ finish_last_advance_record (gfc_unit *u) int newunit_alloc (void) { - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); if (!newunits) { newunits = xcalloc (16, 1); @@ -920,7 +955,7 @@ newunit_alloc (void) { newunits[ii] = true; newunit_lwi = ii + 1; - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); return -ii + NEWUNIT_START; } } @@ -933,12 +968,12 @@ newunit_alloc (void) memset (newunits + old_size, 0, old_size); newunits[old_size] = true; newunit_lwi = old_size + 1; - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); return -old_size + NEWUNIT_START; } -/* Free a previously allocated newunit= unit number. unit_lock must +/* Free a previously allocated newunit= unit number. unit_rwlock must be held when calling. */ void diff --git a/libgfortran/io/unix.c b/libgfortran/io/unix.c index d466df979dff..dcae051744d9 100644 --- a/libgfortran/io/unix.c +++ b/libgfortran/io/unix.c @@ -1773,7 +1773,7 @@ find_file (const char *file, gfc_charlen_type file_len) id = id_from_path (path); #endif - LOCK (&unit_lock); + RDLOCK (&unit_rwlock); retry: u = find_file0 (unit_root, FIND_FILE0_ARGS); if (u != NULL) @@ -1782,19 +1782,19 @@ retry: if (! __gthread_mutex_trylock (&u->lock)) { /* assert (u->closed == 0); */ - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); goto done; } inc_waiting_locked (u); } - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); if (u != NULL) { LOCK (&u->lock); if (u->closed) { - LOCK (&unit_lock); + RDLOCK (&unit_rwlock); UNLOCK (&u->lock); if (predec_waiting_locked (u) == 0) free (u); @@ -1838,13 +1838,13 @@ flush_all_units (void) gfc_unit *u; int min_unit = 0; - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); do { u = flush_all_units_1 (unit_root, min_unit); if (u != NULL) inc_waiting_locked (u); - UNLOCK (&unit_lock); + RWUNLOCK (&unit_rwlock); if (u == NULL) return; @@ -1855,13 +1855,13 @@ flush_all_units (void) if (u->closed == 0) { sflush (u->s); - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); UNLOCK (&u->lock); (void) predec_waiting_locked (u); } else { - LOCK (&unit_lock); + WRLOCK (&unit_rwlock); UNLOCK (&u->lock); if (predec_waiting_locked (u) == 0) free (u); diff --git a/libgomp/testsuite/libgomp.fortran/rwlock_1.f90 b/libgomp/testsuite/libgomp.fortran/rwlock_1.f90 new file mode 100644 index 000000000000..f90ecbeb00f7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/rwlock_1.f90 @@ -0,0 +1,33 @@ +! { dg-do run } +! Multiple threads call open/write/read/close in concurrency with different unit number, +! threads can acquire read lock concurrently, to find unit from cache or unit list very frequently, +! if not found, threads will acquire the write lock exclusively to insert unit to cache and unit list. +! This test case is used to stress both the read and write lock when access unit cache and list. +program main + use omp_lib + implicit none + integer:: unit_number, v1, v2, i + character(11) :: file_name + character(3) :: async = "no" + !$omp parallel private (unit_number, v1, v2, file_name, async, i) + do i = 0, 100 + unit_number = 10 + omp_get_thread_num () + write (file_name, "(I3, A)") unit_number, "_tst.dat" + file_name = adjustl(file_name) + open (unit_number, file=file_name, asynchronous="yes") + ! call inquire with file parameter to test find_file in unix.c + inquire (file=file_name, asynchronous=async) + if (async /= "YES") stop 1 + write (unit_number, *, asynchronous="yes") unit_number + write (unit_number, *, asynchronous="yes") unit_number + 1 + close(unit_number) + + open (unit_number, file = file_name, asynchronous="yes") + read (unit_number, *, asynchronous="yes") v1 + read (unit_number, *, asynchronous="yes") v2 + wait (unit_number) + if ((v1 /= unit_number) .or. (v2 /= unit_number + 1)) stop 2 + close(unit_number, status="delete") + end do + !$omp end parallel +end program diff --git a/libgomp/testsuite/libgomp.fortran/rwlock_2.f90 b/libgomp/testsuite/libgomp.fortran/rwlock_2.f90 new file mode 100644 index 000000000000..08c80d14cfb5 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/rwlock_2.f90 @@ -0,0 +1,22 @@ +! { dg-do run } +! Insert a unit into cache at the beginning, then start multiple +! threads to access the same unit concurrency, unit will be found in unit cache during the read lock phase. +! This test case is used to test the read lock when access unit cache and list. +program main + use omp_lib + implicit none + integer:: thread_id, total_threads, i, j + total_threads = omp_get_max_threads () + open (10, file='tst.dat', asynchronous="yes") + !$omp parallel private (thread_id, i, j) + do i = 1, 100 + thread_id = omp_get_thread_num () + do j = 1, 100 + write (10, *, asynchronous="yes") thread_id, i + end do + end do + !$omp end parallel + ! call inquire with file parameter to test find_file in unix.c + call flush () + close (10, status="delete") +end program diff --git a/libgomp/testsuite/libgomp.fortran/rwlock_3.f90 b/libgomp/testsuite/libgomp.fortran/rwlock_3.f90 new file mode 100644 index 000000000000..1906fcd7a0b0 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/rwlock_3.f90 @@ -0,0 +1,18 @@ +! { dg-do run } +! Find or create the same unit number in concurrency, +! at beginning, threads cannot find the unit in cache or unit list, +! then threads will acquire the write lock to insert unit. +! This test case is used to ensure that no duplicate unit number will be +! inserted into cache nor unit list when same unit was accessed in concurrency. +program main + use omp_lib + implicit none + integer:: i + !$omp parallel private (i) + do i = 1, 100 + open (10, file='tst.dat', asynchronous="yes") + ! Delete the unit number from cache and unit list to stress write lock. + close (10, status="delete") + end do + !$omp end parallel +end program From e4fc59e0d50cd3d2bb3a004fff135f5955621d01 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:13 -0300 Subject: [PATCH 199/311] strub: add note on attribute access Document why attribute access doesn't need the same treatment as fn spec, and check that the assumption behind it holds. for gcc/ChangeLog * ipa-strub.cc (pass_ipa_strub::execute): Check that we don't add indirection to pointer parameters, and document attribute access non-interactions. --- gcc/ipa-strub.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gcc/ipa-strub.cc b/gcc/ipa-strub.cc index 2afb7a455751..8ec6824e8a80 100644 --- a/gcc/ipa-strub.cc +++ b/gcc/ipa-strub.cc @@ -2889,6 +2889,13 @@ pass_ipa_strub::execute (function *) && (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (nparm))) <= 4 * UNITS_PER_WORD)))) { + /* No point in indirecting pointer types. Presumably they + won't ever pass the size-based test above, but check the + assumption here, because getting this wrong would mess + with attribute access and possibly others. We deal with + fn spec below. */ + gcc_checking_assert (!POINTER_TYPE_P (TREE_TYPE (nparm))); + indirect_nparms.add (nparm); /* ??? Is there any case in which it is not safe to suggest the parms @@ -2976,7 +2983,9 @@ pass_ipa_strub::execute (function *) } } - /* ??? Maybe we could adjust it instead. */ + /* ??? Maybe we could adjust it instead. Note we don't need + to mess with attribute access: pointer-typed parameters are + not modified, so they can remain unchanged. */ if (drop_fnspec) remove_named_attribute_unsharing ("fn spec", &TYPE_ATTRIBUTES (nftype)); From 60decd36cd4b242e7814b5d34412fceae735406c Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:16 -0300 Subject: [PATCH 200/311] strub: disable on rl78 rl78 allocation of virtual registers to physical registers doesn't operate on asm statements, and strub uses asm statements in the runtime and in the generated code, to the point that the runtime won't build. Force strub disabled on that target. for gcc/ChangeLog * config/rl78/rl78.cc (TARGET_HAVE_STRUB_SUPPORT_FOR): Disable. --- gcc/config/rl78/rl78.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc index 5d8fddbd905a..f3507280859b 100644 --- a/gcc/config/rl78/rl78.cc +++ b/gcc/config/rl78/rl78.cc @@ -4972,6 +4972,11 @@ rl78_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) } +/* The strub runtime uses asms, and physical register allocation won't + deal with them, so disable it. */ +#undef TARGET_HAVE_STRUB_SUPPORT_FOR +#define TARGET_HAVE_STRUB_SUPPORT_FOR hook_bool_tree_false + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rl78.h" From 07d9f2fce81a3052b5421f10149e7bd5270b5946 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:19 -0300 Subject: [PATCH 201/311] multiflags: fix doc warning Comply with dubious doc warning that after an @xref there must be a comma or a period, not a close parentheses. for gcc/ChangeLog * doc/invoke.texi (multiflags): Add period after @xref to silence warning. --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 15f3a86e7688..7d15cf94821e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20589,7 +20589,7 @@ allocation before or after interprocedural optimization. This option enables multilib-aware @code{TFLAGS} to be used to build target libraries with options different from those the compiler is configured to use by default, through the use of specs (@xref{Spec -Files}) set up by compiler internals, by the target, or by builders at +Files}.) set up by compiler internals, by the target, or by builders at configure time. Like @code{TFLAGS}, this allows the target libraries to be built for From 76ca5ab4ef95c41c1ed67edfb34a1a455a602192 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:22 -0300 Subject: [PATCH 202/311] -finline-stringops: don't assume ptr_mode ptr in memset [PR112804] On aarch64 -milp32, and presumably on other such targets, ptr can be in a different mode than ptr_mode in the testcase. Cope with it. for gcc/ChangeLog PR target/112804 * builtins.cc (try_store_by_multiple_pieces): Use ptr's mode for the increment. for gcc/testsuite/ChangeLog PR target/112804 * gcc.target/aarch64/inline-mem-set-pr112804.c: New. --- gcc/builtins.cc | 2 +- gcc/testsuite/gcc.target/aarch64/inline-mem-set-pr112804.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/inline-mem-set-pr112804.c diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 38b0acff1312..12a535d313f1 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -4519,7 +4519,7 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, to = change_address (to, QImode, 0); emit_move_insn (to, val); if (update_needed) - next_ptr = plus_constant (ptr_mode, ptr, blksize); + next_ptr = plus_constant (GET_MODE (ptr), ptr, blksize); } else { diff --git a/gcc/testsuite/gcc.target/aarch64/inline-mem-set-pr112804.c b/gcc/testsuite/gcc.target/aarch64/inline-mem-set-pr112804.c new file mode 100644 index 000000000000..fe8414559864 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/inline-mem-set-pr112804.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-finline-stringops -mabi=ilp32 -ftrivial-auto-var-init=zero" } */ + +short m(unsigned k) { + const unsigned short *n[65]; + return 0; +} From 1e2ea685bdea9aa65da2bf4137264d14f38a6f0b Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:25 -0300 Subject: [PATCH 203/311] -finline-stringops: check base blksize for memset [PR112778] The recently-added logic for -finline-stringops=memset introduced an assumption that doesn't necessarily hold, namely, that can_store_by_pieces of a larger size implies can_store_by_pieces by smaller sizes. Checks for all sizes the by-multiple-pieces machinery might use before committing to an expansion pattern. for gcc/ChangeLog PR target/112778 * builtins.cc (can_store_by_multiple_pieces): New. (try_store_by_multiple_pieces): Call it. for gcc/testsuite/ChangeLog PR target/112778 * gcc.dg/inline-mem-cmp-pr112778.c: New. --- gcc/builtins.cc | 57 ++++++++++++++++--- .../gcc.dg/inline-mem-cmp-pr112778.c | 10 ++++ 2 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 12a535d313f1..f6c96498f078 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -4284,6 +4284,40 @@ expand_builtin_memset (tree exp, rtx target, machine_mode mode) return expand_builtin_memset_args (dest, val, len, target, mode, exp); } +/* Check that store_by_pieces allows BITS + LEN (so that we don't + expand something too unreasonably long), and every power of 2 in + BITS. It is assumed that LEN has already been tested by + itself. */ +static bool +can_store_by_multiple_pieces (unsigned HOST_WIDE_INT bits, + by_pieces_constfn constfun, + void *constfundata, unsigned int align, + bool memsetp, + unsigned HOST_WIDE_INT len) +{ + if (bits + && !can_store_by_pieces (bits + len, constfun, constfundata, + align, memsetp)) + return false; + + /* BITS set are expected to be generally in the low range and + contiguous. We do NOT want to repeat the test above in case BITS + has a single bit set, so we terminate the loop when BITS == BIT. + In the unlikely case that BITS has the MSB set, also terminate in + case BIT gets shifted out. */ + for (unsigned HOST_WIDE_INT bit = 1; bit < bits && bit; bit <<= 1) + { + if ((bits & bit) == 0) + continue; + + if (!can_store_by_pieces (bit, constfun, constfundata, + align, memsetp)) + return false; + } + + return true; +} + /* Try to store VAL (or, if NULL_RTX, VALC) in LEN bytes starting at TO. Return TRUE if successful, FALSE otherwise. TO is assumed to be aligned at an ALIGN-bits boundary. LEN must be a multiple of @@ -4341,7 +4375,11 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, else /* Huh, max_len < min_len? Punt. See pr100843.c. */ return false; - if (min_len >= blksize) + if (min_len >= blksize + /* ??? Maybe try smaller fixed-prefix blksizes before + punting? */ + && can_store_by_pieces (blksize, builtin_memset_read_str, + &valc, align, true)) { min_len -= blksize; min_bits = floor_log2 (min_len); @@ -4367,8 +4405,9 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, happen because of the way max_bits and blksize are related, but it doesn't hurt to test. */ if (blksize > xlenest - || !can_store_by_pieces (xlenest, builtin_memset_read_str, - &valc, align, true)) + || !can_store_by_multiple_pieces (xlenest - blksize, + builtin_memset_read_str, + &valc, align, true, blksize)) { if (!(flag_inline_stringops & ILSOP_MEMSET)) return false; @@ -4386,17 +4425,17 @@ try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, of overflow. */ if (max_bits < orig_max_bits && xlenest + blksize >= xlenest - && can_store_by_pieces (xlenest + blksize, - builtin_memset_read_str, - &valc, align, true)) + && can_store_by_multiple_pieces (xlenest, + builtin_memset_read_str, + &valc, align, true, blksize)) { max_loop = true; break; } if (blksize - && can_store_by_pieces (xlenest, - builtin_memset_read_str, - &valc, align, true)) + && can_store_by_multiple_pieces (xlenest, + builtin_memset_read_str, + &valc, align, true, 0)) { max_len += blksize; min_len += blksize; diff --git a/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c b/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c new file mode 100644 index 000000000000..fdfc5b6f28c8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/inline-mem-cmp-pr112778.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-finline-stringops" } */ + +char buf[3]; + +int +f () +{ + __builtin_memset (buf, 'v', 3); +} From a8a3d832e609501002dee54150abfd96a28fe532 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Mon, 11 Dec 2023 15:09:28 -0300 Subject: [PATCH 204/311] -finline-stringops: avoid too-wide smallest_int_mode_for_size [PR112784] smallest_int_mode_for_size may abort when the requested mode is not available. Call int_mode_for_size instead, that signals the unsatisfiable request in a more graceful way. for gcc/ChangeLog PR middle-end/112784 * expr.cc (emit_block_move_via_loop): Call int_mode_for_size for maybe-too-wide sizes. (emit_block_cmp_via_loop): Likewise. for gcc/testsuite/ChangeLog PR middle-end/112784 * gcc.target/i386/avx512cd-inline-stringops-pr112784.c: New. --- gcc/expr.cc | 20 +++++++++---------- .../i386/avx512cd-inline-stringops-pr112784.c | 12 +++++++++++ 2 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512cd-inline-stringops-pr112784.c diff --git a/gcc/expr.cc b/gcc/expr.cc index 4686cacd22fe..9fef2bf65853 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -2449,15 +2449,14 @@ emit_block_move_via_loop (rtx x, rtx y, rtx size, } emit_move_insn (iter, iter_init); - scalar_int_mode int_move_mode - = smallest_int_mode_for_size (incr * BITS_PER_UNIT); - if (GET_MODE_BITSIZE (int_move_mode) != incr * BITS_PER_UNIT) + opt_scalar_int_mode int_move_mode + = int_mode_for_size (incr * BITS_PER_UNIT, 1); + if (!int_move_mode.exists (&move_mode) + || GET_MODE_BITSIZE (int_move_mode.require ()) != incr * BITS_PER_UNIT) { move_mode = BLKmode; gcc_checking_assert (can_move_by_pieces (incr, align)); } - else - move_mode = int_move_mode; x_addr = force_operand (XEXP (x, 0), NULL_RTX); y_addr = force_operand (XEXP (y, 0), NULL_RTX); @@ -2701,16 +2700,15 @@ emit_block_cmp_via_loop (rtx x, rtx y, rtx len, tree len_type, rtx target, iter = gen_reg_rtx (iter_mode); emit_move_insn (iter, iter_init); - scalar_int_mode int_cmp_mode - = smallest_int_mode_for_size (incr * BITS_PER_UNIT); - if (GET_MODE_BITSIZE (int_cmp_mode) != incr * BITS_PER_UNIT - || !can_compare_p (NE, int_cmp_mode, ccp_jump)) + opt_scalar_int_mode int_cmp_mode + = int_mode_for_size (incr * BITS_PER_UNIT, 1); + if (!int_cmp_mode.exists (&cmp_mode) + || GET_MODE_BITSIZE (int_cmp_mode.require ()) != incr * BITS_PER_UNIT + || !can_compare_p (NE, cmp_mode, ccp_jump)) { cmp_mode = BLKmode; gcc_checking_assert (incr != 1); } - else - cmp_mode = int_cmp_mode; /* Save the base addresses. */ x_addr = force_operand (XEXP (x, 0), NULL_RTX); diff --git a/gcc/testsuite/gcc.target/i386/avx512cd-inline-stringops-pr112784.c b/gcc/testsuite/gcc.target/i386/avx512cd-inline-stringops-pr112784.c new file mode 100644 index 000000000000..c81f99c693c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512cd-inline-stringops-pr112784.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512cd -finline-stringops" } */ + +struct S { + int e; +} __attribute__((aligned(128))); + +int main() { + struct S s1; + struct S s2; + int v = __builtin_memcmp(&s1, &s2, sizeof(s1)); +} From 074c6f15f7a28c620c756f18c2a310961de00539 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Mon, 11 Dec 2023 14:05:48 -0500 Subject: [PATCH 205/311] testsuite: update mangling Since r14-6064-gc3f281a0c1ca50 this test was checking for the wrong mangling, but it still passed on targets that support ABI compatibility aliases. Let's avoid generating those aliases when checking mangling. gcc/ChangeLog: * common.opt: Add comment. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-explicit-inst1.C: Specify ABI v18. * g++.dg/cpp2a/concepts-explicit-inst1a.C: New test. --- gcc/common.opt | 1 + .../g++.dg/cpp2a/concepts-explicit-inst1.C | 1 + .../g++.dg/cpp2a/concepts-explicit-inst1a.C | 24 +++++++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1a.C diff --git a/gcc/common.opt b/gcc/common.opt index 5eb5ecff04bd..d263a959df38 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1020,6 +1020,7 @@ Driver Undocumented ; ; 19: Emits ABI tags if needed in structured binding mangled names. ; Ignores cv-quals on [[no_unique_object]] members. +; Mangles constraints on function templates. ; Default in G++ 14. ; ; Additional positive integers will be assigned as new versions of diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1.C b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1.C index 5cbf64a8cd3d..b66e919e880c 100644 --- a/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1.C +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1.C @@ -1,4 +1,5 @@ // { dg-do compile { target c++20 } } +// { dg-additional-options "-fabi-version=18 -fabi-compat-version=18" } // { dg-final { scan-assembler "_Z1gI1XEvT_" } } // { dg-final { scan-assembler "_Z1gI1YEvT_" } } // { dg-final { scan-assembler "_Z1gIiEvT_" } } diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1a.C b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1a.C new file mode 100644 index 000000000000..feb31f9e24c8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-explicit-inst1a.C @@ -0,0 +1,24 @@ +// { dg-do compile { target c++20 } } +// { dg-additional-options "-fabi-version=0 -fabi-compat-version=0" } +// { dg-final { scan-assembler "_Z1gITk1C1YEvT_" } } +// { dg-final { scan-assembler "_Z1gITk1D1XEvT_" } } +// { dg-final { scan-assembler "_Z1gIiEvT_" } } + +template + concept C = __is_class(T); + +template + concept D = C && __is_empty(T); + +struct X { }; +struct Y { int n; }; + +template void g(T) { } // #1 +template void g(T) { } // #2 +template void g(T) { } // #3 + +template void g(int); // Instantiate #1 +template void g(X); // Instantitae #3 +template void g(Y); // Instantiate #2 + +int main() { } From a14d247f339454ef9068d24e64eeaeef282fec95 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 11 Dec 2023 19:43:38 +0000 Subject: [PATCH 206/311] Treat "p" in asms as addressing VOIDmode check_asm_operands was inconsistent about how it handled "p" after RA compared to before RA. Before RA it tested the address with a void (unknown) memory mode: case CT_ADDRESS: /* Every address operand can be reloaded to fit. */ result = result || address_operand (op, VOIDmode); break; After RA it deferred to constrain_operands, which used the mode of the operand: if ((GET_MODE (op) == VOIDmode || SCALAR_INT_MODE_P (GET_MODE (op))) && (strict <= 0 || (strict_memory_address_p (recog_data.operand_mode[opno], op)))) win = true; Using the mode of the operand is necessary for special predicates, where it is used to give the memory mode. But for asms, the operand mode is simply the mode of the address itself (so DImode on 64-bit targets), which doesn't say anything about the addressed memory. This patch uses VOIDmode for asms but continues to use the operand mode for .md insns. It's needed to avoid a regression in the testcase with the late-combine pass. Fixing this made me realise that recog_level2 was doing duplicate work for asms after RA. gcc/ * recog.cc (constrain_operands): Pass VOIDmode to strict_memory_address_p for 'p' constraints in asms. * rtl-ssa/changes.cc (recog_level2): Skip redundant constrain_operands for asms. gcc/testsuite/ * gcc.target/aarch64/prfm_imm_offset_2.c: New test. --- gcc/recog.cc | 18 +++++++++++------- gcc/rtl-ssa/changes.cc | 4 +++- .../gcc.target/aarch64/prfm_imm_offset_2.c | 1 + 3 files changed, 15 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c diff --git a/gcc/recog.cc b/gcc/recog.cc index ed084fa88086..1138a7934bd3 100644 --- a/gcc/recog.cc +++ b/gcc/recog.cc @@ -3199,13 +3199,17 @@ constrain_operands (int strict, alternative_mask alternatives) strictly valid, i.e., that all pseudos requiring hard regs have gotten them. We also want to make sure we have a valid mode. */ - if ((GET_MODE (op) == VOIDmode - || SCALAR_INT_MODE_P (GET_MODE (op))) - && (strict <= 0 - || (strict_memory_address_p - (recog_data.operand_mode[opno], op)))) - win = true; - break; + { + auto mem_mode = (recog_data.is_asm + ? VOIDmode + : recog_data.operand_mode[opno]); + if ((GET_MODE (op) == VOIDmode + || SCALAR_INT_MODE_P (GET_MODE (op))) + && (strict <= 0 + || strict_memory_address_p (mem_mode, op))) + win = true; + break; + } /* No need to check general_operand again; it was done in insn-recog.cc. Well, except that reload diff --git a/gcc/rtl-ssa/changes.cc b/gcc/rtl-ssa/changes.cc index 2f2d12d5f30b..443d0575df5b 100644 --- a/gcc/rtl-ssa/changes.cc +++ b/gcc/rtl-ssa/changes.cc @@ -986,8 +986,10 @@ recog_level2 (insn_change &change, add_regno_clobber_fn add_regno_clobber) pat = newpat; } + // check_asm_operands checks the constraints after RA, so we don't + // need to do it again. INSN_CODE (rtl) = icode; - if (reload_completed) + if (reload_completed && !asm_p) { extract_insn (rtl); if (!constrain_operands (1, get_preferred_alternatives (rtl))) diff --git a/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c b/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c new file mode 100644 index 000000000000..2dd695157f20 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c @@ -0,0 +1 @@ +void f(char *p) { asm("prfm pldl1keep, %a0\n" :: "p" (p + 6)); } From 6008b80b25d71827fb26ce49f49aae02b645bb12 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 11 Dec 2023 16:18:56 -0500 Subject: [PATCH 207/311] analyzer: fix uninitialized bitmap [PR112955] In r14-5566-g841008d3966c0f I added a new ctor for feasibility_state, but failed to call bitmap_clear on m_snodes_visited. Fixed thusly. gcc/analyzer/ChangeLog: PR analyzer/112955 * engine.cc (feasibility_state::feasibility_state): Initialize m_snodes_visited. Signed-off-by: David Malcolm --- gcc/analyzer/engine.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index d2524e34f586..ed1e923743e5 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -4875,6 +4875,7 @@ feasibility_state::feasibility_state (const region_model &model, : m_model (model), m_snodes_visited (sg.m_nodes.length ()) { + bitmap_clear (m_snodes_visited); } feasibility_state & From 453e0f45a49f425992bc47ff8909ed8affc29d2e Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 11 Dec 2023 22:52:54 +0100 Subject: [PATCH 208/311] Resolve ICE in 'gcc/fortran/trans-openmp.cc:gfc_omp_call_is_alloc' Fix-up for recent commit 2505a8b41d3b74a545755a278f3750a29c1340b6 "OpenMP: Minor '!$omp allocators' cleanup", which caused: {+FAIL: gfortran.dg/gomp/allocate-5.f90 -O (internal compiler error: tree check: expected class 'type', have 'declaration' (function_decl) in gfc_omp_call_is_alloc, at fortran/trans-openmp.cc:8386)+} [-PASS:-]{+FAIL:+} gfortran.dg/gomp/allocate-5.f90 -O (test for excess errors) ..., and similarly in 'libgomp.fortran/allocators-1.f90', 'libgomp.fortran/allocators-2.f90', 'libgomp.fortran/allocators-3.f90', 'libgomp.fortran/allocators-4.f90', 'libgomp.fortran/allocators-5.f90'. gcc/fortran/ * trans-openmp.cc (gfc_omp_call_is_alloc): Resolve ICE. --- gcc/fortran/trans-openmp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 95184920cf70..f7c73a5d2734 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -8381,10 +8381,10 @@ gfc_omp_call_is_alloc (tree ptr) { fn = build_function_type_list (boolean_type_node, ptr_type_node, NULL_TREE); - fn = build_fn_decl ("GOMP_is_alloc", fn); tree att = build_tree_list (NULL_TREE, build_string (4, ". R ")); att = tree_cons (get_identifier ("fn spec"), att, TYPE_ATTRIBUTES (fn)); fn = build_type_attribute_variant (fn, att); + fn = build_fn_decl ("GOMP_is_alloc", fn); } return build_call_expr_loc (input_location, fn, 1, ptr); } From 6cf9654c3b06c076502a39a3bf2222dd6e43b73b Mon Sep 17 00:00:00 2001 From: Martin Uecker Date: Wed, 15 Nov 2023 09:22:55 +0100 Subject: [PATCH 209/311] Fix regression causing ICE for structs with VLAs [PR 112488] A previous patch that fixed several ICEs related to size expressions of VM types (PR c/70418, ...) caused a regression for structs where a DECL_EXPR is not generated anymore although reqired. We now call add_decl_expr introduced by the previous patch from finish_struct. The function is revised with a new argument to not set the TYPE_NAME for the type to the DECL_EXPR in this specific case. PR c/112488 gcc/c * c-decl.cc (add_decl_expr): Revise. (finish_struct): Create DECL_EXPR. * c-parser.cc (c_parser_struct_or_union_specifier): Call finish_struct with expression for VLA sizes. * c-tree.h (finish_struct): Add argument. gcc/testsuite * gcc.dg/pr112488-1.c: New test. * gcc.dg/pr112488-2.c: New test. * gcc.dg/pr112898.c: New test. * gcc.misc-tests/gcov-pr85350.c: Adapt. --- gcc/c/c-decl.cc | 33 ++++++++++++++++----- gcc/c/c-parser.cc | 2 +- gcc/c/c-tree.h | 3 +- gcc/testsuite/gcc.dg/pr112488-1.c | 14 +++++++++ gcc/testsuite/gcc.dg/pr112488-2.c | 13 ++++++++ gcc/testsuite/gcc.dg/pr112898.c | 9 ++++++ gcc/testsuite/gcc.misc-tests/gcov-pr85350.c | 2 +- 7 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr112488-1.c create mode 100644 gcc/testsuite/gcc.dg/pr112488-2.c create mode 100644 gcc/testsuite/gcc.dg/pr112898.c diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 92c83e1bf10d..039a66fef09c 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -6618,12 +6618,10 @@ smallest_type_quals_location (const location_t *locations, the size evaluation prior to the side effects. We therefore use BIND_EXPRs in TYPENAME contexts too. */ static void -add_decl_expr (location_t loc, enum decl_context decl_context, tree type, - tree *expr) +add_decl_expr (location_t loc, tree type, tree *expr, bool set_name_p) { tree bind = NULL_TREE; - if (decl_context == TYPENAME || decl_context == PARM - || decl_context == FIELD) + if (expr) { bind = build3 (BIND_EXPR, void_type_node, NULL_TREE, NULL_TREE, NULL_TREE); @@ -6636,7 +6634,8 @@ add_decl_expr (location_t loc, enum decl_context decl_context, tree type, pushdecl (decl); DECL_ARTIFICIAL (decl) = 1; add_stmt (build_stmt (DECL_SOURCE_LOCATION (decl), DECL_EXPR, decl)); - TYPE_NAME (type) = decl; + if (set_name_p) + TYPE_NAME (type) = decl; if (bind) { @@ -7635,7 +7634,12 @@ grokdeclarator (const struct c_declarator *declarator, type has a name/declaration of it's own, but special attention is required if the type is anonymous. */ if (!TYPE_NAME (type) && c_type_variably_modified_p (type)) - add_decl_expr (loc, decl_context, type, expr); + { + bool bind_p = decl_context == TYPENAME + || decl_context == FIELD + || decl_context == PARM; + add_decl_expr (loc, type, bind_p ? expr : NULL, true); + } type = c_build_pointer_type (type); @@ -7900,7 +7904,12 @@ grokdeclarator (const struct c_declarator *declarator, /* The pointed-to type may need a decl expr (see above). */ if (!TYPE_NAME (type) && c_type_variably_modified_p (type)) - add_decl_expr (loc, decl_context, type, expr); + { + bool bind_p = decl_context == TYPENAME + || decl_context == FIELD + || decl_context == PARM; + add_decl_expr (loc, type, bind_p ? expr : NULL, true); + } type = c_build_pointer_type (type); type_quals = array_ptr_quals; @@ -9257,7 +9266,8 @@ is_flexible_array_member_p (bool is_last_field, tree finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, - class c_struct_parse_info *enclosing_struct_parse_info) + class c_struct_parse_info *enclosing_struct_parse_info, + tree *expr) { tree x; bool toplevel = file_scope == current_scope; @@ -9595,6 +9605,13 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, finish_incomplete_vars (incomplete_vars, toplevel); + /* Make sure a DECL_EXPR is created for structs with VLA members. + Because we do not know the context, we always pass expr + to force creation of a BIND_EXPR which is required in some + contexts. */ + if (c_type_variably_modified_p (t)) + add_decl_expr (loc, t, expr, false); + if (warn_cxx_compat) warn_cxx_compat_finish_struct (fieldlist, TREE_CODE (t), loc); diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 5700ccccc493..0c301015d880 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -4087,7 +4087,7 @@ c_parser_struct_or_union_specifier (c_parser *parser) ret.spec = finish_struct (struct_loc, type, nreverse (contents), chainon (std_attrs, chainon (attrs, postfix_attrs)), - struct_info); + struct_info, &expr); ret.kind = ctsk_tagdef; ret.expr = expr; ret.expr_const_operands = true; diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index d0bdc3df2c2c..b325723a7345 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -656,7 +656,8 @@ extern void finish_decl (tree, location_t, tree, tree, tree); extern tree finish_enum (tree, tree, tree); extern void finish_function (location_t = input_location); extern tree finish_struct (location_t, tree, tree, tree, - class c_struct_parse_info *); + class c_struct_parse_info *, + tree *expr = NULL); extern tree c_simulate_enum_decl (location_t, const char *, vec *); extern tree c_simulate_record_decl (location_t, const char *, diff --git a/gcc/testsuite/gcc.dg/pr112488-1.c b/gcc/testsuite/gcc.dg/pr112488-1.c new file mode 100644 index 000000000000..b53295c4366d --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112488-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } + * { dg-options "-O1" } */ + +extern void abort(void); + +int test(int *n) { + struct T { char a[*n], b[*n]; }; + return sizeof(struct T) - sizeof(struct T); +} + +void f1(int *p) { + if (!test(p)) abort(); +} + diff --git a/gcc/testsuite/gcc.dg/pr112488-2.c b/gcc/testsuite/gcc.dg/pr112488-2.c new file mode 100644 index 000000000000..3f0fc43eeb63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112488-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } + * { dg-options "-std=gnu23 -O1" } */ + +extern void abort(void); + +int test(int *n, struct T { char a[*n], b[*n]; }*) { /* { dg-warning "declared inside parameter list" } */ + return sizeof(struct T) - sizeof(struct T); +} + +void f1(int *p) { + if (test(p, 0)) abort(); +} + diff --git a/gcc/testsuite/gcc.dg/pr112898.c b/gcc/testsuite/gcc.dg/pr112898.c new file mode 100644 index 000000000000..395f3afaf2e4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112898.c @@ -0,0 +1,9 @@ +/* { dg-do compile } + { dg-options "-O2 -finstrument-functions-once" } */ + +void func(int n) +{ + struct T { int x[n]; }; + struct T *t = __builtin_malloc(sizeof *t); +} + diff --git a/gcc/testsuite/gcc.misc-tests/gcov-pr85350.c b/gcc/testsuite/gcc.misc-tests/gcov-pr85350.c index a42bf1282b2f..0383b81fdfb6 100644 --- a/gcc/testsuite/gcc.misc-tests/gcov-pr85350.c +++ b/gcc/testsuite/gcc.misc-tests/gcov-pr85350.c @@ -4,7 +4,7 @@ int main (void) { const int t = 2; /* count(1) */ - struct s1 { /* count(-) */ + struct s1 { /* count(1) */ int x; int g[t]; }; From 639776f260144b874c29ede0d9a6613192667094 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 11 Dec 2023 23:52:46 +0100 Subject: [PATCH 210/311] testsuite: Disable -fstack-protector* for some strub tests In our distro builds, we test with RUNTESTFLAGS='--target_board=unix\{,-fstack-protector-strong\}' because SSP is something we use widely in the distribution. 4 new strub test FAIL with that option though, as can be seen with a simple make check-gcc check-g++ RUNTESTFLAGS='--target_board=unix\{,-fstack-protector-strong\} dg.exp=strub-O*' - in particular, the expand dump \[(\]call\[^\n\]*strub_leave.*\n\[(\]code_label regexps see code_labels in there introduced for stack protector. The following patch fixes it by using -fno-stack-protector for these explicitly. 2023-12-11 Jakub Jelinek * c-c++-common/strub-O2fni.c: Add -fno-stack-protector to dg-options. * c-c++-common/strub-O3fni.c: Likewise. * c-c++-common/strub-Os.c: Likewise. * c-c++-common/strub-Og.c: Likewise. --- gcc/testsuite/c-c++-common/strub-O2fni.c | 2 +- gcc/testsuite/c-c++-common/strub-O3fni.c | 2 +- gcc/testsuite/c-c++-common/strub-Og.c | 2 +- gcc/testsuite/c-c++-common/strub-Os.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/c-c++-common/strub-O2fni.c b/gcc/testsuite/c-c++-common/strub-O2fni.c index 905e2c6b2ffc..acb8ceed1c59 100644 --- a/gcc/testsuite/c-c++-common/strub-O2fni.c +++ b/gcc/testsuite/c-c++-common/strub-O2fni.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fstrub=strict -fdump-rtl-expand -fno-inline" } */ +/* { dg-options "-O2 -fstrub=strict -fdump-rtl-expand -fno-inline -fno-stack-protector" } */ /* { dg-require-effective-target strub } */ /* With -fno-inline, none of the strub builtins are inlined. */ diff --git a/gcc/testsuite/c-c++-common/strub-O3fni.c b/gcc/testsuite/c-c++-common/strub-O3fni.c index c46fce38e5c9..454c706c3208 100644 --- a/gcc/testsuite/c-c++-common/strub-O3fni.c +++ b/gcc/testsuite/c-c++-common/strub-O3fni.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fstrub=strict -fdump-rtl-expand -fno-inline" } */ +/* { dg-options "-O3 -fstrub=strict -fdump-rtl-expand -fno-inline -fno-stack-protector" } */ /* { dg-require-effective-target strub } */ /* With -fno-inline, none of the strub builtins are inlined. */ diff --git a/gcc/testsuite/c-c++-common/strub-Og.c b/gcc/testsuite/c-c++-common/strub-Og.c index 3b8eb19765cd..a81f220c08f2 100644 --- a/gcc/testsuite/c-c++-common/strub-Og.c +++ b/gcc/testsuite/c-c++-common/strub-Og.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-Og -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-options "-Og -fstrub=strict -fdump-rtl-expand -fno-stack-protector" } */ /* { dg-require-effective-target strub } */ /* At -Og, without -fno-inline, we fully expand enter, but neither update nor diff --git a/gcc/testsuite/c-c++-common/strub-Os.c b/gcc/testsuite/c-c++-common/strub-Os.c index 8cfb253d6764..5a146f00e1ea 100644 --- a/gcc/testsuite/c-c++-common/strub-Os.c +++ b/gcc/testsuite/c-c++-common/strub-Os.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-Os -fstrub=strict -fdump-rtl-expand" } */ +/* { dg-options "-Os -fstrub=strict -fdump-rtl-expand -fno-stack-protector" } */ /* { dg-require-effective-target strub } */ /* At -Os, without -fno-inline, we fully expand enter, and also update. The From d5c96225b4a13d0783b41660a4ccc7f452216290 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 12 Dec 2023 00:17:22 +0000 Subject: [PATCH 211/311] Daily bump. --- gcc/ChangeLog | 221 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 5 + gcc/analyzer/ChangeLog | 11 ++ gcc/c/ChangeLog | 15 +++ gcc/cp/ChangeLog | 28 +++++ gcc/d/ChangeLog | 10 ++ gcc/fortran/ChangeLog | 22 ++++ gcc/testsuite/ChangeLog | 213 ++++++++++++++++++++++++++++++++++++++ libgcc/ChangeLog | 10 ++ libgfortran/ChangeLog | 40 ++++++++ libgomp/ChangeLog | 15 +++ libphobos/ChangeLog | 5 + 13 files changed, 596 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6724048f5172..323e6ad3d070 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,224 @@ +2023-12-11 Richard Sandiford + + * recog.cc (constrain_operands): Pass VOIDmode to + strict_memory_address_p for 'p' constraints in asms. + * rtl-ssa/changes.cc (recog_level2): Skip redundant constrain_operands + for asms. + +2023-12-11 Jason Merrill + + * common.opt: Add comment. + +2023-12-11 Alexandre Oliva + + PR middle-end/112784 + * expr.cc (emit_block_move_via_loop): Call int_mode_for_size + for maybe-too-wide sizes. + (emit_block_cmp_via_loop): Likewise. + +2023-12-11 Alexandre Oliva + + PR target/112778 + * builtins.cc (can_store_by_multiple_pieces): New. + (try_store_by_multiple_pieces): Call it. + +2023-12-11 Alexandre Oliva + + PR target/112804 + * builtins.cc (try_store_by_multiple_pieces): Use ptr's mode + for the increment. + +2023-12-11 Alexandre Oliva + + * doc/invoke.texi (multiflags): Add period after @xref to + silence warning. + +2023-12-11 Alexandre Oliva + + * config/rl78/rl78.cc (TARGET_HAVE_STRUB_SUPPORT_FOR): Disable. + +2023-12-11 Alexandre Oliva + + * ipa-strub.cc (pass_ipa_strub::execute): Check that we don't + add indirection to pointer parameters, and document attribute + access non-interactions. + +2023-12-11 Roger Sayle + + PR rtl-optimization/112380 + * combine.cc (expand_field_assignment): Check if gen_lowpart + returned a CLOBBER, and avoid calling gen_simplify_binary with + it if so. + +2023-12-11 Andrew Pinski + + PR target/111867 + * config/aarch64/aarch64.cc (aarch64_float_const_representable_p): For BFmode, + only accept +0.0. + +2023-12-11 Andrew Pinski + + PR tree-optimization/111972 + PR tree-optimization/110637 + * match.pd (`(convert)(zeroone !=/== CST)`): Match + and simplify to ((convert)zeroone){,^1}. + * fold-const.cc (fold_binary_loc): Remove + transformation of `(~a) & 1` and `(a ^ 1) & 1` + into `(convert)(a == 0)`. + +2023-12-11 Andrew Pinski + + PR middle-end/112935 + * expr.cc (expand_expr_real_2): Use + gimple_zero_one_valued_p instead of tree_nonzero_bits + to find boolean defined expressions. + +2023-12-11 Mikael Pettersson + + PR target/112413 + * config/m68k/linux.h (ASM_RETURN_CASE_JUMP): For + TARGET_LONG_JUMP_TABLE_OFFSETS, reference the jump table + via its label. + * config/m68k/m68kelf.h (ASM_RETURN_CASE_JUMP): Likewise. + * config/m68k/netbsd-elf.h (ASM_RETURN_CASE_JUMP): Likewise. + +2023-12-11 Andre Vieira + + * config/aarch64/aarch64.cc (lane_size): New function. + (aarch64_simd_clone_compute_vecsize_and_simdlen): Determine simdlen according to NDS rule + and reject combination of simdlen and types that lead to vectors larger than 128bits. + +2023-12-11 Juzhe-Zhong + + * rtl-ssa/insns.cc (function_info::record_use): Add !ordered_p case. + +2023-12-11 Juzhe-Zhong + + * config/riscv/riscv-v.cc (get_gather_index_mode): New function. + (shuffle_series_patterns): Robostify shuffle index. + (shuffle_generic_patterns): Ditto. + +2023-12-11 Victor Do Nascimento + + * config/aarch64/arm_neon.h (vldap1_lane_u64): Add + `const' to `__builtin_aarch64_simd_di *' cast. + (vldap1q_lane_u64): Likewise. + (vldap1_lane_s64): Cast __src to `const __builtin_aarch64_simd_di *'. + (vldap1q_lane_s64): Likewise. + (vldap1_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. + (vldap1q_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. + (vldap1_lane_p64): Add `const' to `__builtin_aarch64_simd_di *' cast. + (vldap1q_lane_p64): Add `const' to `__builtin_aarch64_simd_di *' cast. + (vstl1_lane_u64): remove stray `const'. + (vstl1_lane_s64): Cast __src to `__builtin_aarch64_simd_di *'. + (vstl1q_lane_s64): Likewise. + (vstl1_lane_f64): Cast __src to `const __builtin_aarch64_simd_df *'. + (vstl1q_lane_f64): Likewise. + +2023-12-11 Robin Dapp + + PR target/112853 + * config/riscv/riscv-v.cc (expand_const_vector): Fix step + calculation. + (modulo_sel_indices): Also perform modulo for variable-length + constants. + (shuffle_series): Recognize series permutations. + (expand_vec_perm_const_1): Add shuffle_series. + +2023-12-11 liuhongt + + * match.pd (VCE (a cmp b ? -1 : 0) < 0) ? c : d ---> (VCE ((a + cmp b) ? (VCE:c) : (VCE:d))): New gimple simplication. + +2023-12-11 Juzhe-Zhong + + PR target/112431 + * config/riscv/vector.md: Support highest overlap for wv instructions. + +2023-12-11 Juzhe-Zhong + + * config/riscv/riscv-vsetvl.cc (extract_single_source): Fix ICE. + +2023-12-11 Jakub Jelinek + + * doc/extend.texi (__sync_fetch_and_add, __sync_fetch_and_sub, + __sync_fetch_and_or, __sync_fetch_and_and, __sync_fetch_and_xor, + __sync_fetch_and_nand, __sync_add_and_fetch, __sync_sub_and_fetch, + __sync_or_and_fetch, __sync_and_and_fetch, __sync_xor_and_fetch, + __sync_nand_and_fetch, __sync_bool_compare_and_swap, + __sync_val_compare_and_swap, __sync_lock_test_and_set, + __sync_lock_release, __atomic_load_n, __atomic_load, __atomic_store_n, + __atomic_store, __atomic_exchange_n, __atomic_exchange, + __atomic_compare_exchange_n, __atomic_compare_exchange, + __atomic_add_fetch, __atomic_sub_fetch, __atomic_and_fetch, + __atomic_xor_fetch, __atomic_or_fetch, __atomic_nand_fetch, + __atomic_fetch_add, __atomic_fetch_sub, __atomic_fetch_and, + __atomic_fetch_xor, __atomic_fetch_or, __atomic_fetch_nand, + __atomic_test_and_set, __atomic_clear, __atomic_thread_fence, + __atomic_signal_fence, __atomic_always_lock_free, + __atomic_is_lock_free, __builtin_add_overflow, + __builtin_sadd_overflow, __builtin_saddl_overflow, + __builtin_saddll_overflow, __builtin_uadd_overflow, + __builtin_uaddl_overflow, __builtin_uaddll_overflow, + __builtin_sub_overflow, __builtin_ssub_overflow, + __builtin_ssubl_overflow, __builtin_ssubll_overflow, + __builtin_usub_overflow, __builtin_usubl_overflow, + __builtin_usubll_overflow, __builtin_mul_overflow, + __builtin_smul_overflow, __builtin_smull_overflow, + __builtin_smulll_overflow, __builtin_umul_overflow, + __builtin_umull_overflow, __builtin_umulll_overflow, + __builtin_add_overflow_p, __builtin_sub_overflow_p, + __builtin_mul_overflow_p, __builtin_addc, __builtin_addcl, + __builtin_addcll, __builtin_subc, __builtin_subcl, __builtin_subcll, + __builtin_alloca, __builtin_alloca_with_align, + __builtin_alloca_with_align_and_max, __builtin_speculation_safe_value, + __builtin_nan, __builtin_nand32, __builtin_nand64, __builtin_nand128, + __builtin_nanf, __builtin_nanl, __builtin_nanf@var{n}, + __builtin_nanf@var{n}x, __builtin_nans, __builtin_nansd32, + __builtin_nansd64, __builtin_nansd128, __builtin_nansf, + __builtin_nansl, __builtin_nansf@var{n}, __builtin_nansf@var{n}x, + __builtin_ffs, __builtin_clz, __builtin_ctz, __builtin_clrsb, + __builtin_popcount, __builtin_parity, __builtin_bswap16, + __builtin_bswap32, __builtin_bswap64, __builtin_bswap128, + __builtin_extend_pointer, __builtin_goacc_parlevel_id, + __builtin_goacc_parlevel_size, vec_clrl, vec_clrr, vec_mulh, vec_mul, + vec_div, vec_dive, vec_mod, __builtin_rx_mvtc): Use @var{...} around + parameter names. + (vec_rl, vec_sl, vec_sr, vec_sra): Likewise. Use @var{...} also + around A, B and R in description. + +2023-12-11 Juzhe-Zhong + + * config/riscv/riscv-selftests.cc (riscv_run_selftests): + Remove poly self test when FIXED-VLMAX. + +2023-12-11 Fei Gao + Xiao Zeng + + * ifcvt.cc (noce_cond_zero_binary_op_supported): Add support for AND. + (noce_bbs_ok_for_cond_zero_arith): Likewise. + (noce_try_cond_zero_arith): Likewise. + +2023-12-11 liuhongt + + PR target/112904 + * config/i386/mmx.md (*xop_pcmov_): New define_insn. + +2023-12-11 Haochen Gui + + PR target/112707 + * config/rs6000/rs6000.h (TARGET_FCTID): Define. + * config/rs6000/rs6000.md (lrintdi2): Add guard TARGET_FCTID. + * (lrounddi2): Replace TARGET_FPRND with TARGET_FCTID. + +2023-12-11 Haochen Gui + + PR target/112707 + * config/rs6000/rs6000.md (expand lrintsi2): New. + (insn lrintsi2): Rename to... + (*lrintsi): ...this. + (lrintsi_di): New. + 2023-12-10 Fei Gao Xiao Zeng diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 6a5191bd9774..190b92f716be 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231211 +20231212 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 48a5bb742536..22d24615ed84 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,8 @@ +2023-12-11 Rainer Orth + + * terminals.c [__FreeBSD__]: Include . + (TABDLY): Only define if missing. + 2023-12-06 Alexandre Oliva * gcc-interface/trans.cc: Include ipa-strub.h. diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index cf056decf722..7f6296539536 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,14 @@ +2023-12-11 David Malcolm + + PR analyzer/112955 + * engine.cc (feasibility_state::feasibility_state): Initialize + m_snodes_visited. + +2023-12-11 Andrew Pinski + + * region-model-manager.cc (maybe_undo_optimize_bit_field_compare): Remove + the check for type being unsigned_char_type_node. + 2023-12-08 David Malcolm * sm-taint.cc (taint_state_machine::alt_get_inherited_state): Fix diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index f6175775c20d..a96e65b6abb6 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,18 @@ +2023-12-11 Martin Uecker + + PR c/112488 + * c-decl.cc (add_decl_expr): Revise. + (finish_struct): Create DECL_EXPR. + * c-parser.cc (c_parser_struct_or_union_specifier): Call + finish_struct with expression for VLA sizes. + * c-tree.h (finish_struct): Add argument. + +2023-12-11 Tobias Burnus + + * c-parser.cc (c_parser_omp_requires): Handle acquires/release + in atomic_default_mem_order clause. + (c_parser_omp_atomic): Update. + 2023-12-05 Richard Sandiford * c-decl.cc (std_attribute_table): Add extra braces to work diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index d493135edca9..56089138a0b4 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,31 @@ +2023-12-11 Patrick Palka + + * pt.cc (alias_ctad_tweaks): Pass use_spec_table=false to + tsubst_decl. + +2023-12-11 Tobias Burnus + + * parser.cc (cp_parser_omp_requires): Handle acquires/release + in atomic_default_mem_order clause. + (cp_parser_omp_atomic): Update. + +2023-12-11 Nathaniel Shead + + PR c++/96090 + PR c++/100470 + * call.cc (build_over_call): Prevent folding of trivial special + members when checking for noexcept. + * method.cc (constructible_expr): Perform value-initialisation + for empty parameter lists. + (is_nothrow_xible): Treat as noexcept operator. + +2023-12-11 Nathaniel Shead + + PR c++/104234 + PR c++/112580 + * pt.cc (tsubst_template_decl): Clear + DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P. + 2023-12-10 Ken Matsui * cp-trait.def: Define __remove_pointer. diff --git a/gcc/d/ChangeLog b/gcc/d/ChangeLog index b002b459adc4..f07ec4677277 100644 --- a/gcc/d/ChangeLog +++ b/gcc/d/ChangeLog @@ -1,3 +1,13 @@ +2023-12-11 Iain Buclaw + + * Make-lang.in (D_FRONTEND_OBJS): Rename d/common-string.o to + d/common-smallbuffer.o. + * dmd/MERGE: Merge upstream dmd 2bbf64907c. + * dmd/VERSION: Bump version to v2.106.0. + * modules.cc (layout_moduleinfo_fields): Update for new front-end + interface. + (layout_moduleinfo): Likewise. + 2023-12-05 Richard Sandiford * d-attribs.cc (d_langhook_common_attribute_table): Add extra braces diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index daab6e223b46..0a1eae419bd6 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,25 @@ +2023-12-11 Thomas Schwinge + + * trans-openmp.cc (gfc_omp_call_is_alloc): Resolve ICE. + +2023-12-11 Tobias Burnus + + * gfortran.h (enum gfc_omp_requires_kind): Add + OMP_REQ_ATOMIC_MEM_ORDER_ACQUIRE and OMP_REQ_ATOMIC_MEM_ORDER_RELEASE. + (gfc_namespace): Add a 7th bit to omp_requires. + * module.cc (enum ab_attribute): Add AB_OMP_REQ_MEM_ORDER_ACQUIRE + and AB_OMP_REQ_MEM_ORDER_RELEASE + (mio_symbol_attribute): Handle it. + * openmp.cc (gfc_omp_requires_add_clause): Update for acquire/release. + (gfc_match_omp_requires): Likewise. + (gfc_match_omp_atomic): Handle them for atomic_default_mem_order. + * parse.cc: Likewise. + +2023-12-11 Tobias Burnus + + * trans-openmp.cc (gfc_omp_call_add_alloc, + gfc_omp_call_is_alloc): Set 'fn spec'. + 2023-12-10 Harald Anlauf PR fortran/111503 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 36db275f8669..d76225408932 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,216 @@ +2023-12-11 Jakub Jelinek + + * c-c++-common/strub-O2fni.c: Add -fno-stack-protector to dg-options. + * c-c++-common/strub-O3fni.c: Likewise. + * c-c++-common/strub-Os.c: Likewise. + * c-c++-common/strub-Og.c: Likewise. + +2023-12-11 Martin Uecker + + PR c/112488 + * gcc.dg/pr112488-1.c: New test. + * gcc.dg/pr112488-2.c: New test. + * gcc.dg/pr112898.c: New test. + * gcc.misc-tests/gcov-pr85350.c: Adapt. + +2023-12-11 Richard Sandiford + + * gcc.target/aarch64/prfm_imm_offset_2.c: New test. + +2023-12-11 Jason Merrill + + * g++.dg/cpp2a/concepts-explicit-inst1.C: Specify ABI v18. + * g++.dg/cpp2a/concepts-explicit-inst1a.C: New test. + +2023-12-11 Alexandre Oliva + + PR middle-end/112784 + * gcc.target/i386/avx512cd-inline-stringops-pr112784.c: New. + +2023-12-11 Alexandre Oliva + + PR target/112778 + * gcc.dg/inline-mem-cmp-pr112778.c: New. + +2023-12-11 Alexandre Oliva + + PR target/112804 + * gcc.target/aarch64/inline-mem-set-pr112804.c: New. + +2023-12-11 Roger Sayle + + PR rtl-optimization/112380 + * gcc.dg/pr112380.c: New test case. + +2023-12-11 Francois-Xavier Coudert + + PR testsuite/112297 + * gcc.target/i386/pr100936.c: Require nonpic target. + +2023-12-11 Patrick Palka + + PR c++/63378 + * g++.dg/template/fnspec3.C: New test. + +2023-12-11 Andrew Pinski + + * gcc.dg/tree-ssa/pr110637-1.c: New test. + * gcc.dg/tree-ssa/pr110637-2.c: New test. + * gcc.dg/tree-ssa/pr110637-3.c: New test. + * gcc.dg/tree-ssa/pr111972-1.c: New test. + * gcc.dg/tree-ssa/pr69270.c: Update testcase. + * gcc.target/i386/pr110790-2.c: Update testcase. + * gcc.dg/fold-even-1.c: Removed. + +2023-12-11 Andre Vieira + + * lib/target-supports.exp: Add aarch64 targets to vect_simd_clones. + * c-c++-common/gomp/declare-variant-14.c: Adapt test for aarch64. + * c-c++-common/gomp/pr60823-1.c: Likewise. + * c-c++-common/gomp/pr60823-2.c: Likewise. + * c-c++-common/gomp/pr60823-3.c: Likewise. + * g++.dg/gomp/attrs-10.C: Likewise. + * g++.dg/gomp/declare-simd-1.C: Likewise. + * g++.dg/gomp/declare-simd-3.C: Likewise. + * g++.dg/gomp/declare-simd-4.C: Likewise. + * g++.dg/gomp/declare-simd-7.C: Likewise. + * g++.dg/gomp/declare-simd-8.C: Likewise. + * g++.dg/gomp/pr88182.C: Likewise. + * gcc.dg/declare-simd.c: Likewise. + * gcc.dg/gomp/declare-simd-1.c: Likewise. + * gcc.dg/gomp/declare-simd-3.c: Likewise. + * gcc.dg/gomp/pr87887-1.c: Likewise. + * gcc.dg/gomp/pr87895-1.c: Likewise. + * gcc.dg/gomp/pr89246-1.c: Likewise. + * gcc.dg/gomp/pr99542.c: Likewise. + * gcc.dg/gomp/simd-clones-2.c: Likewise. + * gcc.dg/vect/vect-simd-clone-1.c: Likewise. + * gcc.dg/vect/vect-simd-clone-2.c: Likewise. + * gcc.dg/vect/vect-simd-clone-4.c: Likewise. + * gcc.dg/vect/vect-simd-clone-5.c: Likewise. + * gcc.dg/vect/vect-simd-clone-6.c: Likewise. + * gcc.dg/vect/vect-simd-clone-7.c: Likewise. + * gcc.dg/vect/vect-simd-clone-8.c: Likewise. + * gfortran.dg/gomp/declare-simd-2.f90: Likewise. + * gfortran.dg/gomp/declare-simd-coarray-lib.f90: Likewise. + * gfortran.dg/gomp/declare-variant-14.f90: Likewise. + * gfortran.dg/gomp/pr79154-1.f90: Likewise. + * gfortran.dg/gomp/pr83977.f90: Likewise. + * gcc.target/aarch64/declare-simd-1.c: New file. + * gcc.target/aarch64/declare-simd-2.c: New file. + +2023-12-11 Patrick Palka + + * g++.dg/modules/concept-8.h: New test. + * g++.dg/modules/concept-8_a.H: New test. + * g++.dg/modules/concept-8_b.C: New test. + +2023-12-11 Robin Dapp + + * gcc.target/riscv/rvv/autovec/builtin/strcmp-run.c: Adjust test + expectation and target selector. + * gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: Adjust + target selector. + * gcc.target/riscv/rvv/autovec/builtin/strncmp-run.c: Ditto. + +2023-12-11 Tobias Burnus + + * c-c++-common/gomp/requires-3.c: Update for now valid code. + * gfortran.dg/gomp/requires-3.f90: Likewise. + * gfortran.dg/gomp/requires-2.f90: Update dg-error. + * gfortran.dg/gomp/requires-5.f90: Likewise. + * c-c++-common/gomp/requires-5.c: New test. + * c-c++-common/gomp/requires-6.c: New test. + * c-c++-common/gomp/requires-7.c: New test. + * c-c++-common/gomp/requires-8.c: New test. + * gfortran.dg/gomp/requires-10.f90: New test. + * gfortran.dg/gomp/requires-11.f90: New test. + +2023-12-11 Juzhe-Zhong + + * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-2.c: New test. + +2023-12-11 Juzhe-Zhong + + * gcc.target/riscv/rvv/autovec/pr110950.c: Adapt test. + +2023-12-11 Francois-Xavier Coudert + + * c-c++-common/asan/memcmp-1.c: Adjust pattern on darwin. + +2023-12-11 Juzhe-Zhong + + * gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c: Moved to... + * gcc.target/riscv/rvv/vsetvl/vsetvl_bug-1.c: ...here. + +2023-12-11 Francois-Xavier Coudert + + * gcc.target/i386/pr112445.c: Require dfp. + +2023-12-11 liuhongt + + * gcc.target/i386/avx512vl-blendv-3.c: New test. + * gcc.target/i386/blendv-3.c: New test. + +2023-12-11 Francois-Xavier Coudert + + * gcc.target/i386/libcall-1.c: Skip on darwin. + +2023-12-11 Juzhe-Zhong + + PR target/112431 + * gcc.target/riscv/rvv/base/pr112431-39.c: New test. + * gcc.target/riscv/rvv/base/pr112431-40.c: New test. + * gcc.target/riscv/rvv/base/pr112431-41.c: New test. + +2023-12-11 Juzhe-Zhong + + * gcc.target/riscv/rvv/vsetvl/avl_use_bug-1.c: New test. + +2023-12-11 Juzhe-Zhong + + * gcc.target/riscv/rvv/base/poly-selftest-1.c: New test. + +2023-12-11 Fei Gao + Xiao Zeng + + * gcc.target/riscv/zicond_ifcvt_opt.c: Add TCs for AND. + +2023-12-11 Nathaniel Shead + + PR c++/96090 + PR c++/100470 + * g++.dg/cpp0x/noexcept81.C: New test. + * g++.dg/ext/is_nothrow_constructible7.C: New test. + * g++.dg/ext/is_nothrow_constructible8.C: New test. + +2023-12-11 Nathaniel Shead + + PR c++/104234 + PR c++/112580 + * g++.dg/modules/pr104234.C: New test. + +2023-12-11 liuhongt + + * g++.target/i386/pr112904.C: New test. + +2023-12-11 Haochen Gui + + PR target/112707 + * gcc.target/powerpc/pr112707.h: New. + * gcc.target/powerpc/pr112707-2.c: New. + * gcc.target/powerpc/pr112707-3.c: New. + * gcc.target/powerpc/pr88558-p7.c: Check fctid on ilp32 and + has_arch_ppc64 as it's now guarded by powerpc64. + * gcc.target/powerpc/pr88558-p8.c: Likewise. + * gfortran.dg/nint_p7.f90: Add powerpc64 target requirement as + lrounddi2 is now guarded by powerpc64. + +2023-12-11 Haochen Gui + + PR target/112707 + * gcc.target/powerpc/pr112707-1.c: New. + 2023-12-10 Fei Gao Xiao Zeng diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 279d71802645..0248ac380489 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,13 @@ +2023-12-11 Lipeng Zhu + + * gthr-posix.h (__GTHREAD_RWLOCK_INIT): New macro. + (__gthrw): New function. + (__gthread_rwlock_rdlock): New function. + (__gthread_rwlock_tryrdlock): New function. + (__gthread_rwlock_wrlock): New function. + (__gthread_rwlock_trywrlock): New function. + (__gthread_rwlock_unlock): New function. + 2023-12-08 Florian Weimer * libgcov-interface.c (__gcov_fork): Use __builtin_fork instead diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog index 2393305a8649..50c5fe84278f 100644 --- a/libgfortran/ChangeLog +++ b/libgfortran/ChangeLog @@ -1,3 +1,43 @@ +2023-12-11 Lipeng Zhu + + * io/async.c (DEBUG_LINE): New macro. + * io/async.h (RWLOCK_DEBUG_ADD): New macro. + (CHECK_RDLOCK): New macro. + (CHECK_WRLOCK): New macro. + (TAIL_RWLOCK_DEBUG_QUEUE): New macro. + (IN_RWLOCK_DEBUG_QUEUE): New macro. + (RDLOCK): New macro. + (WRLOCK): New macro. + (RWUNLOCK): New macro. + (RD_TO_WRLOCK): New macro. + (INTERN_RDLOCK): New macro. + (INTERN_WRLOCK): New macro. + (INTERN_RWUNLOCK): New macro. + * io/io.h (struct gfc_unit): Change UNIT_LOCK to UNIT_RWLOCK in + a comment. + (unit_lock): Remove including associated internal_proto. + (unit_rwlock): New declarations including associated internal_proto. + (dec_waiting_unlocked): Use WRLOCK and RWUNLOCK on unit_rwlock + instead of __gthread_mutex_lock and __gthread_mutex_unlock on + unit_lock. + * io/transfer.c (st_read_done_worker): Use WRLOCK and RWUNLOCK on + unit_rwlock instead of LOCK and UNLOCK on unit_lock. + (st_write_done_worker): Likewise. + * io/unit.c: Change UNIT_LOCK to UNIT_RWLOCK in 'IO locking rules' + comment. Use unit_rwlock variable instead of unit_lock variable. + (get_gfc_unit_from_unit_root): New function. + (get_gfc_unit): Use RDLOCK, WRLOCK and RWUNLOCK on unit_rwlock + instead of LOCK and UNLOCK on unit_lock. + (close_unit_1): Use WRLOCK and RWUNLOCK on unit_rwlock instead of + LOCK and UNLOCK on unit_lock. + (close_units): Likewise. + (newunit_alloc): Use RWUNLOCK on unit_rwlock instead of UNLOCK on + unit_lock. + * io/unix.c (find_file): Use RDLOCK and RWUNLOCK on unit_rwlock + instead of LOCK and UNLOCK on unit_lock. + (flush_all_units): Use WRLOCK and RWUNLOCK on unit_rwlock instead + of LOCK and UNLOCK on unit_lock. + 2023-12-05 Florian Weimer Jakub Jelinek diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 77280e09e073..b22bd98498fe 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,18 @@ +2023-12-11 Lipeng Zhu + + * testsuite/libgomp.fortran/rwlock_1.f90: New file. + * testsuite/libgomp.fortran/rwlock_2.f90: New file. + * testsuite/libgomp.fortran/rwlock_3.f90: New file. + +2023-12-11 Andre Vieira + + * testsuite/libgomp.c/declare-variant-1.c: Adapt test for aarch64. + * testsuite/libgomp.fortran/declare-simd-1.f90: Likewise. + +2023-12-11 Tobias Burnus + + * libgomp_g.h (GOMP_add_alloc, GOMP_is_alloc): Add. + 2023-12-08 Tobias Burnus * allocator.c (struct fort_alloc_splay_tree_key_s, diff --git a/libphobos/ChangeLog b/libphobos/ChangeLog index 6f437e49179d..a98bd43dc9cd 100644 --- a/libphobos/ChangeLog +++ b/libphobos/ChangeLog @@ -1,3 +1,8 @@ +2023-12-11 Iain Buclaw + + * libdruntime/MERGE: Merge upstream druntime 2bbf64907c. + * src/MERGE: Merge upstream phobos b64bfbf91. + 2023-11-21 Iain Buclaw * libdruntime/MERGE: Merge upstream druntime ff57fec515. From 46cfafad9b3e64e9cd060b7a26bfe1890c74135f Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Tue, 12 Dec 2023 01:58:07 +0000 Subject: [PATCH 212/311] MAINTAINERS: Add myself to write after approval and DCO ChangeLog: * MAINTAINERS: Add myself to write after approval --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0dbcbadcfd7d..f3683ff03ecc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -705,6 +705,7 @@ Marcel Vollweiler Ville Voutilainen Nenad Vukicevic Feng Wang +Feng Wang s Hongyu Wang Jiong Wang Stephen M. Webb From 4b421728289e6f1caa0dfaa953a11698ab95d37d Mon Sep 17 00:00:00 2001 From: Yang Yujie Date: Fri, 8 Dec 2023 18:01:18 +0800 Subject: [PATCH 213/311] LoongArch: Fix eh_return epilogue for normal returns. On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved and restored in the function prologue and epilogue if the given function calls __builtin_eh_return. This causes the return value to be overwritten on normal return paths and breaks a rare case of libgcc's _Unwind_RaiseException. gcc/ChangeLog: * config/loongarch/loongarch.cc: Do not restore the saved eh_return data registers ($r4-$r7) for a normal return of a function that calls __builtin_eh_return elsewhere. * config/loongarch/loongarch-protos.h: Same. * config/loongarch/loongarch.md: Same. gcc/testsuite/ChangeLog: * gcc.target/loongarch/eh_return-normal-return.c: New test. --- gcc/config/loongarch/loongarch-protos.h | 2 +- gcc/config/loongarch/loongarch.cc | 34 ++++++++++++----- gcc/config/loongarch/loongarch.md | 23 ++++++++++- .../loongarch/eh_return-normal-return.c | 38 +++++++++++++++++++ 4 files changed, 84 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index f2ff93b5e10e..c66ab932d67c 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -60,7 +60,7 @@ enum loongarch_symbol_type { extern rtx loongarch_emit_move (rtx, rtx); extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int); extern void loongarch_expand_prologue (void); -extern void loongarch_expand_epilogue (bool); +extern void loongarch_expand_epilogue (int); extern bool loongarch_can_use_return_insn (void); extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *); diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 8630db44e897..3ec31c5d1054 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1015,7 +1015,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, static void loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, - loongarch_save_restore_fn fn) + loongarch_save_restore_fn fn, + bool skip_eh_data_regs_p) { HOST_WIDE_INT offset; @@ -1024,7 +1025,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) { - if (!cfun->machine->reg_is_wrapped_separately[regno]) + /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO) + when returning normally from a function that calls + __builtin_eh_return. In this case, these registers are saved but + should not be restored, or the return value may be clobbered. */ + + if (!(cfun->machine->reg_is_wrapped_separately[regno] + || (skip_eh_data_regs_p + && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4))) loongarch_save_restore_reg (word_mode, regno, offset, fn); offset -= UNITS_PER_WORD; @@ -1297,7 +1305,7 @@ loongarch_expand_prologue (void) GEN_INT (-step1)); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; size -= step1; - loongarch_for_each_saved_reg (size, loongarch_save_reg); + loongarch_for_each_saved_reg (size, loongarch_save_reg, false); } /* Set up the frame pointer, if we're using one. */ @@ -1382,11 +1390,13 @@ loongarch_can_use_return_insn (void) return reload_completed && cfun->machine->frame.total_size == 0; } -/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P - says which. */ +/* Expand function epilogue using the following insn patterns: + "epilogue" (style == NORMAL_RETURN) + "sibcall_epilogue" (style == SIBCALL_RETURN) + "eh_return" (style == EXCEPTION_RETURN) */ void -loongarch_expand_epilogue (bool sibcall_p) +loongarch_expand_epilogue (int style) { /* Split the frame into two. STEP1 is the amount of stack we should deallocate before restoring the registers. STEP2 is the amount we @@ -1403,7 +1413,8 @@ loongarch_expand_epilogue (bool sibcall_p) bool need_barrier_p = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; - if (!sibcall_p && loongarch_can_use_return_insn ()) + /* Handle simple returns. */ + if (style == NORMAL_RETURN && loongarch_can_use_return_insn ()) { emit_jump_insn (gen_return ()); return; @@ -1479,7 +1490,9 @@ loongarch_expand_epilogue (bool sibcall_p) /* Restore the registers. */ loongarch_for_each_saved_reg (frame->total_size - step2, - loongarch_restore_reg); + loongarch_restore_reg, + crtl->calls_eh_return + && style != EXCEPTION_RETURN); if (need_barrier_p) loongarch_emit_stack_tie (); @@ -1500,11 +1513,12 @@ loongarch_expand_epilogue (bool sibcall_p) } /* Add in the __builtin_eh_return stack adjustment. */ - if (crtl->calls_eh_return) + if (crtl->calls_eh_return && style == EXCEPTION_RETURN) emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); - if (!sibcall_p) + /* Emit return unless doing sibcall. */ + if (style != SIBCALL_RETURN) emit_jump_insn (gen_simple_return_internal (ra)); } diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index afbf201d4d06..f7ec435cba74 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -125,6 +125,11 @@ (T1_REGNUM 13) (S0_REGNUM 23) + ;; Return path styles + (NORMAL_RETURN 0) + (SIBCALL_RETURN 1) + (EXCEPTION_RETURN 2) + ;; PIC long branch sequences are never longer than 100 bytes. (MAX_PIC_BRANCH_LENGTH 100) ]) @@ -3276,7 +3281,7 @@ [(const_int 2)] "" { - loongarch_expand_epilogue (false); + loongarch_expand_epilogue (NORMAL_RETURN); DONE; }) @@ -3284,7 +3289,7 @@ [(const_int 2)] "" { - loongarch_expand_epilogue (true); + loongarch_expand_epilogue (SIBCALL_RETURN); DONE; }) @@ -3341,6 +3346,20 @@ emit_insn (gen_eh_set_ra_di (operands[0])); else emit_insn (gen_eh_set_ra_si (operands[0])); + + emit_jump_insn (gen_eh_return_internal ()); + emit_barrier (); + DONE; +}) + +(define_insn_and_split "eh_return_internal" + [(eh_return)] + "" + "#" + "epilogue_completed" + [(const_int 0)] +{ + loongarch_expand_epilogue (EXCEPTION_RETURN); DONE; }) diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c new file mode 100644 index 000000000000..f8f3965f894f --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include + +int foo () __attribute__((noinline)); +int main (); + +int +foo () { + + int t; + + /* prevent optimization using asm */ + asm ("" : "=r" (t) : "0" (-1)); + asm ("" : "=r" (t) : "0" (t ? 1 : 0)); + + if (t == 0) + /* never reached */ + __builtin_eh_return (0, __builtin_return_address (0)); + + else if (t == 1) + /* return here */ + return 202312; + + else + /* never reached: prevent vrp optimization in main */ + return 0; +} + +int +main () +{ + if (foo() == 202312) + return 0; + else + abort (); +} From fda2e1ab60377ca1a6aff093355b29115cff5880 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Tue, 12 Dec 2023 10:06:38 +0800 Subject: [PATCH 214/311] RISC-V: Move RVV POLY VALUE estimation from riscv.cc to riscv-v.cc[NFC] This patch moves RVV POLY VALUE estimation from riscv.cc to riscv-v.cc for future better maintain like other target hook implementation. Committed as it is obviously a code refinement. gcc/ChangeLog: * config/riscv/riscv-protos.h (estimated_poly_value): New function. * config/riscv/riscv-v.cc (estimated_poly_value): Ditto. * config/riscv/riscv.cc (riscv_estimated_poly_value): Move RVV POLY VALUE estimation to riscv-v.cc --- gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/riscv-v.cc | 47 +++++++++++++++++++++++++++++++++ gcc/config/riscv/riscv.cc | 44 +++--------------------------- 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 20bbb5b859ce..85ab1db2088e 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -608,6 +608,7 @@ int count_regno_occurrences (rtx_insn *, unsigned int); bool imm_avl_p (machine_mode); bool can_be_broadcasted_p (rtx); bool gather_scatter_valid_offset_p (machine_mode); +HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int); } /* We classify builtin types into two classes: diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 944b37b5df7f..01898cb4b8d0 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -4927,4 +4927,51 @@ gather_scatter_valid_offset_p (machine_mode mode) return true; } +/* Implement TARGET_ESTIMATED_POLY_VALUE. + Look into the tuning structure for an estimate. + KIND specifies the type of requested estimate: min, max or likely. + For cores with a known VLA width all three estimates are the same. + For generic VLA tuning we want to distinguish the maximum estimate from + the minimum and likely ones. + The likely estimate is the same as the minimum in that case to give a + conservative behavior of auto-vectorizing with VLA when it is a win + even for VLA vectorization. + When VLA width information is available VAL.coeffs[1] is multiplied by + the number of VLA chunks over the initial VLS bits. */ +HOST_WIDE_INT +estimated_poly_value (poly_int64 val, unsigned int kind) +{ + unsigned int width_source + = BITS_PER_RISCV_VECTOR.is_constant () + ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant () + : (unsigned int) RVV_SCALABLE; + + /* If there is no core-specific information then the minimum and likely + values are based on TARGET_MIN_VLEN vectors and the maximum is based on + the architectural maximum of 65536 bits. */ + unsigned int min_vlen_bytes = TARGET_MIN_VLEN / 8 - 1; + if (width_source == RVV_SCALABLE) + switch (kind) + { + case POLY_VALUE_MIN: + case POLY_VALUE_LIKELY: + return val.coeffs[0]; + + case POLY_VALUE_MAX: + return val.coeffs[0] + val.coeffs[1] * min_vlen_bytes; + } + + /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL, treating the + lowest as likely. This could be made more general if future -mtune + options need it to be. */ + if (kind == POLY_VALUE_MAX) + width_source = 1 << floor_log2 (width_source); + else + width_source = least_bit_hwi (width_source); + + /* If the core provides width information, use that. */ + HOST_WIDE_INT over_min_vlen = width_source - TARGET_MIN_VLEN; + return val.coeffs[0] + val.coeffs[1] * over_min_vlen / TARGET_MIN_VLEN; +} + } // namespace riscv_vector diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 3f111fa0393a..69a8a503f303 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -9604,51 +9604,15 @@ riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor, return RISCV_DWARF_VLENB; } -/* Implement TARGET_ESTIMATED_POLY_VALUE. - Look into the tuning structure for an estimate. - KIND specifies the type of requested estimate: min, max or likely. - For cores with a known RVV width all three estimates are the same. - For generic RVV tuning we want to distinguish the maximum estimate from - the minimum and likely ones. - The likely estimate is the same as the minimum in that case to give a - conservative behavior of auto-vectorizing with RVV when it is a win - even for 128-bit RVV. - When RVV width information is available VAL.coeffs[1] is multiplied by - the number of VQ chunks over the initial Advanced SIMD 128 bits. */ +/* Implement TARGET_ESTIMATED_POLY_VALUE. */ static HOST_WIDE_INT riscv_estimated_poly_value (poly_int64 val, poly_value_estimate_kind kind = POLY_VALUE_LIKELY) { - unsigned int width_source = BITS_PER_RISCV_VECTOR.is_constant () - ? (unsigned int) BITS_PER_RISCV_VECTOR.to_constant () - : (unsigned int) RVV_SCALABLE; - - /* If there is no core-specific information then the minimum and likely - values are based on 128-bit vectors and the maximum is based on - the architectural maximum of 65536 bits. */ - if (width_source == RVV_SCALABLE) - switch (kind) - { - case POLY_VALUE_MIN: - case POLY_VALUE_LIKELY: - return val.coeffs[0]; - - case POLY_VALUE_MAX: - return val.coeffs[0] + val.coeffs[1] * 15; - } - - /* Allow BITS_PER_RISCV_VECTOR to be a bitmask of different VL, treating the - lowest as likely. This could be made more general if future -mtune - options need it to be. */ - if (kind == POLY_VALUE_MAX) - width_source = 1 << floor_log2 (width_source); - else - width_source = least_bit_hwi (width_source); - - /* If the core provides width information, use that. */ - HOST_WIDE_INT over_128 = width_source - 128; - return val.coeffs[0] + val.coeffs[1] * over_128 / 128; + if (TARGET_VECTOR) + return riscv_vector::estimated_poly_value (val, kind); + return default_estimated_poly_value (val, kind); } /* Return true if the vector misalignment factor is supported by the From ce7e66787b5b4ad385b21756da5a89171d233ddc Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Mon, 11 Dec 2023 01:14:17 +0000 Subject: [PATCH 215/311] RISC-V: Add avail interface into function_group_info Patch v3: Fix typo and remove the modification of rvv.exp. Patch v2: Using variadic macro and add the dependency into t-riscv. In order to add other extension about vector,this patch add unsigned int (*avail) (void) into function_group_info to determine whether to register the intrinsic based on ISA info. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-functions.def (DEF_RVV_FUNCTION): Add AVAIL argument. (read_vl): Using AVAIL argument default value. (vlenb): Ditto. (vsetvl): Ditto. (vsetvlmax): Ditto. (vle): Ditto. (vse): Ditto. (vlm): Ditto. (vsm): Ditto. (vlse): Ditto. (vsse): Ditto. (vluxei8): Ditto. (vluxei16): Ditto. (vluxei32): Ditto. (vluxei64): Ditto. (vloxei8): Ditto. (vloxei16): Ditto. (vloxei32): Ditto. (vloxei64): Ditto. (vsuxei8): Ditto. (vsuxei16): Ditto. (vsuxei32): Ditto. (vsuxei64): Ditto. (vsoxei8): Ditto. (vsoxei16): Ditto. (vsoxei32): Ditto. (vsoxei64): Ditto. (vleff): Ditto. (vadd): Ditto. (vsub): Ditto. (vrsub): Ditto. (vneg): Ditto. (vwaddu): Ditto. (vwsubu): Ditto. (vwadd): Ditto. (vwsub): Ditto. (vwcvt_x): Ditto. (vwcvtu_x): Ditto. (vzext): Ditto. (vsext): Ditto. (vadc): Ditto. (vmadc): Ditto. (vsbc): Ditto. (vmsbc): Ditto. (vand): Ditto. (vor): Ditto. (vxor): Ditto. (vnot): Ditto. (vsll): Ditto. (vsra): Ditto. (vsrl): Ditto. (vnsrl): Ditto. (vnsra): Ditto. (vncvt_x): Ditto. (vmseq): Ditto. (vmsne): Ditto. (vmsltu): Ditto. (vmslt): Ditto. (vmsleu): Ditto. (vmsle): Ditto. (vmsgtu): Ditto. (vmsgt): Ditto. (vmsgeu): Ditto. (vmsge): Ditto. (vminu): Ditto. (vmin): Ditto. (vmaxu): Ditto. (vmax): Ditto. (vmul): Ditto. (vmulh): Ditto. (vmulhu): Ditto. (vmulhsu): Ditto. (vdivu): Ditto. (vdiv): Ditto. (vremu): Ditto. (vrem): Ditto. (vwmul): Ditto. (vwmulu): Ditto. (vwmulsu): Ditto. (vmacc): Ditto. (vnmsac): Ditto. (vmadd): Ditto. (vnmsub): Ditto. (vwmaccu): Ditto. (vwmacc): Ditto. (vwmaccsu): Ditto. (vwmaccus): Ditto. (vmerge): Ditto. (vmv_v): Ditto. (vsaddu): Ditto. (vsadd): Ditto. (vssubu): Ditto. (vssub): Ditto. (vaaddu): Ditto. (vaadd): Ditto. (vasubu): Ditto. (vasub): Ditto. (vsmul): Ditto. (vssrl): Ditto. (vssra): Ditto. (vnclipu): Ditto. (vnclip): Ditto. (vfadd): Ditto. (vfsub): Ditto. (vfrsub): Ditto. (vfadd_frm): Ditto. (vfsub_frm): Ditto. (vfrsub_frm): Ditto. (vfwadd): Ditto. (vfwsub): Ditto. (vfwadd_frm): Ditto. (vfwsub_frm): Ditto. (vfmul): Ditto. (vfdiv): Ditto. (vfrdiv): Ditto. (vfmul_frm): Ditto. (vfdiv_frm): Ditto. (vfrdiv_frm): Ditto. (vfwmul): Ditto. (vfwmul_frm): Ditto. (vfmacc): Ditto. (vfnmsac): Ditto. (vfmadd): Ditto. (vfnmsub): Ditto. (vfnmacc): Ditto. (vfmsac): Ditto. (vfnmadd): Ditto. (vfmsub): Ditto. (vfmacc_frm): Ditto. (vfnmacc_frm): Ditto. (vfmsac_frm): Ditto. (vfnmsac_frm): Ditto. (vfmadd_frm): Ditto. (vfnmadd_frm): Ditto. (vfmsub_frm): Ditto. (vfnmsub_frm): Ditto. (vfwmacc): Ditto. (vfwnmacc): Ditto. (vfwmsac): Ditto. (vfwnmsac): Ditto. (vfwmacc_frm): Ditto. (vfwnmacc_frm): Ditto. (vfwmsac_frm): Ditto. (vfwnmsac_frm): Ditto. (vfsqrt): Ditto. (vfsqrt_frm): Ditto. (vfrsqrt7): Ditto. (vfrec7): Ditto. (vfrec7_frm): Ditto. (vfmin): Ditto. (vfmax): Ditto. (vfsgnj): Ditto. (vfsgnjn): Ditto. (vfsgnjx): Ditto. (vfneg): Ditto. (vfabs): Ditto. (vmfeq): Ditto. (vmfne): Ditto. (vmflt): Ditto. (vmfle): Ditto. (vmfgt): Ditto. (vmfge): Ditto. (vfclass): Ditto. (vfmerge): Ditto. (vfmv_v): Ditto. (vfcvt_x): Ditto. (vfcvt_xu): Ditto. (vfcvt_rtz_x): Ditto. (vfcvt_rtz_xu): Ditto. (vfcvt_f): Ditto. (vfcvt_x_frm): Ditto. (vfcvt_xu_frm): Ditto. (vfcvt_f_frm): Ditto. (vfwcvt_x): Ditto. (vfwcvt_xu): Ditto. (vfwcvt_rtz_x): Ditto. (vfwcvt_rtz_xu) Ditto.: (vfwcvt_f): Ditto. (vfwcvt_x_frm): Ditto. (vfwcvt_xu_frm) Ditto.: (vfncvt_x): Ditto. (vfncvt_xu): Ditto. (vfncvt_rtz_x): Ditto. (vfncvt_rtz_xu): Ditto. (vfncvt_f): Ditto. (vfncvt_rod_f): Ditto. (vfncvt_x_frm): Ditto. (vfncvt_xu_frm): Ditto. (vfncvt_f_frm): Ditto. (vredsum): Ditto. (vredmaxu): Ditto. (vredmax): Ditto. (vredminu): Ditto. (vredmin): Ditto. (vredand): Ditto. (vredor): Ditto. (vredxor): Ditto. (vwredsum): Ditto. (vwredsumu): Ditto. (vfredusum): Ditto. (vfredosum): Ditto. (vfredmax): Ditto. (vfredmin): Ditto. (vfredusum_frm): Ditto. (vfredosum_frm): Ditto. (vfwredosum): Ditto. (vfwredusum): Ditto. (vfwredosum_frm): Ditto. (vfwredusum_frm): Ditto. (vmand): Ditto. (vmnand): Ditto. (vmandn): Ditto. (vmxor): Ditto. (vmor): Ditto. (vmnor): Ditto. (vmorn): Ditto. (vmxnor): Ditto. (vmmv): Ditto. (vmclr): Ditto. (vmset): Ditto. (vmnot): Ditto. (vcpop): Ditto. (vfirst): Ditto. (vmsbf): Ditto. (vmsif): Ditto. (vmsof): Ditto. (viota): Ditto. (vid): Ditto. (vmv_x): Ditto. (vmv_s): Ditto. (vfmv_f): Ditto. (vfmv_s): Ditto. (vslideup): Ditto. (vslidedown): Ditto. (vslide1up): Ditto. (vslide1down): Ditto. (vfslide1up): Ditto. (vfslide1down): Ditto. (vrgather): Ditto. (vrgatherei16): Ditto. (vcompress): Ditto. (vundefined): Ditto. (vreinterpret): Ditto. (vlmul_ext): Ditto. (vlmul_trunc): Ditto. (vset): Ditto. (vget): Ditto. (vcreate): Ditto. (vlseg): Ditto. (vsseg): Ditto. (vlsseg): Ditto. (vssseg): Ditto. (vluxseg): Ditto. (vloxseg): Ditto. (vsuxseg): Ditto. (vsoxseg): Ditto. (vlsegff): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_FUNCTION): Using variadic macro. * config/riscv/riscv-vector-builtins.h (struct function_group_info): Add avail function interface into struct. * config/riscv/t-riscv: Add dependency * config/riscv/riscv-vector-builtins-avail.h: New file.The definition of AVAIL marco. --- .../riscv/riscv-vector-builtins-avail.h | 12 + .../riscv/riscv-vector-builtins-functions.def | 941 +++++++++--------- gcc/config/riscv/riscv-vector-builtins.cc | 6 +- gcc/config/riscv/riscv-vector-builtins.h | 10 + gcc/config/riscv/t-riscv | 1 + 5 files changed, 499 insertions(+), 471 deletions(-) create mode 100644 gcc/config/riscv/riscv-vector-builtins-avail.h diff --git a/gcc/config/riscv/riscv-vector-builtins-avail.h b/gcc/config/riscv/riscv-vector-builtins-avail.h new file mode 100644 index 000000000000..b11a8bcbc7fd --- /dev/null +++ b/gcc/config/riscv/riscv-vector-builtins-avail.h @@ -0,0 +1,12 @@ +#ifndef GCC_RISCV_VECTOR_BUILTINS_AVAIL_H +#define GCC_RISCV_VECTOR_BUILTINS_AVAIL_H + +#include "insn-codes.h" +namespace riscv_vector { + +/* Declare an availability predicate for built-in functions. */ +#define AVAIL(NAME, COND) \ + static unsigned int riscv_vector_avail_##NAME(void) { return (COND); } + +} // namespace riscv_vector +#endif diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index 1c37fd5fffe3..bedd5397d3dd 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -31,624 +31,627 @@ along with GCC; see the file COPYING3. If not see - OPS_INFO describes all information of return type and each argument type. + - AVAIL this argument is the optional for AVAIL.Determin the enable + of the intrinsic function. + */ #ifndef DEF_RVV_FUNCTION -#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO) +#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, AVAIL) #endif /* Internal helper functions for gimple fold use. */ -DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops) -DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops) +DEF_RVV_FUNCTION (read_vl, read_vl, none_preds, p_none_void_ops, ) +DEF_RVV_FUNCTION (vlenb, vlenb, none_preds, ul_none_void_ops, ) /* 6. Configuration-Setting Instructions. */ -DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops) -DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops) +DEF_RVV_FUNCTION (vsetvl, vsetvl, none_preds, i_none_size_size_ops, ) +DEF_RVV_FUNCTION (vsetvlmax, vsetvlmax, none_preds, i_none_size_void_ops, ) /* 7. Vector Loads and Stores. */ // 7.4. Vector Unit-Stride Instructions -DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops) -DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops) -DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops) -DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops) +DEF_RVV_FUNCTION (vle, loadstore, full_preds, all_v_scalar_const_ptr_ops, ) +DEF_RVV_FUNCTION (vse, loadstore, none_m_preds, all_v_scalar_ptr_ops, ) +DEF_RVV_FUNCTION (vlm, loadstore, none_preds, b_v_scalar_const_ptr_ops, ) +DEF_RVV_FUNCTION (vsm, loadstore, none_preds, b_v_scalar_ptr_ops, ) // 7.5. Vector Strided Instructions -DEF_RVV_FUNCTION (vlse, loadstore, full_preds, all_v_scalar_const_ptr_ptrdiff_ops) -DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, all_v_scalar_ptr_ptrdiff_ops) +DEF_RVV_FUNCTION (vlse, loadstore, full_preds, all_v_scalar_const_ptr_ptrdiff_ops, ) +DEF_RVV_FUNCTION (vsse, loadstore, none_m_preds, all_v_scalar_ptr_ptrdiff_ops, ) // 7.6. Vector Indexed Instructions -DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vluxei64, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vloxei8, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vloxei16, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vloxei32, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vloxei64, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vsuxei8, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vsuxei16, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vsuxei32, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vsuxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vsoxei8, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vsoxei16, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vsoxei32, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew64_index_ops) +DEF_RVV_FUNCTION (vluxei8, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vluxei16, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vluxei32, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vluxei64, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vloxei8, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vloxei16, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vloxei32, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vloxei64, indexed_loadstore, full_preds, all_v_scalar_const_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vsuxei8, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vsuxei16, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vsuxei32, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vsuxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vsoxei8, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vsoxei16, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vsoxei32, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_eew64_index_ops, ) // 7.7. Unit-stride Fault-Only-First Loads -DEF_RVV_FUNCTION (vleff, fault_load, full_preds, all_v_scalar_const_ptr_size_ptr_ops) +DEF_RVV_FUNCTION (vleff, fault_load, full_preds, all_v_scalar_const_ptr_size_ptr_ops, ) // TODO: 7.8. Vector Load/Store Segment Instructions /* 11. Vector Integer Arithmetic Instructions. */ // 11.1. Vector Single-Width Integer Add and Subtract -DEF_RVV_FUNCTION (vadd, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vadd, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vsub, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vsub, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vrsub, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vneg, alu, full_preds, iu_v_ops) +DEF_RVV_FUNCTION (vadd, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vadd, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vsub, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vsub, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vrsub, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vneg, alu, full_preds, iu_v_ops, ) // 11.2. Vector Widening Integer Add/Subtract -DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wvv_ops) -DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wvx_ops) -DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wvv_ops) -DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wvx_ops) -DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wvv_ops) -DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wvx_ops) -DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wvv_ops) -DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wvx_ops) -DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wwv_ops) -DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wwx_ops) -DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wwv_ops) -DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wwx_ops) -DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wwv_ops) -DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wwx_ops) -DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wwv_ops) -DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wwx_ops) -DEF_RVV_FUNCTION (vwcvt_x, alu, full_preds, i_x_x_v_ops) -DEF_RVV_FUNCTION (vwcvtu_x, alu, full_preds, u_x_x_v_ops) +DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wvv_ops, ) +DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wvx_ops, ) +DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wvv_ops, ) +DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wvx_ops, ) +DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wvv_ops, ) +DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wvx_ops, ) +DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wvv_ops, ) +DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wvx_ops, ) +DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wwv_ops, ) +DEF_RVV_FUNCTION (vwaddu, widen_alu, full_preds, u_wwx_ops, ) +DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wwv_ops, ) +DEF_RVV_FUNCTION (vwsubu, widen_alu, full_preds, u_wwx_ops, ) +DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wwv_ops, ) +DEF_RVV_FUNCTION (vwadd, widen_alu, full_preds, i_wwx_ops, ) +DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wwv_ops, ) +DEF_RVV_FUNCTION (vwsub, widen_alu, full_preds, i_wwx_ops, ) +DEF_RVV_FUNCTION (vwcvt_x, alu, full_preds, i_x_x_v_ops, ) +DEF_RVV_FUNCTION (vwcvtu_x, alu, full_preds, u_x_x_v_ops, ) // 11.3. Vector Integer Extension -DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf2_ops) -DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf4_ops) -DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf8_ops) -DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf2_ops) -DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf4_ops) -DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf8_ops) +DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf2_ops, ) +DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf4_ops, ) +DEF_RVV_FUNCTION (vzext, widen_alu, full_preds, u_vf8_ops, ) +DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf2_ops, ) +DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf4_ops, ) +DEF_RVV_FUNCTION (vsext, widen_alu, full_preds, i_vf8_ops, ) // 11.4. Vector Integer Add-with-Carry/Subtract-with-Borrow Instructions -DEF_RVV_FUNCTION (vadc, no_mask_policy, none_tu_preds, iu_vvvm_ops) -DEF_RVV_FUNCTION (vadc, no_mask_policy, none_tu_preds, iu_vvxm_ops) -DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvvm_ops) -DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvxm_ops) -DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvv_ops) -DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvx_ops) -DEF_RVV_FUNCTION (vsbc, no_mask_policy, none_tu_preds, iu_vvvm_ops) -DEF_RVV_FUNCTION (vsbc, no_mask_policy, none_tu_preds, iu_vvxm_ops) -DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvvm_ops) -DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvxm_ops) -DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvv_ops) -DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvx_ops) +DEF_RVV_FUNCTION (vadc, no_mask_policy, none_tu_preds, iu_vvvm_ops, ) +DEF_RVV_FUNCTION (vadc, no_mask_policy, none_tu_preds, iu_vvxm_ops, ) +DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvvm_ops, ) +DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvxm_ops, ) +DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvv_ops, ) +DEF_RVV_FUNCTION (vmadc, return_mask, none_preds, iu_mvx_ops, ) +DEF_RVV_FUNCTION (vsbc, no_mask_policy, none_tu_preds, iu_vvvm_ops, ) +DEF_RVV_FUNCTION (vsbc, no_mask_policy, none_tu_preds, iu_vvxm_ops, ) +DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvvm_ops, ) +DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvxm_ops, ) +DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvv_ops, ) +DEF_RVV_FUNCTION (vmsbc, return_mask, none_preds, iu_mvx_ops, ) // 11.5. Vector Bitwise Logical Instructions -DEF_RVV_FUNCTION (vand, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vand, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vor, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vor, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vxor, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vxor, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vnot, alu, full_preds, iu_v_ops) +DEF_RVV_FUNCTION (vand, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vand, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vor, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vor, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vxor, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vxor, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vnot, alu, full_preds, iu_v_ops, ) // 11.6. Vector Single-Width Shift Instructions -DEF_RVV_FUNCTION (vsll, alu, full_preds, iu_shift_vvv_ops) -DEF_RVV_FUNCTION (vsll, alu, full_preds, iu_shift_vvx_ops) -DEF_RVV_FUNCTION (vsra, alu, full_preds, i_shift_vvv_ops) -DEF_RVV_FUNCTION (vsra, alu, full_preds, i_shift_vvx_ops) -DEF_RVV_FUNCTION (vsrl, alu, full_preds, u_shift_vvv_ops) -DEF_RVV_FUNCTION (vsrl, alu, full_preds, u_shift_vvx_ops) +DEF_RVV_FUNCTION (vsll, alu, full_preds, iu_shift_vvv_ops, ) +DEF_RVV_FUNCTION (vsll, alu, full_preds, iu_shift_vvx_ops, ) +DEF_RVV_FUNCTION (vsra, alu, full_preds, i_shift_vvv_ops, ) +DEF_RVV_FUNCTION (vsra, alu, full_preds, i_shift_vvx_ops, ) +DEF_RVV_FUNCTION (vsrl, alu, full_preds, u_shift_vvv_ops, ) +DEF_RVV_FUNCTION (vsrl, alu, full_preds, u_shift_vvx_ops, ) // 11.7. Vector Narrowing Integer Right Shift Instructions -DEF_RVV_FUNCTION (vnsrl, narrow_alu, full_preds, u_narrow_shift_vwv_ops) -DEF_RVV_FUNCTION (vnsrl, narrow_alu, full_preds, u_narrow_shift_vwx_ops) -DEF_RVV_FUNCTION (vnsra, narrow_alu, full_preds, i_narrow_shift_vwv_ops) -DEF_RVV_FUNCTION (vnsra, narrow_alu, full_preds, i_narrow_shift_vwx_ops) -DEF_RVV_FUNCTION (vncvt_x, narrow_alu, full_preds, iu_trunc_ops) +DEF_RVV_FUNCTION (vnsrl, narrow_alu, full_preds, u_narrow_shift_vwv_ops, ) +DEF_RVV_FUNCTION (vnsrl, narrow_alu, full_preds, u_narrow_shift_vwx_ops, ) +DEF_RVV_FUNCTION (vnsra, narrow_alu, full_preds, i_narrow_shift_vwv_ops, ) +DEF_RVV_FUNCTION (vnsra, narrow_alu, full_preds, i_narrow_shift_vwx_ops, ) +DEF_RVV_FUNCTION (vncvt_x, narrow_alu, full_preds, iu_trunc_ops, ) // 11.8. Vector Integer Compare Instructions -DEF_RVV_FUNCTION (vmseq, return_mask, none_m_mu_preds, iu_mvv_ops) -DEF_RVV_FUNCTION (vmseq, return_mask, none_m_mu_preds, iu_mvx_ops) -DEF_RVV_FUNCTION (vmsne, return_mask, none_m_mu_preds, iu_mvv_ops) -DEF_RVV_FUNCTION (vmsne, return_mask, none_m_mu_preds, iu_mvx_ops) -DEF_RVV_FUNCTION (vmsltu, return_mask, none_m_mu_preds, u_mvv_ops) -DEF_RVV_FUNCTION (vmsltu, return_mask, none_m_mu_preds, u_mvx_ops) -DEF_RVV_FUNCTION (vmslt, return_mask, none_m_mu_preds, i_mvv_ops) -DEF_RVV_FUNCTION (vmslt, return_mask, none_m_mu_preds, i_mvx_ops) -DEF_RVV_FUNCTION (vmsleu, return_mask, none_m_mu_preds, u_mvv_ops) -DEF_RVV_FUNCTION (vmsleu, return_mask, none_m_mu_preds, u_mvx_ops) -DEF_RVV_FUNCTION (vmsle, return_mask, none_m_mu_preds, i_mvv_ops) -DEF_RVV_FUNCTION (vmsle, return_mask, none_m_mu_preds, i_mvx_ops) -DEF_RVV_FUNCTION (vmsgtu, return_mask, none_m_mu_preds, u_mvv_ops) -DEF_RVV_FUNCTION (vmsgtu, return_mask, none_m_mu_preds, u_mvx_ops) -DEF_RVV_FUNCTION (vmsgt, return_mask, none_m_mu_preds, i_mvv_ops) -DEF_RVV_FUNCTION (vmsgt, return_mask, none_m_mu_preds, i_mvx_ops) -DEF_RVV_FUNCTION (vmsgeu, return_mask, none_m_mu_preds, u_mvv_ops) -DEF_RVV_FUNCTION (vmsgeu, return_mask, none_m_mu_preds, u_mvx_ops) -DEF_RVV_FUNCTION (vmsge, return_mask, none_m_mu_preds, i_mvv_ops) -DEF_RVV_FUNCTION (vmsge, return_mask, none_m_mu_preds, i_mvx_ops) +DEF_RVV_FUNCTION (vmseq, return_mask, none_m_mu_preds, iu_mvv_ops, ) +DEF_RVV_FUNCTION (vmseq, return_mask, none_m_mu_preds, iu_mvx_ops, ) +DEF_RVV_FUNCTION (vmsne, return_mask, none_m_mu_preds, iu_mvv_ops, ) +DEF_RVV_FUNCTION (vmsne, return_mask, none_m_mu_preds, iu_mvx_ops, ) +DEF_RVV_FUNCTION (vmsltu, return_mask, none_m_mu_preds, u_mvv_ops, ) +DEF_RVV_FUNCTION (vmsltu, return_mask, none_m_mu_preds, u_mvx_ops, ) +DEF_RVV_FUNCTION (vmslt, return_mask, none_m_mu_preds, i_mvv_ops, ) +DEF_RVV_FUNCTION (vmslt, return_mask, none_m_mu_preds, i_mvx_ops, ) +DEF_RVV_FUNCTION (vmsleu, return_mask, none_m_mu_preds, u_mvv_ops, ) +DEF_RVV_FUNCTION (vmsleu, return_mask, none_m_mu_preds, u_mvx_ops, ) +DEF_RVV_FUNCTION (vmsle, return_mask, none_m_mu_preds, i_mvv_ops, ) +DEF_RVV_FUNCTION (vmsle, return_mask, none_m_mu_preds, i_mvx_ops, ) +DEF_RVV_FUNCTION (vmsgtu, return_mask, none_m_mu_preds, u_mvv_ops, ) +DEF_RVV_FUNCTION (vmsgtu, return_mask, none_m_mu_preds, u_mvx_ops, ) +DEF_RVV_FUNCTION (vmsgt, return_mask, none_m_mu_preds, i_mvv_ops, ) +DEF_RVV_FUNCTION (vmsgt, return_mask, none_m_mu_preds, i_mvx_ops, ) +DEF_RVV_FUNCTION (vmsgeu, return_mask, none_m_mu_preds, u_mvv_ops, ) +DEF_RVV_FUNCTION (vmsgeu, return_mask, none_m_mu_preds, u_mvx_ops, ) +DEF_RVV_FUNCTION (vmsge, return_mask, none_m_mu_preds, i_mvv_ops, ) +DEF_RVV_FUNCTION (vmsge, return_mask, none_m_mu_preds, i_mvx_ops, ) // 11.9. Vector Integer Min/Max Instructions -DEF_RVV_FUNCTION (vminu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vminu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vmin, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vmin, alu, full_preds, i_vvx_ops) -DEF_RVV_FUNCTION (vmaxu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vmaxu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vmax, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vmax, alu, full_preds, i_vvx_ops) +DEF_RVV_FUNCTION (vminu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vminu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vmin, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vmin, alu, full_preds, i_vvx_ops, ) +DEF_RVV_FUNCTION (vmaxu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vmaxu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vmax, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vmax, alu, full_preds, i_vvx_ops, ) // 11.10. Vector Single-Width Integer Multiply Instructions -DEF_RVV_FUNCTION (vmul, alu, full_preds, iu_vvv_ops) -DEF_RVV_FUNCTION (vmul, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vmulh, alu, full_preds, full_v_i_vvv_ops) -DEF_RVV_FUNCTION (vmulh, alu, full_preds, full_v_i_vvx_ops) -DEF_RVV_FUNCTION (vmulhu, alu, full_preds, full_v_u_vvv_ops) -DEF_RVV_FUNCTION (vmulhu, alu, full_preds, full_v_u_vvx_ops) -DEF_RVV_FUNCTION (vmulhsu, alu, full_preds, full_v_i_su_vvv_ops) -DEF_RVV_FUNCTION (vmulhsu, alu, full_preds, full_v_i_su_vvx_ops) +DEF_RVV_FUNCTION (vmul, alu, full_preds, iu_vvv_ops, ) +DEF_RVV_FUNCTION (vmul, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vmulh, alu, full_preds, full_v_i_vvv_ops, ) +DEF_RVV_FUNCTION (vmulh, alu, full_preds, full_v_i_vvx_ops, ) +DEF_RVV_FUNCTION (vmulhu, alu, full_preds, full_v_u_vvv_ops, ) +DEF_RVV_FUNCTION (vmulhu, alu, full_preds, full_v_u_vvx_ops, ) +DEF_RVV_FUNCTION (vmulhsu, alu, full_preds, full_v_i_su_vvv_ops, ) +DEF_RVV_FUNCTION (vmulhsu, alu, full_preds, full_v_i_su_vvx_ops, ) // 11.11. Vector Integer Divide Instructions -DEF_RVV_FUNCTION (vdivu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vdivu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vdiv, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vdiv, alu, full_preds, i_vvx_ops) -DEF_RVV_FUNCTION (vremu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vremu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vrem, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vrem, alu, full_preds, i_vvx_ops) +DEF_RVV_FUNCTION (vdivu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vdivu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vdiv, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vdiv, alu, full_preds, i_vvx_ops, ) +DEF_RVV_FUNCTION (vremu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vremu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vrem, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vrem, alu, full_preds, i_vvx_ops, ) // 11.12. Vector Widening Integer Multiply Instructions -DEF_RVV_FUNCTION (vwmul, alu, full_preds, i_wvv_ops) -DEF_RVV_FUNCTION (vwmul, alu, full_preds, i_wvx_ops) -DEF_RVV_FUNCTION (vwmulu, alu, full_preds, u_wvv_ops) -DEF_RVV_FUNCTION (vwmulu, alu, full_preds, u_wvx_ops) -DEF_RVV_FUNCTION (vwmulsu, alu, full_preds, i_su_wvv_ops) -DEF_RVV_FUNCTION (vwmulsu, alu, full_preds, i_su_wvx_ops) +DEF_RVV_FUNCTION (vwmul, alu, full_preds, i_wvv_ops, ) +DEF_RVV_FUNCTION (vwmul, alu, full_preds, i_wvx_ops, ) +DEF_RVV_FUNCTION (vwmulu, alu, full_preds, u_wvv_ops, ) +DEF_RVV_FUNCTION (vwmulu, alu, full_preds, u_wvx_ops, ) +DEF_RVV_FUNCTION (vwmulsu, alu, full_preds, i_su_wvv_ops, ) +DEF_RVV_FUNCTION (vwmulsu, alu, full_preds, i_su_wvx_ops, ) // 11.13. Vector Single-Width Integer Multiply-Add Instructions -DEF_RVV_FUNCTION (vmacc, alu, full_preds, iu_vvvv_ops) -DEF_RVV_FUNCTION (vmacc, alu, full_preds, iu_vvxv_ops) -DEF_RVV_FUNCTION (vnmsac, alu, full_preds, iu_vvvv_ops) -DEF_RVV_FUNCTION (vnmsac, alu, full_preds, iu_vvxv_ops) -DEF_RVV_FUNCTION (vmadd, alu, full_preds, iu_vvvv_ops) -DEF_RVV_FUNCTION (vmadd, alu, full_preds, iu_vvxv_ops) -DEF_RVV_FUNCTION (vnmsub, alu, full_preds, iu_vvvv_ops) -DEF_RVV_FUNCTION (vnmsub, alu, full_preds, iu_vvxv_ops) +DEF_RVV_FUNCTION (vmacc, alu, full_preds, iu_vvvv_ops, ) +DEF_RVV_FUNCTION (vmacc, alu, full_preds, iu_vvxv_ops, ) +DEF_RVV_FUNCTION (vnmsac, alu, full_preds, iu_vvvv_ops, ) +DEF_RVV_FUNCTION (vnmsac, alu, full_preds, iu_vvxv_ops, ) +DEF_RVV_FUNCTION (vmadd, alu, full_preds, iu_vvvv_ops, ) +DEF_RVV_FUNCTION (vmadd, alu, full_preds, iu_vvxv_ops, ) +DEF_RVV_FUNCTION (vnmsub, alu, full_preds, iu_vvvv_ops, ) +DEF_RVV_FUNCTION (vnmsub, alu, full_preds, iu_vvxv_ops, ) // 11.14. Vector Widening Integer Multiply-Add Instructions -DEF_RVV_FUNCTION (vwmaccu, alu, full_preds, u_wwvv_ops) -DEF_RVV_FUNCTION (vwmaccu, alu, full_preds, u_wwxv_ops) -DEF_RVV_FUNCTION (vwmacc, alu, full_preds, i_wwvv_ops) -DEF_RVV_FUNCTION (vwmacc, alu, full_preds, i_wwxv_ops) -DEF_RVV_FUNCTION (vwmaccsu, alu, full_preds, i_su_wwvv_ops) -DEF_RVV_FUNCTION (vwmaccsu, alu, full_preds, i_su_wwxv_ops) -DEF_RVV_FUNCTION (vwmaccus, alu, full_preds, i_us_wwxv_ops) +DEF_RVV_FUNCTION (vwmaccu, alu, full_preds, u_wwvv_ops, ) +DEF_RVV_FUNCTION (vwmaccu, alu, full_preds, u_wwxv_ops, ) +DEF_RVV_FUNCTION (vwmacc, alu, full_preds, i_wwvv_ops, ) +DEF_RVV_FUNCTION (vwmacc, alu, full_preds, i_wwxv_ops, ) +DEF_RVV_FUNCTION (vwmaccsu, alu, full_preds, i_su_wwvv_ops, ) +DEF_RVV_FUNCTION (vwmaccsu, alu, full_preds, i_su_wwxv_ops, ) +DEF_RVV_FUNCTION (vwmaccus, alu, full_preds, i_us_wwxv_ops, ) // 11.15. Vector Integer Merge Instructions -DEF_RVV_FUNCTION (vmerge, no_mask_policy, none_tu_preds, all_vvvm_ops) -DEF_RVV_FUNCTION (vmerge, no_mask_policy, none_tu_preds, iu_vvxm_ops) +DEF_RVV_FUNCTION (vmerge, no_mask_policy, none_tu_preds, all_vvvm_ops, ) +DEF_RVV_FUNCTION (vmerge, no_mask_policy, none_tu_preds, iu_vvxm_ops, ) // 11.16 Vector Integer Move Instructions -DEF_RVV_FUNCTION (vmv_v, move, none_tu_preds, all_v_ops) -DEF_RVV_FUNCTION (vmv_v, move, none_tu_preds, iu_x_ops) +DEF_RVV_FUNCTION (vmv_v, move, none_tu_preds, all_v_ops, ) +DEF_RVV_FUNCTION (vmv_v, move, none_tu_preds, iu_x_ops, ) /* 12. Vector Fixed-Point Arithmetic Instructions. */ // 12.1. Vector Single-Width Saturating Add and Subtract -DEF_RVV_FUNCTION (vsaddu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vsaddu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vsadd, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vsadd, alu, full_preds, i_vvx_ops) -DEF_RVV_FUNCTION (vssubu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vssubu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vssub, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vssub, alu, full_preds, i_vvx_ops) +DEF_RVV_FUNCTION (vsaddu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vsaddu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vsadd, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vsadd, alu, full_preds, i_vvx_ops, ) +DEF_RVV_FUNCTION (vssubu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vssubu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vssub, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vssub, alu, full_preds, i_vvx_ops, ) // 12.2. Vector Single-Width Averaging Add and Subtract -DEF_RVV_FUNCTION (vaaddu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vaaddu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vaadd, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vaadd, alu, full_preds, i_vvx_ops) -DEF_RVV_FUNCTION (vasubu, alu, full_preds, u_vvv_ops) -DEF_RVV_FUNCTION (vasubu, alu, full_preds, u_vvx_ops) -DEF_RVV_FUNCTION (vasub, alu, full_preds, i_vvv_ops) -DEF_RVV_FUNCTION (vasub, alu, full_preds, i_vvx_ops) +DEF_RVV_FUNCTION (vaaddu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vaaddu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vaadd, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vaadd, alu, full_preds, i_vvx_ops, ) +DEF_RVV_FUNCTION (vasubu, alu, full_preds, u_vvv_ops, ) +DEF_RVV_FUNCTION (vasubu, alu, full_preds, u_vvx_ops, ) +DEF_RVV_FUNCTION (vasub, alu, full_preds, i_vvv_ops, ) +DEF_RVV_FUNCTION (vasub, alu, full_preds, i_vvx_ops, ) // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation -DEF_RVV_FUNCTION (vsmul, alu, full_preds, full_v_i_vvv_ops) -DEF_RVV_FUNCTION (vsmul, alu, full_preds, full_v_i_vvx_ops) +DEF_RVV_FUNCTION (vsmul, alu, full_preds, full_v_i_vvv_ops, ) +DEF_RVV_FUNCTION (vsmul, alu, full_preds, full_v_i_vvx_ops, ) // 12.4. Vector Single-Width Scaling Shift Instructions -DEF_RVV_FUNCTION (vssrl, alu, full_preds, u_shift_vvv_ops) -DEF_RVV_FUNCTION (vssrl, alu, full_preds, u_shift_vvx_ops) -DEF_RVV_FUNCTION (vssra, alu, full_preds, i_shift_vvv_ops) -DEF_RVV_FUNCTION (vssra, alu, full_preds, i_shift_vvx_ops) +DEF_RVV_FUNCTION (vssrl, alu, full_preds, u_shift_vvv_ops, ) +DEF_RVV_FUNCTION (vssrl, alu, full_preds, u_shift_vvx_ops, ) +DEF_RVV_FUNCTION (vssra, alu, full_preds, i_shift_vvv_ops, ) +DEF_RVV_FUNCTION (vssra, alu, full_preds, i_shift_vvx_ops, ) // 12.5. Vector Narrowing Fixed-Point Clip Instructions -DEF_RVV_FUNCTION (vnclipu, narrow_alu, full_preds, u_narrow_shift_vwv_ops) -DEF_RVV_FUNCTION (vnclipu, narrow_alu, full_preds, u_narrow_shift_vwx_ops) -DEF_RVV_FUNCTION (vnclip, narrow_alu, full_preds, i_narrow_shift_vwv_ops) -DEF_RVV_FUNCTION (vnclip, narrow_alu, full_preds, i_narrow_shift_vwx_ops) +DEF_RVV_FUNCTION (vnclipu, narrow_alu, full_preds, u_narrow_shift_vwv_ops, ) +DEF_RVV_FUNCTION (vnclipu, narrow_alu, full_preds, u_narrow_shift_vwx_ops, ) +DEF_RVV_FUNCTION (vnclip, narrow_alu, full_preds, i_narrow_shift_vwv_ops, ) +DEF_RVV_FUNCTION (vnclip, narrow_alu, full_preds, i_narrow_shift_vwx_ops, ) /* 13. Vector Floating-Point Instructions. */ // 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions -DEF_RVV_FUNCTION (vfadd, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfadd, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfsub, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfsub, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfrsub, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfadd_frm, alu_frm, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfadd_frm, alu_frm, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfsub_frm, alu_frm, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfsub_frm, alu_frm, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfrsub_frm, alu_frm, full_preds, f_vvf_ops) +DEF_RVV_FUNCTION (vfadd, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfadd, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfsub, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfsub, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfrsub, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfadd_frm, alu_frm, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfadd_frm, alu_frm, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfsub_frm, alu_frm, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfsub_frm, alu_frm, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfrsub_frm, alu_frm, full_preds, f_vvf_ops, ) // 13.3. Vector Widening Floating-Point Add/Subtract Instructions -DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wvf_ops) -DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wvf_ops) -DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wwv_ops) -DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wwf_ops) -DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wwv_ops) -DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wwf_ops) -DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wvf_ops) -DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wvf_ops) -DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wwv_ops) -DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wwf_ops) -DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wwv_ops) -DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wwf_ops) +DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wvf_ops, ) +DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wvf_ops, ) +DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wwv_ops, ) +DEF_RVV_FUNCTION (vfwadd, widen_alu, full_preds, f_wwf_ops, ) +DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wwv_ops, ) +DEF_RVV_FUNCTION (vfwsub, widen_alu, full_preds, f_wwf_ops, ) +DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wvf_ops, ) +DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wvf_ops, ) +DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wwv_ops, ) +DEF_RVV_FUNCTION (vfwadd_frm, widen_alu_frm, full_preds, f_wwf_ops, ) +DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wwv_ops, ) +DEF_RVV_FUNCTION (vfwsub_frm, widen_alu_frm, full_preds, f_wwf_ops, ) // 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions -DEF_RVV_FUNCTION (vfmul, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfmul, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfdiv, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfdiv, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfrdiv, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfmul_frm, alu_frm, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfmul_frm, alu_frm, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfdiv_frm, alu_frm, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfdiv_frm, alu_frm, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfrdiv_frm, alu_frm, full_preds, f_vvf_ops) +DEF_RVV_FUNCTION (vfmul, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfmul, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfdiv, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfdiv, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfrdiv, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfmul_frm, alu_frm, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfmul_frm, alu_frm, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfdiv_frm, alu_frm, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfdiv_frm, alu_frm, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfrdiv_frm, alu_frm, full_preds, f_vvf_ops, ) // 13.5. Vector Widening Floating-Point Multiply -DEF_RVV_FUNCTION (vfwmul, alu, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwmul, alu, full_preds, f_wvf_ops) -DEF_RVV_FUNCTION (vfwmul_frm, alu_frm, full_preds, f_wvv_ops) -DEF_RVV_FUNCTION (vfwmul_frm, alu_frm, full_preds, f_wvf_ops) +DEF_RVV_FUNCTION (vfwmul, alu, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwmul, alu, full_preds, f_wvf_ops, ) +DEF_RVV_FUNCTION (vfwmul_frm, alu_frm, full_preds, f_wvv_ops, ) +DEF_RVV_FUNCTION (vfwmul_frm, alu_frm, full_preds, f_wvf_ops, ) // 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions -DEF_RVV_FUNCTION (vfmacc, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmacc, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmsac, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmsac, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmadd, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmadd, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmsub, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmsub, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmacc, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmacc, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmsac, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmsac, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmadd, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmadd, alu, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmsub, alu, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmsub, alu, full_preds, f_vvfv_ops) +DEF_RVV_FUNCTION (vfmacc, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmacc, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmsac, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmsac, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmadd, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmadd, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmsub, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmsub, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmacc, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmacc, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmsac, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmsac, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmadd, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmadd, alu, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmsub, alu, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmsub, alu, full_preds, f_vvfv_ops, ) -DEF_RVV_FUNCTION (vfmacc_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmacc_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmacc_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmacc_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmsac_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmsac_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmsac_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmsac_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmadd_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmadd_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvfv_ops) -DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvvv_ops) -DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvfv_ops) +DEF_RVV_FUNCTION (vfmacc_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmacc_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmacc_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmacc_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmsac_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmsac_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmsac_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmsac_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmadd_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmadd_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvfv_ops, ) +DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvvv_ops, ) +DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvfv_ops, ) // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions -DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwnmacc, alu, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwnmacc, alu, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops) +DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwnmacc, alu, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwnmacc, alu, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops, ) -DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops) -DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwvv_ops) -DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwfv_ops) +DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops, ) +DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwvv_ops, ) +DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwfv_ops, ) // 13.8. Vector Floating-Point Square-Root Instruction -DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops, ) -DEF_RVV_FUNCTION (vfsqrt_frm, alu_frm, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfsqrt_frm, alu_frm, full_preds, f_v_ops, ) // 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction -DEF_RVV_FUNCTION (vfrsqrt7, alu, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfrsqrt7, alu, full_preds, f_v_ops, ) // 13.10. Vector Floating-Point Reciprocal Estimate Instruction -DEF_RVV_FUNCTION (vfrec7, alu, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfrec7, alu, full_preds, f_v_ops, ) -DEF_RVV_FUNCTION (vfrec7_frm, alu_frm, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfrec7_frm, alu_frm, full_preds, f_v_ops, ) // 13.11. Vector Floating-Point MIN/MAX Instructions -DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfmax, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfmax, alu, full_preds, f_vvf_ops) +DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfmin, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfmax, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfmax, alu, full_preds, f_vvf_ops, ) // 13.12. Vector Floating-Point Sign-Injection Instructions -DEF_RVV_FUNCTION (vfsgnj, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfsgnj, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfsgnjn, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfsgnjn, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfsgnjx, alu, full_preds, f_vvv_ops) -DEF_RVV_FUNCTION (vfsgnjx, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfneg, alu, full_preds, f_v_ops) -DEF_RVV_FUNCTION (vfabs, alu, full_preds, f_v_ops) +DEF_RVV_FUNCTION (vfsgnj, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfsgnj, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfsgnjn, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfsgnjn, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfsgnjx, alu, full_preds, f_vvv_ops, ) +DEF_RVV_FUNCTION (vfsgnjx, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfneg, alu, full_preds, f_v_ops, ) +DEF_RVV_FUNCTION (vfabs, alu, full_preds, f_v_ops, ) // 13.13. Vector Floating-Point Compare Instructions -DEF_RVV_FUNCTION (vmfeq, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmfeq, return_mask, none_m_mu_preds, f_mvf_ops) -DEF_RVV_FUNCTION (vmfne, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmfne, return_mask, none_m_mu_preds, f_mvf_ops) -DEF_RVV_FUNCTION (vmflt, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmflt, return_mask, none_m_mu_preds, f_mvf_ops) -DEF_RVV_FUNCTION (vmfle, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmfle, return_mask, none_m_mu_preds, f_mvf_ops) -DEF_RVV_FUNCTION (vmfgt, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmfgt, return_mask, none_m_mu_preds, f_mvf_ops) -DEF_RVV_FUNCTION (vmfge, return_mask, none_m_mu_preds, f_mvv_ops) -DEF_RVV_FUNCTION (vmfge, return_mask, none_m_mu_preds, f_mvf_ops) +DEF_RVV_FUNCTION (vmfeq, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmfeq, return_mask, none_m_mu_preds, f_mvf_ops, ) +DEF_RVV_FUNCTION (vmfne, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmfne, return_mask, none_m_mu_preds, f_mvf_ops, ) +DEF_RVV_FUNCTION (vmflt, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmflt, return_mask, none_m_mu_preds, f_mvf_ops, ) +DEF_RVV_FUNCTION (vmfle, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmfle, return_mask, none_m_mu_preds, f_mvf_ops, ) +DEF_RVV_FUNCTION (vmfgt, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmfgt, return_mask, none_m_mu_preds, f_mvf_ops, ) +DEF_RVV_FUNCTION (vmfge, return_mask, none_m_mu_preds, f_mvv_ops, ) +DEF_RVV_FUNCTION (vmfge, return_mask, none_m_mu_preds, f_mvf_ops, ) // 13.14. Vector Floating-Point Classify Instruction -DEF_RVV_FUNCTION (vfclass, alu, full_preds, f_to_u_v_ops) +DEF_RVV_FUNCTION (vfclass, alu, full_preds, f_to_u_v_ops, ) // 13.15. Vector Floating-Point Merge Instruction -DEF_RVV_FUNCTION (vfmerge, no_mask_policy, none_tu_preds, f_vvfm_ops) +DEF_RVV_FUNCTION (vfmerge, no_mask_policy, none_tu_preds, f_vvfm_ops, ) // 13.16. Vector Floating-Point Move Instruction -DEF_RVV_FUNCTION (vfmv_v, move, none_tu_preds, f_f_ops) +DEF_RVV_FUNCTION (vfmv_v, move, none_tu_preds, f_f_ops, ) // 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions -DEF_RVV_FUNCTION (vfcvt_x, alu, full_preds, f_to_i_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_xu, alu, full_preds, f_to_u_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_rtz_x, alu, full_preds, f_to_i_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_rtz_xu, alu, full_preds, f_to_u_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, i_to_f_x_v_ops) -DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, u_to_f_xu_v_ops) +DEF_RVV_FUNCTION (vfcvt_x, alu, full_preds, f_to_i_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_xu, alu, full_preds, f_to_u_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_rtz_x, alu, full_preds, f_to_i_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_rtz_xu, alu, full_preds, f_to_u_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, i_to_f_x_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_f, alu, full_preds, u_to_f_xu_v_ops, ) -DEF_RVV_FUNCTION (vfcvt_x_frm, alu_frm, full_preds, f_to_i_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_xu_frm, alu_frm, full_preds, f_to_u_f_v_ops) -DEF_RVV_FUNCTION (vfcvt_f_frm, alu_frm, full_preds, i_to_f_x_v_ops) -DEF_RVV_FUNCTION (vfcvt_f_frm, alu_frm, full_preds, u_to_f_xu_v_ops) +DEF_RVV_FUNCTION (vfcvt_x_frm, alu_frm, full_preds, f_to_i_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_xu_frm, alu_frm, full_preds, f_to_u_f_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_f_frm, alu_frm, full_preds, i_to_f_x_v_ops, ) +DEF_RVV_FUNCTION (vfcvt_f_frm, alu_frm, full_preds, u_to_f_xu_v_ops, ) // 13.18. Widening Floating-Point/Integer Type-Convert Instructions -DEF_RVV_FUNCTION (vfwcvt_x, alu, full_preds, f_to_wi_f_v_ops) -DEF_RVV_FUNCTION (vfwcvt_xu, alu, full_preds, f_to_wu_f_v_ops) -DEF_RVV_FUNCTION (vfwcvt_rtz_x, alu, full_preds, f_to_wi_f_v_ops) -DEF_RVV_FUNCTION (vfwcvt_rtz_xu, alu, full_preds, f_to_wu_f_v_ops) -DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, i_to_wf_x_v_ops) -DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, u_to_wf_xu_v_ops) -DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops) +DEF_RVV_FUNCTION (vfwcvt_x, alu, full_preds, f_to_wi_f_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_xu, alu, full_preds, f_to_wu_f_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_rtz_x, alu, full_preds, f_to_wi_f_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_rtz_xu, alu, full_preds, f_to_wu_f_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, i_to_wf_x_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, u_to_wf_xu_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_f, alu, full_preds, f_to_wf_f_v_ops, ) -DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops) -DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops) +DEF_RVV_FUNCTION (vfwcvt_x_frm, alu_frm, full_preds, f_to_wi_f_v_ops, ) +DEF_RVV_FUNCTION (vfwcvt_xu_frm, alu_frm, full_preds, f_to_wu_f_v_ops, ) // 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions -DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_xu, narrow_alu, full_preds, f_to_nu_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_rtz_x, narrow_alu, full_preds, f_to_ni_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_rtz_xu, narrow_alu, full_preds, f_to_nu_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, i_to_nf_x_w_ops) -DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, u_to_nf_xu_w_ops) -DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, f_to_nf_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops) +DEF_RVV_FUNCTION (vfncvt_x, narrow_alu, full_preds, f_to_ni_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_xu, narrow_alu, full_preds, f_to_nu_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_rtz_x, narrow_alu, full_preds, f_to_ni_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_rtz_xu, narrow_alu, full_preds, f_to_nu_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, i_to_nf_x_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, u_to_nf_xu_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f, narrow_alu, full_preds, f_to_nf_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_rod_f, narrow_alu, full_preds, f_to_nf_f_w_ops, ) -DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops) -DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, i_to_nf_x_w_ops) -DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, u_to_nf_xu_w_ops) -DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, f_to_nf_f_w_ops) +DEF_RVV_FUNCTION (vfncvt_x_frm, narrow_alu_frm, full_preds, f_to_ni_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_xu_frm, narrow_alu_frm, full_preds, f_to_nu_f_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, i_to_nf_x_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, u_to_nf_xu_w_ops, ) +DEF_RVV_FUNCTION (vfncvt_f_frm, narrow_alu_frm, full_preds, f_to_nf_f_w_ops, ) /* 14. Vector Reduction Operations. */ // 14.1. Vector Single-Width Integer Reduction Instructions -DEF_RVV_FUNCTION (vredsum, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredmaxu, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredmax, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredminu, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredmin, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredand, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredor, reduc_alu, no_mu_preds, iu_vs_ops) -DEF_RVV_FUNCTION (vredxor, reduc_alu, no_mu_preds, iu_vs_ops) +DEF_RVV_FUNCTION (vredsum, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredmaxu, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredmax, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredminu, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredmin, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredand, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredor, reduc_alu, no_mu_preds, iu_vs_ops, ) +DEF_RVV_FUNCTION (vredxor, reduc_alu, no_mu_preds, iu_vs_ops, ) // 14.2. Vector Widening Integer Reduction Instructions -DEF_RVV_FUNCTION (vwredsum, reduc_alu, no_mu_preds, wi_vs_ops) -DEF_RVV_FUNCTION (vwredsumu, reduc_alu, no_mu_preds, wu_vs_ops) +DEF_RVV_FUNCTION (vwredsum, reduc_alu, no_mu_preds, wi_vs_ops, ) +DEF_RVV_FUNCTION (vwredsumu, reduc_alu, no_mu_preds, wu_vs_ops, ) // 14.3. Vector Single-Width Floating-Point Reduction Instructions -DEF_RVV_FUNCTION (vfredusum, reduc_alu, no_mu_preds, f_vs_ops) -DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds, f_vs_ops) -DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops) -DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops) +DEF_RVV_FUNCTION (vfredusum, reduc_alu, no_mu_preds, f_vs_ops, ) +DEF_RVV_FUNCTION (vfredosum, reduc_alu, no_mu_preds, f_vs_ops, ) +DEF_RVV_FUNCTION (vfredmax, reduc_alu, no_mu_preds, f_vs_ops, ) +DEF_RVV_FUNCTION (vfredmin, reduc_alu, no_mu_preds, f_vs_ops, ) -DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops) -DEF_RVV_FUNCTION (vfredosum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops) +DEF_RVV_FUNCTION (vfredusum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops, ) +DEF_RVV_FUNCTION (vfredosum_frm, reduc_alu_frm, no_mu_preds, f_vs_ops, ) // 14.4. Vector Widening Floating-Point Reduction Instructions -DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops) -DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops) +DEF_RVV_FUNCTION (vfwredosum, reduc_alu, no_mu_preds, wf_vs_ops, ) +DEF_RVV_FUNCTION (vfwredusum, reduc_alu, no_mu_preds, wf_vs_ops, ) -DEF_RVV_FUNCTION (vfwredosum_frm, reduc_alu_frm, no_mu_preds, wf_vs_ops) -DEF_RVV_FUNCTION (vfwredusum_frm, reduc_alu_frm, no_mu_preds, wf_vs_ops) +DEF_RVV_FUNCTION (vfwredosum_frm, reduc_alu_frm, no_mu_preds, wf_vs_ops, ) +DEF_RVV_FUNCTION (vfwredusum_frm, reduc_alu_frm, no_mu_preds, wf_vs_ops, ) /* 15. Vector Mask Instructions. */ // 15.1. Vector Mask-Register Logical Instructions -DEF_RVV_FUNCTION (vmand, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmnand, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmandn, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmxor, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmor, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmnor, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmorn, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmxnor, mask_alu, none_preds, b_mmm_ops) -DEF_RVV_FUNCTION (vmmv, mask_alu, none_preds, b_mm_ops) -DEF_RVV_FUNCTION (vmclr, mask_alu, none_preds, b_m_ops) -DEF_RVV_FUNCTION (vmset, mask_alu, none_preds, b_m_ops) -DEF_RVV_FUNCTION (vmnot, mask_alu, none_preds, b_mm_ops) +DEF_RVV_FUNCTION (vmand, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmnand, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmandn, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmxor, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmor, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmnor, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmorn, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmxnor, mask_alu, none_preds, b_mmm_ops, ) +DEF_RVV_FUNCTION (vmmv, mask_alu, none_preds, b_mm_ops, ) +DEF_RVV_FUNCTION (vmclr, mask_alu, none_preds, b_m_ops, ) +DEF_RVV_FUNCTION (vmset, mask_alu, none_preds, b_m_ops, ) +DEF_RVV_FUNCTION (vmnot, mask_alu, none_preds, b_mm_ops, ) // 15.2. Vector count population in mask vcpop.m -DEF_RVV_FUNCTION (vcpop, mask_alu, none_m_preds, b_ulong_m_ops) +DEF_RVV_FUNCTION (vcpop, mask_alu, none_m_preds, b_ulong_m_ops, ) // 15.3. vfirst find-first-set mask bit -DEF_RVV_FUNCTION (vfirst, mask_alu, none_m_preds, b_long_m_ops) +DEF_RVV_FUNCTION (vfirst, mask_alu, none_m_preds, b_long_m_ops, ) // 15.4. vmsbf.m set-before-first mask bit -DEF_RVV_FUNCTION (vmsbf, mask_alu, none_m_mu_preds, b_mm_ops) +DEF_RVV_FUNCTION (vmsbf, mask_alu, none_m_mu_preds, b_mm_ops, ) // 15.5. vmsif.m set-including-first mask bit -DEF_RVV_FUNCTION (vmsif, mask_alu, none_m_mu_preds, b_mm_ops) +DEF_RVV_FUNCTION (vmsif, mask_alu, none_m_mu_preds, b_mm_ops, ) // 15.6. vmsof.m set-only-first mask bit -DEF_RVV_FUNCTION (vmsof, mask_alu, none_m_mu_preds, b_mm_ops) +DEF_RVV_FUNCTION (vmsof, mask_alu, none_m_mu_preds, b_mm_ops, ) // 15.8. Vector Iota Instruction -DEF_RVV_FUNCTION (viota, mask_alu, full_preds, u_vm_ops) +DEF_RVV_FUNCTION (viota, mask_alu, full_preds, u_vm_ops, ) // 15.9. Vector Element Index Instruction -DEF_RVV_FUNCTION (vid, alu, full_preds, u_v_ops) +DEF_RVV_FUNCTION (vid, alu, full_preds, u_v_ops, ) /* 16. Vector Permutation Instructions. */ // 16.1. Integer Scalar Move Instructions -DEF_RVV_FUNCTION (vmv_x, scalar_move, none_preds, iu_x_s_ops) -DEF_RVV_FUNCTION (vmv_s, move, none_tu_preds, iu_s_x_ops) +DEF_RVV_FUNCTION (vmv_x, scalar_move, none_preds, iu_x_s_ops, ) +DEF_RVV_FUNCTION (vmv_s, move, none_tu_preds, iu_s_x_ops, ) // 16.2. Floating-Point Scalar Move Instructions -DEF_RVV_FUNCTION (vfmv_f, scalar_move, none_preds, f_f_s_ops) -DEF_RVV_FUNCTION (vfmv_s, move, none_tu_preds, f_s_f_ops) +DEF_RVV_FUNCTION (vfmv_f, scalar_move, none_preds, f_f_s_ops, ) +DEF_RVV_FUNCTION (vfmv_s, move, none_tu_preds, f_s_f_ops, ) // 16.3. Vector Slide Instructions -DEF_RVV_FUNCTION (vslideup, alu, full_preds, all_vvvx_ops) -DEF_RVV_FUNCTION (vslidedown, alu, full_preds, all_vvx_ops) -DEF_RVV_FUNCTION (vslide1up, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vslide1down, alu, full_preds, iu_vvx_ops) -DEF_RVV_FUNCTION (vfslide1up, alu, full_preds, f_vvf_ops) -DEF_RVV_FUNCTION (vfslide1down, alu, full_preds, f_vvf_ops) +DEF_RVV_FUNCTION (vslideup, alu, full_preds, all_vvvx_ops, ) +DEF_RVV_FUNCTION (vslidedown, alu, full_preds, all_vvx_ops, ) +DEF_RVV_FUNCTION (vslide1up, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vslide1down, alu, full_preds, iu_vvx_ops, ) +DEF_RVV_FUNCTION (vfslide1up, alu, full_preds, f_vvf_ops, ) +DEF_RVV_FUNCTION (vfslide1down, alu, full_preds, f_vvf_ops, ) // 16.4. Vector Register Gather Instructions -DEF_RVV_FUNCTION (vrgather, alu, full_preds, all_gather_vvv_ops) -DEF_RVV_FUNCTION (vrgather, alu, full_preds, all_gather_vvx_ops) -DEF_RVV_FUNCTION (vrgatherei16, alu, full_preds, all_gatherei16_vvv_ops) +DEF_RVV_FUNCTION (vrgather, alu, full_preds, all_gather_vvv_ops, ) +DEF_RVV_FUNCTION (vrgather, alu, full_preds, all_gather_vvx_ops, ) +DEF_RVV_FUNCTION (vrgatherei16, alu, full_preds, all_gatherei16_vvv_ops, ) // 16.5. Vector Compress Instruction -DEF_RVV_FUNCTION (vcompress, alu, none_tu_preds, all_vvm_ops) +DEF_RVV_FUNCTION (vcompress, alu, none_tu_preds, all_vvm_ops, ) /* Miscellaneous Vector Functions. */ -DEF_RVV_FUNCTION (vundefined, vundefined, none_preds, all_none_void_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, i_v_u_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, u_v_i_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, f_v_i_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, f_v_u_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, i_v_f_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, u_v_f_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew8_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew16_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew32_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew64_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool2_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool4_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool8_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool16_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool32_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool64_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew8_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew16_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew32_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew64_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew8_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew16_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew32_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew64_lmul1_interpret_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x2_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x4_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x8_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x16_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x32_ops) -DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x64_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x2_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x4_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x8_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x16_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x32_ops) -DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x64_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x2_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x4_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x8_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul2_x2_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul2_x4_ops) -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul4_x2_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x2_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x4_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x8_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul2_x2_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul2_x4_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul4_x2_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x2_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x4_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x8_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul2_x2_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul2_x4_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul4_x2_ops) +DEF_RVV_FUNCTION (vundefined, vundefined, none_preds, all_none_void_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, i_v_u_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, u_v_i_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, f_v_i_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, f_v_u_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, i_v_f_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, u_v_f_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew8_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew16_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew32_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_eew64_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool2_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool4_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool8_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool16_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool32_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, iu_v_bool64_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew8_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew16_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew32_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_signed_eew64_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew8_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew16_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew32_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vreinterpret, misc, none_preds, b_v_unsigned_eew64_lmul1_interpret_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x2_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x4_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x8_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x16_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x32_ops, ) +DEF_RVV_FUNCTION (vlmul_ext, misc, none_preds, all_v_vlmul_ext_x64_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x2_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x4_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x8_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x16_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x32_ops, ) +DEF_RVV_FUNCTION (vlmul_trunc, misc, none_preds, all_v_vlmul_trunc_x64_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x2_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x4_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul1_x8_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul2_x2_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul2_x4_ops, ) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_lmul4_x2_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x2_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x4_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul1_x8_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul2_x2_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul2_x4_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_lmul4_x2_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x2_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x4_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul1_x8_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul2_x2_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul2_x4_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_lmul4_x2_ops, ) // Tuple types -DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_tuple_ops) -DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_tuple_ops) -DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_tuple_ops) -DEF_RVV_FUNCTION (vundefined, vundefined, none_preds, all_none_void_tuple_ops) -DEF_RVV_FUNCTION (vlseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ops) -DEF_RVV_FUNCTION (vsseg, seg_loadstore, none_m_preds, tuple_v_scalar_ptr_ops) -DEF_RVV_FUNCTION (vlsseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ptrdiff_ops) -DEF_RVV_FUNCTION (vssseg, seg_loadstore, none_m_preds, tuple_v_scalar_ptr_ptrdiff_ops) -DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew8_index_ops) -DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew16_index_ops) -DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew32_index_ops) -DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew64_index_ops) -DEF_RVV_FUNCTION (vlsegff, seg_fault_load, full_preds, tuple_v_scalar_const_ptr_size_ptr_ops) +DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_tuple_ops, ) +DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_tuple_ops, ) +DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_tuple_ops, ) +DEF_RVV_FUNCTION (vundefined, vundefined, none_preds, all_none_void_tuple_ops, ) +DEF_RVV_FUNCTION (vlseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ops, ) +DEF_RVV_FUNCTION (vsseg, seg_loadstore, none_m_preds, tuple_v_scalar_ptr_ops, ) +DEF_RVV_FUNCTION (vlsseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ptrdiff_ops, ) +DEF_RVV_FUNCTION (vssseg, seg_loadstore, none_m_preds, tuple_v_scalar_ptr_ptrdiff_ops, ) +DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vluxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vloxseg, seg_indexed_loadstore, full_preds, tuple_v_scalar_const_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vsuxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew8_index_ops, ) +DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew16_index_ops, ) +DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew32_index_ops, ) +DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_ptr_eew64_index_ops, ) +DEF_RVV_FUNCTION (vlsegff, seg_fault_load, full_preds, tuple_v_scalar_const_ptr_size_ptr_ops, ) #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 6330a3a41c34..5a164943df9b 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -51,6 +51,7 @@ #include "riscv-vector-builtins.h" #include "riscv-vector-builtins-shapes.h" #include "riscv-vector-builtins-bases.h" +#include "riscv-vector-builtins-avail.h" using namespace riscv_vector; @@ -2684,8 +2685,9 @@ static CONSTEXPR const function_type_info function_types[] = { /* A list of all RVV intrinsic functions. */ static function_group_info function_groups[] = { -#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO) \ - {#NAME, &bases::NAME, &shapes::SHAPE, PREDS, OPS_INFO}, +#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO, ...) \ + {#NAME, &bases::NAME, &shapes::SHAPE, PREDS, OPS_INFO,\ + __VA_ARGS__}, #include "riscv-vector-builtins-functions.def" }; diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index cd8ccab17248..6c3e61074a28 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -212,6 +212,14 @@ class function_shape; /* Static information about a set of functions. */ struct function_group_info { + constexpr function_group_info (const char *base_name,\ + const function_base *const *base,\ + const function_shape *const *shape,\ + const predication_type_index *preds,\ + const rvv_op_info ops_infos,\ + unsigned int (*avail)() = nullptr) + : base_name (base_name), base (base), shape (shape), preds (preds),\ + ops_infos (ops_infos), avail (avail){}; /* The base name, as a string. */ const char *base_name; @@ -232,6 +240,8 @@ struct function_group_info on the index value. */ const predication_type_index *preds; const rvv_op_info ops_infos; + /* Whether the function is available. */ + unsigned int (*avail) (void); }; class GTY ((user)) function_instance diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 372bb77c7264..1aac8b58bb52 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -1,4 +1,5 @@ RISCV_BUILTINS_H = $(srcdir)/config/riscv/riscv-vector-builtins.h \ + $(srcdir)/config/riscv/riscv-vector-builtins-avail.h \ $(srcdir)/config/riscv/riscv-vector-builtins.def \ $(srcdir)/config/riscv/riscv-vector-builtins-functions.def \ riscv-vector-type-indexer.gen.def From 3e93a3f09dc9af7155cc4760bc65d5be4571addf Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Tue, 12 Dec 2023 02:16:59 +0000 Subject: [PATCH 216/311] MAINTAINERS: Update my email address ChangeLog: * MAINTAINERS: Update my email address --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f3683ff03ecc..bc47e30325b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -705,7 +705,7 @@ Marcel Vollweiler Ville Voutilainen Nenad Vukicevic Feng Wang -Feng Wang s +Feng Wang Hongyu Wang Jiong Wang Stephen M. Webb From 2900a77fe4e7d2211a785d427794544fe3d01960 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 11 Dec 2023 21:29:39 -0500 Subject: [PATCH 217/311] analyzer: add more test coverage for tainted modulus Add more test coverage for r14-6349-g0bef72539e585d. gcc/testsuite/ChangeLog: * gcc.dg/plugin/plugin.exp: Add taint-modulus.c to analyzer_kernel_plugin.c tests. * gcc.dg/plugin/taint-modulus.c: New test. Signed-off-by: David Malcolm --- gcc/testsuite/gcc.dg/plugin/plugin.exp | 1 + gcc/testsuite/gcc.dg/plugin/taint-modulus.c | 75 +++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/plugin/taint-modulus.c diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index d6cccb269df2..eebf96116ef8 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -165,6 +165,7 @@ set plugin_test_list [list \ taint-CVE-2011-0521-5-fixed.c \ taint-CVE-2011-0521-6.c \ taint-antipatterns-1.c \ + taint-modulus.c \ taint-pr112850.c \ taint-pr112850-precise.c \ taint-pr112850-too-complex.c \ diff --git a/gcc/testsuite/gcc.dg/plugin/taint-modulus.c b/gcc/testsuite/gcc.dg/plugin/taint-modulus.c new file mode 100644 index 000000000000..81d968864e6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/taint-modulus.c @@ -0,0 +1,75 @@ +/* { dg-do compile } */ +/* { dg-options "-fanalyzer" } */ +/* { dg-require-effective-target analyzer } */ + +/* Reduced from a -Wanalyzer-tainted-array-index false +ve + seen in the Linux kernel's sound/drivers/opl3/opl3_synth.c. */ + +extern unsigned long +copy_from_user(void* to, const void* from, unsigned long n); + +struct sbi_patch +{ + unsigned char prog; + unsigned char bank; +}; +struct fm_patch +{ + unsigned char prog; + unsigned char bank; + struct fm_patch* next; +}; +struct snd_opl3 +{ + struct fm_patch* patch_table[32]; +}; +int +snd_opl3_load_patch(struct snd_opl3* opl3, + int prog, + int bank); +struct fm_patch* +snd_opl3_find_patch(struct snd_opl3* opl3, + int prog, + int bank, + int create_patch); +long +snd_opl3_write(struct snd_opl3* opl3, + const char* buf, + long count) +{ + long result = 0; + int err = 0; + struct sbi_patch inst; + while (count >= sizeof(inst)) { + if (copy_from_user(&inst, buf, sizeof(inst))) + return -14; + err = snd_opl3_load_patch(opl3, inst.prog, inst.bank); + if (err < 0) + break; + result += sizeof(inst); + count -= sizeof(inst); + } + return result > 0 ? result : err; +} +int +snd_opl3_load_patch(struct snd_opl3* opl3, + int prog, + int bank) +{ + struct fm_patch* patch; + patch = snd_opl3_find_patch(opl3, prog, bank, 1); + if (!patch) + return -12; + return 0; +} +struct fm_patch* +snd_opl3_find_patch(struct snd_opl3* opl3, int prog, int bank, int create_patch) +{ + unsigned int key = (prog + bank) % 32; + struct fm_patch* patch; + for (patch = opl3->patch_table[key]; patch; patch = patch->next) { /* { dg-bogus "use of attacker-controlled value in array lookup" } */ + if (patch->prog == prog && patch->bank == bank) + return patch; + } + return ((void*)0); +} From 07dcb39e08aa52f166e8d74420364757002ad756 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Mon, 11 Dec 2023 19:30:42 +0800 Subject: [PATCH 218/311] i386: Fix missed APX_NDD check for shift/rotate expanders [PR 112943] The ashl/lshr/ashr expanders calls ix86_expand_binary_operator, while they will be called for some post-reload split, and TARGET_APX_NDD is required for these calls to avoid force-load to memory at postreload stage. gcc/ChangeLog: PR target/112943 * config/i386/i386.md (ashl3): Add TARGET_APX_NDD to ix86_expand_binary_operator call. (3): Likewise for rshift. (di3): Likewise for DImode rotate. (3): Likewise for SWI124 rotate. gcc/testsuite/ChangeLog: PR target/112943 * gcc.target/i386/pr112943.c: New test. --- gcc/config/i386/i386.md | 12 +++-- gcc/testsuite/gcc.target/i386/pr112943.c | 63 ++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112943.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b4db50f61cdd..f83064ec3357 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14308,7 +14308,8 @@ (ashift:SDWIM (match_operand:SDWIM 1 "") (match_operand:QI 2 "nonmemory_operand")))] "" - "ix86_expand_binary_operator (ASHIFT, mode, operands); DONE;") + "ix86_expand_binary_operator (ASHIFT, mode, operands, + TARGET_APX_NDD); DONE;") (define_insn_and_split "*ashl3_doubleword_mask" [(set (match_operand: 0 "register_operand") @@ -15564,7 +15565,8 @@ (any_shiftrt:SDWIM (match_operand:SDWIM 1 "") (match_operand:QI 2 "nonmemory_operand")))] "" - "ix86_expand_binary_operator (, mode, operands); DONE;") + "ix86_expand_binary_operator (, mode, operands, + TARGET_APX_NDD); DONE;") ;; Avoid useless masking of count operand. (define_insn_and_split "*3_mask" @@ -16791,7 +16793,8 @@ "" { if (TARGET_64BIT) - ix86_expand_binary_operator (, DImode, operands); + ix86_expand_binary_operator (, DImode, operands, + TARGET_APX_NDD); else if (const_1_to_31_operand (operands[2], VOIDmode)) emit_insn (gen_ix86_di3_doubleword (operands[0], operands[1], operands[2])); @@ -16811,7 +16814,8 @@ (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand") (match_operand:QI 2 "nonmemory_operand")))] "" - "ix86_expand_binary_operator (, mode, operands); DONE;") + "ix86_expand_binary_operator (, mode, operands, + TARGET_APX_NDD); DONE;") ;; Avoid useless masking of count operand. (define_insn_and_split "*3_mask" diff --git a/gcc/testsuite/gcc.target/i386/pr112943.c b/gcc/testsuite/gcc.target/i386/pr112943.c new file mode 100644 index 000000000000..7e299bd4b5e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112943.c @@ -0,0 +1,63 @@ +/* PR target/112943 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=westmere -mapxf" } */ + +typedef unsigned char __attribute__((__vector_size__(1))) v8u8; +typedef char __attribute__((__vector_size__(2))) v16u8; +typedef char __attribute__((__vector_size__(4))) v32u8; +typedef char __attribute__((__vector_size__(8))) v64u8; +typedef char __attribute__((__vector_size__(16))) v128u8; +typedef _Float16 __attribute__((__vector_size__(2))) v16f16; +typedef _Float16 __attribute__((__vector_size__(16))) v128f16; +typedef _Float64x __attribute__((__vector_size__(16))) v128f128; +typedef _Decimal64 d64; +char foo0_u8_0; +v8u8 foo0_v8u8_0; +__attribute__((__vector_size__(sizeof(char)))) char foo0_v8s8_0; +__attribute__((__vector_size__(sizeof(long)))) unsigned long v64u64_0; +_Float16 foo0_f16_0; +v128f16 foo0_v128f16_0; +double foo0_f64_0; +int foo0_f128_0, foo0_v32d32_0, foo0__0; +d64 foo0_d64_0; +v8u8 *foo0_ret; +unsigned __int128 foo0_u128_3; +v8u8 d; +void foo0() { + v64u64_0 -= foo0_u8_0; + v8u8 v8u8_1 = foo0_v8u8_0 % d; + v128f128 v128f128_1 = __builtin_convertvector(v64u64_0, v128f128); + __int128 u128_2 = ((9223372036854775807 + (__int128) 1) << 4) * foo0_u8_0, + u128_r = u128_2 + foo0_u128_3 + foo0_f128_0 + (__int128)foo0_d64_0; + v16f16 v16f16_1 = __builtin_convertvector(foo0_v8s8_0, v16f16); + v128f16 v128f16_1 = 0 > foo0_v128f16_0; + v128u8 v128u8_r = (v128u8)v128f16_1 + (v128u8)v128f128_1; + v64u8 v64u8_r = ((union { + v128u8 a; + v64u8 b; + })v128u8_r) + .b + + (v64u8)v64u64_0; + v32u8 v32u8_r = ((union { + v64u8 a; + v32u8 b; + })v64u8_r) + .b + + (v32u8)foo0_v32d32_0; + v16u8 v16u8_r = ((union { + v32u8 a; + v16u8 b; + })v32u8_r) + .b + + (v16u8)v16f16_1; + v8u8 v8u8_r = ((union { + v16u8 a; + v8u8 b; + })v16u8_r) + .b + + foo0_v8u8_0 + v8u8_1 + foo0_v8s8_0; + long u64_r = u128_r + foo0_f64_0 + (unsigned long)foo0__0; + short u16_r = u64_r + foo0_f16_0; + char u8_r = u16_r + foo0_u8_0; + *foo0_ret = v8u8_r + u8_r; +} From d96533559e26dd0c86f0708fa46eef65c35f7b90 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Tue, 12 Dec 2023 01:12:04 -0300 Subject: [PATCH 219/311] untyped calls: enable target switching [PR112334] The computation of apply_args_size and apply_result_size is saved in a static variable, so that the corresponding _mode arrays are initialized only once. That is not compatible with switchable targets, and ARM's arm_set_current_function, by saving and restoring target globals, exercises this problem with a testcase such as that in the PR, in which more than one function in the translation unit calls __builtin_apply or __builtin_return, respectively. This patch moves the _size statics into the target_builtins array, with a bit of ugliness over _plus_one so that zero initialization of the struct does the right thing. for gcc/ChangeLog PR target/112334 * builtins.h (target_builtins): Add fields for apply_args_size and apply_result_size. * builtins.cc (apply_args_size, apply_result_size): Cache results in fields rather than in static variables. (get_apply_args_size, set_apply_args_size): New. (get_apply_result_size, set_apply_result_size): New. --- gcc/builtins.cc | 16 ++++++++++++++-- gcc/builtins.h | 7 +++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/builtins.cc b/gcc/builtins.cc index f6c96498f078..7c2732ab79e6 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -1403,8 +1403,16 @@ get_memory_rtx (tree exp, tree len) /* Built-in functions to perform an untyped call and return. */ +#define set_apply_args_size(x) \ + (this_target_builtins->x_apply_args_size_plus_one = 1 + (x)) +#define get_apply_args_size() \ + (this_target_builtins->x_apply_args_size_plus_one - 1) #define apply_args_mode \ (this_target_builtins->x_apply_args_mode) +#define set_apply_result_size(x) \ + (this_target_builtins->x_apply_result_size_plus_one = 1 + (x)) +#define get_apply_result_size() \ + (this_target_builtins->x_apply_result_size_plus_one - 1) #define apply_result_mode \ (this_target_builtins->x_apply_result_mode) @@ -1414,7 +1422,7 @@ get_memory_rtx (tree exp, tree len) static int apply_args_size (void) { - static int size = -1; + int size = get_apply_args_size (); int align; unsigned int regno; @@ -1447,6 +1455,8 @@ apply_args_size (void) } else apply_args_mode[regno] = as_a (VOIDmode); + + set_apply_args_size (size); } return size; } @@ -1457,7 +1467,7 @@ apply_args_size (void) static int apply_result_size (void) { - static int size = -1; + int size = get_apply_result_size (); int align, regno; /* The values computed by this function never change. */ @@ -1489,6 +1499,8 @@ apply_result_size (void) #ifdef APPLY_RESULT_SIZE size = APPLY_RESULT_SIZE; #endif + + set_apply_result_size (size); } return size; } diff --git a/gcc/builtins.h b/gcc/builtins.h index 88a26d70cd5a..1a26fc63a6d1 100644 --- a/gcc/builtins.h +++ b/gcc/builtins.h @@ -37,6 +37,13 @@ struct target_builtins { register windows, this gives only the outbound registers. INCOMING_REGNO gives the corresponding inbound register. */ fixed_size_mode_pod x_apply_result_mode[FIRST_PSEUDO_REGISTER]; + + /* Nonzero iff the arrays above have been initialized. The _plus_one suffix + is for zero initialization to make it an unreasonable size, used to signal + that the size and the corresponding mode array has not been + initialized. */ + int x_apply_args_size_plus_one; + int x_apply_result_size_plus_one; }; extern struct target_builtins default_target_builtins; From fc62716fe8d1d60a9f1c6906e5a4845b3331b828 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 7 Dec 2023 09:17:27 +0800 Subject: [PATCH 220/311] Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS. If the function desn't clobber any sse registers or only clobber 128-bit part, then vzeroupper isn't issued before the function exit. the status not CLEAN but ANY after the function. Also for sibling_call, it's safe to issue an vzeroupper. Also there could be missing vzeroupper since there's no mode_exit for sibling_call_p. gcc/ChangeLog: PR target/112891 * config/i386/i386.cc (ix86_avx_u128_mode_after): Return AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to align with ix86_avx_u128_mode_needed. (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for sibling_call. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112891.c: New test. * gcc.target/i386/pr112891-2.c: New test. --- gcc/config/i386/i386.cc | 22 +++++++++++++--- gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr112891.c | 29 +++++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112891-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr112891.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4b6bad37c8fc..4706f0d40594 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) vzeroupper if all SSE registers are clobbered. */ const function_abi &abi = insn_callee_abi (insn); if (vzeroupper_pattern (PATTERN (insn), VOIDmode) - || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS], - abi.mode_clobbers (V4DImode))) + /* Should be safe to issue an vzeroupper before sibling_call_p. + Also there not mode_exit for sibling_call, so there could be + missing vzeroupper for that. */ + || !(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) return AVX_U128_ANY; return AVX_U128_CLEAN; @@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) bool avx_upper_reg_found = false; note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + if (avx_upper_reg_found) + return AVX_U128_DIRTY; + + /* If the function desn't clobber any sse registers or only clobber + 128-bit part, Then vzeroupper isn't issued before the function exit. + the status not CLEAN but ANY after the function. */ + const function_abi &abi = insn_callee_abi (insn); + if (!(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c new file mode 100644 index 000000000000..164c3985d508 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +double +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); + return a[5] + b[5]; +} + +double +foo1 (double* __restrict a, double* b) +{ + double c = foo (a, b); + return __builtin_exp (c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c new file mode 100644 index 000000000000..dbf6c67948a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +void +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); +} + +double +foo1 (double* __restrict a, double* b) +{ + foo (a, b); + return __builtin_exp (b[1]); +} From f5f33b44b5dd4c41ae335445ae3f06b1ca3cfbcb Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 11 Dec 2023 13:00:18 +0100 Subject: [PATCH 221/311] tree-optimization/112939 - VN PHI visiting and -ftrivial-auto-var-init The following builds upon the last fix, making sure we only value-number to visited (un-)defs, otherwise prefer .VN_TOP. PR tree-optimization/112939 * tree-ssa-sccvn.cc (visit_phi): When all args are undefined make sure we end up with a value that was visited, otherwise fall back to .VN_TOP. * gcc.dg/pr112939.c: New testcase. --- gcc/testsuite/gcc.dg/pr112939.c | 23 +++++++++++++++++++++++ gcc/tree-ssa-sccvn.cc | 4 +++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr112939.c diff --git a/gcc/testsuite/gcc.dg/pr112939.c b/gcc/testsuite/gcc.dg/pr112939.c new file mode 100644 index 000000000000..7017beff30a2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr112939.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftrivial-auto-var-init=zero" } */ + +int i; + +void f (void) +{ + for (;;) + { + if (0) + for (;;) + { + int *a; + int *b = a; + + l1: + *b = (*b != 0) ? 0 : 2; + } + + if (i != 0) + goto l1; + } +} diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 11537fa3e0be..a178b768459b 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -5946,6 +5946,8 @@ visit_phi (gimple *phi, bool *inserted, bool backedges_varying_p) if (TREE_CODE (def) == SSA_NAME) { tree val = SSA_VAL (def, &visited); + if (SSA_NAME_IS_DEFAULT_DEF (def)) + visited = true; if (!backedges_varying_p || !(e->flags & EDGE_DFS_BACK)) def = val; if (e->flags & EDGE_DFS_BACK) @@ -6091,7 +6093,7 @@ visit_phi (gimple *phi, bool *inserted, bool backedges_varying_p) /* If we saw only undefined values and VN_TOP use one of the undefined values. */ else if (sameval == VN_TOP) - result = seen_undef ? seen_undef : sameval; + result = (seen_undef && seen_undef_visited) ? seen_undef : sameval; /* First see if it is equivalent to a phi node in this block. We prefer this as it allows IV elimination - see PRs 66502 and 67167. */ else if ((result = vn_phi_lookup (phi, backedges_varying_p))) From 8c8b40e1c8bb4fbfba850bca47c768d99d12bef4 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Thu, 7 Dec 2023 15:45:30 +0800 Subject: [PATCH 222/311] LoongArch: Allow -mcmodel=extreme and model attribute with -mexplicit-relocs=auto There seems no real reason to require -mexplicit-relocs=always for -mcmodel=extreme or model attribute. As the linker does not know how to relax a 3-operand la.local or la.global pseudo instruction, just emit explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also SYMBOL_GOT_DISP. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP if TARGET_CMODEL_EXTREME. (loongarch_split_symbol): Check for la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS. (loongarch_print_operand_reloc): Likewise. (loongarch_option_override_internal): Likewise. (loongarch_handle_model_attribute): Likewise. * doc/invoke.texi (-mcmodel=extreme): Update the compatibility between it and -mexplicit-relocs=. gcc/testsuite/ChangeLog: * gcc.target/loongarch/attr-model-3.c: New test. * gcc.target/loongarch/attr-model-4.c: New test. * gcc.target/loongarch/func-call-extreme-3.c: New test. * gcc.target/loongarch/func-call-extreme-4.c: New test. --- gcc/config/loongarch/loongarch.cc | 25 ++++++++++++------- gcc/doc/invoke.texi | 4 +-- .../gcc.target/loongarch/attr-model-3.c | 6 +++++ .../gcc.target/loongarch/attr-model-4.c | 6 +++++ .../loongarch/func-call-extreme-3.c | 7 ++++++ .../loongarch/func-call-extreme-4.c | 7 ++++++ 6 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 3ec31c5d1054..860e6e8229f6 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1972,9 +1972,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type) case SYMBOL_TLS_LE: case SYMBOL_TLSGD: case SYMBOL_TLSLDM: - /* The linker don't know how to relax TLS accesses. */ + case SYMBOL_PCREL64: + /* The linker don't know how to relax TLS accesses or 64-bit + pc-relative accesses. */ return true; case SYMBOL_GOT_DISP: + /* The linker don't know how to relax GOT accesses in extreme + code model. */ + if (TARGET_CMODEL_EXTREME) + return true; + /* If we are performing LTO for a final link, and we have the linker plugin so we know the resolution of the symbols, then all GOT references are binding to external symbols or @@ -3138,7 +3145,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) { - gcc_assert (TARGET_EXPLICIT_RELOCS); + gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); temp1 = gen_reg_rtx (Pmode); emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), @@ -5937,7 +5944,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, loongarch_classify_symbolic_expression (op); if (loongarch_symbol_extreme_p (symbol_type)) - gcc_assert (TARGET_EXPLICIT_RELOCS); + gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE); switch (symbol_type) { @@ -7544,9 +7551,9 @@ loongarch_option_override_internal (struct gcc_options *opts, switch (la_target.cmodel) { case CMODEL_EXTREME: - if (!TARGET_EXPLICIT_RELOCS) - error ("code model %qs needs %s", - "extreme", "-mexplicit-relocs=always"); + if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) + error ("code model %qs is not compatible with %s", + "extreme", "-mexplicit-relocs=none"); if (opts->x_flag_plt) { @@ -7912,11 +7919,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int, *no_add_attrs = true; return NULL_TREE; } - if (!TARGET_EXPLICIT_RELOCS) + if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE) { error_at (DECL_SOURCE_LOCATION (decl), - "%qE attribute requires %s", name, - "-mexplicit-relocs=always"); + "%qE attribute is not compatible with %s", name, + "-mexplicit-relocs=none"); *no_add_attrs = true; return NULL_TREE; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7d15cf94821e..1f26f80d26c8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -26691,8 +26691,8 @@ The text segment and data segment must be within 2GB addressing space. @item extreme This mode does not limit the size of the code segment and data segment. -The @option{-mcmodel=extreme} option is incompatible with @option{-fplt}, -and it requires @option{-mexplicit-relocs=always}. +The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} +and/or @option{-mexplicit-relocs=none}. @end table The default code model is @code{normal}. diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c new file mode 100644 index 000000000000..5622d5086780 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */ +/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */ + +#define ATTR_MODEL_TEST +#include "attr-model-test.c" diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c new file mode 100644 index 000000000000..482724bb9743 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */ +/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */ + +#define ATTR_MODEL_TEST +#include "attr-model-test.c" diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c new file mode 100644 index 000000000000..a4da44b4a3d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ +/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ +/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ +/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + +#include "func-call-extreme-1.c" diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c new file mode 100644 index 000000000000..16b00f4c5f2c --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */ +/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ +/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */ +/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */ + +#include "func-call-extreme-1.c" From d3247bee39e7b339d3c64f327b543a63e1cce19f Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 9 Dec 2023 22:08:37 +0800 Subject: [PATCH 223/311] LoongArch: Fix warnings building libgcc We are excluding loongarch-opts.h from target libraries, but now struct loongarch_target and gcc_options are not declared in the target libraries, causing: In file included from ../.././gcc/options.h:8, from ../.././gcc/tm.h:49, from ../../../gcc/libgcc/fixed-bit.c:48: ../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41: warning: 'struct gcc_options' declared inside parameter list will not be visible outside of this definition or declaration 57 | struct gcc_options *opts, | ^~~~~~~~~~~ So exclude the declarations referring to the C++ structs as well. gcc/ChangeLog: * config/loongarch/loongarch-opts.h (la_target): Move into #if for loongarch-def.h. (loongarch_init_target): Likewise. (loongarch_config_target): Likewise. (loongarch_update_gcc_opt_status): Likewise. --- gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h index 651c1c18ca84..d091359300a0 100644 --- a/gcc/config/loongarch/loongarch-opts.h +++ b/gcc/config/loongarch/loongarch-opts.h @@ -21,22 +21,15 @@ along with GCC; see the file COPYING3. If not see #ifndef LOONGARCH_OPTS_H #define LOONGARCH_OPTS_H -/* This is a C++ header and it shouldn't be used by target libraries. */ +/* The loongarch-def.h file is a C++ header and it shouldn't be used by + target libraries. Exclude it and everything using the C++ structs + (struct loongarch_target and gcc_options) from target libraries. */ #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS) #include "loongarch-def.h" -#endif /* Target configuration */ extern struct loongarch_target la_target; -/* Flag status */ -struct loongarch_flags { - int flt; const char* flt_str; -#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) - int sx[2]; -}; - - /* Initialize loongarch_target from separate option variables. */ void loongarch_init_target (struct loongarch_target *target, @@ -56,7 +49,14 @@ void loongarch_update_gcc_opt_status (struct loongarch_target *target, struct gcc_options *opts, struct gcc_options *opts_set); +#endif +/* Flag status */ +struct loongarch_flags { + int flt; const char* flt_str; +#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x)) + int sx[2]; +}; /* Macros for common conditional expressions used in loongarch.{c,h,md} */ #define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL) From fdce86c9f07eb4f95ba438491c2b151e94be7ef2 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 12 Dec 2023 08:57:02 +0100 Subject: [PATCH 224/311] libquadmath: Restore linking against -lm on most targets [PR112963] The r14-4825 change added AC_CHECK_LIBM to libquadmath configure.ac and replaced unconditional linking with -lm with linking with $(LIBM) determined by that. Unfortunately that broke bare metal targets because AC_CHECK_LIBM attempts to link against -lm and this was after (unconditional) GCC_NO_EXECUTABLES. Then r14-4863 partially reverted that change (no longer AC_CHECK_LIBM), but didn't revert the Makefile.am change of -lm to $(LIBM), which had the effect that libquadmath is not linked against -lm on any arch. That is a serious problem though e.g. on Linux, because libquadmath calls a few libm entrypoints and e.g. on powerpc64le the underlinking can cause crashes in IFUNC resolvers of libm. Instead of adding further reversion of the r14-4825 commit and use -lm unconditionally again, this patch adds an AC_CHECK_LIBM like substitutions with the *-ncr-sysv4.3* target handling removed (I think we don't support such targets, especially not in libquadmath) and with the default case replaced by simple using -lm. That is something in between using -lm unconditionally and what AC_CHECK_LIBM does if it would work on bare metal - we know from GCC 13 and earlier that we can link -lm on all targets libquadmath is built for, and just white list a couple of targets which we know don't have separate -lm and don't want to link against that (like Darwin, Cygwin, ...). 2023-12-12 Jakub Jelinek PR libquadmath/112963 * configure.ac (LIBM): Readd AC_CHECK_LIBM-like check without doing AC_CHECK_LIB in it. * configure: Regenerated. * Makefile.in: Regenerated. --- libquadmath/Makefile.in | 1 + libquadmath/configure | 19 +++++++++++++++++-- libquadmath/configure.ac | 14 ++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in index dbcafb57e5b4..068af559457e 100644 --- a/libquadmath/Makefile.in +++ b/libquadmath/Makefile.in @@ -355,6 +355,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ +LIBM = @LIBM@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ diff --git a/libquadmath/configure b/libquadmath/configure index fd5274582854..49d70809218c 100755 --- a/libquadmath/configure +++ b/libquadmath/configure @@ -644,6 +644,7 @@ LIBQUAD_USE_SYMVER_GNU_FALSE LIBQUAD_USE_SYMVER_GNU_TRUE LIBQUAD_USE_SYMVER_FALSE LIBQUAD_USE_SYMVER_TRUE +LIBM toolexeclibdir toolexecdir MAINT @@ -10921,7 +10922,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 10924 "configure" +#line 10925 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11027,7 +11028,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11030 "configure" +#line 11031 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12260,6 +12261,20 @@ esac +# AC_CHECK_LIBM variant which avoids AC_CHECK_LIB (that doesn't work +# on bare metal). In the past we've used -lm in Makefile.am unconditionally, +# let's use it there unless target knows it doesn't need that. +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*) + LIBM=-lm + ;; +esac + + for ac_header in fenv.h langinfo.h locale.h wchar.h wctype.h limits.h ctype.h printf.h errno.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac index 23ac3bae6e5c..349be2607c64 100644 --- a/libquadmath/configure.ac +++ b/libquadmath/configure.ac @@ -122,6 +122,20 @@ esac AC_SUBST(toolexecdir) AC_SUBST(toolexeclibdir) +# AC_CHECK_LIBM variant which avoids AC_CHECK_LIB (that doesn't work +# on bare metal). In the past we've used -lm in Makefile.am unconditionally, +# let's use it there unless target knows it doesn't need that. +LIBM= +case $host in +*-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) + # These system don't have libm, or don't need it + ;; +*) + LIBM=-lm + ;; +esac +AC_SUBST([LIBM]) + AC_CHECK_HEADERS(fenv.h langinfo.h locale.h wchar.h wctype.h limits.h ctype.h printf.h errno.h) LIBQUAD_CHECK_MATH_H_SIGNGAM From 8b832bfb4f9e50c2793463d9756499124de2f3e6 Mon Sep 17 00:00:00 2001 From: Pan Li Date: Tue, 12 Dec 2023 16:19:12 +0800 Subject: [PATCH 225/311] RISC-V: Disable RVV VCOMPRESS avl propagation This patch would like to disable the avl propagation for the follow reasons. According to the ISA, the first vl elements of vector register group vs2 should be extracted and packed for vcompress. And the highest element of vs2 vector may be touched by the mask, which may be eliminated by avl propagation. For example, given original vl = 4 here. We have: v0 = 0b1000 v1 = {0x1, 0x2, 0x3, 0x4} v2 = {0x5, 0x6, 0x7, 0x8} Then: vcompress v1, v2, v0 (avl = 4), v1 = {0x8, 0x2, 0x3, 0x4}. <== Correct. vcompress v1, v2, v0 (avl = 2), v1 will be unchanged. <== Wrong. Finally, we cannot propagate avl of vcompress because it may has senmatics change to the result. This patch also fix the failure of gcc.c-torture/execute/990128-1.c for the following configurations. riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1 riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2 riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4 riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8 riscv-sim/-march=rv64gcv/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1 riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2 riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4 riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8 riscv-sim/-march=rv64gcv_zvl256b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1 riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m1/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2 riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m2/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4 riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m4/--param=riscv-autovec-preference=fixed-vlmax riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8 riscv-sim/-march=rv64gcv_zvl512b/-mabi=lp64d/-mcmodel=medlow/--param=riscv-autovec-lmul=m8/--param=riscv-autovec-preference=fixed-vlmax gcc/ChangeLog: * config/riscv/riscv-avlprop.cc (avl_can_be_propagated_p): Disable the avl propogation for the vcompress. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c: New test. Signed-off-by: Pan Li --- gcc/config/riscv/riscv-avlprop.cc | 35 ++++++++++++------ .../rvv/autovec/binop/vcompress-avlprop-1.c | 36 +++++++++++++++++++ 2 files changed, 61 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index 02f006742f18..a6159816cf71 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -113,19 +113,34 @@ avl_can_be_propagated_p (rtx_insn *rinsn) touching the element with i > AVL. So, we don't do AVL propagation on these following situations: - - The index of "vrgather dest, source, index" may pick up the - element which has index >= AVL, so we can't strip the elements - that has index >= AVL of source register. - - The last element of vslide1down is AVL + 1 according to RVV ISA: - vstart <= i < vl-1 vd[i] = vs2[i+1] if v0.mask[i] enabled - - The last multiple elements of vslidedown can be the element - has index >= AVL according to RVV ISA: - 0 <= i+OFFSET < VLMAX src[i] = vs2[i+OFFSET] - vstart <= i < vl vd[i] = src[i] if v0.mask[i] enabled. */ + vgather: + - The index of "vrgather dest, source, index" may pick up the + element which has index >= AVL, so we can't strip the elements + that has index >= AVL of source register. + vslide1down: + - The last element of vslide1down is AVL + 1 according to RVV ISA: + vstart <= i < vl-1 vd[i] = vs2[i+1] if v0.mask[i] enabled + - The last multiple elements of vslidedown can be the element + has index >= AVL according to RVV ISA: + 0 <= i+OFFSET < VLMAX src[i] = vs2[i+OFFSET] + vstart <= i < vl vd[i] = src[i] if v0.mask[i] enabled. + vcompress: + - According to the ISA, the first vl elements of vector register + group vs2 should be extracted and packed for vcompress. And the + highest element of vs2 vector may be touched by the mask. For + example, given vlmax = 4 here. + v0 = 0b1000 + v1 = {0x1, 0x2, 0x3, 0x4} + v2 = {0x5, 0x6, 0x7, 0x8} + vcompress v1, v2, v0 with avl = 4, v1 = {0x8, 0x2, 0x3, 0x4}. + vcompress v1, v2, v0 with avl = 2, v1 will be unchanged. + Thus, we cannot propagate avl of vcompress because it may has + senmatics change to the result. */ return get_attr_type (rinsn) != TYPE_VGATHER && get_attr_type (rinsn) != TYPE_VSLIDEDOWN && get_attr_type (rinsn) != TYPE_VISLIDE1DOWN - && get_attr_type (rinsn) != TYPE_VFSLIDE1DOWN; + && get_attr_type (rinsn) != TYPE_VFSLIDE1DOWN + && get_attr_type (rinsn) != TYPE_VCOMPRESS; } static bool diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c new file mode 100644 index 000000000000..43f79fe3b7bf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl512b -mabi=lp64d -O3 --param=riscv-autovec-preference=fixed-vlmax -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ +#define MAX 10 + +struct s { struct s *n; } *p; +struct s ss; +struct s sss[MAX]; + +/* +** build_linked_list: +** ... +** vsetivli\s+zero,\s*8,\s*e64,\s*m1,\s*ta,\s*ma +** ... +** vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0 +** ... +** vcompress\.vm\s+v[0-9]+,\s*v[0-9]+,\s*v0 +** vsetivli\s+zero,\s*2,\s*e64,\s*m1,\s*ta,\s*ma +** ... +*/ +void +build_linked_list () +{ + int i; + struct s *next; + + p = &ss; + next = p; + + for (i = 0; i < MAX; i++) { + next->n = &sss[i]; + next = next->n; + } + + next->n = 0; +} From 1ee4ad6e9248563e735a70ff625c689285645710 Mon Sep 17 00:00:00 2001 From: Paul Iannetta Date: Mon, 11 Dec 2023 11:36:13 +0100 Subject: [PATCH 226/311] Add myself to write after approval ChangeLog: * MAINTAINERS: Add myself to write after approval Signed-off-by: Paul Iannetta --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bc47e30325b0..e877396dc0e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -471,6 +471,7 @@ Dominique d'Humieres Andy Hutchinson Joel Hutton Lewis Hyatt +Paul Iannetta Roland Illig Meador Inge Bernardo Innocenti From 0640bc76cd0937069bdeea71aa9e1076d633ed97 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 12 Dec 2023 10:09:53 +0000 Subject: [PATCH 227/311] aarch64: Add dg-options to prfm_imm_offset_2.c gcc/testsuite/ * gcc.target/aarch64/prfm_imm_offset_2.c: Add dg-options. --- gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c b/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c index 2dd695157f20..04e3fb72c455 100644 --- a/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c +++ b/gcc/testsuite/gcc.target/aarch64/prfm_imm_offset_2.c @@ -1 +1,2 @@ +/* { dg-options "-O2" } */ void f(char *p) { asm("prfm pldl1keep, %a0\n" :: "p" (p + 6)); } From 99182ea09f10beca8445396cbab491899536f5c3 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 24 Nov 2023 11:08:19 +0800 Subject: [PATCH 228/311] Only allow (int)trunc(x) to (int)x simplification with -ffp-int-builtin-inexact [PR107723] With -fno-fp-int-builtin-inexact, trunc is not allowed to raise FE_INEXACT and it should produce an integral result (if the input is not NaN or Inf). Thus FE_INEXACT should not be raised. But (int)x may raise FE_INEXACT when x is a non-integer, non-NaN, and non-Inf value. C23 recommends to do so in a footnote. Thus we should not simplify (int)trunc(x) to (int)x if -fno-fp-int-builtin-inexact is in-effect. gcc/ChangeLog: PR middle-end/107723 * convert.cc (convert_to_integer_1) [case BUILT_IN_TRUNC]: Break early if !flag_fp_int_builtin_inexact and flag_trapping_math. gcc/testsuite/ChangeLog: PR middle-end/107723 * gcc.dg/torture/builtin-fp-int-inexact-trunc.c: New test. --- gcc/convert.cc | 3 ++- .../gcc.dg/torture/builtin-fp-int-inexact-trunc.c | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/builtin-fp-int-inexact-trunc.c diff --git a/gcc/convert.cc b/gcc/convert.cc index 46c8bcb31f82..f214b750188d 100644 --- a/gcc/convert.cc +++ b/gcc/convert.cc @@ -591,7 +591,8 @@ convert_to_integer_1 (tree type, tree expr, bool dofold) CASE_FLT_FN (BUILT_IN_TRUNC): CASE_FLT_FN_FLOATN_NX (BUILT_IN_TRUNC): if (call_expr_nargs (s_expr) != 1 - || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (CALL_EXPR_ARG (s_expr, 0)))) + || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (CALL_EXPR_ARG (s_expr, 0))) + || (!flag_fp_int_builtin_inexact && flag_trapping_math)) break; return convert_to_integer_1 (type, CALL_EXPR_ARG (s_expr, 0), dofold); diff --git a/gcc/testsuite/gcc.dg/torture/builtin-fp-int-inexact-trunc.c b/gcc/testsuite/gcc.dg/torture/builtin-fp-int-inexact-trunc.c new file mode 100644 index 000000000000..09731183621b --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/builtin-fp-int-inexact-trunc.c @@ -0,0 +1,12 @@ +/* Test -fno-fp-int-builtin-inexact. */ +/* { dg-do compile } */ +/* { dg-options "-fno-fp-int-builtin-inexact -fdump-tree-original" } */ + +long +x (double y) +{ + return __builtin_trunc (y); +} + +/* Optimization should not discard the __builtin_trunc call. */ +/* { dg-final { scan-tree-dump "__builtin_trunc" "original" } } */ From dabd94da0c78f4beb0c2c25f38ab6de2d8417497 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 12 Dec 2023 13:07:35 +0100 Subject: [PATCH 229/311] testsuite: Fix up test directive syntax errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've noticed +ERROR: gcc.dg/gomp/pr87887-1.c: syntax error in target selector ".-4" for " dg-warning 13 "unsupported return type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 " +ERROR: gcc.dg/gomp/pr87887-1.c: syntax error in target selector ".-4" for " dg-warning 13 "unsupported return type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 " +ERROR: gcc.dg/gomp/pr89246-1.c: syntax error in target selector ".-4" for " dg-warning 11 "unsupported argument type ‘__int128’ for ‘simd’ functions" { target aarch64*-*-* } .-4 " +ERROR: gcc.dg/gomp/pr89246-1.c: syntax error in target selector ".-4" for " dg-warning 11 "unsupported argument type ‘__int128’ for ‘simd’ functions" { target aarch64*-*-* } .-4 " +ERROR: gcc.dg/gomp/simd-clones-2.c: unmatched open quote in list for " dg-final 19 { scan-tree-dump "_ZGVnN2ua32vl_setArray" "optimized { target aarch64*-*-* } } " +ERROR: gcc.dg/gomp/simd-clones-2.c: unmatched open quote in list for " dg-final 19 { scan-tree-dump "_ZGVnN2ua32vl_setArray" "optimized { target aarch64*-*-* } } " regressions. The following patch fixes those. 2023-12-12 Jakub Jelinek * gcc.dg/gomp/pr87887-1.c: Add missing comment argument to dg-warning. * gcc.dg/gomp/pr89246-1.c: Likewise. * gcc.dg/gomp/simd-clones-2.c: Add missing " after dump name. --- gcc/testsuite/gcc.dg/gomp/pr87887-1.c | 4 ++-- gcc/testsuite/gcc.dg/gomp/pr89246-1.c | 2 +- gcc/testsuite/gcc.dg/gomp/simd-clones-2.c | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/gcc.dg/gomp/pr87887-1.c b/gcc/testsuite/gcc.dg/gomp/pr87887-1.c index 6e143aa0b5aa..281898300c77 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr87887-1.c +++ b/gcc/testsuite/gcc.dg/gomp/pr87887-1.c @@ -10,7 +10,7 @@ foo (int x) { return (struct S) { x }; } -/* { dg-warning "unsupported return type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ +/* { dg-warning "unsupported return type ‘struct S’ for ‘simd’ functions" "" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd int @@ -18,7 +18,7 @@ bar (struct S x) { return x.n; } -/* { dg-warning "unsupported argument type ‘struct S’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ +/* { dg-warning "unsupported argument type ‘struct S’ for ‘simd’ functions" "" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd uniform (x) int diff --git a/gcc/testsuite/gcc.dg/gomp/pr89246-1.c b/gcc/testsuite/gcc.dg/gomp/pr89246-1.c index cdaec6b38516..4a0fd74f0639 100644 --- a/gcc/testsuite/gcc.dg/gomp/pr89246-1.c +++ b/gcc/testsuite/gcc.dg/gomp/pr89246-1.c @@ -8,7 +8,7 @@ int foo (__int128 x) { return x; } -/* { dg-warning "unsupported argument type ‘__int128’ for ‘simd’ functions" { target aarch64*-*-* } .-4 } */ +/* { dg-warning "unsupported argument type ‘__int128’ for ‘simd’ functions" "" { target aarch64*-*-* } .-4 } */ #pragma omp declare simd extern int bar (int x); diff --git a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c index 5fe4069c01c1..f12244054bd4 100644 --- a/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c +++ b/gcc/testsuite/gcc.dg/gomp/simd-clones-2.c @@ -16,12 +16,12 @@ float setArray(float *a, float x, int k) } /* { dg-final { scan-tree-dump {(?n)^__attribute__\(\(omp declare simd \(notinbranch uniform\(0\) aligned\(0:32\) linear\(2:1\)\)\)\)$} "optimized" } } */ -/* { dg-final { scan-tree-dump "_ZGVnN2ua32vl_setArray" "optimized { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "_ZGVnN4ua32vl_setArray" "optimized { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "_ZGVnN2vvva32_addit" "optimized { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "_ZGVnN4vvva32_addit" "optimized { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "_ZGVnM2vl66u_addit" "optimized { target aarch64*-*-* } } } */ -/* { dg-final { scan-tree-dump "_ZGVnM4vl66u_addit" "optimized { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN2ua32vl_setArray" "optimized" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN4ua32vl_setArray" "optimized" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN2vvva32_addit" "optimized" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnN4vvva32_addit" "optimized" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnM2vl66u_addit" "optimized" { target aarch64*-*-* } } } */ +/* { dg-final { scan-tree-dump "_ZGVnM4vl66u_addit" "optimized" { target aarch64*-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVbN4ua32vl_setArray" "optimized" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump "_ZGVbN4vvva32_addit" "optimized" { target i?86-*-* x86_64-*-* } } } */ From 878cb5acf0c499702ffd315e273f55e8bd0970b8 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 12 Dec 2023 14:01:47 +0100 Subject: [PATCH 230/311] tree-optimization/112961 - include latch in if-conversion CSE The following makes sure to also process the (empty) latch when performing CSE on the if-converted loop body. That's important to get all uses of copies propagated out on the backedge as well. To avoid CSE on the PHI nodes itself which is prohibitive (see PR90402) this temporarily adds a fake entry edge to the loop. PR tree-optimization/112961 * tree-if-conv.cc (tree_if_conversion): Instead of excluding the latch block from VN, add a fake entry edge. * g++.dg/vect/pr112961.cc: New testcase. --- gcc/testsuite/g++.dg/vect/pr112961.cc | 17 +++++++++++++++++ gcc/tree-if-conv.cc | 9 +++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/pr112961.cc diff --git a/gcc/testsuite/g++.dg/vect/pr112961.cc b/gcc/testsuite/g++.dg/vect/pr112961.cc new file mode 100644 index 000000000000..52759e180fbe --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr112961.cc @@ -0,0 +1,17 @@ +// { dg-do compile } +// { dg-require-effective-target vect_int } + +inline const int& maxx (const int& a, const int &b) +{ + return a > b ? a : b; +} + +int foo(int *a) +{ + int max = 0; + for (int i = 0; i < 1024; ++i) + max = maxx(max, a[i]); + return max; +} + +// { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail vect_no_int_min_max } } } diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 0bde281c2468..f9fd01499374 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3734,7 +3734,7 @@ tree_if_conversion (class loop *loop, vec *preds) auto_vec reads_to_lower; auto_vec writes_to_lower; bitmap exit_bbs; - edge pe; + edge pe, e; auto_vec refs; bool loop_versioned; @@ -3894,11 +3894,13 @@ tree_if_conversion (class loop *loop, vec *preds) /* Perform local CSE, this esp. helps the vectorizer analysis if loads and stores are involved. CSE only the loop body, not the entry PHIs, those are to be kept in sync with the non-if-converted copy. + Do this by adding a fake entry edge - we do want to include the + latch as otherwise copies on a reduction path cannot be propagated out. ??? We'll still keep dead stores though. */ + e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), loop->header, EDGE_FAKE); exit_bbs = BITMAP_ALLOC (NULL); for (edge exit : get_loop_exit_edges (loop)) bitmap_set_bit (exit_bbs, exit->dest->index); - bitmap_set_bit (exit_bbs, loop->latch->index); std::pair *name_pair; unsigned ssa_names_idx; @@ -3908,6 +3910,9 @@ tree_if_conversion (class loop *loop, vec *preds) todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); + /* Remove the fake edge again. */ + remove_edge (e); + /* Delete dead predicate computations. */ ifcvt_local_dce (loop); BITMAP_FREE (exit_bbs); From eee13a3730bd1d7aa7b40687b1ee49c17d95159f Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 11 Dec 2023 10:08:24 +0100 Subject: [PATCH 231/311] ipa/92606 - properly handle no_icf attribute for variables The following adds no_icf handling for variables where the attribute was rejected. It also fixes the check for no_icf by checking both the source and the targets decl. PR ipa/92606 gcc/c-family/ * c-attribs.cc (handle_noicf_attribute): Also allow the attribute on global variables. gcc/ * ipa-icf.cc (sem_item_optimizer::merge_classes): Check both source and alias for the no_icf attribute. * doc/extend.texi (no_icf): Document variable attribute. --- gcc/c-family/c-attribs.cc | 3 ++- gcc/doc/extend.texi | 5 +++++ gcc/ipa-icf.cc | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index 854e987dc79b..a3671fe3a572 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -1649,7 +1649,8 @@ handle_noicf_attribute (tree *node, tree name, tree ARG_UNUSED (args), int ARG_UNUSED (flags), bool *no_add_attrs) { - if (TREE_CODE (*node) != FUNCTION_DECL) + if (TREE_CODE (*node) != FUNCTION_DECL + && (TREE_CODE (*node) != VAR_DECL || !is_global_var (*node))) { warning (OPT_Wattributes, "%qE attribute ignored", name); *no_add_attrs = true; diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e8b5e771f7a0..f0c789f6cb4a 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -8152,6 +8152,11 @@ script to place the sections with the @code{.persistent} prefix in the right location. Specifically, some type of non-volatile, writeable memory is required. +@cindex @code{no_icf} variable attribute +@item no_icf +This variable attribute prevents a variable from being merged with another +equivalent variable. + @cindex @code{objc_nullability} variable attribute @item objc_nullability (@var{nullability kind}) @r{(Objective-C and Objective-C++ only)} This attribute applies to pointer variables only. It allows marking the diff --git a/gcc/ipa-icf.cc b/gcc/ipa-icf.cc index 81232d5706e1..e27536d73a96 100644 --- a/gcc/ipa-icf.cc +++ b/gcc/ipa-icf.cc @@ -3422,7 +3422,8 @@ sem_item_optimizer::merge_classes (unsigned int prev_class_count, alias->node->dump_asm_name ()); } - if (lookup_attribute ("no_icf", DECL_ATTRIBUTES (alias->decl))) + if (lookup_attribute ("no_icf", DECL_ATTRIBUTES (alias->decl)) + || lookup_attribute ("no_icf", DECL_ATTRIBUTES (source->decl))) { if (dump_enabled_p ()) dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, From 6d0b0806eb638447c3184c59d996c2f178553d45 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 11 Dec 2023 14:39:48 +0100 Subject: [PATCH 232/311] tree-optimization/112736 - avoid overread with non-grouped SLP load The following aovids over/under-read of storage when vectorizing a non-grouped load with SLP. Instead of forcing peeling for gaps use a smaller load for the last vector which might access excess elements. This builds upon the existing optimization avoiding peeling for gaps, generalizing it to all gap widths leaving a power-of-two remaining number of elements (but it doesn't replace or improve that particular case at this point). I wonder if the poly relational compares I set up are good enough to guarantee /* remain should now be > 0 and < nunits. */. There is existing test coverage that runs into /* DR will be unused. */ always when the gap is wider than nunits. Compared to the existing gap == nunits/2 case this only adjusts the load that will cause the overrun at the end, not every load. Apart from the poly relational compares it should reliably cover these cases but I'll leave it for stage1 to remove. PR tree-optimization/112736 * tree-vect-stmts.cc (vectorizable_load): Extend optimization to avoid peeling for gaps to handle single-element non-groups we now allow with SLP. * gcc.dg/torture/pr112736.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr112736.c | 27 ++++++++ gcc/tree-vect-stmts.cc | 92 ++++++++++++++++++++----- 2 files changed, 100 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr112736.c diff --git a/gcc/testsuite/gcc.dg/torture/pr112736.c b/gcc/testsuite/gcc.dg/torture/pr112736.c new file mode 100644 index 000000000000..6abb56edba31 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr112736.c @@ -0,0 +1,27 @@ +/* { dg-do run { target *-*-linux* *-*-gnu* *-*-uclinux* } } */ + +#include +#include + +int a, c[3][5]; + +void __attribute__((noipa)) +fn1 (int * __restrict b) +{ + int e; + for (a = 2; a >= 0; a--) + for (e = 0; e < 4; e++) + c[a][e] = b[a]; +} + +int main() +{ + long pgsz = sysconf (_SC_PAGESIZE); + void *p = mmap (NULL, pgsz * 2, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); + if (p == MAP_FAILED) + return 0; + mprotect (p, pgsz, PROT_NONE); + fn1 (p + pgsz); + return 0; +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 390c8472fd6c..fc6923cf68a4 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11465,26 +11465,70 @@ vectorizable_load (vec_info *vinfo, if (new_vtype != NULL_TREE) ltype = half_vtype; } + /* Try to use a single smaller load when we are about + to load excess elements compared to the unrolled + scalar loop. + ??? This should cover the above case as well. */ + else if (known_gt ((vec_num * j + i + 1) * nunits, + (group_size * vf - gap))) + { + if (known_ge ((vec_num * j + i + 1) * nunits + - (group_size * vf - gap), nunits)) + /* DR will be unused. */ + ltype = NULL_TREE; + else if (alignment_support_scheme == dr_aligned) + /* Aligned access to excess elements is OK if + at least one element is accessed in the + scalar loop. */ + ; + else + { + auto remain + = ((group_size * vf - gap) + - (vec_num * j + i) * nunits); + /* remain should now be > 0 and < nunits. */ + unsigned num; + if (constant_multiple_p (nunits, remain, &num)) + { + tree ptype; + new_vtype + = vector_vector_composition_type (vectype, + num, + &ptype); + if (new_vtype) + ltype = ptype; + } + /* Else use multiple loads or a masked load? */ + } + } tree offset = (dataref_offset ? dataref_offset : build_int_cst (ref_type, 0)); - if (ltype != vectype - && memory_access_type == VMAT_CONTIGUOUS_REVERSE) + if (!ltype) + ; + else if (ltype != vectype + && memory_access_type == VMAT_CONTIGUOUS_REVERSE) { - unsigned HOST_WIDE_INT gap_offset - = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type)); - tree gapcst = build_int_cst (ref_type, gap_offset); + poly_uint64 gap_offset + = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype)) + - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype))); + tree gapcst = build_int_cstu (ref_type, gap_offset); offset = size_binop (PLUS_EXPR, offset, gapcst); } - data_ref - = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); - if (alignment_support_scheme == dr_aligned) - ; - else - TREE_TYPE (data_ref) - = build_aligned_type (TREE_TYPE (data_ref), - align * BITS_PER_UNIT); - if (ltype != vectype) + if (ltype) + { + data_ref + = fold_build2 (MEM_REF, ltype, dataref_ptr, offset); + if (alignment_support_scheme == dr_aligned) + ; + else + TREE_TYPE (data_ref) + = build_aligned_type (TREE_TYPE (data_ref), + align * BITS_PER_UNIT); + } + if (!ltype) + data_ref = build_constructor (vectype, NULL); + else if (ltype != vectype) { vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr)); @@ -11494,18 +11538,28 @@ vectorizable_load (vec_info *vinfo, gsi); data_ref = NULL; vec *v; - vec_alloc (v, 2); + /* We've computed 'num' above to statically two + or via constant_multiple_p. */ + unsigned num + = (exact_div (tree_to_poly_uint64 + (TYPE_SIZE_UNIT (vectype)), + tree_to_poly_uint64 + (TYPE_SIZE_UNIT (ltype))) + .to_constant ()); + vec_alloc (v, num); if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) { - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - build_zero_cst (ltype)); + while (--num) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); } else { CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); - CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, - build_zero_cst (ltype)); + while (--num) + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, + build_zero_cst (ltype)); } gcc_assert (new_vtype != NULL_TREE); if (new_vtype == vectype) From d83acace704927ee351968258c8a8cd39e305d03 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Mon, 12 Jun 2023 13:02:08 +0100 Subject: [PATCH 233/311] aarch64,arm: Remove accepted_branch_protection_string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On aarch64 this caused ICE with pragma push_options since commit ae54c1b09963779c5c3914782324ff48af32e2f1 Author: Wilco Dijkstra CommitDate: 2022-06-01 18:13:57 +0100 AArch64: Cleanup option processing code The failure is at pop_options: internal compiler error: ‘global_options’ are modified in local context On arm the variable was unused. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_override_options_after_change_1): Do not override branch_protection options. (aarch64_override_options): Remove accepted_branch_protection_string. * config/arm/aarch-common.cc (BRANCH_PROTECT_STR_MAX): Remove. (aarch_parse_branch_protection): Remove accepted_branch_protection_string. * config/arm/arm.cc: Likewise. --- gcc/config/aarch64/aarch64.cc | 10 +--------- gcc/config/arm/aarch-common.cc | 16 ---------------- gcc/config/arm/arm.cc | 2 -- 3 files changed, 1 insertion(+), 27 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 21a24d92b025..164ca4babbe3 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -351,8 +351,6 @@ bool aarch64_pcrelative_literal_loads; /* Global flag for whether frame pointer is enabled. */ bool aarch64_use_frame_pointer; -char *accepted_branch_protection_string = NULL; - /* Support for command line parsing of boolean flags in the tuning structures. */ struct aarch64_flag_desc @@ -17999,12 +17997,6 @@ aarch64_adjust_generic_arch_tuning (struct tune_params ¤t_tune) static void aarch64_override_options_after_change_1 (struct gcc_options *opts) { - if (accepted_branch_protection_string) - { - opts->x_aarch64_branch_protection_string - = xstrdup (accepted_branch_protection_string); - } - /* PR 70044: We have to be careful about being called multiple times for the same function. This means all changes should be repeatable. */ @@ -18647,7 +18639,7 @@ aarch64_override_options (void) /* Return address signing is currently not supported for ILP32 targets. For LP64 targets use the configured option in the absence of a command-line option for -mbranch-protection. */ - if (!TARGET_ILP32 && accepted_branch_protection_string == NULL) + if (!TARGET_ILP32 && aarch64_branch_protection_string == NULL) { #ifdef TARGET_ENABLE_PAC_RET aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF; diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc index d68b7047c9b9..3dca9d93b0ea 100644 --- a/gcc/config/arm/aarch-common.cc +++ b/gcc/config/arm/aarch-common.cc @@ -660,9 +660,6 @@ arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, return saw_asm_flag ? seq : NULL; } -#define BRANCH_PROTECT_STR_MAX 255 -extern char *accepted_branch_protection_string; - static enum aarch_parse_opt_result aarch_handle_no_branch_protection (char* str, char* rest) { @@ -813,19 +810,6 @@ aarch_parse_branch_protection (const char *const_str, char** last_str) else *last_str = NULL; } - - if (res == AARCH_PARSE_OK) - { - /* If needed, alloc the accepted string then copy in const_str. - Used by override_option_after_change_1. */ - if (!accepted_branch_protection_string) - accepted_branch_protection_string - = (char *) xmalloc (BRANCH_PROTECT_STR_MAX + 1); - strncpy (accepted_branch_protection_string, const_str, - BRANCH_PROTECT_STR_MAX + 1); - /* Forcibly null-terminate. */ - accepted_branch_protection_string[BRANCH_PROTECT_STR_MAX] = '\0'; - } return res; } diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 6e3e2e8fb1bf..4292d3d524e5 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -2433,8 +2433,6 @@ const struct tune_params arm_fa726te_tune = tune_params::SCHED_AUTOPREF_OFF }; -char *accepted_branch_protection_string = NULL; - /* Auto-generated CPU, FPU and architecture tables. */ #include "arm-cpu-data.h" From 321477fc3a0f8de18c4452f431309f896ae3a854 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Thu, 15 Jun 2023 17:15:09 +0100 Subject: [PATCH 234/311] aarch64,arm: Fix branch-protection= parsing Refactor the parsing to have a single API and fix a few parsing issues: - Different handling of "bti+none" and "none+bti": these should be rejected because "none" can only appear alone. - Accepted empty strings such as "bti++pac-ret" or "bti+", this bug was caused by using strtok_r. - Memory got leaked (str_root was never freed). And two buffers got allocated when one is enough. The callbacks now have no failure mode, only parsing can fail and all failures are handled locally. The "-mbranch-protection=" vs "target("branch-protection=")" difference in the error message is handled by a separate argument to aarch_validate_mbranch_protection. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_override_options): Update. (aarch64_handle_attr_branch_protection): Update. * config/arm/aarch-common-protos.h (aarch_parse_branch_protection): Remove. (aarch_validate_mbranch_protection): Add new argument. * config/arm/aarch-common.cc (aarch_handle_no_branch_protection): Update. (aarch_handle_standard_branch_protection): Update. (aarch_handle_pac_ret_protection): Update. (aarch_handle_pac_ret_leaf): Update. (aarch_handle_pac_ret_b_key): Update. (aarch_handle_bti_protection): Update. (aarch_parse_branch_protection): Remove. (next_tok): New. (aarch_validate_mbranch_protection): Rewrite. * config/arm/aarch-common.h (struct aarch_branch_protect_type): Add field "alone". * config/arm/arm.cc (arm_configure_build_target): Update. gcc/testsuite/ChangeLog: * gcc.target/aarch64/branch-protection-attr.c: Update. * gcc.target/aarch64/branch-protection-option.c: Update. --- gcc/config/aarch64/aarch64.cc | 37 +-- gcc/config/arm/aarch-common-protos.h | 5 +- gcc/config/arm/aarch-common.cc | 217 ++++++++---------- gcc/config/arm/aarch-common.h | 14 +- gcc/config/arm/arm.cc | 3 +- .../aarch64/branch-protection-attr.c | 6 +- .../aarch64/branch-protection-option.c | 2 +- 7 files changed, 116 insertions(+), 168 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 164ca4babbe3..51673e9a847b 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -18563,7 +18563,8 @@ aarch64_override_options (void) aarch64_validate_sls_mitigation (aarch64_harden_sls_string); if (aarch64_branch_protection_string) - aarch_validate_mbranch_protection (aarch64_branch_protection_string); + aarch_validate_mbranch_protection (aarch64_branch_protection_string, + "-mbranch-protection="); /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU. If either of -march or -mtune is given, they override their @@ -18997,34 +18998,12 @@ aarch64_handle_attr_cpu (const char *str) /* Handle the argument STR to the branch-protection= attribute. */ - static bool - aarch64_handle_attr_branch_protection (const char* str) - { - char *err_str = (char *) xmalloc (strlen (str) + 1); - enum aarch_parse_opt_result res = aarch_parse_branch_protection (str, - &err_str); - bool success = false; - switch (res) - { - case AARCH_PARSE_MISSING_ARG: - error ("missing argument to % pragma or" - " attribute"); - break; - case AARCH_PARSE_INVALID_ARG: - error ("invalid protection type %qs in % pragma or attribute", err_str); - break; - case AARCH_PARSE_OK: - success = true; - /* Fall through. */ - case AARCH_PARSE_INVALID_FEATURE: - break; - default: - gcc_unreachable (); - } - free (err_str); - return success; - } +static bool +aarch64_handle_attr_branch_protection (const char* str) +{ + return aarch_validate_mbranch_protection (str, + "target(\"branch-protection=\")"); +} /* Handle the argument STR to the tune= target attribute. */ diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 6e44d29b4333..61cdca2b002a 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -159,10 +159,7 @@ rtx_insn *arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, vec &clobbers, HARD_REG_SET &clobbered_regs, location_t loc); -/* Parsing routine for branch-protection common to AArch64 and Arm. */ -enum aarch_parse_opt_result aarch_parse_branch_protection (const char*, char**); - /* Validation routine for branch-protection common to AArch64 and Arm. */ -bool aarch_validate_mbranch_protection (const char *); +bool aarch_validate_mbranch_protection (const char *, const char *); #endif /* GCC_AARCH_COMMON_PROTOS_H */ diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc index 3dca9d93b0ea..ab4e680f4cc3 100644 --- a/gcc/config/arm/aarch-common.cc +++ b/gcc/config/arm/aarch-common.cc @@ -660,169 +660,146 @@ arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, return saw_asm_flag ? seq : NULL; } -static enum aarch_parse_opt_result -aarch_handle_no_branch_protection (char* str, char* rest) +static void +aarch_handle_no_branch_protection (void) { aarch_ra_sign_scope = AARCH_FUNCTION_NONE; aarch_enable_bti = 0; - if (rest) - { - error ("unexpected %<%s%> after %<%s%>", rest, str); - return AARCH_PARSE_INVALID_FEATURE; - } - return AARCH_PARSE_OK; } -static enum aarch_parse_opt_result -aarch_handle_standard_branch_protection (char* str, char* rest) +static void +aarch_handle_standard_branch_protection (void) { aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF; aarch_ra_sign_key = AARCH_KEY_A; aarch_enable_bti = 1; - if (rest) - { - error ("unexpected %<%s%> after %<%s%>", rest, str); - return AARCH_PARSE_INVALID_FEATURE; - } - return AARCH_PARSE_OK; } -static enum aarch_parse_opt_result -aarch_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) +static void +aarch_handle_pac_ret_protection (void) { aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF; aarch_ra_sign_key = AARCH_KEY_A; - return AARCH_PARSE_OK; } -static enum aarch_parse_opt_result -aarch_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) +static void +aarch_handle_pac_ret_leaf (void) { aarch_ra_sign_scope = AARCH_FUNCTION_ALL; - return AARCH_PARSE_OK; } -static enum aarch_parse_opt_result -aarch_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) +static void +aarch_handle_pac_ret_b_key (void) { aarch_ra_sign_key = AARCH_KEY_B; - return AARCH_PARSE_OK; } -static enum aarch_parse_opt_result -aarch_handle_bti_protection (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) +static void +aarch_handle_bti_protection (void) { aarch_enable_bti = 1; - return AARCH_PARSE_OK; } static const struct aarch_branch_protect_type aarch_pac_ret_subtypes[] = { - { "leaf", aarch_handle_pac_ret_leaf, NULL, 0 }, - { "b-key", aarch_handle_pac_ret_b_key, NULL, 0 }, - { NULL, NULL, NULL, 0 } + { "leaf", false, aarch_handle_pac_ret_leaf, NULL, 0 }, + { "b-key", false, aarch_handle_pac_ret_b_key, NULL, 0 }, + { NULL, false, NULL, NULL, 0 } }; static const struct aarch_branch_protect_type aarch_branch_protect_types[] = { - { "none", aarch_handle_no_branch_protection, NULL, 0 }, - { "standard", aarch_handle_standard_branch_protection, NULL, 0 }, - { "pac-ret", aarch_handle_pac_ret_protection, aarch_pac_ret_subtypes, + { "none", true, aarch_handle_no_branch_protection, NULL, 0 }, + { "standard", true, aarch_handle_standard_branch_protection, NULL, 0 }, + { "pac-ret", false, aarch_handle_pac_ret_protection, aarch_pac_ret_subtypes, ARRAY_SIZE (aarch_pac_ret_subtypes) }, - { "bti", aarch_handle_bti_protection, NULL, 0 }, - { NULL, NULL, NULL, 0 } + { "bti", false, aarch_handle_bti_protection, NULL, 0 }, + { NULL, false, NULL, NULL, 0 } }; -/* Parses CONST_STR for branch protection features specified in - aarch64_branch_protect_types, and set any global variables required. Returns - the parsing result and assigns LAST_STR to the last processed token from - CONST_STR so that it can be used for error reporting. */ +/* In-place split *str at delim, return *str and set *str to the tail + of the string or NULL if the end is reached. */ -enum aarch_parse_opt_result -aarch_parse_branch_protection (const char *const_str, char** last_str) +static char * +next_tok (char **str, int delim) { - char *str_root = xstrdup (const_str); - char* token_save = NULL; - char *str = strtok_r (str_root, "+", &token_save); - enum aarch_parse_opt_result res = AARCH_PARSE_OK; - if (!str) - res = AARCH_PARSE_MISSING_ARG; - else + char *tok = *str; + for (char *p = tok; p && *p != '\0'; p++) { - char *next_str = strtok_r (NULL, "+", &token_save); - /* Reset the branch protection features to their defaults. */ - aarch_handle_no_branch_protection (NULL, NULL); - - while (str && res == AARCH_PARSE_OK) + if (*p == delim) { - const aarch_branch_protect_type* type = aarch_branch_protect_types; - bool found = false; - /* Search for this type. */ - while (type && type->name && !found && res == AARCH_PARSE_OK) - { - if (strcmp (str, type->name) == 0) - { - found = true; - res = type->handler (str, next_str); - str = next_str; - next_str = strtok_r (NULL, "+", &token_save); - } - else - type++; - } - if (found && res == AARCH_PARSE_OK) - { - bool found_subtype = true; - /* Loop through each token until we find one that isn't a - subtype. */ - while (found_subtype) - { - found_subtype = false; - const aarch_branch_protect_type *subtype = type->subtypes; - /* Search for the subtype. */ - while (str && subtype && subtype->name && !found_subtype - && res == AARCH_PARSE_OK) - { - if (strcmp (str, subtype->name) == 0) - { - found_subtype = true; - res = subtype->handler (str, next_str); - str = next_str; - next_str = strtok_r (NULL, "+", &token_save); - } - else - subtype++; - } - } - } - else if (!found) - res = AARCH_PARSE_INVALID_ARG; + *p = '\0'; + *str = p + 1; + return tok; } } - /* Copy the last processed token into the argument to pass it back. - Used by option and attribute validation to print the offending token. */ - if (last_str) - { - if (str) - strcpy (*last_str, str); - else - *last_str = NULL; - } - return res; + *str = NULL; + return tok; } +/* Parses CONST_STR for branch protection features specified in + aarch64_branch_protect_types, and set any global variables required. + Returns true on success. */ + bool -aarch_validate_mbranch_protection (const char *const_str) +aarch_validate_mbranch_protection (const char *const_str, const char *opt) { - char *str = (char *) xmalloc (strlen (const_str)); - enum aarch_parse_opt_result res = - aarch_parse_branch_protection (const_str, &str); - if (res == AARCH_PARSE_INVALID_ARG) - error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str); - else if (res == AARCH_PARSE_MISSING_ARG) - error ("missing argument for %<-mbranch-protection=%>"); - free (str); - return res == AARCH_PARSE_OK; + char *str_root = xstrdup (const_str); + char *next_str = str_root; + char *str = next_tok (&next_str, '+'); + char *alone_str = NULL; + bool reject_alone = false; + bool res = true; + + /* First entry is "none" and it is used to reset the state. */ + aarch_branch_protect_types[0].handler (); + + while (str) + { + const aarch_branch_protect_type *type = aarch_branch_protect_types; + for (; type->name; type++) + if (strcmp (str, type->name) == 0) + break; + if (type->name == NULL) + { + res = false; + if (strcmp (str, "") == 0) + error ("missing feature or flag for %<%s%>", opt); + else + error ("invalid argument %<%s%> for %<%s%>", str, opt); + break; + } + + if (type->alone && alone_str == NULL) + alone_str = str; + else + reject_alone = true; + if (reject_alone && alone_str != NULL) + { + res = false; + error ("argument %<%s%> can only appear alone in %<%s%>", + alone_str, opt); + break; + } + + type->handler (); + str = next_tok (&next_str, '+'); + if (type->subtypes == NULL) + continue; + + /* Loop through tokens until we find one that isn't a subtype. */ + while (str) + { + const aarch_branch_protect_type *subtype = type->subtypes; + for (; subtype->name; subtype++) + if (strcmp (str, subtype->name) == 0) + break; + if (subtype->name == NULL) + break; + + subtype->handler (); + str = next_tok (&next_str, '+'); + } + } + + free (str_root); + return res; } diff --git a/gcc/config/arm/aarch-common.h b/gcc/config/arm/aarch-common.h index c6a67f0d05cc..f72e21127fc8 100644 --- a/gcc/config/arm/aarch-common.h +++ b/gcc/config/arm/aarch-common.h @@ -55,16 +55,10 @@ struct aarch_branch_protect_type /* The type's name that the user passes to the branch-protection option string. */ const char* name; - /* Function to handle the protection type and set global variables. - First argument is the string token corresponding with this type and the - second argument is the next token in the option string. - Return values: - * AARCH_PARSE_OK: Handling was sucessful. - * AARCH_INVALID_ARG: The type is invalid in this context and the caller - should print an error. - * AARCH_INVALID_FEATURE: The type is invalid and the handler prints its - own error. */ - enum aarch_parse_opt_result (*handler)(char*, char*); + /* The type can only appear alone, other types should be rejected. */ + int alone; + /* Function to handle the protection type and set global variables. */ + void (*handler)(void); /* A list of types that can follow this type in the option string. */ const struct aarch_branch_protect_type* subtypes; unsigned int num_subtypes; diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 4292d3d524e5..0c0cb14a8a4f 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -3306,7 +3306,8 @@ arm_configure_build_target (struct arm_build_target *target, if (opts->x_arm_branch_protection_string) { - aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string); + aarch_validate_mbranch_protection (opts->x_arm_branch_protection_string, + "-mbranch-protection="); if (aarch_ra_sign_key != AARCH_KEY_A) { diff --git a/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c b/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c index 272000c27474..cc6820a7b14d 100644 --- a/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c +++ b/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c @@ -4,19 +4,19 @@ void __attribute__ ((target("branch-protection=leaf"))) foo1 () { } -/* { dg-error {invalid protection type 'leaf' in 'target\("branch-protection="\)' pragma or attribute} "" { target *-*-* } 5 } */ +/* { dg-error {invalid argument 'leaf' for 'target\("branch-protection="\)'} "" { target *-*-* } 5 } */ /* { dg-error {pragma or attribute 'target\("branch-protection=leaf"\)' is not valid} "" { target *-*-* } 5 } */ void __attribute__ ((target("branch-protection=none+pac-ret"))) foo2 () { } -/* { dg-error "unexpected 'pac-ret' after 'none'" "" { target *-*-* } 12 } */ +/* { dg-error {argument 'none' can only appear alone in 'target\("branch-protection="\)'} "" { target *-*-* } 12 } */ /* { dg-error {pragma or attribute 'target\("branch-protection=none\+pac-ret"\)' is not valid} "" { target *-*-* } 12 } */ void __attribute__ ((target("branch-protection="))) foo3 () { } -/* { dg-error {missing argument to 'target\("branch-protection="\)' pragma or attribute} "" { target *-*-* } 19 } */ +/* { dg-error {missing feature or flag for 'target\("branch-protection="\)'} "" { target *-*-* } 19 } */ /* { dg-error {pragma or attribute 'target\("branch-protection="\)' is not valid} "" { target *-*-* } 19 } */ diff --git a/gcc/testsuite/gcc.target/aarch64/branch-protection-option.c b/gcc/testsuite/gcc.target/aarch64/branch-protection-option.c index 1b3bf4ee2b88..e2f847a31c44 100644 --- a/gcc/testsuite/gcc.target/aarch64/branch-protection-option.c +++ b/gcc/testsuite/gcc.target/aarch64/branch-protection-option.c @@ -1,4 +1,4 @@ /* { dg-do "compile" } */ /* { dg-options "-mbranch-protection=leaf -mbranch-protection=none+pac-ret" } */ -/* { dg-error "unexpected 'pac-ret' after 'none'" "" { target *-*-* } 0 } */ +/* { dg-error "argument 'none' can only appear alone in '-mbranch-protection='" "" { target *-*-* } 0 } */ From de072b52292dc5218845904eee709c35a1418c3b Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Mon, 11 Dec 2023 11:35:31 -0500 Subject: [PATCH 235/311] tree: add to clobber_kind In discussion of PR71093 it came up that more clobber_kind options would be useful within the C++ front-end. gcc/ChangeLog: * tree-core.h (enum clobber_kind): Rename CLOBBER_EOL to CLOBBER_STORAGE_END. Add CLOBBER_STORAGE_BEGIN, CLOBBER_OBJECT_BEGIN, CLOBBER_OBJECT_END. * gimple-lower-bitint.cc * gimple-ssa-warn-access.cc * gimplify.cc * tree-inline.cc * tree-ssa-ccp.cc: Adjust for rename. * tree-pretty-print.cc: And handle new values. gcc/cp/ChangeLog: * call.cc (build_trivial_dtor_call): Use CLOBBER_OBJECT_END. * decl.cc (build_clobber_this): Take clobber_kind argument. (start_preparsed_function): Pass CLOBBER_OBJECT_BEGIN. (begin_destructor_body): Pass CLOBBER_OBJECT_END. gcc/testsuite/ChangeLog: * gcc.dg/pr87052.c: Adjust expected CLOBBER output. Co-authored-by: Nathaniel Shead --- gcc/cp/call.cc | 2 +- gcc/cp/decl.cc | 9 +++++---- gcc/gimple-lower-bitint.cc | 10 ++++++---- gcc/gimple-ssa-warn-access.cc | 2 +- gcc/gimplify.cc | 9 +++++---- gcc/testsuite/gcc.dg/pr87052.c | 4 ++-- gcc/tree-core.h | 13 ++++++++++--- gcc/tree-inline.cc | 6 ++++-- gcc/tree-pretty-print.cc | 19 +++++++++++++++++-- gcc/tree-ssa-ccp.cc | 2 +- 10 files changed, 52 insertions(+), 24 deletions(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 4f0abf8e93fa..aaee34f35b04 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9716,7 +9716,7 @@ build_trivial_dtor_call (tree instance, bool no_ptr_deref) } /* A trivial destructor should still clobber the object. */ - tree clobber = build_clobber (TREE_TYPE (instance)); + tree clobber = build_clobber (TREE_TYPE (instance), CLOBBER_OBJECT_END); return build2 (MODIFY_EXPR, void_type_node, instance, clobber); } diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index b1ada1d52152..4d17ead123ac 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -17401,7 +17401,7 @@ implicit_default_ctor_p (tree fn) storage is dead when we enter the constructor or leave the destructor. */ static tree -build_clobber_this () +build_clobber_this (clobber_kind kind) { /* Clobbering an empty base is pointless, and harmful if its one byte TYPE_SIZE overlays real data. */ @@ -17417,7 +17417,7 @@ build_clobber_this () if (!vbases) ctype = CLASSTYPE_AS_BASE (ctype); - tree clobber = build_clobber (ctype); + tree clobber = build_clobber (ctype, kind); tree thisref = current_class_ref; if (ctype != current_class_type) @@ -17836,7 +17836,7 @@ start_preparsed_function (tree decl1, tree attrs, int flags) because part of the initialization might happen before we enter the constructor, via AGGR_INIT_ZERO_FIRST (c++/68006). */ && !implicit_default_ctor_p (decl1)) - finish_expr_stmt (build_clobber_this ()); + finish_expr_stmt (build_clobber_this (CLOBBER_OBJECT_BEGIN)); if (!processing_template_decl && DECL_CONSTRUCTOR_P (decl1) @@ -18074,7 +18074,8 @@ begin_destructor_body (void) finish_decl_cleanup (NULL_TREE, stmt); } else - finish_decl_cleanup (NULL_TREE, build_clobber_this ()); + finish_decl_cleanup (NULL_TREE, + build_clobber_this (CLOBBER_OBJECT_END)); } /* And insert cleanups for our bases and members so that they diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index c55c32fb40d6..65a7bbe3fa9a 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -806,7 +806,8 @@ bitint_large_huge::handle_operand (tree op, tree idx) && m_after_stmt && bitmap_bit_p (m_single_use_names, SSA_NAME_VERSION (op))) { - tree clobber = build_clobber (TREE_TYPE (m_vars[p]), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (m_vars[p]), + CLOBBER_STORAGE_END); g = gimple_build_assign (m_vars[p], clobber); gimple_stmt_iterator gsi = gsi_for_stmt (m_after_stmt); gsi_insert_after (&gsi, g, GSI_SAME_STMT); @@ -2063,7 +2064,7 @@ bitint_large_huge::handle_operand_addr (tree op, gimple *stmt, tree ret = build_fold_addr_expr (var); if (!stmt_ends_bb_p (gsi_stmt (m_gsi))) { - tree clobber = build_clobber (atype, CLOBBER_EOL); + tree clobber = build_clobber (atype, CLOBBER_STORAGE_END); g = gimple_build_assign (var, clobber); gsi_insert_after (&m_gsi, g, GSI_SAME_STMT); } @@ -2100,7 +2101,8 @@ bitint_large_huge::handle_operand_addr (tree op, gimple *stmt, ret = build_fold_addr_expr (var); if (!stmt_ends_bb_p (gsi_stmt (m_gsi))) { - tree clobber = build_clobber (m_limb_type, CLOBBER_EOL); + tree clobber = build_clobber (m_limb_type, + CLOBBER_STORAGE_END); g = gimple_build_assign (var, clobber); gsi_insert_after (&m_gsi, g, GSI_SAME_STMT); } @@ -3707,7 +3709,7 @@ bitint_large_huge::finish_arith_overflow (tree var, tree obj, tree type, } if (var) { - tree clobber = build_clobber (TREE_TYPE (var), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (var), CLOBBER_STORAGE_END); g = gimple_build_assign (var, clobber); gsi_insert_after (&m_gsi, g, GSI_SAME_STMT); } diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc index 1646bd1be14c..f04c25308693 100644 --- a/gcc/gimple-ssa-warn-access.cc +++ b/gcc/gimple-ssa-warn-access.cc @@ -4364,7 +4364,7 @@ void pass_waccess::check_stmt (gimple *stmt) { if (m_check_dangling_p - && gimple_clobber_p (stmt, CLOBBER_EOL)) + && gimple_clobber_p (stmt, CLOBBER_STORAGE_END)) { /* Ignore clobber statements in blocks with exceptional edges. */ basic_block bb = gimple_bb (stmt); diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 342e43a7f255..afeaea873c08 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -1518,7 +1518,7 @@ gimplify_bind_expr (tree *expr_p, gimple_seq *pre_p) tmp = build_call_expr_loc (EXPR_LOCATION (*e), tmp, 2, v, build_zero_cst (ptr_type_node)); tsi_link_after (&e, tmp, TSI_SAME_STMT); - tmp = build_clobber (TREE_TYPE (v), CLOBBER_EOL); + tmp = build_clobber (TREE_TYPE (v), CLOBBER_STORAGE_END); tmp = fold_build2_loc (loc, MODIFY_EXPR, TREE_TYPE (v), v, fold_convert (TREE_TYPE (v), tmp)); ++e; @@ -1651,7 +1651,7 @@ gimplify_bind_expr (tree *expr_p, gimple_seq *pre_p) build_zero_cst (ptr_type_node)); gimplify_and_add (tmp, &cleanup); gimple *clobber_stmt; - tmp = build_clobber (TREE_TYPE (v), CLOBBER_EOL); + tmp = build_clobber (TREE_TYPE (v), CLOBBER_STORAGE_END); clobber_stmt = gimple_build_assign (v, tmp); gimple_set_location (clobber_stmt, end_locus); gimplify_seq_add_stmt (&cleanup, clobber_stmt); @@ -1665,7 +1665,7 @@ gimplify_bind_expr (tree *expr_p, gimple_seq *pre_p) && !is_gimple_reg (t) && flag_stack_reuse != SR_NONE) { - tree clobber = build_clobber (TREE_TYPE (t), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (t), CLOBBER_STORAGE_END); gimple *clobber_stmt; clobber_stmt = gimple_build_assign (t, clobber); gimple_set_location (clobber_stmt, end_locus); @@ -7417,7 +7417,8 @@ gimplify_target_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p) { if (flag_stack_reuse == SR_ALL) { - tree clobber = build_clobber (TREE_TYPE (temp), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (temp), + CLOBBER_STORAGE_END); clobber = build2 (MODIFY_EXPR, TREE_TYPE (temp), temp, clobber); gimple_push_cleanup (temp, clobber, false, pre_p, true); } diff --git a/gcc/testsuite/gcc.dg/pr87052.c b/gcc/testsuite/gcc.dg/pr87052.c index 796fe6440c17..90f3c3b14a80 100644 --- a/gcc/testsuite/gcc.dg/pr87052.c +++ b/gcc/testsuite/gcc.dg/pr87052.c @@ -36,6 +36,6 @@ void test (void) { dg-final { scan-tree-dump-times "b = \"a\\\\x00bc\";" 1 "gimple" } } { dg-final { scan-tree-dump-times "c = \"\";" 1 "gimple" } } { dg-final { scan-tree-dump-times "d = " 1 "gimple" } } - { dg-final { scan-tree-dump-times "d = {CLOBBER\\(eol\\)}" 1 "gimple" } } + { dg-final { scan-tree-dump-times "d = {CLOBBER\\(eos\\)}" 1 "gimple" } } { dg-final { scan-tree-dump-times "e = " 1 "gimple" } } - { dg-final { scan-tree-dump-times "e = {CLOBBER\\(eol\\)}" 1 "gimple" } } */ + { dg-final { scan-tree-dump-times "e = {CLOBBER\\(eos\\)}" 1 "gimple" } } */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 04c04cf2f375..58aa598f3bba 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -986,12 +986,19 @@ enum annot_expr_kind { annot_expr_kind_last }; -/* The kind of a TREE_CLOBBER_P CONSTRUCTOR node. */ +/* The kind of a TREE_CLOBBER_P CONSTRUCTOR node. Other than _UNDEF, these are + in roughly sequential order. */ enum clobber_kind { /* Unspecified, this clobber acts as a store of an undefined value. */ CLOBBER_UNDEF, - /* This clobber ends the lifetime of the storage. */ - CLOBBER_EOL, + /* Beginning of storage duration, e.g. malloc. */ + CLOBBER_STORAGE_BEGIN, + /* Beginning of object lifetime, e.g. C++ constructor. */ + CLOBBER_OBJECT_BEGIN, + /* End of object lifetime, e.g. C++ destructor. */ + CLOBBER_OBJECT_END, + /* End of storage duration, e.g. free. */ + CLOBBER_STORAGE_END, CLOBBER_LAST }; diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc index a4fc839a22d5..137b83b7c83a 100644 --- a/gcc/tree-inline.cc +++ b/gcc/tree-inline.cc @@ -5136,7 +5136,8 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, && !is_gimple_reg (*varp) && !(id->debug_map && id->debug_map->get (p))) { - tree clobber = build_clobber (TREE_TYPE (*varp), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (*varp), + CLOBBER_STORAGE_END); gimple *clobber_stmt; clobber_stmt = gimple_build_assign (*varp, clobber); gimple_set_location (clobber_stmt, gimple_location (stmt)); @@ -5208,7 +5209,8 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, && !is_gimple_reg (id->retvar) && !stmt_ends_bb_p (stmt)) { - tree clobber = build_clobber (TREE_TYPE (id->retvar), CLOBBER_EOL); + tree clobber = build_clobber (TREE_TYPE (id->retvar), + CLOBBER_STORAGE_END); gimple *clobber_stmt; clobber_stmt = gimple_build_assign (id->retvar, clobber); gimple_set_location (clobber_stmt, gimple_location (old_stmt)); diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc index 0dabb6d1580d..cab99f9dfb6d 100644 --- a/gcc/tree-pretty-print.cc +++ b/gcc/tree-pretty-print.cc @@ -2624,8 +2624,23 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, if (TREE_CLOBBER_P (node)) { pp_string (pp, "CLOBBER"); - if (CLOBBER_KIND (node) == CLOBBER_EOL) - pp_string (pp, "(eol)"); + switch (CLOBBER_KIND (node)) + { + case CLOBBER_STORAGE_BEGIN: + pp_string (pp, "(bos)"); + break; + case CLOBBER_STORAGE_END: + pp_string (pp, "(eos)"); + break; + case CLOBBER_OBJECT_BEGIN: + pp_string (pp, "(bob)"); + break; + case CLOBBER_OBJECT_END: + pp_string (pp, "(eob)"); + break; + default: + break; + } } else if (TREE_CODE (TREE_TYPE (node)) == RECORD_TYPE || TREE_CODE (TREE_TYPE (node)) == UNION_TYPE) diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index ddcbaaaa417d..fab2a9b248a6 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -2525,7 +2525,7 @@ insert_clobber_before_stack_restore (tree saved_val, tree var, FOR_EACH_IMM_USE_STMT (stmt, iter, saved_val) if (gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE)) { - clobber = build_clobber (TREE_TYPE (var), CLOBBER_EOL); + clobber = build_clobber (TREE_TYPE (var), CLOBBER_STORAGE_END); clobber_stmt = gimple_build_assign (var, clobber); i = gsi_for_stmt (stmt); From 6cc3231b9e72c55c93827232420031a050e4dbe2 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 10:20:40 -0500 Subject: [PATCH 236/311] testsuite: fix is_nothrow_default_constructible8.C This testcase uses variable templates, a C++14 feature. gcc/testsuite/ChangeLog: * g++.dg/ext/is_nothrow_constructible8.C: Require C++14. --- gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C index c2a0b93ae971..996f6d895ffb 100644 --- a/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C +++ b/gcc/testsuite/g++.dg/ext/is_nothrow_constructible8.C @@ -1,4 +1,4 @@ -// { dg-do compile { target c++11 } } +// { dg-do compile { target c++14 } } // PR c++/96090 template From f4d8ab192d8c9f540cf19a0906d68a0f52fdef60 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Wed, 13 Dec 2023 02:39:35 +0800 Subject: [PATCH 237/311] LoongArch: testsuite: Remove XFAIL in vect-ftint-no-inexact.c After r14-6455 this no longer fails. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove. --- gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c index 83d268099ac9..61918beef5c6 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c @@ -39,6 +39,5 @@ /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */ /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */ -/* trunc: XFAIL due to PR 107723 */ -/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */ +/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */ /* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */ From 01cca857aa3e86a750f5df77ca6c36c0739f10f0 Mon Sep 17 00:00:00 2001 From: Gaius Mulley Date: Tue, 12 Dec 2023 19:29:06 +0000 Subject: [PATCH 238/311] PR modula2/112984 Compiling program with -Wpedantic shows warning in libraries This patch tidies up the library modules so that -Wpedantic does not generate any warnings (apart from two procedures with legitimate infinite loops). gcc/m2/ChangeLog: PR modula2/112984 * gm2-libs-coroutines/SYSTEM.mod: Remove redundant import of memcpy. * gm2-libs-iso/ClientSocket.mod: Remove redundant import of IOConsts. * gm2-libs-iso/IOChan.mod: Remove redundant import of IOConsts. * gm2-libs-iso/IOLink.mod: Remove redundant import of IOChan and SYSTEM. * gm2-libs-iso/IOResult.mod: Remove redundant import of IOChan. * gm2-libs-iso/LongIO.mod: Remove redundant import of writeString. * gm2-libs-iso/LongWholeIO.mod: Remove redundant import of IOChan. * gm2-libs-iso/M2RTS.mod: Remove redundant import of ADDRESS. * gm2-libs-iso/MemStream.mod: Remove redundant import of ADDRESS. * gm2-libs-iso/RTdata.mod: Remove redundant import of DeviceTablePtr. * gm2-libs-iso/RTfio.mod: Remove redundant import of DeviceTablePtr. * gm2-libs-iso/RTgen.mod: Remove redundant import of DeviceTablePtr. * gm2-libs-iso/RealIO.mod: Remove redundant import of writeString. * gm2-libs-iso/RndFile.mod: Remove redundant import of SYSTEM. * gm2-libs-iso/SYSTEM.mod: Remove redundant import of memcpy. * gm2-libs-iso/ShortWholeIO.mod: Remove redundant import of IOConsts. * gm2-libs-iso/TextIO.mod: Remove redundant import of IOChan. * gm2-libs-iso/TextUtil.mod: Remove redundant import of IOChan. * gm2-libs-iso/WholeIO.mod: Remove redundant import of IOChan. * gm2-libs-log/BitByteOps.mod: Remove redundant import of BYTE. * gm2-libs-log/FileSystem.mod: Remove redundant import of BYTE and ADDRESS. * gm2-libs-log/InOut.mod: Remove redundant import of String. * gm2-libs-log/RealConversions.mod: Remove redundant import of StringToLongreal. * gm2-libs/FIO.mod: Remove redundant import of SIZE. * gm2-libs/FormatStrings.mod: Remove redundant import of String and ConCatChar. * gm2-libs/IO.mod: Remove redundant import of SIZE. * gm2-libs/Indexing.mod: Remove redundant import of ADDRESS. * gm2-libs/M2Dependent.mod: Remove redundant import of SIZE. * gm2-libs/M2RTS.mod: Remove redundant import of ADDRESS. * gm2-libs/OptLib.mod: Remove redundant import of DynamicStrings. * gm2-libs/SYSTEM.mod: Remove redundant import of memcpy. * gm2-libs/StringConvert.mod: Remove redundant import of String. libgm2/ChangeLog: * libm2iso/Makefile.am (libm2iso_la_M2FLAGS): Added line breaks. * libm2iso/Makefile.in: Regenerate. * libm2log/Makefile.am (libm2log_la_M2FLAGS): Added line breaks. * libm2log/Makefile.in: Regenerate. * libm2pim/Makefile.am (libm2pim_la_M2FLAGS): Added line breaks. * libm2pim/Makefile.in: Regenerate. gcc/testsuite/ChangeLog: PR modula2/112984 * gm2/switches/pedantic/pass/hello.mod: New test. * gm2/switches/pedantic/pass/switches-pedantic-pass.exp: New test. Signed-off-by: Gaius Mulley --- gcc/m2/gm2-libs-coroutines/SYSTEM.mod | 4 +- gcc/m2/gm2-libs-iso/ClientSocket.mod | 2 +- gcc/m2/gm2-libs-iso/IOChan.mod | 2 +- gcc/m2/gm2-libs-iso/IOLink.mod | 2 +- gcc/m2/gm2-libs-iso/IOResult.mod | 1 - gcc/m2/gm2-libs-iso/LongIO.mod | 4 +- gcc/m2/gm2-libs-iso/LongWholeIO.mod | 2 - gcc/m2/gm2-libs-iso/M2RTS.mod | 2 +- gcc/m2/gm2-libs-iso/MemStream.mod | 2 +- gcc/m2/gm2-libs-iso/RTdata.mod | 2 +- gcc/m2/gm2-libs-iso/RTfio.mod | 6 +-- gcc/m2/gm2-libs-iso/RTgen.mod | 7 ++-- gcc/m2/gm2-libs-iso/RealIO.mod | 4 +- gcc/m2/gm2-libs-iso/RndFile.mod | 2 +- gcc/m2/gm2-libs-iso/SYSTEM.mod | 2 +- gcc/m2/gm2-libs-iso/ShortWholeIO.mod | 2 - gcc/m2/gm2-libs-iso/TextIO.mod | 2 +- gcc/m2/gm2-libs-iso/TextUtil.mod | 2 +- gcc/m2/gm2-libs-iso/WholeIO.mod | 1 - gcc/m2/gm2-libs-log/BitByteOps.mod | 2 +- gcc/m2/gm2-libs-log/FileSystem.mod | 6 +-- gcc/m2/gm2-libs-log/InOut.mod | 2 +- gcc/m2/gm2-libs-log/RealConversions.mod | 2 +- gcc/m2/gm2-libs/FIO.mod | 2 +- gcc/m2/gm2-libs/FormatStrings.mod | 4 +- gcc/m2/gm2-libs/IO.mod | 2 +- gcc/m2/gm2-libs/Indexing.mod | 2 +- gcc/m2/gm2-libs/M2Dependent.mod | 2 +- gcc/m2/gm2-libs/M2RTS.mod | 2 +- gcc/m2/gm2-libs/OptLib.mod | 1 - gcc/m2/gm2-libs/SYSTEM.mod | 2 +- gcc/m2/gm2-libs/StringConvert.mod | 2 +- .../gm2/switches/pedantic/pass/hello.mod | 8 ++++ .../pedantic/pass/switches-pedantic-pass.exp | 38 +++++++++++++++++++ libgm2/libm2iso/Makefile.am | 3 +- libgm2/libm2iso/Makefile.in | 3 +- libgm2/libm2log/Makefile.am | 3 +- libgm2/libm2log/Makefile.in | 3 +- libgm2/libm2pim/Makefile.am | 3 +- libgm2/libm2pim/Makefile.in | 3 +- 40 files changed, 93 insertions(+), 53 deletions(-) create mode 100644 gcc/testsuite/gm2/switches/pedantic/pass/hello.mod create mode 100644 gcc/testsuite/gm2/switches/pedantic/pass/switches-pedantic-pass.exp diff --git a/gcc/m2/gm2-libs-coroutines/SYSTEM.mod b/gcc/m2/gm2-libs-coroutines/SYSTEM.mod index 3652dec78f16..f8ec6d725a18 100644 --- a/gcc/m2/gm2-libs-coroutines/SYSTEM.mod +++ b/gcc/m2/gm2-libs-coroutines/SYSTEM.mod @@ -35,7 +35,7 @@ IMPORT RTint ; FROM Storage IMPORT ALLOCATE ; FROM M2RTS IMPORT Halt ; -FROM libc IMPORT printf, memcpy, memcpy, memset ; +FROM libc IMPORT printf, memcpy, memset ; CONST @@ -51,7 +51,7 @@ TYPE VAR initMain, - initGTh : BOOLEAN ; + initGTh : BOOLEAN ; (* diff --git a/gcc/m2/gm2-libs-iso/ClientSocket.mod b/gcc/m2/gm2-libs-iso/ClientSocket.mod index 15a194da587c..56145c3b5e23 100644 --- a/gcc/m2/gm2-libs-iso/ClientSocket.mod +++ b/gcc/m2/gm2-libs-iso/ClientSocket.mod @@ -28,7 +28,7 @@ IMPLEMENTATION MODULE ClientSocket ; FROM ASCII IMPORT nul, lf, cr ; -FROM ChanConsts IMPORT OpenResults, ChanFlags ; +FROM ChanConsts IMPORT ChanFlags ; FROM RTio IMPORT GetDeviceId ; FROM RTgenif IMPORT GenDevIF, InitGenDevIF ; FROM RTdata IMPORT ModuleId, MakeModuleId, InitData, GetData, KillData ; diff --git a/gcc/m2/gm2-libs-iso/IOChan.mod b/gcc/m2/gm2-libs-iso/IOChan.mod index 1376e067d94e..83040c6fddf0 100644 --- a/gcc/m2/gm2-libs-iso/IOChan.mod +++ b/gcc/m2/gm2-libs-iso/IOChan.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE IOChan ; -IMPORT FIO, EXCEPTIONS, M2EXCEPTION, RTio, IOConsts, +IMPORT FIO, EXCEPTIONS, M2EXCEPTION, RTio, RTentity, errno, ErrnoCategory, IOLink, StdChans, M2RTS ; FROM EXCEPTIONS IMPORT ExceptionSource, RAISE, AllocateSource, diff --git a/gcc/m2/gm2-libs-iso/IOLink.mod b/gcc/m2/gm2-libs-iso/IOLink.mod index 1e10f29e7e04..0ac9a4453259 100644 --- a/gcc/m2/gm2-libs-iso/IOLink.mod +++ b/gcc/m2/gm2-libs-iso/IOLink.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE IOLink ; -IMPORT RTio, RTentity, EXCEPTIONS, IOChan, M2RTS, SYSTEM, ASCII ; +IMPORT RTio, RTentity, EXCEPTIONS, M2RTS, ASCII ; FROM Storage IMPORT ALLOCATE, DEALLOCATE ; diff --git a/gcc/m2/gm2-libs-iso/IOResult.mod b/gcc/m2/gm2-libs-iso/IOResult.mod index 4b46cdcb1ead..d61a3cdbfaa6 100644 --- a/gcc/m2/gm2-libs-iso/IOResult.mod +++ b/gcc/m2/gm2-libs-iso/IOResult.mod @@ -26,7 +26,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE IOResult ; -IMPORT IOChan ; PROCEDURE ReadResult (cid: IOChan.ChanId): ReadResults; (* Returns the result for the last read operation on the channel cid. *) diff --git a/gcc/m2/gm2-libs-iso/LongIO.mod b/gcc/m2/gm2-libs-iso/LongIO.mod index 40a2a601b63d..06feccaaed2c 100644 --- a/gcc/m2/gm2-libs-iso/LongIO.mod +++ b/gcc/m2/gm2-libs-iso/LongIO.mod @@ -17,7 +17,7 @@ IMPLEMENTATION MODULE LongIO; *) FROM TextIO IMPORT WriteChar, ReadChar ; -FROM StringChan IMPORT writeString ; +FROM StringChan IMPORT writeString, writeFieldWidth ; FROM IOChan IMPORT SetReadResult ; FROM IOConsts IMPORT ReadResults ; @@ -25,10 +25,8 @@ FROM ConvStringLong IMPORT RealToFixedString, RealToFloatString, RealToEngString ; FROM ConvTypes IMPORT ScanClass, ScanState ; -FROM TextIO IMPORT WriteChar, ReadChar ; FROM DynamicStrings IMPORT String, char, KillString, Length, InitString, ConCatChar, string ; FROM LongConv IMPORT ScanReal ; -FROM StringChan IMPORT writeString, writeFieldWidth ; FROM ldtoa IMPORT strtold ; FROM TextUtil IMPORT SkipSpaces ; diff --git a/gcc/m2/gm2-libs-iso/LongWholeIO.mod b/gcc/m2/gm2-libs-iso/LongWholeIO.mod index 666e109fabc1..01f410c88a15 100644 --- a/gcc/m2/gm2-libs-iso/LongWholeIO.mod +++ b/gcc/m2/gm2-libs-iso/LongWholeIO.mod @@ -41,8 +41,6 @@ FROM TextUtil IMPORT SkipSpaces ; type IOConsts.ReadResults. *) -IMPORT IOChan; - (* The text form of a signed whole number is ["+" | "-"], decimal digit, {decimal digit} diff --git a/gcc/m2/gm2-libs-iso/M2RTS.mod b/gcc/m2/gm2-libs-iso/M2RTS.mod index 42e5c328fc26..3c813123a639 100644 --- a/gcc/m2/gm2-libs-iso/M2RTS.mod +++ b/gcc/m2/gm2-libs-iso/M2RTS.mod @@ -31,7 +31,7 @@ FROM libc IMPORT abort, exit, write, getenv, printf, strlen ; (* FROM Builtins IMPORT strncmp, strcmp ; not available during bootstrap. *) FROM NumberIO IMPORT CardToStr ; FROM StrLib IMPORT StrCopy, StrLen, StrEqual ; -FROM SYSTEM IMPORT ADDRESS, ADR ; +FROM SYSTEM IMPORT ADR ; FROM ASCII IMPORT nl, nul ; FROM Storage IMPORT ALLOCATE ; diff --git a/gcc/m2/gm2-libs-iso/MemStream.mod b/gcc/m2/gm2-libs-iso/MemStream.mod index 38435de25442..573b8d32ff99 100644 --- a/gcc/m2/gm2-libs-iso/MemStream.mod +++ b/gcc/m2/gm2-libs-iso/MemStream.mod @@ -45,7 +45,7 @@ FROM RTgenif IMPORT GenDevIF, InitGenDevIF ; FROM FIO IMPORT File ; FROM IOConsts IMPORT ReadResults ; FROM ChanConsts IMPORT readFlag, writeFlag ; -FROM SYSTEM IMPORT ADDRESS, ADR ; +FROM SYSTEM IMPORT ADR ; FROM ASCII IMPORT nl, nul ; FROM Storage IMPORT ALLOCATE, DEALLOCATE, REALLOCATE ; FROM libc IMPORT printf ; diff --git a/gcc/m2/gm2-libs-iso/RTdata.mod b/gcc/m2/gm2-libs-iso/RTdata.mod index 1c03f973655e..82ecec87675c 100644 --- a/gcc/m2/gm2-libs-iso/RTdata.mod +++ b/gcc/m2/gm2-libs-iso/RTdata.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE RTdata ; -FROM IOLink IMPORT DeviceTablePtr, RAISEdevException ; +FROM IOLink IMPORT RAISEdevException ; FROM RTentity IMPORT Group, InitGroup, PutKey, IsIn, DelKey ; FROM Storage IMPORT ALLOCATE, DEALLOCATE ; diff --git a/gcc/m2/gm2-libs-iso/RTfio.mod b/gcc/m2/gm2-libs-iso/RTfio.mod index 34ac00cb28fd..945ac3cad336 100644 --- a/gcc/m2/gm2-libs-iso/RTfio.mod +++ b/gcc/m2/gm2-libs-iso/RTfio.mod @@ -26,11 +26,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE RTfio ; -FROM IOLink IMPORT DeviceTablePtr ; FROM RTio IMPORT GetFile ; FROM errno IMPORT geterrno ; -FROM FIO IMPORT File, ReadChar, UnReadChar, WriteChar, ReadNBytes, WriteNBytes, IsActive, +FROM FIO IMPORT File, ReadChar, UnReadChar, WriteChar, ReadNBytes, + WriteNBytes, IsActive, WriteLine, EOF, WasEOLN, IsNoError ; @@ -97,7 +97,7 @@ END dorbytes ; (* - dowbytes - + dowbytes - *) PROCEDURE dowbytes (g: GenDevIF; d: DeviceTablePtr; diff --git a/gcc/m2/gm2-libs-iso/RTgen.mod b/gcc/m2/gm2-libs-iso/RTgen.mod index edf640d0dc55..c4706c58a387 100644 --- a/gcc/m2/gm2-libs-iso/RTgen.mod +++ b/gcc/m2/gm2-libs-iso/RTgen.mod @@ -29,7 +29,7 @@ IMPLEMENTATION MODULE RTgen ; FROM IOChan IMPORT ChanId, InvalidChan, ChanExceptions ; -FROM IOLink IMPORT DeviceTablePtr, DeviceTablePtrValue, +FROM IOLink IMPORT DeviceTablePtrValue, RAISEdevException ; IMPORT ChanConsts ; @@ -37,10 +37,9 @@ IMPORT IOConsts ; IMPORT ErrnoCategory ; IMPORT RTgen ; -FROM RTgenif IMPORT GenDevIF, getDID, +FROM RTgenif IMPORT getDID, doReadChar, doUnReadChar, doGetErrno, - doRBytes, doWBytes, - doWBytes, doWrLn, + doRBytes, doWBytes, doWrLn, isEOF, isError, isEOLN ; FROM ChanConsts IMPORT FlagSet, readFlag, writeFlag, rawFlag, diff --git a/gcc/m2/gm2-libs-iso/RealIO.mod b/gcc/m2/gm2-libs-iso/RealIO.mod index ec2cc5b5fe52..ab73065efadc 100644 --- a/gcc/m2/gm2-libs-iso/RealIO.mod +++ b/gcc/m2/gm2-libs-iso/RealIO.mod @@ -17,7 +17,7 @@ IMPLEMENTATION MODULE RealIO; *) FROM TextIO IMPORT WriteChar, ReadChar ; -FROM StringChan IMPORT writeString ; +FROM StringChan IMPORT writeString, writeFieldWidth ; FROM IOChan IMPORT SetReadResult ; FROM IOConsts IMPORT ReadResults ; @@ -25,10 +25,8 @@ FROM ConvStringReal IMPORT RealToFixedString, RealToFloatString, RealToEngString ; FROM ConvTypes IMPORT ScanClass, ScanState ; -FROM TextIO IMPORT WriteChar, ReadChar ; FROM DynamicStrings IMPORT String, char, KillString, Length, InitString, ConCatChar, string ; FROM RealConv IMPORT ScanReal ; -FROM StringChan IMPORT writeString, writeFieldWidth ; FROM dtoa IMPORT strtod ; FROM TextUtil IMPORT SkipSpaces ; diff --git a/gcc/m2/gm2-libs-iso/RndFile.mod b/gcc/m2/gm2-libs-iso/RndFile.mod index d4b072452bda..46cf6389ebf9 100644 --- a/gcc/m2/gm2-libs-iso/RndFile.mod +++ b/gcc/m2/gm2-libs-iso/RndFile.mod @@ -51,7 +51,7 @@ FROM EXCEPTIONS IMPORT ExceptionNumber, RAISE, AllocateSource, ExceptionSource, IsCurrentSource, IsExceptionalExecution ; -IMPORT FIO, SYSTEM, RTio, errno, ErrnoCategory ; +IMPORT FIO, RTio, errno, ErrnoCategory ; VAR diff --git a/gcc/m2/gm2-libs-iso/SYSTEM.mod b/gcc/m2/gm2-libs-iso/SYSTEM.mod index 2de3437a855a..b4a943f3776e 100644 --- a/gcc/m2/gm2-libs-iso/SYSTEM.mod +++ b/gcc/m2/gm2-libs-iso/SYSTEM.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE SYSTEM ; -FROM libc IMPORT memcpy, memcpy, memset ; +FROM libc IMPORT memcpy, memset ; CONST BitsPerBitset = MAX(BITSET)+1 ; diff --git a/gcc/m2/gm2-libs-iso/ShortWholeIO.mod b/gcc/m2/gm2-libs-iso/ShortWholeIO.mod index 0c7286c3162d..fcd806568bc4 100644 --- a/gcc/m2/gm2-libs-iso/ShortWholeIO.mod +++ b/gcc/m2/gm2-libs-iso/ShortWholeIO.mod @@ -41,8 +41,6 @@ FROM TextUtil IMPORT SkipSpaces ; type IOConsts.ReadResults. *) -IMPORT IOChan; - (* The text form of a signed whole number is ["+" | "-"], decimal digit, {decimal digit} diff --git a/gcc/m2/gm2-libs-iso/TextIO.mod b/gcc/m2/gm2-libs-iso/TextIO.mod index eab156d20762..940f5d97217f 100644 --- a/gcc/m2/gm2-libs-iso/TextIO.mod +++ b/gcc/m2/gm2-libs-iso/TextIO.mod @@ -27,7 +27,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE TextIO ; -IMPORT IOChan, IOConsts, CharClass, ASCII ; +IMPORT IOConsts, CharClass, ASCII ; FROM SYSTEM IMPORT ADR ; FROM FIO IMPORT FlushOutErr ; FROM libc IMPORT printf ; diff --git a/gcc/m2/gm2-libs-iso/TextUtil.mod b/gcc/m2/gm2-libs-iso/TextUtil.mod index 6f6c02e68b12..44dbd1c69f8b 100644 --- a/gcc/m2/gm2-libs-iso/TextUtil.mod +++ b/gcc/m2/gm2-libs-iso/TextUtil.mod @@ -1,6 +1,6 @@ IMPLEMENTATION MODULE TextUtil ; -IMPORT IOChan, CharClass, IOConsts ; +IMPORT CharClass, IOConsts ; (* SkipSpaces - skips any spaces. diff --git a/gcc/m2/gm2-libs-iso/WholeIO.mod b/gcc/m2/gm2-libs-iso/WholeIO.mod index b8ed37797395..4e6b75e7171c 100644 --- a/gcc/m2/gm2-libs-iso/WholeIO.mod +++ b/gcc/m2/gm2-libs-iso/WholeIO.mod @@ -41,7 +41,6 @@ FROM TextUtil IMPORT SkipSpaces ; type IOConsts.ReadResults. *) -IMPORT IOChan ; (* The text form of a signed whole number is ["+" | "-"], decimal digit, {decimal digit} diff --git a/gcc/m2/gm2-libs-log/BitByteOps.mod b/gcc/m2/gm2-libs-log/BitByteOps.mod index 318a2e3b53d3..746bb62de3fc 100644 --- a/gcc/m2/gm2-libs-log/BitByteOps.mod +++ b/gcc/m2/gm2-libs-log/BitByteOps.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE BitByteOps ; -FROM SYSTEM IMPORT BYTE, ADR, SHIFT, ROTATE, TSIZE, BITSET8, CARDINAL8 ; +FROM SYSTEM IMPORT ADR, SHIFT, ROTATE, TSIZE, BITSET8, CARDINAL8 ; (* diff --git a/gcc/m2/gm2-libs-log/FileSystem.mod b/gcc/m2/gm2-libs-log/FileSystem.mod index 9f08819d5866..99612fac3f6a 100644 --- a/gcc/m2/gm2-libs-log/FileSystem.mod +++ b/gcc/m2/gm2-libs-log/FileSystem.mod @@ -28,9 +28,9 @@ IMPLEMENTATION MODULE FileSystem ; FROM M2RTS IMPORT InstallTerminationProcedure ; FROM Storage IMPORT ALLOCATE ; -FROM SYSTEM IMPORT WORD, BYTE, ADDRESS, ADR ; -IMPORT FIO, SFIO, libc, wrapc ; -FROM DynamicStrings IMPORT String, InitString, ConCat, ConCatChar, KillString, string ; +FROM SYSTEM IMPORT ADR ; +IMPORT SFIO, libc, wrapc ; +FROM DynamicStrings IMPORT InitString, ConCat, ConCatChar, KillString, string ; FROM FormatStrings IMPORT Sprintf2 ; CONST diff --git a/gcc/m2/gm2-libs-log/InOut.mod b/gcc/m2/gm2-libs-log/InOut.mod index 517aca2c7b43..669d67970031 100644 --- a/gcc/m2/gm2-libs-log/InOut.mod +++ b/gcc/m2/gm2-libs-log/InOut.mod @@ -29,7 +29,7 @@ IMPLEMENTATION MODULE InOut ; IMPORT FIO, SFIO, Terminal ; FROM FIO IMPORT File, StdIn, StdOut ; -FROM DynamicStrings IMPORT String, InitString, Mark, KillString, ConCat, +FROM DynamicStrings IMPORT InitString, Mark, KillString, ConCat, RemoveWhitePrefix, char, ConCatChar, Length ; FROM StringConvert IMPORT CardinalToString, stoc, stoi, ctos, itos ; diff --git a/gcc/m2/gm2-libs-log/RealConversions.mod b/gcc/m2/gm2-libs-log/RealConversions.mod index 02e0f9241d5d..4e662fa01e2a 100644 --- a/gcc/m2/gm2-libs-log/RealConversions.mod +++ b/gcc/m2/gm2-libs-log/RealConversions.mod @@ -33,7 +33,7 @@ FROM DynamicStrings IMPORT String, InitString, KillString, CopyOut, Length, InitStringCharDB, MultDB, DupDB, SliceDB ; FROM StringConvert IMPORT LongrealToString, StringToLongreal, - StringToLongreal, StringToInteger, itos ; + StringToInteger, itos ; FROM ASCII IMPORT nul ; FROM Builtins IMPORT logl, log10l ; diff --git a/gcc/m2/gm2-libs/FIO.mod b/gcc/m2/gm2-libs/FIO.mod index b46d505d30c9..711ac47649bd 100644 --- a/gcc/m2/gm2-libs/FIO.mod +++ b/gcc/m2/gm2-libs/FIO.mod @@ -36,7 +36,7 @@ IMPLEMENTATION MODULE FIO ; provides a simple buffered file input/output library. *) -FROM SYSTEM IMPORT ADR, TSIZE, SIZE, WORD ; +FROM SYSTEM IMPORT ADR, TSIZE, WORD ; FROM ASCII IMPORT nl, nul, tab ; FROM StrLib IMPORT StrLen, StrConCat, StrCopy ; FROM Storage IMPORT ALLOCATE, DEALLOCATE ; diff --git a/gcc/m2/gm2-libs/FormatStrings.mod b/gcc/m2/gm2-libs/FormatStrings.mod index 9c3a56228f94..a5ab3b29f615 100644 --- a/gcc/m2/gm2-libs/FormatStrings.mod +++ b/gcc/m2/gm2-libs/FormatStrings.mod @@ -26,13 +26,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE FormatStrings ; -FROM DynamicStrings IMPORT String, InitString, InitStringChar, Mark, +FROM DynamicStrings IMPORT InitString, InitStringChar, Mark, ConCat, Slice, Index, char, string, Assign, Length, Mult, Dup, ConCatChar, PushAllocation, PopAllocationExemption, InitStringDB, InitStringCharStarDB, InitStringCharDB, MultDB, DupDB, SliceDB, - KillString, ConCatChar ; + KillString ; FROM StringConvert IMPORT IntegerToString, CardinalToString, hstoc ; FROM SYSTEM IMPORT ADDRESS ; diff --git a/gcc/m2/gm2-libs/IO.mod b/gcc/m2/gm2-libs/IO.mod index bd6d539634a0..3c74f288c800 100644 --- a/gcc/m2/gm2-libs/IO.mod +++ b/gcc/m2/gm2-libs/IO.mod @@ -28,7 +28,7 @@ IMPLEMENTATION MODULE IO ; FROM StrLib IMPORT StrCopy ; -FROM SYSTEM IMPORT ADR, SIZE ; +FROM SYSTEM IMPORT ADR ; FROM libc IMPORT read, write, system, isatty ; FROM FIO IMPORT File, StdIn, StdOut, StdErr, WriteChar, ReadChar, diff --git a/gcc/m2/gm2-libs/Indexing.mod b/gcc/m2/gm2-libs/Indexing.mod index 65c293cf26db..f23778c644ee 100644 --- a/gcc/m2/gm2-libs/Indexing.mod +++ b/gcc/m2/gm2-libs/Indexing.mod @@ -28,7 +28,7 @@ IMPLEMENTATION MODULE Indexing ; FROM libc IMPORT memset, memmove ; FROM Storage IMPORT ALLOCATE, REALLOCATE, DEALLOCATE ; -FROM SYSTEM IMPORT TSIZE, ADDRESS, WORD, BYTE ; +FROM SYSTEM IMPORT TSIZE, WORD, BYTE ; CONST MinSize = 128 ; diff --git a/gcc/m2/gm2-libs/M2Dependent.mod b/gcc/m2/gm2-libs/M2Dependent.mod index 19bdab1bc508..f8a116cb19ce 100644 --- a/gcc/m2/gm2-libs/M2Dependent.mod +++ b/gcc/m2/gm2-libs/M2Dependent.mod @@ -29,7 +29,7 @@ IMPLEMENTATION MODULE M2Dependent ; FROM libc IMPORT abort, exit, write, getenv, printf, snprintf, strncpy ; FROM ASCII IMPORT nul, nl ; -FROM SYSTEM IMPORT ADR, SIZE ; +FROM SYSTEM IMPORT ADR ; FROM Storage IMPORT ALLOCATE ; FROM StrLib IMPORT StrCopy, StrLen, StrEqual ; diff --git a/gcc/m2/gm2-libs/M2RTS.mod b/gcc/m2/gm2-libs/M2RTS.mod index dec1d5f0c29b..5cc87094457a 100644 --- a/gcc/m2/gm2-libs/M2RTS.mod +++ b/gcc/m2/gm2-libs/M2RTS.mod @@ -31,7 +31,7 @@ FROM libc IMPORT abort, exit, write, getenv, printf, strlen ; (* FROM Builtins IMPORT strncmp, strcmp ; not available during bootstrap. *) FROM NumberIO IMPORT CardToStr ; FROM StrLib IMPORT StrCopy, StrLen, StrEqual ; -FROM SYSTEM IMPORT ADDRESS, ADR ; +FROM SYSTEM IMPORT ADR ; FROM ASCII IMPORT nl, nul ; FROM Storage IMPORT ALLOCATE ; diff --git a/gcc/m2/gm2-libs/OptLib.mod b/gcc/m2/gm2-libs/OptLib.mod index 873ab6e4ffbc..e4870eefebaa 100644 --- a/gcc/m2/gm2-libs/OptLib.mod +++ b/gcc/m2/gm2-libs/OptLib.mod @@ -28,7 +28,6 @@ IMPLEMENTATION MODULE OptLib ; FROM Storage IMPORT ALLOCATE, DEALLOCATE ; FROM libc IMPORT memcpy ; -FROM DynamicStrings IMPORT String ; IMPORT DynamicStrings ; diff --git a/gcc/m2/gm2-libs/SYSTEM.mod b/gcc/m2/gm2-libs/SYSTEM.mod index 8d2a00583ce8..8337105029a3 100644 --- a/gcc/m2/gm2-libs/SYSTEM.mod +++ b/gcc/m2/gm2-libs/SYSTEM.mod @@ -26,7 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see IMPLEMENTATION MODULE SYSTEM ; -FROM libc IMPORT memcpy, memcpy, memset ; +FROM libc IMPORT memcpy, memset ; CONST BitsPerBitset = MAX(BITSET)+1 ; diff --git a/gcc/m2/gm2-libs/StringConvert.mod b/gcc/m2/gm2-libs/StringConvert.mod index e2208ca6ad03..c1c8c395c16f 100644 --- a/gcc/m2/gm2-libs/StringConvert.mod +++ b/gcc/m2/gm2-libs/StringConvert.mod @@ -33,7 +33,7 @@ FROM M2RTS IMPORT ErrorMessage ; IMPORT DynamicStrings ; -FROM DynamicStrings IMPORT String, InitString, +FROM DynamicStrings IMPORT InitString, InitStringChar, InitStringCharStar, Mark, ConCat, Dup, string, Slice, Index, char, Assign, Length, Mult, diff --git a/gcc/testsuite/gm2/switches/pedantic/pass/hello.mod b/gcc/testsuite/gm2/switches/pedantic/pass/hello.mod new file mode 100644 index 000000000000..74a15e5ab3da --- /dev/null +++ b/gcc/testsuite/gm2/switches/pedantic/pass/hello.mod @@ -0,0 +1,8 @@ +MODULE hello; + +FROM STextIO IMPORT WriteString, WriteLn ; + +BEGIN + WriteString ("Hello world") ; + WriteLn +END hello. diff --git a/gcc/testsuite/gm2/switches/pedantic/pass/switches-pedantic-pass.exp b/gcc/testsuite/gm2/switches/pedantic/pass/switches-pedantic-pass.exp new file mode 100644 index 000000000000..ce5fd363b3f8 --- /dev/null +++ b/gcc/testsuite/gm2/switches/pedantic/pass/switches-pedantic-pass.exp @@ -0,0 +1,38 @@ +# Copyright (C) 2003-2023 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# This file was written by Gaius Mulley (gaius.mulley@southwales.ac.uk) +# for GNU Modula-2. + +if $tracelevel then { + strace $tracelevel +} + +# load support procs +load_lib gm2-torture.exp + +set gm2src ${srcdir}/../m2 + +gm2_init_iso "${srcdir}/gm2/switches/pedantic/pass" -Wpedantic -Werror + +foreach testcase [lsort [glob -nocomplain $srcdir/$subdir/*.mod]] { + # If we're only testing specific files and this isn't one of them, skip it. + if ![runtest_file_p $runtests $testcase] then { + continue + } + + gm2-torture-execute $testcase "" "pass" +} diff --git a/libgm2/libm2iso/Makefile.am b/libgm2/libm2iso/Makefile.am index ddd24629849f..8e774c5ea195 100644 --- a/libgm2/libm2iso/Makefile.am +++ b/libgm2/libm2iso/Makefile.am @@ -196,7 +196,8 @@ libm2iso_la_CFLAGS = \ libm2iso_la_M2FLAGS = \ -fm2-pathname=m2iso -I. -Ilibm2iso -I$(GM2_SRC)/gm2-libs-iso \ -fm2-pathname=m2pim -I$(GM2_SRC)/gm2-libs \ - -fiso -fextended-opaque -fm2-g -g -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2iso \ + -fiso -fextended-opaque -fm2-g -g -Wcase-enum \ + -Wreturn-type -fcase -fm2-prefix=m2iso \ $(TARGET_LONGDOUBLE_ABI) if TARGET_DARWIN libm2iso_la_link_flags = -Wl,-undefined,dynamic_lookup diff --git a/libgm2/libm2iso/Makefile.in b/libgm2/libm2iso/Makefile.in index 97de40c034b8..a82aa0c408bc 100644 --- a/libgm2/libm2iso/Makefile.in +++ b/libgm2/libm2iso/Makefile.in @@ -583,7 +583,8 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_ISOLIB_TRUE@libm2iso_la_M2FLAGS = \ @BUILD_ISOLIB_TRUE@ -fm2-pathname=m2iso -I. -Ilibm2iso -I$(GM2_SRC)/gm2-libs-iso \ @BUILD_ISOLIB_TRUE@ -fm2-pathname=m2pim -I$(GM2_SRC)/gm2-libs \ -@BUILD_ISOLIB_TRUE@ -fiso -fextended-opaque -fm2-g -g -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2iso \ +@BUILD_ISOLIB_TRUE@ -fiso -fextended-opaque -fm2-g -g -Wcase-enum \ +@BUILD_ISOLIB_TRUE@ -Wreturn-type -fcase -fm2-prefix=m2iso \ @BUILD_ISOLIB_TRUE@ $(TARGET_LONGDOUBLE_ABI) @BUILD_ISOLIB_TRUE@@TARGET_DARWIN_FALSE@libm2iso_la_link_flags = \ diff --git a/libgm2/libm2log/Makefile.am b/libgm2/libm2log/Makefile.am index 9571d50585c4..8dadaae95388 100644 --- a/libgm2/libm2log/Makefile.am +++ b/libgm2/libm2log/Makefile.am @@ -137,7 +137,8 @@ libm2log_la_M2FLAGS = \ -fm2-pathname=m2log -I$(GM2_SRC)/gm2-libs-log \ -fm2-pathname=m2pim -I$(GM2_SRC)/gm2-libs \ -fm2-pathname=m2iso -I$(GM2_SRC)/gm2-libs-iso \ - -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2log \ + -Wcase-enum -Wreturn-type \ + -fcase -fm2-prefix=m2log \ $(TARGET_LONGDOUBLE_ABI) if TARGET_DARWIN libm2log_la_link_flags = -Wl,-undefined,dynamic_lookup diff --git a/libgm2/libm2log/Makefile.in b/libgm2/libm2log/Makefile.in index bc64692c0fa8..4c371350ce2f 100644 --- a/libgm2/libm2log/Makefile.in +++ b/libgm2/libm2log/Makefile.in @@ -486,7 +486,8 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_LOGLIB_TRUE@ -fm2-pathname=m2log -I$(GM2_SRC)/gm2-libs-log \ @BUILD_LOGLIB_TRUE@ -fm2-pathname=m2pim -I$(GM2_SRC)/gm2-libs \ @BUILD_LOGLIB_TRUE@ -fm2-pathname=m2iso -I$(GM2_SRC)/gm2-libs-iso \ -@BUILD_LOGLIB_TRUE@ -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2log \ +@BUILD_LOGLIB_TRUE@ -Wcase-enum -Wreturn-type \ +@BUILD_LOGLIB_TRUE@ -fcase -fm2-prefix=m2log \ @BUILD_LOGLIB_TRUE@ $(TARGET_LONGDOUBLE_ABI) @BUILD_LOGLIB_TRUE@@TARGET_DARWIN_FALSE@libm2log_la_link_flags = \ diff --git a/libgm2/libm2pim/Makefile.am b/libgm2/libm2pim/Makefile.am index dd1fd08e90e0..4c5e046a1c98 100644 --- a/libgm2/libm2pim/Makefile.am +++ b/libgm2/libm2pim/Makefile.am @@ -173,7 +173,8 @@ libm2pim_la_CFLAGS = \ libm2pim_la_M2FLAGS = \ -fm2-pathname=m2pim -I. -I$(GM2_SRC)/gm2-libs \ -fm2-pathname=m2iso -I$(GM2_SRC)/gm2-libs-iso \ - -fm2-g -g -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2pim \ + -fm2-g -g -Wcase-enum -Wreturn-type \ + -fcase -fm2-prefix=m2pim \ $(TARGET_LONGDOUBLE_ABI) if TARGET_DARWIN libm2pim_la_link_flags = -Wl,-undefined,dynamic_lookup diff --git a/libgm2/libm2pim/Makefile.in b/libgm2/libm2pim/Makefile.in index c9817bc4746d..eca7e8501adb 100644 --- a/libgm2/libm2pim/Makefile.in +++ b/libgm2/libm2pim/Makefile.in @@ -551,7 +551,8 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_PIMLIB_TRUE@libm2pim_la_M2FLAGS = \ @BUILD_PIMLIB_TRUE@ -fm2-pathname=m2pim -I. -I$(GM2_SRC)/gm2-libs \ @BUILD_PIMLIB_TRUE@ -fm2-pathname=m2iso -I$(GM2_SRC)/gm2-libs-iso \ -@BUILD_PIMLIB_TRUE@ -fm2-g -g -Wcase-enum -Wreturn-type -fcase -fm2-prefix=m2pim \ +@BUILD_PIMLIB_TRUE@ -fm2-g -g -Wcase-enum -Wreturn-type \ +@BUILD_PIMLIB_TRUE@ -fcase -fm2-prefix=m2pim \ @BUILD_PIMLIB_TRUE@ $(TARGET_LONGDOUBLE_ABI) @BUILD_PIMLIB_TRUE@@TARGET_DARWIN_FALSE@libm2pim_la_link_flags = \ From cd7d0b4cf789264cd75ab7df5df232dc58061ed7 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Tue, 12 Dec 2023 21:19:21 +0100 Subject: [PATCH 239/311] SRA: Force gimple operand in an additional corner case (PR 112822) PR 112822 revealed a corner case in load_assign_lhs_subreplacements where it creates invalid gimple: an assignment where on the LHS there is a complex variable which however is not a gimple register because it has partial defs and on the right hand side there is a VIEW_CONVERT_EXPR. This patch invokes force_gimple_operand_gsi on such statements (like it already does when both sides of a generated assignment have partial definitions. gcc/ChangeLog: 2023-12-12 Martin Jambor PR tree-optimization/112822 * tree-sra.cc (load_assign_lhs_subreplacements): Invoke force_gimple_operand_gsi also when LHS has partial stores and RHS is a VIEW_CONVERT_EXPR. --- gcc/tree-sra.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index 3bd0c7a9af0a..99a1b0a6d179 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -4219,11 +4219,15 @@ load_assign_lhs_subreplacements (struct access *lacc, if (racc && racc->grp_to_be_replaced) { rhs = get_access_replacement (racc); + bool vce = false; if (!useless_type_conversion_p (lacc->type, racc->type)) - rhs = fold_build1_loc (sad->loc, VIEW_CONVERT_EXPR, - lacc->type, rhs); + { + rhs = fold_build1_loc (sad->loc, VIEW_CONVERT_EXPR, + lacc->type, rhs); + vce = true; + } - if (racc->grp_partial_lhs && lacc->grp_partial_lhs) + if (lacc->grp_partial_lhs && (vce || racc->grp_partial_lhs)) rhs = force_gimple_operand_gsi (&sad->old_gsi, rhs, true, NULL_TREE, true, GSI_SAME_STMT); } From 988dd6384c88a110952833dfe7c8344b9af95fa4 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Tue, 12 Dec 2023 14:54:36 +0000 Subject: [PATCH 240/311] libstdc++: Remove redundant -std flags from Makefile In r14-4060-gc4baeaecbbf7d0 I moved some files from src/c++98 to src/c++11 but I didn't remove the redundant -std=gnu++11 flags for those files. The flags aren't needed now, because AM_CXXFLAGS for that directory already uses -std=gnu++11. This removes them. libstdc++-v3/ChangeLog: * src/c++11/Makefile.am: Remove redundant -std=gnu++11 flags. * src/c++11/Makefile.in: Regenerate. --- libstdc++-v3/src/c++11/Makefile.am | 8 ++++---- libstdc++-v3/src/c++11/Makefile.in | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libstdc++-v3/src/c++11/Makefile.am b/libstdc++-v3/src/c++11/Makefile.am index 9cddb978928f..b626e477dde7 100644 --- a/libstdc++-v3/src/c++11/Makefile.am +++ b/libstdc++-v3/src/c++11/Makefile.am @@ -159,13 +159,13 @@ limits.lo: limits.cc limits.o: limits.cc $(CXXCOMPILE) -fchar8_t -c $< locale_init.lo: locale_init.cc - $(LTCXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(LTCXXCOMPILE) -fchar8_t -c $< locale_init.o: locale_init.cc - $(CXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(CXXCOMPILE) -fchar8_t -c $< localename.lo: localename.cc - $(LTCXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(LTCXXCOMPILE) -fchar8_t -c $< localename.o: localename.cc - $(CXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(CXXCOMPILE) -fchar8_t -c $< if ENABLE_DUAL_ABI # Rewrite the type info for __ios_failure. diff --git a/libstdc++-v3/src/c++11/Makefile.in b/libstdc++-v3/src/c++11/Makefile.in index e6d37c5464ca..4be021e80259 100644 --- a/libstdc++-v3/src/c++11/Makefile.in +++ b/libstdc++-v3/src/c++11/Makefile.in @@ -887,13 +887,13 @@ limits.lo: limits.cc limits.o: limits.cc $(CXXCOMPILE) -fchar8_t -c $< locale_init.lo: locale_init.cc - $(LTCXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(LTCXXCOMPILE) -fchar8_t -c $< locale_init.o: locale_init.cc - $(CXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(CXXCOMPILE) -fchar8_t -c $< localename.lo: localename.cc - $(LTCXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(LTCXXCOMPILE) -fchar8_t -c $< localename.o: localename.cc - $(CXXCOMPILE) -std=gnu++11 -fchar8_t -c $< + $(CXXCOMPILE) -fchar8_t -c $< @ENABLE_DUAL_ABI_TRUE@cxx11-ios_failure-lt.s: cxx11-ios_failure.cc @ENABLE_DUAL_ABI_TRUE@ $(LTCXXCOMPILE) -gno-as-loc-support -S $< -o tmp-cxx11-ios_failure-lt.s From a01462ae8bafa86e7df47a252917ba6899d587cf Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Mon, 11 Dec 2023 15:33:59 +0000 Subject: [PATCH 241/311] libstdc++: Fix std::format output of %C for negative years During discussion of LWG 4022 I noticed that we do not correctly implement floored division for the century. We were just truncating towards zero, rather than applying the floor function. For negative values that rounds the wrong way. libstdc++-v3/ChangeLog: * include/bits/chrono_io.h (__formatter_chrono::_M_C_y_Y): Fix rounding for negative centuries. * testsuite/std/time/year/io.cc: Check %C for negative years. --- libstdc++-v3/include/bits/chrono_io.h | 9 +++++++-- libstdc++-v3/testsuite/std/time/year/io.cc | 7 +++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/bits/chrono_io.h b/libstdc++-v3/include/bits/chrono_io.h index 16e8fc58dffb..b63b8592ebac 100644 --- a/libstdc++-v3/include/bits/chrono_io.h +++ b/libstdc++-v3/include/bits/chrono_io.h @@ -820,9 +820,14 @@ namespace __format if (__conv == 'Y' || __conv == 'C') { - if (__is_neg) - __s.assign(1, _S_plus_minus[1]); int __ci = __yi / 100; + if (__is_neg) [[unlikely]] + { + __s.assign(1, _S_plus_minus[1]); + // For floored division -123//100 is -2 and -100//100 is -1 + if ((__ci * 100) != __yi) + ++__ci; + } if (__ci >= 100) [[unlikely]] { __s += std::format(_S_empty_spec, __ci / 100); diff --git a/libstdc++-v3/testsuite/std/time/year/io.cc b/libstdc++-v3/testsuite/std/time/year/io.cc index 6157afae2533..a6683ae20df4 100644 --- a/libstdc++-v3/testsuite/std/time/year/io.cc +++ b/libstdc++-v3/testsuite/std/time/year/io.cc @@ -43,8 +43,11 @@ test_format() s = std::format("{}", --year::min()); // formatted via ostream VERIFY( s == "-32768 is not a valid year" ); - s = std::format("{:%y} {:%y}", 1976y, -1976y); - VERIFY( s == "76 76" ); // LWG 3831 + s = std::format("{:%C %y} {:%C %y}", 1976y, -1976y); + VERIFY( s == "19 76 -20 76" ); // LWG 3831 + + s = std::format("{:%C %y} {:%C %y} {:%C %y}", -9y, -900y, -555y); + VERIFY( s == "-01 09 -09 00 -06 55" ); // LWG 4022 s = std::format("{0:%EC}{0:%Ey} = {0:%EY}", 1642y); VERIFY( s == "1642 = 1642" ); From 52de6aa1a8582208b519b6998389d3a801b0de7b Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Tue, 12 Dec 2023 20:53:08 +0000 Subject: [PATCH 242/311] libstdc++: Fix std::format("{}", 'c') When I added a fast path for std::format("{}", x) in r14-5587-g41a5ea4cab2c59 I forgot to handle char separately from other integral types. That caused std::format("{}", 'c') to return "99" instead of "c". libstdc++-v3/ChangeLog: * include/std/format (__do_vformat_to): Handle char separately from other integral types. * testsuite/std/format/functions/format.cc: Check for expected output for char and bool arguments. * testsuite/std/format/string.cc: Check that 0 filling is rejected for character and string formats. --- libstdc++-v3/include/std/format | 9 +++ .../testsuite/std/format/functions/format.cc | 56 +++++++++++++++++++ libstdc++-v3/testsuite/std/format/string.cc | 3 + 3 files changed, 68 insertions(+) diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index 04d03e0ceb70..1f8cd5c06be8 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -3968,6 +3968,15 @@ namespace __format __done = true; } } + else if constexpr (is_same_v<_Tp, char>) + { + if (auto __res = __sink_out._M_reserve(1)) + { + *__res.get() = __arg; + __res._M_bump(1); + __done = true; + } + } else if constexpr (is_integral_v<_Tp>) { make_unsigned_t<_Tp> __uval; diff --git a/libstdc++-v3/testsuite/std/format/functions/format.cc b/libstdc++-v3/testsuite/std/format/functions/format.cc index 9328dec88758..b3b4f0647bcd 100644 --- a/libstdc++-v3/testsuite/std/format/functions/format.cc +++ b/libstdc++-v3/testsuite/std/format/functions/format.cc @@ -256,12 +256,42 @@ test_width() } } +void +test_char() +{ + std::string s; + + s = std::format("{}", 'a'); + VERIFY( s == "a" ); + + s = std::format("{:c} {:d} {:o}", 'b', '\x17', '\x3f'); + VERIFY( s == "b 23 77" ); + + s = std::format("{:#d} {:#o}", '\x17', '\x3f'); + VERIFY( s == "23 077" ); + + s = std::format("{:04d} {:04o}", '\x17', '\x3f'); + VERIFY( s == "0023 0077" ); + + s = std::format("{:b} {:B} {:#b} {:#B}", '\xff', '\xa0', '\x17', '\x3f'); + if constexpr (std::is_unsigned_v) + VERIFY( s == "11111111 10100000 0b10111 0B111111" ); + else + VERIFY( s == "-1 -1100000 0b10111 0B111111" ); + + s = std::format("{:x} {:#x} {:#X}", '\x12', '\x34', '\x45'); + VERIFY( s == "12 0x34 0X45" ); +} + void test_wchar() { using namespace std::literals; std::wstring s; + s = std::format(L"{}", L'a'); + VERIFY( s == L"a" ); + s = std::format(L"{} {} {} {} {} {}", L'0', 1, 2LL, 3.4, L"five", L"six"s); VERIFY( s == L"0 1 2 3.4 five six" ); @@ -353,6 +383,9 @@ test_pointer() const void* pc = p; std::string s, str_int; + s = std::format("{}", p); + VERIFY( s == "0x0" ); + s = std::format("{} {} {}", p, pc, nullptr); VERIFY( s == "0x0 0x0 0x0" ); s = std::format("{:p} {:p} {:p}", p, pc, nullptr); @@ -385,6 +418,27 @@ test_pointer() #endif } +void +test_bool() +{ + std::string s; + + s = std::format("{}", true); + VERIFY( s == "true" ); + s = std::format("{:} {:s}", true, false); + VERIFY( s == "true false" ); + s = std::format("{:b} {:#b}", true, false); + VERIFY( s == "1 0b0" ); + s = std::format("{:B} {:#B}", false, true); + VERIFY( s == "0 0B1" ); + s = std::format("{:d} {:#d}", false, true); + VERIFY( s == "0 1" ); + s = std::format("{:o} {:#o} {:#o}", false, true, false); + VERIFY( s == "0 01 0" ); + s = std::format("{:x} {:#x} {:#X}", false, true, false); + VERIFY( s == "0 0x1 0X0" ); +} + int main() { test_no_args(); @@ -393,8 +447,10 @@ int main() test_alternate_forms(); test_locale(); test_width(); + test_char(); test_wchar(); test_minmax(); test_p1652r1(); test_pointer(); + test_bool(); } diff --git a/libstdc++-v3/testsuite/std/format/string.cc b/libstdc++-v3/testsuite/std/format/string.cc index 5d338644c620..40aaebae04ea 100644 --- a/libstdc++-v3/testsuite/std/format/string.cc +++ b/libstdc++-v3/testsuite/std/format/string.cc @@ -109,6 +109,9 @@ test_format_spec() VERIFY( ! is_format_string_for("{:#?}", "str") ); VERIFY( ! is_format_string_for("{:#?}", 'c') ); + VERIFY( ! is_format_string_for("{:0c}", 'c') ); + VERIFY( ! is_format_string_for("{:0s}", true) ); + // Precision only valid for string and floating-point types. VERIFY( ! is_format_string_for("{:.3d}", 1) ); VERIFY( ! is_format_string_for("{:3.3d}", 1) ); From 788e0d48ec639d44294434f4f20ae94023c3759d Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Tue, 12 Dec 2023 16:46:16 -0600 Subject: [PATCH 243/311] testsuite: Add testcase for already fixed PR [PR112822] Adding a testcase for PR112822 to ensure we won't regress. 2023-12-12 Peter Bergner gcc/testsuite/ PR tree-optimization/112822 * g++.dg/pr112822.C: New test. --- gcc/testsuite/g++.dg/pr112822.C | 369 ++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 gcc/testsuite/g++.dg/pr112822.C diff --git a/gcc/testsuite/g++.dg/pr112822.C b/gcc/testsuite/g++.dg/pr112822.C new file mode 100644 index 000000000000..d1490405493a --- /dev/null +++ b/gcc/testsuite/g++.dg/pr112822.C @@ -0,0 +1,369 @@ +/* PR tree-optimization/112822 */ +/* { dg-options "-w -O2" } */ + +/* Verify we do not ICE on the following noisy creduced test case. */ + +namespace b { +typedef int c; +template struct d; +template struct d { using f = e; }; +template struct aa; +template struct aa { using f = h; }; +template using ab = typename d::f; +template using n = typename aa::f; +template class af { +public: + typedef __complex__ ah; + template af operator+=(e) { + ah o; + x = o; + return *this; + } + ah x; +}; +} // namespace b +namespace { +enum { p }; +enum { ac, ad }; +struct ae; +struct al; +struct ag; +typedef b::c an; +namespace ai { +template struct ak { typedef aj f; }; +template using ar = typename ak::f; +template struct am { + enum { at }; +}; +template struct ao { + enum { at }; +}; +template struct ap; +template struct aq { + enum { at }; +}; +} // namespace ai +template struct ay; +template class as; +template class ba; +template class aw; +template class be; +template class az; +namespace ai { +template struct bg; +template ::bd> +struct bk; +template struct bf; +template struct bm; +template struct bh; +template ::bj>::at> struct bp { + typedef bi f; +}; +template struct br { + typedef typename bp::f>::f f; +}; +template struct bn; +template struct bn { + typedef aw f; +}; +template struct bx { + typedef typename bn::bs, aj ::bo>::f f; +}; +template struct bt { typedef b::n<0, aj, aj> f; }; +template ::f> struct cb { + enum { bw }; + typedef b::n::f> f; +}; +template ::bs> struct by { + typedef be f; +}; +template struct bz { + typedef typename by::f f; +}; +template struct ch; +template struct ch { typedef ci bd; }; +} // namespace ai +template > struct cg; +template struct cg { typedef aj cn; }; +namespace ai { +template cj cp; +template void cl(bu *cr, cj cs) { ct(cr, cs); } +typedef __attribute__((altivec(vector__))) double co; +void ct(double *cr, co cs) { *(co *)cr = cs; } +struct cq { + co q; +}; +template <> struct bm> { typedef cq f; }; +template <> struct bh { typedef cq bj; }; +void ct(b::af *cr, cq cs) { ct((double *)cr, cs.q); } +template struct cx { + template void cu(cw *a, cj) { + cl(a, cp); + } +}; +} // namespace ai +template class ba : public ay { +public: + typedef ai::ap bu; + typedef b::n::bo, bu, b::n::at, bu, bu>> cv; + typedef ay db; + db::dc; + cv coeff(an dd, an col) const { return dc().coeff(dd, col); } +}; +template class cz : public ba::at> { +public: + ai::ap b; + enum { da, dg, dh, bv, bq, di = dg, bo }; +}; +template class be : public cz { +public: + typedef typename ai::ap::bu bu; + typedef cz db; + db::dc; + template cd &operator+=(const be &); + template az df(de); +}; +template struct ay { + cd &dc() { return *static_cast(this); } + cd dc() const; +}; +template class dl; +namespace ai { +template struct ap> { + typedef bb dj; + typedef bc r; + typedef ap s; + typedef ap t; + typedef typename cg::cn bu; + typedef typename ch::bd>::bd cf; + enum { bo }; +}; +} // namespace ai +template +class az : public dl, ai::ap, ai::bg::bd>> { +public: + typedef dk bb; + typedef Rhs_ bc; + typedef typename ai::bt::f LhsNested; + typedef typename ai::bt::f dn; + typedef ai::ar u; + typedef ai::ar RhsNestedCleaned; + u lhs(); + RhsNestedCleaned rhs(); +}; +template +class dl : public ai::bz, al>::f {}; +namespace ai { +template struct v { typedef ag w; }; +template struct evaluator_traits_base { + typedef typename v::cf>::w w; +}; +template struct ax : evaluator_traits_base {}; +template struct y { static const bool at = false; }; +template class plainobjectbase_evaluator_data { +public: + plainobjectbase_evaluator_data(bu *ptr, an) : data(ptr) {} + an outerStride() { return z; } + bu *data; +}; +template struct evaluator { + typedef cd PlainObjectType; + typedef typename PlainObjectType::bu bu; + enum { IsVectorAtCompileTime }; + enum { OuterStrideAtCompileTime }; + evaluator(PlainObjectType &m) : m_d(m.data(), IsVectorAtCompileTime) {} + bu &coeffRef(an, an) { return m_d.data[m_d.outerStride()]; } + plainobjectbase_evaluator_data m_d; +}; +template +struct evaluator> + : evaluator>> { + typedef aw XprType; + evaluator(XprType &m) : evaluator>(m) {} +}; +template +struct copy_using_evaluator_traits { + typedef typename DstEvaluator::bu cw; + enum { RestrictedInnerSize }; + typedef typename br::f bi; +}; +template +struct copy_using_evaluator_innervec_CompleteUnrolling { + typedef typename Kernel::bi bi; + enum { outer, inner, SrcAlignment, DstAlignment }; + static void run(Kernel kernel) { + kernel.template assignPacketByOuterInner( + outer, inner); + } +}; +template struct dense_assignment_loop { + static void run(Kernel kernel) { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling< + Kernel, 0, DstXprType::dh>::run(kernel); + } +}; +template +class generic_dense_assignment_kernel { + typedef typename DstEvaluatorTypeT::XprType DstXprType; + +public: + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef copy_using_evaluator_traits + AssignmentTraits; + typedef typename AssignmentTraits::bi bi; + generic_dense_assignment_kernel(DstEvaluatorType dst, SrcEvaluatorType src, + Functor, DstXprType dstExpr) + : m_dst(dst), m_src(src), m_dstExpr(dstExpr) {} + template void cu(an dd, an col) { + m_functor.template cu( + &m_dst.coeffRef(dd, col), m_src.template packet(dd, col)); + } + template + void assignPacketByOuterInner(an, an) { + an dd; + an col; + cu(dd, col); + } + DstEvaluatorType m_dst; + SrcEvaluatorType &m_src; + Functor m_functor; + DstXprType m_dstExpr; +}; +template +void call_dense_assignment_loop(DstXprType &dst, SrcXprType src, Functor func) { + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + SrcEvaluatorType srcEvaluator(src); + DstEvaluatorType dstEvaluator(dst); + typedef generic_dense_assignment_kernel + Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst); + dense_assignment_loop::run(kernel); +} +template struct AssignmentKind; +struct Dense2Dense; +template <> struct AssignmentKind { typedef Dense2Dense Kind; }; +template ::w, + typename ax::w>::Kind, + typename = void> +struct Assignment; +template +void call_assignment(Dst &dst, Src src, Func func, + b::ab::at, void *> = 0) { + enum { NeedToTranspose }; + typedef b::n ActualDstTypeCleaned; + typedef b::n ActualDstType; + ActualDstType actualDst(dst); + Assignment::run(actualDst, src, func); +} +template +struct Assignment { + static void run(DstXprType &dst, SrcXprType src, Functor func) { + call_dense_assignment_loop(dst, src, func); + } +}; +template struct plain_array { aj array[bl]; }; +} // namespace ai +template class DenseStorage { + ai::plain_array m_data; + +public: + an cols() { return av; } + aj *data() { return m_data.array; } +}; +template class as : public ai::by::f { +public: + enum { Options }; + typedef typename ai::by::f db; + typedef typename ai::ap::bu bu; + DenseStorage m_storage; + an cols() { return m_storage.cols(); } + bu &coeffRef(an, an colId) { return data()[colId]; } + bu *data() { return m_storage.data(); } +}; +namespace ai { +template +struct ap> { + typedef Scalar_ bu; + typedef ae cf; + typedef al bs; + enum { bo }; +}; +} // namespace ai +template +class aw : public as> { +public: + template aw(T0, T1) {} +}; +template +template +cd &be::operator+=(const be &other) { + call_assignment(dc(), other.dc(), ai::cx()); + return dc(); +} +namespace ai { +template struct bg { + enum { bd }; +}; +template +struct evaluator> : bk> { + typedef az XprType; + typedef bk db; + evaluator(XprType xpr) : db(xpr) {} +}; +template struct bk, cc> { + typedef az XprType; + bk(XprType xpr) + : m_lhs(xpr.lhs()), m_rhs(xpr.rhs()), m_lhsImpl(m_lhs), m_rhsImpl(m_rhs) { + } + typedef typename cb::f LhsNested; + typedef typename cb::f dn; + typedef LhsNested u; + typedef dn RhsNestedCleaned; + typedef u LhsEtorType; + typedef RhsNestedCleaned RhsEtorType; + template bi packet(an, an); + LhsNested m_lhs; + dn m_rhs; + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; +}; +} // namespace ai +} // namespace +namespace Eigen { +template bool verifyIsApprox(Type1, Type2); +} +using namespace Eigen; +template TC ref_prod(TC C, TA, TB B) { + for (an i; i;) + for (an j = 0; j < C.cols(); ++j) + for (an k; k;) + C.coeffRef(i, j) += B.coeff(k, j); + return C; +} +template +b::ab test_lazy_single(int rows, int cols, int depth) { + aw ci(rows, depth); + aw B(depth, cols); + aw C(rows, cols); + aw D(C); + verifyIsApprox(C += ci.df(B), ref_prod(D, ci, B)); +} +template +void test_lazy_all_layout(int rows = Rows, int cols = Cols, int depth = Depth) { + test_lazy_single(rows, cols, depth); +} +template void test_lazy_l2() { + test_lazy_all_layout(); +} +void fn1() { test_lazy_l2>(); } From 26250632df1526bad2af9f2620204427008ac433 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Tue, 12 Dec 2023 18:54:11 +0800 Subject: [PATCH 244/311] RISC-V: Refactor Dynamic LMUL codes This patch refactor dynamic LMUL to remove this following variable: static hash_map loop_autovec_infos; which will keep growing on-the-fly. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (get_current_lmul): Remove it. (compute_estimated_lmul): New function. (costs::costs): Refactor. (costs::preferred_new_lmul_p): Ditto. (preferred_new_lmul_p): Ditto. (costs::better_main_loop_than_p): Ditto. * config/riscv/riscv-vector-costs.h (struct autovec_info): Remove it. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Adapt test. * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Adapt test. --- gcc/config/riscv/riscv-vector-costs.cc | 113 ++++++++---------- gcc/config/riscv/riscv-vector-costs.h | 9 -- .../riscv/rvv/dynamic-lmul-mixed-1.c | 4 +- .../costmodel/riscv/rvv/dynamic-lmul1-3.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul1-6.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-1.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-2.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-3.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-4.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-5.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul2-6.c | 4 +- .../costmodel/riscv/rvv/dynamic-lmul4-1.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-2.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-3.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-4.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-5.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-6.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-7.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-8.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul4-9.c | 14 +-- .../costmodel/riscv/rvv/dynamic-lmul8-1.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-10.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-2.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-3.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-4.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-5.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-6.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-7.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-8.c | 2 +- .../costmodel/riscv/rvv/dynamic-lmul8-9.c | 2 +- .../vect/costmodel/riscv/rvv/pr111848.c | 3 +- 31 files changed, 88 insertions(+), 109 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index c062c12a2633..3fcb5f3176f7 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -86,7 +86,6 @@ namespace riscv_vector { 2. M8 -> M1(M4) -> MF2(M2) -> MF4(M1) (stop analysis here) -> MF8(MF2) 3. M1(M8) -> MF2(M4) -> MF4(M2) -> MF8(M1) */ -static hash_map loop_autovec_infos; /* Collect all STMTs that are vectorized and compute their program points. Note that we don't care about the STMTs that are not vectorized and @@ -362,13 +361,6 @@ max_number_of_live_regs (const basic_block bb, return max_nregs; } -/* Return the LMUL of the current analysis. */ -static int -get_current_lmul (class loop *loop) -{ - return loop_autovec_infos.get (loop)->current_lmul; -} - /* Get STORE value. */ static tree get_store_value (gimple *stmt) @@ -394,6 +386,33 @@ non_contiguous_memory_access_p (stmt_vec_info stmt_info) && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info))); } +/* Return the LMUL of the current analysis. */ +static int +compute_estimated_lmul (loop_vec_info other_loop_vinfo, machine_mode mode) +{ + gcc_assert (GET_MODE_BITSIZE (mode).is_constant ()); + int regno_alignment + = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode); + if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U)) + { + int estimated_vf = vect_vf_for_cost (other_loop_vinfo); + return estimated_vf * GET_MODE_BITSIZE (mode).to_constant () + / TARGET_MIN_VLEN; + } + else if (regno_alignment > 1) + return regno_alignment; + else + { + int ratio; + if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR, + LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), + &ratio)) + return TARGET_MAX_LMUL / ratio; + else + gcc_unreachable (); + } +} + /* Update the live ranges according PHI. Loop: @@ -520,65 +539,25 @@ update_local_live_ranges ( } } -costs::costs (vec_info *vinfo, bool costing_for_scalar) - : vector_costs (vinfo, costing_for_scalar) -{} - /* Return true that the LMUL of new COST model is preferred. */ -bool -costs::preferred_new_lmul_p (const vector_costs *uncast_other) const +static bool +preferred_new_lmul_p (loop_vec_info other_loop_vinfo) { - auto other = static_cast (uncast_other); - auto this_loop_vinfo = as_a (this->m_vinfo); - auto other_loop_vinfo = as_a (other->m_vinfo); - class loop *loop = LOOP_VINFO_LOOP (this_loop_vinfo); - - if (loop_autovec_infos.get (loop) && loop_autovec_infos.get (loop)->end_p) - return false; - else if (loop_autovec_infos.get (loop)) - loop_autovec_infos.get (loop)->current_lmul - = loop_autovec_infos.get (loop)->current_lmul / 2; - else - { - int regno_alignment - = riscv_get_v_regno_alignment (other_loop_vinfo->vector_mode); - if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (other_loop_vinfo), 1U)) - regno_alignment = RVV_M8; - loop_autovec_infos.put (loop, {regno_alignment, regno_alignment, false}); - } - - int lmul = get_current_lmul (loop); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "Comparing two main loops (%s at VF %d vs %s at VF %d)\n", - GET_MODE_NAME (this_loop_vinfo->vector_mode), - vect_vf_for_cost (this_loop_vinfo), - GET_MODE_NAME (other_loop_vinfo->vector_mode), - vect_vf_for_cost (other_loop_vinfo)); - /* Compute local program points. It's a fast and effective computation. */ hash_map> program_points_per_bb; - compute_local_program_points (other->m_vinfo, program_points_per_bb); + compute_local_program_points (other_loop_vinfo, program_points_per_bb); /* Compute local live ranges. */ hash_map> live_ranges_per_bb; machine_mode biggest_mode = compute_local_live_ranges (program_points_per_bb, live_ranges_per_bb); - /* If we can use simple VLS modes to handle NITERS element. - We don't need to use VLA modes with partial vector auto-vectorization. */ - if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo) - && known_le (tree_to_poly_int64 (LOOP_VINFO_NITERS (this_loop_vinfo)) - * GET_MODE_SIZE (biggest_mode).to_constant (), - (int) RVV_M8 * BYTES_PER_RISCV_VECTOR) - && pow2p_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo))) - return vector_costs::better_main_loop_than_p (other); - /* Update live ranges according to PHI. */ - update_local_live_ranges (other->m_vinfo, program_points_per_bb, + update_local_live_ranges (other_loop_vinfo, program_points_per_bb, live_ranges_per_bb, &biggest_mode); + int lmul = compute_estimated_lmul (other_loop_vinfo, biggest_mode); /* TODO: We calculate the maximum live vars base on current STMTS sequence. We can support live range shrink if it can give us big improvement in the future. */ @@ -603,12 +582,7 @@ costs::preferred_new_lmul_p (const vector_costs *uncast_other) const live_ranges_per_bb.empty (); } live_ranges_per_bb.empty (); - if (loop_autovec_infos.get (loop)->current_lmul == RVV_M1 - || max_nregs <= V_REG_NUM) - loop_autovec_infos.get (loop)->end_p = true; - if (loop_autovec_infos.get (loop)->current_lmul > RVV_M1) - return max_nregs > V_REG_NUM; - return false; + return max_nregs > V_REG_NUM; } if (!program_points_per_bb.is_empty ()) { @@ -625,17 +599,34 @@ costs::preferred_new_lmul_p (const vector_costs *uncast_other) const return lmul > RVV_M1; } +costs::costs (vec_info *vinfo, bool costing_for_scalar) + : vector_costs (vinfo, costing_for_scalar) +{} + bool costs::better_main_loop_than_p (const vector_costs *uncast_other) const { auto other = static_cast (uncast_other); + auto this_loop_vinfo = as_a (this->m_vinfo); + auto other_loop_vinfo = as_a (other->m_vinfo); - if (riscv_autovec_lmul == RVV_DYNAMIC) + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Comparing two main loops (%s at VF %d vs %s at VF %d)\n", + GET_MODE_NAME (this_loop_vinfo->vector_mode), + vect_vf_for_cost (this_loop_vinfo), + GET_MODE_NAME (other_loop_vinfo->vector_mode), + vect_vf_for_cost (other_loop_vinfo)); + + if (!LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo) + && riscv_autovec_lmul == RVV_DYNAMIC) { + if (!riscv_v_ext_vector_mode_p (this_loop_vinfo->vector_mode)) + return false; bool post_dom_available_p = dom_info_available_p (CDI_POST_DOMINATORS); if (!post_dom_available_p) calculate_dominance_info (CDI_POST_DOMINATORS); - bool preferred_p = preferred_new_lmul_p (uncast_other); + bool preferred_p = preferred_new_lmul_p (other_loop_vinfo); if (!post_dom_available_p) free_dominance_info (CDI_POST_DOMINATORS); return preferred_p; diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h index 7b5814a4cffe..e18775e230b1 100644 --- a/gcc/config/riscv/riscv-vector-costs.h +++ b/gcc/config/riscv/riscv-vector-costs.h @@ -33,13 +33,6 @@ struct stmt_point /* Pair typedef used by live range: . */ typedef std::pair pair; -struct autovec_info -{ - unsigned int initial_lmul; - unsigned int current_lmul; - bool end_p; -}; - /* rvv-specific vector costs. */ class costs : public vector_costs { @@ -56,8 +49,6 @@ private: tree vectype, int misalign, vect_cost_model_location where) override; void finish_cost (const vector_costs *) override; - - bool preferred_new_lmul_p (const vector_costs *) const; }; } // namespace riscv_vector diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c index 5f18262f1f26..9e2c65110e82 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c @@ -44,7 +44,7 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler {e32,m2} } } */ /* { dg-final { scan-assembler {e32,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c index d0481dc57ecb..e2511da135e1 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c @@ -88,4 +88,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c, /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c index e6ccd0e2063c..f6be585b9274 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c @@ -147,4 +147,4 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c, /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 1" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c index 8df0e0a48d7e..81cb6954d493 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c @@ -47,5 +47,5 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c index 8f2423700cd9..a0e2311c2e0d 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c @@ -47,5 +47,5 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c, /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c index 8df0e0a48d7e..81cb6954d493 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c @@ -47,5 +47,5 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c index b3498ad82107..5169dcba8461 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c @@ -45,5 +45,5 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c index a238050f28f2..2bc4c6831fc4 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c @@ -48,5 +48,5 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c index a155c080047e..c667ab78a3c5 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c @@ -48,7 +48,7 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler {e32,m2} } } */ /* { dg-final { scan-assembler {e8,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 2 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c index 97eb88f9447d..28fc49a2252a 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c @@ -30,6 +30,6 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict c, /* { dg-final { scan-assembler {e32,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c index a9edfb0d9803..4b6bd85b4a8e 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c @@ -30,6 +30,6 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict c, /* { dg-final { scan-assembler {e8,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c index c064bfaeb501..c61c53be8034 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c @@ -42,6 +42,6 @@ void foo2 (int64_t *__restrict a, /* { dg-final { scan-assembler {e64,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c index 4a90e5ef7879..01a359bc7c8b 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c @@ -42,6 +42,6 @@ void foo2 (int64_t *__restrict a, /* { dg-final { scan-assembler {e64,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c index 1a361ed4226f..1df1cca5e43e 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c @@ -42,6 +42,6 @@ void foo2 (int16_t *__restrict a, /* { dg-final { scan-assembler {e16,m2} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c index c7b886359239..0abbdb614931 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c @@ -22,6 +22,6 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n) /* { dg-final { scan-assembler {e8,m4} } } */ /* { dg-final { scan-assembler-times {csrr} 1 } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c index 800b4e4e03c3..e4769df316a5 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c @@ -42,6 +42,6 @@ void foo2 (int8_t *__restrict a, /* { dg-final { scan-assembler {e64,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c index 4d9175e86f8f..6d2a56474285 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c @@ -31,6 +31,6 @@ foo (uint8_t *restrict a, uint8_t *restrict b, int n) /* { dg-final { scan-assembler {e8,m4} } } */ /* { dg-final { scan-assembler-times {csrr} 1 } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c index d05cd158f22b..7fda83ab9780 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fselective-scheduling -fdump-tree-vect-details" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param riscv-autovec-lmul=dynamic -fselective-scheduling -fdump-tree-vect-details" } */ #include @@ -8,23 +8,19 @@ int a[N]; __attribute__ ((noinline)) int -foo (){ +foo (int n){ int i,j; int sum,x; - for (i = 0; i < N; i++) { + for (i = 0; i < n; i++) { sum = 0; - for (j = 0; j < N; j++) { + for (j = 0; j < n; j++) { sum += (i + j); } a[i] = sum; } + return 0; } -/* { dg-final { scan-assembler {e32,m4} } } */ /* { dg-final { scan-assembler-not {jr} } } */ /* { dg-final { scan-assembler-times {ret} 1 } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ -/* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ -/* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c index e8afc77e4522..31ee58a4b138 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c @@ -12,7 +12,7 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int n) /* { dg-final { scan-assembler {e32,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c index cb4540e42188..d767d2726a68 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c @@ -16,7 +16,7 @@ foo (int *x, int n, int res) /* { dg-final { scan-assembler {e32,m8} } } */ /* { dg-final { scan-assembler-times {csrr} 1 } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c index d6348247dbf9..4fa6d2f5b282 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c @@ -12,7 +12,7 @@ foo (int32_t *__restrict a, int16_t *__restrict b, int n) /* { dg-final { scan-assembler {e16,m4} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c index edf5dabc37dc..c9f4fbca0d83 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c @@ -12,7 +12,7 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int n) /* { dg-final { scan-assembler {e8,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c index 99191ddeb75c..e6d1b1670418 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c @@ -13,7 +13,7 @@ foo (size_t *__restrict a, size_t *__restrict b, int n) /* { dg-final { scan-assembler {e64,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c index 0abface480ff..3276585bacd2 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c @@ -19,7 +19,7 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int n) /* { dg-final { scan-assembler {e8,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c index e7caabdc8430..918b47f301a8 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c @@ -17,7 +17,7 @@ foo (int8_t *__restrict a, int8_t *__restrict b, int8_t *__restrict a2, /* { dg-final { scan-assembler {e8,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c index 2be7a955f871..b0fb62aebb06 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c @@ -17,7 +17,7 @@ foo (int32_t *__restrict a, int32_t *__restrict b, int32_t *__restrict a2, /* { dg-final { scan-assembler {e32,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c index 1d94f341668f..d64c30ae1145 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c @@ -13,7 +13,7 @@ foo (int8_t *__restrict a, int8_t init, int n) /* { dg-final { scan-assembler {e8,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c index 2d2e64bfc949..2cf7de44e44b 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c @@ -13,7 +13,7 @@ foo (int64_t *__restrict a, int64_t init, int n) /* { dg-final { scan-assembler {e64,m8} } } */ /* { dg-final { scan-assembler-not {csrr} } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 8" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c index 31cac047d2c1..533f6d6dec96 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111848.c @@ -31,6 +31,7 @@ f3 (uint8_t *restrict a, uint8_t *restrict b, /* { dg-final { scan-assembler {e8,m4} } } */ /* { dg-final { scan-assembler-not {jr} } } */ /* { dg-final { scan-assembler-times {ret} 1 } } */ -/* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-not "Maximum lmul = 8" "vect" } } */ +/* { dg-final { scan-tree-dump "Maximum lmul = 4" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 2" "vect" } } */ /* { dg-final { scan-tree-dump-not "Maximum lmul = 1" "vect" } } */ From 8501edba91ea63bdfc045f1cb66fb1c242e44e80 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Tue, 12 Dec 2023 22:25:52 +0800 Subject: [PATCH 245/311] RISC-V: Apply vla vs. vls mode heuristic vector COST model This patch apply vla vs. vls mode heuristic which can fixes the following FAILs: FAIL: gcc.target/riscv/rvv/autovec/pr111751.c -O3 -ftree-vectorize scan-assembler-not vset FAIL: gcc.target/riscv/rvv/autovec/pr111751.c -O3 -ftree-vectorize scan-assembler-times li\\s+[a-x0-9]+,0\\s+ret 2 The root cause of this FAIL is we failed to pick VLS mode for the vectorization. Before this patch: foo2: addi sp,sp,-208 addi a2,sp,64 addi a5,sp,128 lui a6,%hi(.LANCHOR0) sd ra,200(sp) addi a6,a6,%lo(.LANCHOR0) mv a0,a2 mv a1,a5 li a3,16 mv a4,sp vsetivli zero,8,e64,m8,ta,ma vle64.v v8,0(a6) vse64.v v8,0(a2) vse64.v v8,0(a5) .L4: vsetvli a5,a3,e32,m1,ta,ma slli a2,a5,2 vle32.v v2,0(a1) vle32.v v1,0(a0) sub a3,a3,a5 vadd.vv v1,v1,v2 vse32.v v1,0(a4) add a1,a1,a2 add a0,a0,a2 add a4,a4,a2 bne a3,zero,.L4 lw a4,128(sp) lw a5,64(sp) addw a5,a5,a4 lw a4,0(sp) bne a4,a5,.L5 lw a4,132(sp) lw a5,68(sp) addw a5,a5,a4 lw a4,4(sp) bne a4,a5,.L5 lw a4,136(sp) lw a5,72(sp) addw a5,a5,a4 lw a4,8(sp) bne a4,a5,.L5 lw a4,140(sp) lw a5,76(sp) addw a5,a5,a4 lw a4,12(sp) bne a4,a5,.L5 lw a4,144(sp) lw a5,80(sp) addw a5,a5,a4 lw a4,16(sp) bne a4,a5,.L5 lw a4,148(sp) lw a5,84(sp) addw a5,a5,a4 lw a4,20(sp) bne a4,a5,.L5 lw a4,152(sp) lw a5,88(sp) addw a5,a5,a4 lw a4,24(sp) bne a4,a5,.L5 lw a4,156(sp) lw a5,92(sp) addw a5,a5,a4 lw a4,28(sp) bne a4,a5,.L5 lw a4,160(sp) lw a5,96(sp) addw a5,a5,a4 lw a4,32(sp) bne a4,a5,.L5 lw a4,164(sp) lw a5,100(sp) addw a5,a5,a4 lw a4,36(sp) bne a4,a5,.L5 lw a4,168(sp) lw a5,104(sp) addw a5,a5,a4 lw a4,40(sp) bne a4,a5,.L5 lw a4,172(sp) lw a5,108(sp) addw a5,a5,a4 lw a4,44(sp) bne a4,a5,.L5 lw a4,176(sp) lw a5,112(sp) addw a5,a5,a4 lw a4,48(sp) bne a4,a5,.L5 lw a4,180(sp) lw a5,116(sp) addw a5,a5,a4 lw a4,52(sp) bne a4,a5,.L5 lw a4,184(sp) lw a5,120(sp) addw a5,a5,a4 lw a4,56(sp) bne a4,a5,.L5 lw a4,188(sp) lw a5,124(sp) addw a5,a5,a4 lw a4,60(sp) bne a4,a5,.L5 ld ra,200(sp) li a0,0 addi sp,sp,208 jr ra .L5: call abort After this patch: li a0,0 ret The heuristic leverage ARM SVE and fully tested and confirm we have same behavior as ARM SVE GCC and RVV Clang. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (costs::analyze_loop_vinfo): New function. (costs::record_potential_vls_unrolling): Ditto. (costs::prefer_unrolled_loop): Ditto. (costs::better_main_loop_than_p): Ditto. (costs::add_stmt_cost): Ditto. * config/riscv/riscv-vector-costs.h (enum cost_type_enum): New enum. * config/riscv/t-riscv: Add new include files. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr111313.c: Adapt test. * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Ditto. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: New test. * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: New test. --- gcc/config/riscv/riscv-vector-costs.cc | 134 +++++++++++++++++- gcc/config/riscv/riscv-vector-costs.h | 43 ++++++ gcc/config/riscv/t-riscv | 2 +- .../vect/costmodel/riscv/rvv/vla_vs_vls-1.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-10.c | 28 ++++ .../vect/costmodel/riscv/rvv/vla_vs_vls-11.c | 28 ++++ .../vect/costmodel/riscv/rvv/vla_vs_vls-12.c | 28 ++++ .../vect/costmodel/riscv/rvv/vla_vs_vls-2.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-3.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-4.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-5.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-6.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-7.c | 13 ++ .../vect/costmodel/riscv/rvv/vla_vs_vls-8.c | 27 ++++ .../vect/costmodel/riscv/rvv/vla_vs_vls-9.c | 27 ++++ .../gcc.target/riscv/rvv/autovec/pr111313.c | 2 +- .../riscv/rvv/autovec/vls/shift-3.c | 2 +- 17 files changed, 408 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 3fcb5f3176f7..7888cef58fe9 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "ssa.h" #include "backend.h" #include "tree-data-ref.h" +#include "tree-ssa-loop-niter.h" /* This file should be included last. */ #include "riscv-vector-costs.h" @@ -601,7 +602,101 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo) costs::costs (vec_info *vinfo, bool costing_for_scalar) : vector_costs (vinfo, costing_for_scalar) -{} +{ + if (costing_for_scalar) + m_cost_type = SCALAR_COST; + else if (riscv_v_ext_vector_mode_p (vinfo->vector_mode)) + m_cost_type = VLA_VECTOR_COST; + else + m_cost_type = VLS_VECTOR_COST; +} + +/* Do one-time initialization of the costs given that we're + costing the loop vectorization described by LOOP_VINFO. */ +void +costs::analyze_loop_vinfo (loop_vec_info loop_vinfo) +{ + /* Record the number of times that the vector loop would execute, + if known. */ + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + auto scalar_niters = max_stmt_executions_int (loop); + if (scalar_niters >= 0) + { + unsigned int vf = vect_vf_for_cost (loop_vinfo); + if (LOOP_VINFO_LENS (loop_vinfo).is_empty ()) + m_num_vector_iterations = scalar_niters / vf; + else + m_num_vector_iterations = CEIL (scalar_niters, vf); + } + + /* Detect whether we're vectorizing for VLA and should apply the unrolling + heuristic described above m_unrolled_vls_niters. */ + record_potential_vls_unrolling (loop_vinfo); +} + +/* Decide whether to use the unrolling heuristic described above + m_unrolled_vls_niters, updating that field if so. LOOP_VINFO + describes the loop that we're vectorizing. */ +void +costs::record_potential_vls_unrolling (loop_vec_info loop_vinfo) +{ + /* We only want to apply the heuristic if LOOP_VINFO is being + vectorized for VLA. */ + if (m_cost_type != VLA_VECTOR_COST) + return; + + /* We don't want to apply the heuristic to outer loops, since it's + harder to track two levels of unrolling. */ + if (LOOP_VINFO_LOOP (loop_vinfo)->inner) + return; + + /* Only handle cases in which the number of VLS iterations + would be known at compile time but the number of SVE iterations + would not. */ + if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + || BYTES_PER_RISCV_VECTOR.is_constant ()) + return; + + /* Guess how many times the VLS loop would iterate and make + sure that it is within the complete unrolling limit. Even if the + number of iterations is small enough, the number of statements might + not be, which is why we need to estimate the number of statements too. */ + unsigned int vls_vf = vect_vf_for_cost (loop_vinfo); + unsigned HOST_WIDE_INT unrolled_vls_niters + = LOOP_VINFO_INT_NITERS (loop_vinfo) / vls_vf; + if (unrolled_vls_niters > (unsigned int) param_max_completely_peel_times) + return; + + /* Record that we're applying the heuristic and should try to estimate + the number of statements in the VLS loop. */ + m_unrolled_vls_niters = unrolled_vls_niters; +} + +/* Return true if (a) we're applying the VLS vs. VLA unrolling + heuristic described above m_unrolled_vls_niters and (b) the heuristic + says that we should prefer the VLS loop. */ +bool +costs::prefer_unrolled_loop () const +{ + if (!m_unrolled_vls_stmts) + return false; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Number of insns in" + " unrolled VLS loop = " HOST_WIDE_INT_PRINT_UNSIGNED "\n", + m_unrolled_vls_stmts); + + /* The balance here is tricky. On the one hand, we can't be sure whether + the code is vectorizable with VLS or not. However, even if + it isn't vectorizable with VLS, there's a possibility that + the scalar code could also be unrolled. Some of the code might then + benefit from SLP, or from using LDP and STP. We therefore apply + the heuristic regardless of can_use_vls_p. */ + return (m_unrolled_vls_stmts + && (m_unrolled_vls_stmts + <= (unsigned int) param_max_completely_peeled_insns)); +} bool costs::better_main_loop_than_p (const vector_costs *uncast_other) const @@ -618,6 +713,21 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const GET_MODE_NAME (other_loop_vinfo->vector_mode), vect_vf_for_cost (other_loop_vinfo)); + /* Apply the unrolling heuristic described above m_unrolled_vls_niters. */ + if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts)) + { + bool this_prefer_unrolled = this->prefer_unrolled_loop (); + bool other_prefer_unrolled = other->prefer_unrolled_loop (); + if (this_prefer_unrolled != other_prefer_unrolled) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Preferring VLS loop because" + " it can be unrolled\n"); + return other_prefer_unrolled; + } + } + if (!LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo) && riscv_autovec_lmul == RVV_DYNAMIC) { @@ -643,6 +753,28 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind, /* TODO: Use default STMT cost model. We will support more accurate STMT cost model later. */ int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign); + + /* Do one-time initialization based on the vinfo. */ + loop_vec_info loop_vinfo = dyn_cast (m_vinfo); + if (!m_analyzed_vinfo) + { + if (loop_vinfo) + analyze_loop_vinfo (loop_vinfo); + + m_analyzed_vinfo = true; + } + + if (stmt_info) + { + /* If we're applying the VLA vs. VLS unrolling heuristic, + estimate the number of statements in the unrolled VLS + loop. For simplicitly, we assume that one iteration of the + VLS loop would need the same number of statements + as one iteration of the VLA loop. */ + if (where == vect_body && m_unrolled_vls_niters) + m_unrolled_vls_stmts += count * m_unrolled_vls_niters; + } + return record_stmt_cost (stmt_info, where, count * stmt_cost); } diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h index e18775e230b1..ff294a60aaf8 100644 --- a/gcc/config/riscv/riscv-vector-costs.h +++ b/gcc/config/riscv/riscv-vector-costs.h @@ -30,6 +30,13 @@ struct stmt_point gimple *stmt; }; +enum cost_type_enum +{ + SCALAR_COST, + VLA_VECTOR_COST, + VLS_VECTOR_COST +}; + /* Pair typedef used by live range: . */ typedef std::pair pair; @@ -49,6 +56,42 @@ private: tree vectype, int misalign, vect_cost_model_location where) override; void finish_cost (const vector_costs *) override; + + /* True if we have performed one-time initialization based on the + vec_info. */ + bool m_analyzed_vinfo = false; + + /* - If M_COST_TYPE = SCALAR_COST then we're costing the original scalar code. + - If M_COST_TYPE = VLA_VECTOR_COST is nonzero then we're costing VLA + partial vectorization codes. + - If M_COST_TYPE = VLS_VECTOR_COST is nonzero then we're costing VLS + minimum length vector codes. */ + enum cost_type_enum m_cost_type; + + /* On some CPUs, VLA and VLS provide the same theoretical vector + throughput, such as 4x128 VLS vs. 2x256 VLA. In those + situations, we try to predict whether an VLS implementation + of the loop could be completely unrolled and become straight-line code. + If so, it is generally better to use the VLS version rather + than length-agnostic VLA, since the VLA loop would execute an unknown + number of times and so could not be completely unrolled in the same way. + + If we're applying this heuristic, M_UNROLLED_VLS_NITERS is the + number of VLS loop iterations that would be unrolled and + M_UNROLLED_VLS_STMTS estimates the total number of statements + in the unrolled loop. Both values are zero if we're not applying + the heuristic. */ + unsigned HOST_WIDE_INT m_unrolled_vls_niters = 0; + unsigned HOST_WIDE_INT m_unrolled_vls_stmts = 0; + + /* If we're vectorizing a loop that executes a constant number of times, + this variable gives the number of times that the vector loop would + iterate, otherwise it is zero. */ + uint64_t m_num_vector_iterations = 0; + + void analyze_loop_vinfo (loop_vec_info); + void record_potential_vls_unrolling (loop_vec_info); + bool prefer_unrolled_loop () const; }; } // namespace riscv_vector diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 1aac8b58bb52..2b2ec4095254 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -74,7 +74,7 @@ riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(FUNCTION_H) \ $(TREE_H) basic-block.h $(RTL_H) gimple.h targhooks.h cfgloop.h \ fold-const.h $(TM_P_H) tree-vectorizer.h gimple-iterator.h bitmap.h \ - ssa.h backend.h \ + ssa.h backend.h tree-data-ref.h tree-ssa-loop-niter.h \ $(srcdir)/config/riscv/riscv-vector-costs.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-costs.cc diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c new file mode 100644 index 000000000000..1ef4215e72ca --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 16; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c new file mode 100644 index 000000000000..3ddffa37fe49 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */ + +#include + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (){ + int i,j; + int sum,x; + + for (i = 0; i < N; i++) { + sum = 0; + for (j = 0; j < N; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 2 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c new file mode 100644 index 000000000000..7625ec5c4b17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */ + +#include + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (){ + int i,j; + int sum,x; + + for (i = 0; i < N; i++) { + sum = 0; + for (j = 0; j < N; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c new file mode 100644 index 000000000000..7625ec5c4b17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */ + +#include + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (){ + int i,j; + int sum,x; + + for (i = 0; i < N; i++) { + sum = 0; + for (j = 0; j < N; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c new file mode 100644 index 000000000000..ca203f508471 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 16; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c new file mode 100644 index 000000000000..f8e533507856 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 16; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c new file mode 100644 index 000000000000..4859d570c0c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 16; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c new file mode 100644 index 000000000000..8a568028bcf9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 16; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c new file mode 100644 index 000000000000..46ebd5fd49b6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 32; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetivli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c new file mode 100644 index 000000000000..f5aceca32d7c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */ + +void +foo (int *__restrict a, int *__restrict b, int *__restrict c) +{ + for (int i = 0; i < 32; i++) + a[i] = b[i] + c[i]; +} + +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetivli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c new file mode 100644 index 000000000000..ea6a7cbe2b17 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (){ + int i,j; + int sum,x; + + for (i = 0; i < N; i++) { + sum = 0; + for (j = 0; j < N; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c new file mode 100644 index 000000000000..7f03cb9ecbe0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */ + +#include + +#define N 40 + +int a[N]; + +__attribute__ ((noinline)) int +foo (){ + int i,j; + int sum,x; + + for (i = 0; i < N; i++) { + sum = 0; + for (j = 0; j < N; j++) { + sum += (i + j); + } + a[i] = sum; + } + return 0; +} + +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-not {vsetvli} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c index 1e01cfefd477..a4f8c37f95d9 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2 -fno-vect-cost-model" } */ #define K 32 short in[2*K][K]; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c index e25e7b59c3eb..8de1b9c0c41d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c @@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, <<) DEF_OP_VV (shift, 256, int64_t, <<) DEF_OP_VV (shift, 512, int64_t, <<) -/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 43 } } */ +/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 46 } } */ /* { dg-final { scan-assembler-not {csrr} } } */ From b75683039960a621a0d7dd7c42eea32b7d2eff4d Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 14:41:39 -0500 Subject: [PATCH 246/311] c++: class hotness attribute and member template The FUNCTION_DECL check ignored member function templates. gcc/cp/ChangeLog: * class.cc (propagate_class_warmth_attribute): Handle member templates. gcc/testsuite/ChangeLog: * g++.dg/ext/attr-hotness.C: Add member templates. Co-authored-by: Jason Xu --- gcc/cp/class.cc | 4 ++-- gcc/testsuite/g++.dg/ext/attr-hotness.C | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc index 6fdb56abfb9f..1954e0a5ed3f 100644 --- a/gcc/cp/class.cc +++ b/gcc/cp/class.cc @@ -7805,8 +7805,8 @@ propagate_class_warmth_attribute (tree t) if (class_has_cold_attr || class_has_hot_attr) for (tree f = TYPE_FIELDS (t); f; f = DECL_CHAIN (f)) - if (TREE_CODE (f) == FUNCTION_DECL) - maybe_propagate_warmth_attributes (f, t); + if (DECL_DECLARES_FUNCTION_P (f)) + maybe_propagate_warmth_attributes (STRIP_TEMPLATE (f), t); } tree diff --git a/gcc/testsuite/g++.dg/ext/attr-hotness.C b/gcc/testsuite/g++.dg/ext/attr-hotness.C index f9a6930304de..24aa089ead30 100644 --- a/gcc/testsuite/g++.dg/ext/attr-hotness.C +++ b/gcc/testsuite/g++.dg/ext/attr-hotness.C @@ -2,15 +2,23 @@ /* { dg-options "-O0 -Wattributes -fdump-tree-gimple" } */ -struct __attribute((cold)) A { __attribute((noinline, used)) void foo(void) { } }; +struct __attribute((cold)) A { + __attribute((noinline, used)) void foo(void) { } + template void bar() {} +}; +template void A::bar(); -struct __attribute((hot)) B { __attribute((noinline, used)) void foo(void) { } }; +struct __attribute((hot)) B { + __attribute((noinline, used)) void foo(void) { } + template void bar() {} +}; +template void B::bar(); struct __attribute((hot, cold)) C { __attribute((noinline, used)) void foo(void) { } }; /* { dg-warning "ignoring attribute .cold. because it conflicts with attribute .hot." } */ struct __attribute((cold, hot)) D { __attribute((noinline, used)) void foo(void) { } }; /* { dg-warning "ignoring attribute .hot. because it conflicts with attribute .cold." } */ -/* { dg-final { scan-tree-dump-times "cold" 2 "gimple" } } */ -/* { dg-final { scan-tree-dump-times "hot" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "cold" 3 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "hot" 3 "gimple" } } */ From 8a5d6ce0e83c17a48541d27758291d6f7ef5c829 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 13 Dec 2023 00:17:49 +0000 Subject: [PATCH 247/311] Daily bump. --- ChangeLog | 12 ++ gcc/ChangeLog | 439 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 6 + gcc/cp/ChangeLog | 14 ++ gcc/m2/ChangeLog | 37 ++++ gcc/testsuite/ChangeLog | 144 +++++++++++++ libgm2/ChangeLog | 9 + libquadmath/ChangeLog | 8 + libstdc++-v3/ChangeLog | 20 ++ 10 files changed, 690 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index c9a1e91e8026..236f420c5d60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2023-12-12 Paul Iannetta + + * MAINTAINERS: Add myself to write after approval + +2023-12-12 Feng Wang + + * MAINTAINERS: Update my email address + +2023-12-12 Feng Wang + + * MAINTAINERS: Add myself to write after approval + 2023-11-23 Nathaniel Shead * MAINTAINERS: Add myself to write after approval and DCO diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 323e6ad3d070..bef20da0b6aa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,442 @@ +2023-12-12 Juzhe-Zhong + + * config/riscv/riscv-vector-costs.cc (costs::analyze_loop_vinfo): New function. + (costs::record_potential_vls_unrolling): Ditto. + (costs::prefer_unrolled_loop): Ditto. + (costs::better_main_loop_than_p): Ditto. + (costs::add_stmt_cost): Ditto. + * config/riscv/riscv-vector-costs.h (enum cost_type_enum): New enum. + * config/riscv/t-riscv: Add new include files. + +2023-12-12 Juzhe-Zhong + + * config/riscv/riscv-vector-costs.cc (get_current_lmul): Remove it. + (compute_estimated_lmul): New function. + (costs::costs): Refactor. + (costs::preferred_new_lmul_p): Ditto. + (preferred_new_lmul_p): Ditto. + (costs::better_main_loop_than_p): Ditto. + * config/riscv/riscv-vector-costs.h (struct autovec_info): Remove it. + +2023-12-12 Martin Jambor + + PR tree-optimization/112822 + * tree-sra.cc (load_assign_lhs_subreplacements): Invoke + force_gimple_operand_gsi also when LHS has partial stores and RHS is a + VIEW_CONVERT_EXPR. + +2023-12-12 Jason Merrill + Nathaniel Shead + + * tree-core.h (enum clobber_kind): Rename CLOBBER_EOL to + CLOBBER_STORAGE_END. Add CLOBBER_STORAGE_BEGIN, + CLOBBER_OBJECT_BEGIN, CLOBBER_OBJECT_END. + * gimple-lower-bitint.cc + * gimple-ssa-warn-access.cc + * gimplify.cc + * tree-inline.cc + * tree-ssa-ccp.cc: Adjust for rename. + * tree-pretty-print.cc: And handle new values. + +2023-12-12 Szabolcs Nagy + + * config/aarch64/aarch64.cc (aarch64_override_options): Update. + (aarch64_handle_attr_branch_protection): Update. + * config/arm/aarch-common-protos.h (aarch_parse_branch_protection): + Remove. + (aarch_validate_mbranch_protection): Add new argument. + * config/arm/aarch-common.cc (aarch_handle_no_branch_protection): + Update. + (aarch_handle_standard_branch_protection): Update. + (aarch_handle_pac_ret_protection): Update. + (aarch_handle_pac_ret_leaf): Update. + (aarch_handle_pac_ret_b_key): Update. + (aarch_handle_bti_protection): Update. + (aarch_parse_branch_protection): Remove. + (next_tok): New. + (aarch_validate_mbranch_protection): Rewrite. + * config/arm/aarch-common.h (struct aarch_branch_protect_type): + Add field "alone". + * config/arm/arm.cc (arm_configure_build_target): Update. + +2023-12-12 Szabolcs Nagy + + * config/aarch64/aarch64.cc (aarch64_override_options_after_change_1): + Do not override branch_protection options. + (aarch64_override_options): Remove accepted_branch_protection_string. + * config/arm/aarch-common.cc (BRANCH_PROTECT_STR_MAX): Remove. + (aarch_parse_branch_protection): Remove + accepted_branch_protection_string. + * config/arm/arm.cc: Likewise. + +2023-12-12 Richard Biener + + PR tree-optimization/112736 + * tree-vect-stmts.cc (vectorizable_load): Extend optimization + to avoid peeling for gaps to handle single-element non-groups + we now allow with SLP. + +2023-12-12 Richard Biener + + PR ipa/92606 + * ipa-icf.cc (sem_item_optimizer::merge_classes): Check + both source and alias for the no_icf attribute. + * doc/extend.texi (no_icf): Document variable attribute. + +2023-12-12 Richard Biener + + PR tree-optimization/112961 + * tree-if-conv.cc (tree_if_conversion): Instead of excluding + the latch block from VN, add a fake entry edge. + +2023-12-12 Xi Ruoyao + + PR middle-end/107723 + * convert.cc (convert_to_integer_1) [case BUILT_IN_TRUNC]: Break + early if !flag_fp_int_builtin_inexact and flag_trapping_math. + +2023-12-12 Pan Li + + * config/riscv/riscv-avlprop.cc (avl_can_be_propagated_p): + Disable the avl propogation for the vcompress. + +2023-12-12 Xi Ruoyao + + * config/loongarch/loongarch-opts.h (la_target): Move into #if + for loongarch-def.h. + (loongarch_init_target): Likewise. + (loongarch_config_target): Likewise. + (loongarch_update_gcc_opt_status): Likewise. + +2023-12-12 Xi Ruoyao + + * config/loongarch/loongarch.cc (loongarch_explicit_relocs_p): + Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP + if TARGET_CMODEL_EXTREME. + (loongarch_split_symbol): Check for la_opt_explicit_relocs != + EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS. + (loongarch_print_operand_reloc): Likewise. + (loongarch_option_override_internal): Likewise. + (loongarch_handle_model_attribute): Likewise. + * doc/invoke.texi (-mcmodel=extreme): Update the compatibility + between it and -mexplicit-relocs=. + +2023-12-12 Richard Biener + + PR tree-optimization/112939 + * tree-ssa-sccvn.cc (visit_phi): When all args are undefined + make sure we end up with a value that was visited, otherwise + fall back to .VN_TOP. + +2023-12-12 liuhongt + + PR target/112891 + * config/i386/i386.cc (ix86_avx_u128_mode_after): Return + AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to + align with ix86_avx_u128_mode_needed. + (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for + sibling_call. + +2023-12-12 Alexandre Oliva + + PR target/112334 + * builtins.h (target_builtins): Add fields for apply_args_size + and apply_result_size. + * builtins.cc (apply_args_size, apply_result_size): Cache + results in fields rather than in static variables. + (get_apply_args_size, set_apply_args_size): New. + (get_apply_result_size, set_apply_result_size): New. + +2023-12-12 Hongyu Wang + + PR target/112943 + * config/i386/i386.md (ashl3): Add TARGET_APX_NDD to + ix86_expand_binary_operator call. + (3): Likewise for rshift. + (di3): Likewise for DImode rotate. + (3): Likewise for SWI124 rotate. + +2023-12-12 Feng Wang + + * config/riscv/riscv-vector-builtins-functions.def (DEF_RVV_FUNCTION): + Add AVAIL argument. + (read_vl): Using AVAIL argument default value. + (vlenb): Ditto. + (vsetvl): Ditto. + (vsetvlmax): Ditto. + (vle): Ditto. + (vse): Ditto. + (vlm): Ditto. + (vsm): Ditto. + (vlse): Ditto. + (vsse): Ditto. + (vluxei8): Ditto. + (vluxei16): Ditto. + (vluxei32): Ditto. + (vluxei64): Ditto. + (vloxei8): Ditto. + (vloxei16): Ditto. + (vloxei32): Ditto. + (vloxei64): Ditto. + (vsuxei8): Ditto. + (vsuxei16): Ditto. + (vsuxei32): Ditto. + (vsuxei64): Ditto. + (vsoxei8): Ditto. + (vsoxei16): Ditto. + (vsoxei32): Ditto. + (vsoxei64): Ditto. + (vleff): Ditto. + (vadd): Ditto. + (vsub): Ditto. + (vrsub): Ditto. + (vneg): Ditto. + (vwaddu): Ditto. + (vwsubu): Ditto. + (vwadd): Ditto. + (vwsub): Ditto. + (vwcvt_x): Ditto. + (vwcvtu_x): Ditto. + (vzext): Ditto. + (vsext): Ditto. + (vadc): Ditto. + (vmadc): Ditto. + (vsbc): Ditto. + (vmsbc): Ditto. + (vand): Ditto. + (vor): Ditto. + (vxor): Ditto. + (vnot): Ditto. + (vsll): Ditto. + (vsra): Ditto. + (vsrl): Ditto. + (vnsrl): Ditto. + (vnsra): Ditto. + (vncvt_x): Ditto. + (vmseq): Ditto. + (vmsne): Ditto. + (vmsltu): Ditto. + (vmslt): Ditto. + (vmsleu): Ditto. + (vmsle): Ditto. + (vmsgtu): Ditto. + (vmsgt): Ditto. + (vmsgeu): Ditto. + (vmsge): Ditto. + (vminu): Ditto. + (vmin): Ditto. + (vmaxu): Ditto. + (vmax): Ditto. + (vmul): Ditto. + (vmulh): Ditto. + (vmulhu): Ditto. + (vmulhsu): Ditto. + (vdivu): Ditto. + (vdiv): Ditto. + (vremu): Ditto. + (vrem): Ditto. + (vwmul): Ditto. + (vwmulu): Ditto. + (vwmulsu): Ditto. + (vmacc): Ditto. + (vnmsac): Ditto. + (vmadd): Ditto. + (vnmsub): Ditto. + (vwmaccu): Ditto. + (vwmacc): Ditto. + (vwmaccsu): Ditto. + (vwmaccus): Ditto. + (vmerge): Ditto. + (vmv_v): Ditto. + (vsaddu): Ditto. + (vsadd): Ditto. + (vssubu): Ditto. + (vssub): Ditto. + (vaaddu): Ditto. + (vaadd): Ditto. + (vasubu): Ditto. + (vasub): Ditto. + (vsmul): Ditto. + (vssrl): Ditto. + (vssra): Ditto. + (vnclipu): Ditto. + (vnclip): Ditto. + (vfadd): Ditto. + (vfsub): Ditto. + (vfrsub): Ditto. + (vfadd_frm): Ditto. + (vfsub_frm): Ditto. + (vfrsub_frm): Ditto. + (vfwadd): Ditto. + (vfwsub): Ditto. + (vfwadd_frm): Ditto. + (vfwsub_frm): Ditto. + (vfmul): Ditto. + (vfdiv): Ditto. + (vfrdiv): Ditto. + (vfmul_frm): Ditto. + (vfdiv_frm): Ditto. + (vfrdiv_frm): Ditto. + (vfwmul): Ditto. + (vfwmul_frm): Ditto. + (vfmacc): Ditto. + (vfnmsac): Ditto. + (vfmadd): Ditto. + (vfnmsub): Ditto. + (vfnmacc): Ditto. + (vfmsac): Ditto. + (vfnmadd): Ditto. + (vfmsub): Ditto. + (vfmacc_frm): Ditto. + (vfnmacc_frm): Ditto. + (vfmsac_frm): Ditto. + (vfnmsac_frm): Ditto. + (vfmadd_frm): Ditto. + (vfnmadd_frm): Ditto. + (vfmsub_frm): Ditto. + (vfnmsub_frm): Ditto. + (vfwmacc): Ditto. + (vfwnmacc): Ditto. + (vfwmsac): Ditto. + (vfwnmsac): Ditto. + (vfwmacc_frm): Ditto. + (vfwnmacc_frm): Ditto. + (vfwmsac_frm): Ditto. + (vfwnmsac_frm): Ditto. + (vfsqrt): Ditto. + (vfsqrt_frm): Ditto. + (vfrsqrt7): Ditto. + (vfrec7): Ditto. + (vfrec7_frm): Ditto. + (vfmin): Ditto. + (vfmax): Ditto. + (vfsgnj): Ditto. + (vfsgnjn): Ditto. + (vfsgnjx): Ditto. + (vfneg): Ditto. + (vfabs): Ditto. + (vmfeq): Ditto. + (vmfne): Ditto. + (vmflt): Ditto. + (vmfle): Ditto. + (vmfgt): Ditto. + (vmfge): Ditto. + (vfclass): Ditto. + (vfmerge): Ditto. + (vfmv_v): Ditto. + (vfcvt_x): Ditto. + (vfcvt_xu): Ditto. + (vfcvt_rtz_x): Ditto. + (vfcvt_rtz_xu): Ditto. + (vfcvt_f): Ditto. + (vfcvt_x_frm): Ditto. + (vfcvt_xu_frm): Ditto. + (vfcvt_f_frm): Ditto. + (vfwcvt_x): Ditto. + (vfwcvt_xu): Ditto. + (vfwcvt_rtz_x): Ditto. + (vfwcvt_rtz_xu) Ditto.: + (vfwcvt_f): Ditto. + (vfwcvt_x_frm): Ditto. + (vfwcvt_xu_frm) Ditto.: + (vfncvt_x): Ditto. + (vfncvt_xu): Ditto. + (vfncvt_rtz_x): Ditto. + (vfncvt_rtz_xu): Ditto. + (vfncvt_f): Ditto. + (vfncvt_rod_f): Ditto. + (vfncvt_x_frm): Ditto. + (vfncvt_xu_frm): Ditto. + (vfncvt_f_frm): Ditto. + (vredsum): Ditto. + (vredmaxu): Ditto. + (vredmax): Ditto. + (vredminu): Ditto. + (vredmin): Ditto. + (vredand): Ditto. + (vredor): Ditto. + (vredxor): Ditto. + (vwredsum): Ditto. + (vwredsumu): Ditto. + (vfredusum): Ditto. + (vfredosum): Ditto. + (vfredmax): Ditto. + (vfredmin): Ditto. + (vfredusum_frm): Ditto. + (vfredosum_frm): Ditto. + (vfwredosum): Ditto. + (vfwredusum): Ditto. + (vfwredosum_frm): Ditto. + (vfwredusum_frm): Ditto. + (vmand): Ditto. + (vmnand): Ditto. + (vmandn): Ditto. + (vmxor): Ditto. + (vmor): Ditto. + (vmnor): Ditto. + (vmorn): Ditto. + (vmxnor): Ditto. + (vmmv): Ditto. + (vmclr): Ditto. + (vmset): Ditto. + (vmnot): Ditto. + (vcpop): Ditto. + (vfirst): Ditto. + (vmsbf): Ditto. + (vmsif): Ditto. + (vmsof): Ditto. + (viota): Ditto. + (vid): Ditto. + (vmv_x): Ditto. + (vmv_s): Ditto. + (vfmv_f): Ditto. + (vfmv_s): Ditto. + (vslideup): Ditto. + (vslidedown): Ditto. + (vslide1up): Ditto. + (vslide1down): Ditto. + (vfslide1up): Ditto. + (vfslide1down): Ditto. + (vrgather): Ditto. + (vrgatherei16): Ditto. + (vcompress): Ditto. + (vundefined): Ditto. + (vreinterpret): Ditto. + (vlmul_ext): Ditto. + (vlmul_trunc): Ditto. + (vset): Ditto. + (vget): Ditto. + (vcreate): Ditto. + (vlseg): Ditto. + (vsseg): Ditto. + (vlsseg): Ditto. + (vssseg): Ditto. + (vluxseg): Ditto. + (vloxseg): Ditto. + (vsuxseg): Ditto. + (vsoxseg): Ditto. + (vlsegff): Ditto. + * config/riscv/riscv-vector-builtins.cc (DEF_RVV_FUNCTION): Using variadic macro. + * config/riscv/riscv-vector-builtins.h (struct function_group_info): + Add avail function interface into struct. + * config/riscv/t-riscv: Add dependency + * config/riscv/riscv-vector-builtins-avail.h: New file.The definition of AVAIL marco. + +2023-12-12 Juzhe-Zhong + + * config/riscv/riscv-protos.h (estimated_poly_value): New function. + * config/riscv/riscv-v.cc (estimated_poly_value): Ditto. + * config/riscv/riscv.cc (riscv_estimated_poly_value): Move RVV POLY + VALUE estimation to riscv-v.cc + +2023-12-12 Yang Yujie + + * config/loongarch/loongarch.cc: Do not restore the saved eh_return + data registers ($r4-$r7) for a normal return of a function that calls + __builtin_eh_return elsewhere. + * config/loongarch/loongarch-protos.h: Same. + * config/loongarch/loongarch.md: Same. + 2023-12-11 Richard Sandiford * recog.cc (constrain_operands): Pass VOIDmode to diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 190b92f716be..ffab8353875e 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231212 +20231213 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 97e2cfdc449f..c6768968bd5b 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2023-12-12 Richard Biener + + PR ipa/92606 + * c-attribs.cc (handle_noicf_attribute): Also allow the + attribute on global variables. + 2023-12-10 Ken Matsui Patrick Palka diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 56089138a0b4..5aec6493de4f 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,17 @@ +2023-12-12 Jason Merrill + Jason Xu + + * class.cc (propagate_class_warmth_attribute): Handle + member templates. + +2023-12-12 Jason Merrill + Nathaniel Shead + + * call.cc (build_trivial_dtor_call): Use CLOBBER_OBJECT_END. + * decl.cc (build_clobber_this): Take clobber_kind argument. + (start_preparsed_function): Pass CLOBBER_OBJECT_BEGIN. + (begin_destructor_body): Pass CLOBBER_OBJECT_END. + 2023-12-11 Patrick Palka * pt.cc (alias_ctad_tweaks): Pass use_spec_table=false to diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index c0ce63deb97d..2e479e4394a5 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,40 @@ +2023-12-12 Gaius Mulley + + PR modula2/112984 + * gm2-libs-coroutines/SYSTEM.mod: Remove redundant import of memcpy. + * gm2-libs-iso/ClientSocket.mod: Remove redundant import of IOConsts. + * gm2-libs-iso/IOChan.mod: Remove redundant import of IOConsts. + * gm2-libs-iso/IOLink.mod: Remove redundant import of IOChan and SYSTEM. + * gm2-libs-iso/IOResult.mod: Remove redundant import of IOChan. + * gm2-libs-iso/LongIO.mod: Remove redundant import of writeString. + * gm2-libs-iso/LongWholeIO.mod: Remove redundant import of IOChan. + * gm2-libs-iso/M2RTS.mod: Remove redundant import of ADDRESS. + * gm2-libs-iso/MemStream.mod: Remove redundant import of ADDRESS. + * gm2-libs-iso/RTdata.mod: Remove redundant import of DeviceTablePtr. + * gm2-libs-iso/RTfio.mod: Remove redundant import of DeviceTablePtr. + * gm2-libs-iso/RTgen.mod: Remove redundant import of DeviceTablePtr. + * gm2-libs-iso/RealIO.mod: Remove redundant import of writeString. + * gm2-libs-iso/RndFile.mod: Remove redundant import of SYSTEM. + * gm2-libs-iso/SYSTEM.mod: Remove redundant import of memcpy. + * gm2-libs-iso/ShortWholeIO.mod: Remove redundant import of IOConsts. + * gm2-libs-iso/TextIO.mod: Remove redundant import of IOChan. + * gm2-libs-iso/TextUtil.mod: Remove redundant import of IOChan. + * gm2-libs-iso/WholeIO.mod: Remove redundant import of IOChan. + * gm2-libs-log/BitByteOps.mod: Remove redundant import of BYTE. + * gm2-libs-log/FileSystem.mod: Remove redundant import of BYTE and ADDRESS. + * gm2-libs-log/InOut.mod: Remove redundant import of String. + * gm2-libs-log/RealConversions.mod: Remove redundant import of StringToLongreal. + * gm2-libs/FIO.mod: Remove redundant import of SIZE. + * gm2-libs/FormatStrings.mod: Remove redundant import of String + and ConCatChar. + * gm2-libs/IO.mod: Remove redundant import of SIZE. + * gm2-libs/Indexing.mod: Remove redundant import of ADDRESS. + * gm2-libs/M2Dependent.mod: Remove redundant import of SIZE. + * gm2-libs/M2RTS.mod: Remove redundant import of ADDRESS. + * gm2-libs/OptLib.mod: Remove redundant import of DynamicStrings. + * gm2-libs/SYSTEM.mod: Remove redundant import of memcpy. + * gm2-libs/StringConvert.mod: Remove redundant import of String. + 2023-12-07 Gaius Mulley PR modula2/112893 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d76225408932..551015f80d35 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,147 @@ +2023-12-12 Jason Merrill + Jason Xu + + * g++.dg/ext/attr-hotness.C: Add member templates. + +2023-12-12 Juzhe-Zhong + + * gcc.target/riscv/rvv/autovec/pr111313.c: Adapt test. + * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Ditto. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: New test. + * gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: New test. + +2023-12-12 Juzhe-Zhong + + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul-mixed-1.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-3.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul1-6.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-1.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-2.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-3.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-4.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-5.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul2-6.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-1.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-2.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-4.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-6.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-7.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-8.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-9.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-1.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-10.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-2.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-3.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-4.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-5.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-6.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-7.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-8.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul8-9.c: Adapt test. + * gcc.dg/vect/costmodel/riscv/rvv/pr111848.c: Adapt test. + +2023-12-12 Peter Bergner + + PR tree-optimization/112822 + * g++.dg/pr112822.C: New test. + +2023-12-12 Gaius Mulley + + PR modula2/112984 + * gm2/switches/pedantic/pass/hello.mod: New test. + * gm2/switches/pedantic/pass/switches-pedantic-pass.exp: New test. + +2023-12-12 Xi Ruoyao + + * gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove. + +2023-12-12 Jason Merrill + + * g++.dg/ext/is_nothrow_constructible8.C: Require C++14. + +2023-12-12 Jason Merrill + Nathaniel Shead + + * gcc.dg/pr87052.c: Adjust expected CLOBBER output. + +2023-12-12 Szabolcs Nagy + + * gcc.target/aarch64/branch-protection-attr.c: Update. + * gcc.target/aarch64/branch-protection-option.c: Update. + +2023-12-12 Richard Biener + + PR tree-optimization/112736 + * gcc.dg/torture/pr112736.c: New testcase. + +2023-12-12 Richard Biener + + PR tree-optimization/112961 + * g++.dg/vect/pr112961.cc: New testcase. + +2023-12-12 Jakub Jelinek + + * gcc.dg/gomp/pr87887-1.c: Add missing comment argument to dg-warning. + * gcc.dg/gomp/pr89246-1.c: Likewise. + * gcc.dg/gomp/simd-clones-2.c: Add missing " after dump name. + +2023-12-12 Xi Ruoyao + + PR middle-end/107723 + * gcc.dg/torture/builtin-fp-int-inexact-trunc.c: New test. + +2023-12-12 Richard Sandiford + + * gcc.target/aarch64/prfm_imm_offset_2.c: Add dg-options. + +2023-12-12 Pan Li + + * gcc.target/riscv/rvv/autovec/binop/vcompress-avlprop-1.c: New test. + +2023-12-12 Xi Ruoyao + + * gcc.target/loongarch/attr-model-3.c: New test. + * gcc.target/loongarch/attr-model-4.c: New test. + * gcc.target/loongarch/func-call-extreme-3.c: New test. + * gcc.target/loongarch/func-call-extreme-4.c: New test. + +2023-12-12 Richard Biener + + PR tree-optimization/112939 + * gcc.dg/pr112939.c: New testcase. + +2023-12-12 liuhongt + + * gcc.target/i386/pr112891.c: New test. + * gcc.target/i386/pr112891-2.c: New test. + +2023-12-12 Hongyu Wang + + PR target/112943 + * gcc.target/i386/pr112943.c: New test. + +2023-12-12 David Malcolm + + * gcc.dg/plugin/plugin.exp: Add taint-modulus.c to + analyzer_kernel_plugin.c tests. + * gcc.dg/plugin/taint-modulus.c: New test. + +2023-12-12 Yang Yujie + + * gcc.target/loongarch/eh_return-normal-return.c: New test. + 2023-12-11 Jakub Jelinek * c-c++-common/strub-O2fni.c: Add -fno-stack-protector to dg-options. diff --git a/libgm2/ChangeLog b/libgm2/ChangeLog index 2539778615ff..42cfcf950442 100644 --- a/libgm2/ChangeLog +++ b/libgm2/ChangeLog @@ -1,3 +1,12 @@ +2023-12-12 Gaius Mulley + + * libm2iso/Makefile.am (libm2iso_la_M2FLAGS): Added line breaks. + * libm2iso/Makefile.in: Regenerate. + * libm2log/Makefile.am (libm2log_la_M2FLAGS): Added line breaks. + * libm2log/Makefile.in: Regenerate. + * libm2pim/Makefile.am (libm2pim_la_M2FLAGS): Added line breaks. + * libm2pim/Makefile.in: Regenerate. + 2023-11-30 Gaius Mulley * configure: Regenerate. diff --git a/libquadmath/ChangeLog b/libquadmath/ChangeLog index e449028fc000..4d407867b543 100644 --- a/libquadmath/ChangeLog +++ b/libquadmath/ChangeLog @@ -1,3 +1,11 @@ +2023-12-12 Jakub Jelinek + + PR libquadmath/112963 + * configure.ac (LIBM): Readd AC_CHECK_LIBM-like check without doing + AC_CHECK_LIB in it. + * configure: Regenerated. + * Makefile.in: Regenerated. + 2023-10-23 Iain Sandoe PR libquadmath/111928 diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index cd9d121374e8..cdbd3b90d901 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,23 @@ +2023-12-12 Jonathan Wakely + + * include/std/format (__do_vformat_to): Handle char separately + from other integral types. + * testsuite/std/format/functions/format.cc: Check for expected + output for char and bool arguments. + * testsuite/std/format/string.cc: Check that 0 filling is + rejected for character and string formats. + +2023-12-12 Jonathan Wakely + + * include/bits/chrono_io.h (__formatter_chrono::_M_C_y_Y): Fix + rounding for negative centuries. + * testsuite/std/time/year/io.cc: Check %C for negative years. + +2023-12-12 Jonathan Wakely + + * src/c++11/Makefile.am: Remove redundant -std=gnu++11 flags. + * src/c++11/Makefile.in: Regenerate. + 2023-12-09 Jonathan Wakely PR libstdc++/111826 From 97b3b38e5faec2b5486368f5a0ffb16eb4ab4190 Mon Sep 17 00:00:00 2001 From: Jiufu Guo Date: Wed, 13 Dec 2023 08:10:25 +0800 Subject: [PATCH 248/311] rs6000: accurate num_insns_constant_gpr Trunk gcc supports more constants to be built via two instructions: e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic". And then num_insns_constant should also be updated. Function "rs6000_emit_set_long_const" is used to build complicated constants; and "num_insns_constant_gpr" is used to compute 'how many instructions are needed" to build the constant. So, these two functions should be aligned. The idea of this patch is: to reuse "rs6000_emit_set_long_const" to compute/record the instruction number(when computing the insn_num, then do not emit instructions). gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new parameter to record number of instructions to build the constant. (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute num_insn. --- gcc/config/rs6000/rs6000.cc | 290 ++++++++++++++++++------------------ 1 file changed, 149 insertions(+), 141 deletions(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 2d8afc121198..5cb94f6c0d66 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr); static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, @@ -6064,21 +6064,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) else if (TARGET_POWERPC64) { - HOST_WIDE_INT low = sext_hwi (value, 32); - HOST_WIDE_INT high = value >> 31; - - if (high == 0 || high == -1) - return 2; - - high >>= 1; - - if (low == 0 || low == high) - return num_insns_constant_gpr (high) + 1; - else if (high == 0) - return num_insns_constant_gpr (low) + 1; - else - return (num_insns_constant_gpr (high) - + num_insns_constant_gpr (low) + 1); + int num_insns = 0; + rs6000_emit_set_long_const (nullptr, value, &num_insns); + return num_insns; } else @@ -10504,14 +10492,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. Output insns to set DEST equal to the constant C as a series of - lis, ori and shl instructions. */ + lis, ori and shl instructions. If NUM_INSNS is not NULL, then + only increase *NUM_INSNS as the number of insns, and do not emit + any insns. */ static void -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) { - rtx temp; - int shift; - HOST_WIDE_INT mask; HOST_WIDE_INT ud1, ud2, ud3, ud4; ud1 = c & 0xffff; @@ -10519,168 +10506,189 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) ud3 = (c >> 32) & 0xffff; ud4 = (c >> 48) & 0xffff; + /* This lambda is used to emit one insn or just increase the insn count. + When counting the insn number, no need to emit the insn. */ + auto count_or_emit_insn = [&num_insns] (rtx dest_or_insn, rtx src = nullptr) { + if (num_insns) + { + (*num_insns)++; + return; + } + + if (src) + emit_move_insn (dest_or_insn, src); + else + emit_insn (dest_or_insn); + }; + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); - - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - - emit_move_insn (ud1 != 0 ? temp : dest, - GEN_INT (sext_hwi (ud2 << 16, 32))); - if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + /* li */ + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16))); + return; } - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) + + rtx temp + = (num_insns || !can_create_pseudo_p ()) ? dest : gen_reg_rtx (DImode); + + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) + { + /* lis[; ori] */ + count_or_emit_insn (ud1 != 0 ? temp : dest, + GEN_INT (sext_hwi (ud2 << 16, 32))); + if (ud1 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + return; + } + + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) { /* lis; xoris */ - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); + count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); + count_or_emit_insn (dest, + gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); + return; } - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) + + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) { /* li; xoris */ - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, - GEN_INT ((ud2 ^ 0xffff) << 16))); + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16))); + count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp, + GEN_INT ((ud2 ^ 0xffff) << 16))); + return; } - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) - || can_be_built_by_li_and_rldic (c, &shift, &mask)) + + int shift; + HOST_WIDE_INT mask; + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) + || can_be_built_by_li_and_rldic (c, &shift, &mask)) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + /* li/lis; rldicX */ unsigned HOST_WIDE_INT imm = (c | ~mask); imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); - emit_move_insn (temp, GEN_INT (imm)); + count_or_emit_insn (temp, GEN_INT (imm)); if (shift != 0) temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); if (mask != HOST_WIDE_INT_M1) temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); - emit_move_insn (dest, temp); + count_or_emit_insn (dest, temp); + + return; } - else if (ud3 == 0 && ud4 == 0) + + if (ud3 == 0 && ud4 == 0) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); - - gcc_assert (ud2 & 0x8000); - - if (ud1 == 0) - { - /* lis; rldicl */ - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); - emit_move_insn (dest, - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); - } - else if (!(ud1 & 0x8000)) + gcc_assert ((ud2 & 0x8000) && ud1 != 0); + if (!(ud1 & 0x8000)) { /* li; oris */ - emit_move_insn (temp, GEN_INT (ud1)); - emit_move_insn (dest, - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); + count_or_emit_insn (temp, GEN_INT (ud1)); + count_or_emit_insn (dest, + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); + return; } - else - { - /* lis; ori; rldicl */ - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); - emit_move_insn (dest, + + /* lis; ori; rldicl */ + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + count_or_emit_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); - } + return; } - else if (ud1 == ud3 && ud2 == ud4) + + if (ud1 == ud3 && ud2 == ud4) { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + /* load low 32bits first, e.g. "lis; ori", then "rldimi". */ HOST_WIDE_INT num = (ud2 << 16) | ud1; - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); - rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); - rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); - emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); - } - else if ((ud4 == 0xffff && (ud3 & 0x8000)) - || (ud4 == 0 && ! (ud3 & 0x8000))) - { - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); - emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); + rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp, + GEN_INT (0xffffffff)); + count_or_emit_insn (rldimi); + return; + } + + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) + { + /* li; [ori;] rldicl [;oir]. */ + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); if (ud2 != 0) - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); - emit_move_insn (ud1 != 0 ? temp : dest, - gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); + count_or_emit_insn (ud1 != 0 ? temp : dest, + gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); + return; } - else if (TARGET_PREFIXED) + + if (TARGET_PREFIXED) { if (can_create_pseudo_p ()) { - /* pli A,L + pli B,H + rldimi A,B,32,0. */ - temp = gen_reg_rtx (DImode); - rtx temp1 = gen_reg_rtx (DImode); - emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); - emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); - - emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, - GEN_INT (0xffffffff))); + /* pli A,L; pli B,H; rldimi A,B,32,0. */ + rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode); + count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3)); + count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); + rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, + GEN_INT (0xffffffff)); + count_or_emit_insn (rldimi); + return; } - else - { - /* pli A,H + sldi A,32 + paddi A,A,L. */ - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); + /* pli A,H; sldi A,32; paddi A,A,L. */ + count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3)); + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; - - /* Use paddi for the low 32 bits. */ - if (ud2 != 0 && ud1 != 0 && can_use_paddi) - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, + bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false; + /* Use paddi for the low 32 bits. */ + if (ud2 != 0 && ud1 != 0 && can_use_paddi) + count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest, GEN_INT ((ud2 << 16) | ud1))); - - /* Use oris, ori for low 32 bits. */ - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) - emit_move_insn (dest, + /* Use oris, ori for low 32 bits. */ + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); - } + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + return; } - else + + if (can_create_pseudo_p ()) { - if (can_create_pseudo_p ()) - { - /* lis HIGH,UD4 ; ori HIGH,UD3 ; - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ - rtx high = gen_reg_rtx (DImode); - rtx low = gen_reg_rtx (DImode); - HOST_WIDE_INT num = (ud2 << 16) | ud1; - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); - num = (ud4 << 16) | ud3; - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, - GEN_INT (0xffffffff))); - } - else - { - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; - oris DEST,UD2 ; ori DEST,UD1. */ - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); - if (ud3 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); + /* lis HIGH,UD4 ; ori HIGH,UD3 ; + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); + HOST_WIDE_INT num = (ud2 << 16) | ud1; + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); + num = (ud4 << 16) | ud3; + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); - if (ud2 != 0) - emit_move_insn (dest, - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); - if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); - } + rtx rldimi = gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, + GEN_INT (0xffffffff)); + count_or_emit_insn (rldimi); + return; } + + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; + oris DEST,UD2 ; ori DEST,UD1. */ + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); + if (ud3 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); + + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); + if (ud2 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); + if (ud1 != 0) + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + + return; } /* Helper for the following. Get rid of [r+r] memory refs From a9046f1979f05c1fd4e69a3bbf5a8629e2573fd3 Mon Sep 17 00:00:00 2001 From: Jiufu Guo Date: Wed, 13 Dec 2023 08:10:25 +0800 Subject: [PATCH 249/311] rs6000: using pli for constant splitting For constant building e.g. r120=0x66666666, which does not fit 'li or lis', 'pli' is used to build this constant via 'emit_move_insn'. While for a complicated constant, e.g. 0x6666666666666666ULL, when using 'rs6000_emit_set_long_const' to split the constant recursively, it fails to use 'pli' to build the half part constant: 0x66666666. 'rs6000_emit_set_long_const' could be updated to use 'pli' to build half part of the constant when necessary. For example: 0x6666666666666666ULL, "pli 3,1717986918; rldimi 3,3,32,0" can be used. gcc/ChangeLog: * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use pli for 34bit constant. gcc/testsuite/ChangeLog: * gcc.target/powerpc/const-build-1.c: New test. --- gcc/config/rs6000/rs6000.cc | 7 +++++++ gcc/testsuite/gcc.target/powerpc/const-build-1.c | 9 +++++++++ 2 files changed, 16 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/const-build-1.c diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 5cb94f6c0d66..09a5d29ec8ca 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10521,6 +10521,13 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) emit_insn (dest_or_insn); }; + if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (c)) + { + /* li/lis/pli */ + count_or_emit_insn (dest, GEN_INT (c)); + return; + } + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) { diff --git a/gcc/testsuite/gcc.target/powerpc/const-build-1.c b/gcc/testsuite/gcc.target/powerpc/const-build-1.c new file mode 100644 index 000000000000..7e35f8c507f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/const-build-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10" } */ +/* { dg-require-effective-target power10_ok } */ + +unsigned long long msk66() { return 0x6666666666666666ULL; } + +/* { dg-final { scan-assembler-times {\mpli\M} 1 } } */ +/* { dg-final { scan-assembler-not {\mli\M} } } */ +/* { dg-final { scan-assembler-not {\mlis\M} } } */ From 1243a057beb53074c40805490b0e204e64000291 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Tue, 12 Dec 2023 16:38:47 +0800 Subject: [PATCH 250/311] i386: Fix PR110790 testcase gcc/testsuite/ChangeLog: * gcc.target/i386/pr110790-2.c: Change scan-assembler from shrq to shr\[qx\]. --- gcc/testsuite/gcc.target/i386/pr110790-2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr110790-2.c b/gcc/testsuite/gcc.target/i386/pr110790-2.c index 16c73cb74659..dbb526308e6d 100644 --- a/gcc/testsuite/gcc.target/i386/pr110790-2.c +++ b/gcc/testsuite/gcc.target/i386/pr110790-2.c @@ -21,5 +21,5 @@ refmpn_tstbit_bad (mp_srcptr ptr, unsigned long bit) shrq %cl, %rax andl $1, %eax */ -/* { dg-final { scan-assembler-times "shrq" 2 { target { lp64 } } } } */ +/* { dg-final { scan-assembler-times "shr\[qx\]" 2 { target { lp64 } } } } */ /* { dg-final { scan-assembler-times "andl" 2 { target { lp64 } } } } */ From fda8e2f8292a90dac9fcaf952bad6fff3aa7fff2 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Tue, 12 Dec 2023 20:39:34 -0600 Subject: [PATCH 251/311] range: Workaround different type precision between _Float128 and long double [PR112788] As PR112788 shows, on rs6000 with -mabi=ieeelongdouble type _Float128 has the different type precision (128) from that (127) of type long double, but actually they has the same underlying mode, so they have the same precision as the mode indicates the same real type format ieee_quad_format. It's not sensible to have such two types which have the same mode but different type precisions, some fix attempt was posted at [1]. As the discussion there, there are some historical reasons and practical issues. Considering we passed stage 1 and it also affected the build as reported, this patch is trying to temporarily workaround it. I thought to introduce a hookpod but that seems a bit overkill, assuming scalar float type with the same mode should have the same precision looks sensible. [1] https://inbox.sourceware.org/gcc-patches/718677e7-614d-7977-312d-05a75e1fd5b4@linux.ibm.com/ PR tree-optimization/112788 gcc/ChangeLog: * value-range.h (range_compatible_p): Workaround same type mode but different type precision issue for rs6000 scalar float types _Float128 and long double. --- gcc/value-range.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/gcc/value-range.h b/gcc/value-range.h index 33f204a71713..d0a84754a100 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -1558,7 +1558,13 @@ range_compatible_p (tree type1, tree type2) // types_compatible_p requires conversion in both directions to be useless. // GIMPLE only requires a cast one way in order to be compatible. // Ranges really only need the sign and precision to be the same. - return (TYPE_PRECISION (type1) == TYPE_PRECISION (type2) - && TYPE_SIGN (type1) == TYPE_SIGN (type2)); + return TYPE_SIGN (type1) == TYPE_SIGN (type2) + && (TYPE_PRECISION (type1) == TYPE_PRECISION (type2) + // FIXME: As PR112788 shows, for now on rs6000 _Float128 has + // type precision 128 while long double has type precision 127 + // but both have the same mode so their precision is actually + // the same, workaround it temporarily. + || (SCALAR_FLOAT_TYPE_P (type1) + && TYPE_MODE (type1) == TYPE_MODE (type2))); } #endif // GCC_VALUE_RANGE_H From 9fba6637687a197cb37c3c76110eb5d81dc56e60 Mon Sep 17 00:00:00 2001 From: Victor Do Nascimento Date: Wed, 10 May 2023 12:03:58 +0100 Subject: [PATCH 252/311] aarch64: Implement the ACLE instruction/data prefetch functions. Implement the ACLE data and instruction prefetch functions[1] with the following signatures: 1. Data prefetch intrinsics: ---------------------------- void __pldx (/*constant*/ unsigned int /*access_kind*/, /*constant*/ unsigned int /*cache_level*/, /*constant*/ unsigned int /*retention_policy*/, void const volatile *addr); void __pld (void const volatile *addr); 2. Instruction prefetch intrinsics: ----------------------------------- void __plix (/*constant*/ unsigned int /*cache_level*/, /*constant*/ unsigned int /*retention_policy*/, void const volatile *addr); void __pli (void const volatile *addr); `__pldx' affords the programmer more fine-grained control over the data prefetch behaviour than the analogous GCC builtin `__builtin_prefetch', and allows access to the "SLC" cache level. While `__builtin_prefetch' chooses both cache-level and retention policy automatically via the optional `locality' parameter, `__pldx' expects 2 (mandatory) arguments to explicitly define the desired cache-level and retention policies. `__plix' on the other hand, generates a code prefetch instruction and so extends functionality on aarch64 targets beyond that which is exposed by `builtin_prefetch'. `__pld' and `__pli' do prefetch of data and instructions, respectively, using default values for both cache-level and retention policies. Bootstrapped and tested on aarch64-none-linux-gnu. [1] https://arm-software.github.io/acle/main/acle.html#memory-prefetch-intrinsics gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc: (AARCH64_PLD): New enum aarch64_builtins entry. (AARCH64_PLDX): Likewise. (AARCH64_PLI): Likewise. (AARCH64_PLIX): Likewise. (aarch64_init_prefetch_builtin): New. (aarch64_general_init_builtins): Call prefetch init function. (aarch64_expand_prefetch_builtin): New. (aarch64_general_expand_builtin): Add prefetch expansion. (require_const_argument): New. * config/aarch64/aarch64.md (UNSPEC_PLDX): New. (aarch64_pldx): Likewise. * config/aarch64/arm_acle.h (__pld): Likewise. (__pli): Likewise. (__plix): Likewise. (__pldx): Likewise. gcc/testsuite/ChangeLog: * gcc.target/aarch64/builtin_pld_pli.c: New. * gcc.target/aarch64/builtin_pld_pli_illegal.c: New. --- gcc/config/aarch64/aarch64-builtins.cc | 125 ++++++++++++++++++ gcc/config/aarch64/aarch64.md | 12 ++ gcc/config/aarch64/arm_acle.h | 30 +++++ .../gcc.target/aarch64/builtin_pld_pli.c | 90 +++++++++++++ .../aarch64/builtin_pld_pli_illegal.c | 33 +++++ 5 files changed, 290 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c create mode 100644 gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 452ff19f89b7..b57255ba1c88 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -822,6 +822,10 @@ enum aarch64_builtins AARCH64_WSRF, AARCH64_WSRF64, AARCH64_WSR128, + AARCH64_PLD, + AARCH64_PLDX, + AARCH64_PLI, + AARCH64_PLIX, AARCH64_BUILTIN_MAX }; @@ -1878,7 +1882,34 @@ aarch64_init_rwsr_builtins (void) = build_function_type_list (void_type_node, const_char_ptr_type, uint128_type_node, NULL); AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype); +} +/* Add builtins for data and instrution prefetch. */ +static void +aarch64_init_prefetch_builtin (void) +{ +#define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \ + aarch64_builtin_decls[INDEX] = \ + aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX) + + tree ftype; + tree cv_argtype; + cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST + | TYPE_QUAL_VOLATILE); + cv_argtype = build_pointer_type (cv_argtype); + + ftype = build_function_type_list (void_type_node, cv_argtype, NULL); + AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld"); + AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli"); + + ftype = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, unsigned_type_node, + cv_argtype, NULL); + AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx"); + + ftype = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, cv_argtype, NULL); + AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix"); } /* Initialize the memory tagging extension (MTE) builtins. */ @@ -2103,6 +2134,7 @@ aarch64_general_init_builtins (void) aarch64_init_data_intrinsics (); aarch64_init_rwsr_builtins (); + aarch64_init_prefetch_builtin (); tree ftype_jcvt = build_function_type_list (intSI_type_node, double_type_node, NULL); @@ -2839,6 +2871,93 @@ aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode) } } +/* Ensure argument ARGNO in EXP represents a const-type argument in the range + [MINVAL, MAXVAL). */ +static HOST_WIDE_INT +require_const_argument (tree exp, unsigned int argno, HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) +{ + maxval--; + tree arg = CALL_EXPR_ARG (exp, argno); + if (TREE_CODE (arg) != INTEGER_CST) + error_at (EXPR_LOCATION (exp), "Constant-type argument expected"); + + auto argval = wi::to_widest (arg); + + if (argval < minval || argval > maxval) + error_at (EXPR_LOCATION (exp), + "argument %d must be a constant immediate " + "in range [%wd,%wd]", argno + 1, minval, maxval); + + HOST_WIDE_INT retval = argval.to_shwi (); + return retval; +} + + +/* Expand a prefetch builtin EXP. */ +void +aarch64_expand_prefetch_builtin (tree exp, int fcode) +{ + int kind_id = -1; + int level_id = -1; + int rettn_id = -1; + char prfop[11]; + class expand_operand ops[2]; + + static const char *kind_s[] = {"PLD", "PST", "PLI"}; + static const char *level_s[] = {"L1", "L2", "L3", "SLC"}; + static const char *rettn_s[] = {"KEEP", "STRM"}; + + /* Each of the four prefetch builtins takes a different number of arguments, + but proceeds to call the PRFM insn which requires 4 pieces of information + to be fully defined. Where one of these takes less than 4 arguments, set + sensible defaults. */ + switch (fcode) + { + case AARCH64_PLDX: + break; + case AARCH64_PLIX: + kind_id = 2; + break; + case AARCH64_PLI: + case AARCH64_PLD: + kind_id = (fcode == AARCH64_PLD) ? 0 : 2; + level_id = 0; + rettn_id = 0; + break; + default: + gcc_unreachable (); + } + + /* Any -1 id variable is to be user-supplied. Here we fill these in and run + bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI & + AARCH64_PLIX, never explicitly. */ + int argno = 0; + if (kind_id < 0) + kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s) - 1); + if (level_id < 0) + level_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (level_s)); + if (rettn_id < 0) + rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (rettn_s)); + rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode, + EXPAND_NORMAL); + + if (seen_error ()) + return; + + sprintf (prfop, "%s%s%s", kind_s[kind_id], + level_s[level_id], + rettn_s[rettn_id]); + + rtx const_str = rtx_alloc (CONST_STRING); + PUT_CODE (const_str, CONST_STRING); + XSTR (const_str, 0) = ggc_strdup (prfop); + + create_fixed_operand (&ops[0], const_str); + create_address_operand (&ops[1], address); + maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops); +} + /* Expand an expression EXP that calls a MEMTAG built-in FCODE with result going to TARGET. */ static rtx @@ -3085,6 +3204,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, case AARCH64_WSRF64: case AARCH64_WSR128: return aarch64_expand_rwsr_builtin (exp, target, fcode); + case AARCH64_PLD: + case AARCH64_PLDX: + case AARCH64_PLI: + case AARCH64_PLIX: + aarch64_expand_prefetch_builtin (exp, fcode); + return target; } if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f4d4427a3a07..d70535e87beb 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -338,6 +338,7 @@ UNSPEC_SYSREG_RTI UNSPEC_SYSREG_WDI UNSPEC_SYSREG_WTI + UNSPEC_PLDX ;; Represents an SVE-style lane index, in which the indexing applies ;; within the containing 128-bit block. UNSPEC_SVE_LANE_SELECT @@ -970,6 +971,17 @@ [(set_attr "type" "load_4")] ) +(define_insn "aarch64_pldx" + [(unspec [(match_operand 0 "" "") + (match_operand:DI 1 "aarch64_prefetch_operand" "Dp")] UNSPEC_PLDX)] + "" + { + operands[1] = gen_rtx_MEM (DImode, operands[1]); + return "prfm\\t%0, %1"; + } + [(set_attr "type" "load_4")] +) + (define_insn "trap" [(trap_if (const_int 1) (const_int 8))] "" diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h index 80282b361a4f..601735b5fba3 100644 --- a/gcc/config/aarch64/arm_acle.h +++ b/gcc/config/aarch64/arm_acle.h @@ -78,6 +78,36 @@ _GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t) #undef _GCC_ARM_ACLE_DATA_FN +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__pld (void const volatile *__addr) +{ + return __builtin_aarch64_pld (__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__pli (void const volatile *__addr) +{ + return __builtin_aarch64_pli (__addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__plix (unsigned int __cache, unsigned int __rettn, + void const volatile *__addr) +{ + return __builtin_aarch64_plix (__cache, __rettn, __addr); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +__pldx (unsigned int __access, unsigned int __cache, unsigned int __rettn, + void const volatile *__addr) +{ + return __builtin_aarch64_pldx (__access, __cache, __rettn, __addr); +} + __extension__ extern __inline unsigned long __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __revl (unsigned long __value) diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c b/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c new file mode 100644 index 000000000000..8cbaa97c00ca --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli.c @@ -0,0 +1,90 @@ +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a -O2" } */ + +#include + +/* Check that we can generate the immediate-offset addressing + mode for PRFM. */ + +/* Access kind specifiers. */ +#define PLD 0 +#define PST 1 +/* Cache levels. */ +#define L1 0 +#define L2 1 +#define L3 2 +#define SLC 3 +/* Retention policies. */ +#define KEEP 0 +#define STRM 1 + +void +prefetch_for_read_write (void *a) +{ + __pldx (PLD, L1, KEEP, a); + __pldx (PLD, L1, STRM, a); + __pldx (PLD, L2, KEEP, a); + __pldx (PLD, L2, STRM, a); + __pldx (PLD, L3, KEEP, a); + __pldx (PLD, L3, STRM, a); + __pldx (PLD, SLC, KEEP, a); + __pldx (PLD, SLC, STRM, a); + __pldx (PST, L1, KEEP, a); + __pldx (PST, L1, STRM, a); + __pldx (PST, L2, KEEP, a); + __pldx (PST, L2, STRM, a); + __pldx (PST, L3, KEEP, a); + __pldx (PST, L3, STRM, a); + __pldx (PST, SLC, KEEP, a); + __pldx (PST, SLC, STRM, a); +} + +/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDL1STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDL2KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDL2STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDL3KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDL3STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDSLCKEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLDSLCSTRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL1KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL1STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL2KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL2STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL3KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTL3STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTSLCKEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPSTSLCSTRM, \\\[x\[0-9\]+\\\]" } } */ + +void +prefetch_simple (void *a) +{ + __pld (a); + __pli (a); +} + +/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */ + +void +prefetch_instructions (void *a) +{ + __plix (L1, KEEP, a); + __plix (L1, STRM, a); + __plix (L2, KEEP, a); + __plix (L2, STRM, a); + __plix (L3, KEEP, a); + __plix (L3, STRM, a); + __plix (SLC, KEEP, a); + __plix (SLC, STRM, a); +} + +/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL1STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL2KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL2STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL3KEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLIL3STRM, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLISLCKEEP, \\\[x\[0-9\]+\\\]" } } */ +/* { dg-final { scan-assembler "prfm\tPLISLCSTRM, \\\[x\[0-9\]+\\\]" } } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c b/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c new file mode 100644 index 000000000000..b799a65eb253 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/builtin_pld_pli_illegal.c @@ -0,0 +1,33 @@ +/* Check that PRFM-related bounds checks are applied correctly. */ +/* { dg-do compile } */ +#include + +/* Access kind specifiers. */ +#define KIND_LOW -1 +#define KIND_HIGH 2 +/* Cache levels. */ +#define LEVEL_LOW -1 +#define LEVEL_HIGH 4 +/* Retention policies. */ +#define POLICY_LOW -1 +#define POLICY_HIGH 2 + +void +data_rw_prefetch_bad_bounds (void *a) +{ + __builtin_aarch64_pldx (KIND_LOW, 0, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,1\]} } */ + __builtin_aarch64_pldx (KIND_HIGH, 0, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,1\]} } */ + __builtin_aarch64_pldx (0, LEVEL_LOW, 0, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,3\]} } */ + __builtin_aarch64_pldx (0, LEVEL_HIGH, 0, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,3\]} } */ + __builtin_aarch64_pldx (0, 0, POLICY_LOW, a); /* { dg-error {argument 3 must be a constant immediate in range \[0,1\]} } */ + __builtin_aarch64_pldx (0, 0, POLICY_HIGH, a); /* { dg-error {argument 3 must be a constant immediate in range \[0,1\]} } */ +} + +void +insn_prefetch_bad_bounds (void *a) +{ + __builtin_aarch64_plix (LEVEL_LOW, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,3\]} } */ + __builtin_aarch64_plix (LEVEL_HIGH, 0, a); /* { dg-error {argument 1 must be a constant immediate in range \[0,3\]} } */ + __builtin_aarch64_plix (0, POLICY_LOW, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,1\]} } */ + __builtin_aarch64_plix (0, POLICY_HIGH, a); /* { dg-error {argument 2 must be a constant immediate in range \[0,1\]} } */ +} From 8e0568d8ac9dbfc8199f970c5c7f26b9735e0e7b Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Wed, 13 Dec 2023 01:31:41 -0300 Subject: [PATCH 253/311] multiflags: fix doc warning properly Rather than a dubious fix for a dubious warning, namely adding a period after a parenthesized @xref because the warning demands it, use @pxref that is meant for exactly this case. Thanks to Joseph Myers for introducing me to it. for gcc/ChangeLog * doc/invoke.texi (multiflags): Drop extraneous period, use @pxref instead. --- gcc/doc/invoke.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1f26f80d26c8..19feba467a46 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20588,8 +20588,8 @@ allocation before or after interprocedural optimization. @item -fmultiflags This option enables multilib-aware @code{TFLAGS} to be used to build target libraries with options different from those the compiler is -configured to use by default, through the use of specs (@xref{Spec -Files}.) set up by compiler internals, by the target, or by builders at +configured to use by default, through the use of specs (@pxref{Spec +Files}) set up by compiler internals, by the target, or by builders at configure time. Like @code{TFLAGS}, this allows the target libraries to be built for From 97094d2ffd7d00261e6d7cc5d4a62dc7c2c89b64 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 13 Dec 2023 08:54:49 +0100 Subject: [PATCH 254/311] middle-end/111591 - explain why TBAA doesn't need adjustment While tidying the prototype patch I've done for the reduced testcase in PR111591 and in that process trying to produce a testcase that is miscompiled by stack slot coalescing and the TBAA info that remains un-altered I've realized we do not need to adjust TBAA info. The following documents this in the place we adjust points-to info which we do need to adjust. PR middle-end/111591 * cfgexpand.cc (update_alias_info_with_stack_vars): Document why not adjusting TBAA info on accesses is OK. --- gcc/cfgexpand.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index b860be8bb77b..ec887ef918f2 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -786,7 +786,13 @@ add_partitioned_vars_to_ptset (struct pt_solution *pt, /* Update points-to sets based on partition info, so we can use them on RTL. The bitmaps representing stack partitions will be saved until expand, where partitioned decls used as bases in memory expressions will be - rewritten. */ + rewritten. + + It is not necessary to update TBAA info on accesses to the coalesced + storage since our memory model doesn't allow TBAA to be used for + WAW or WAR dependences. For RAW when the write is to an old object + the new object would not have been initialized at the point of the + read, invoking undefined behavior. */ static void update_alias_info_with_stack_vars (void) From b9baead90d74e9211fc94d655ecd5d3af3858158 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 13 Dec 2023 09:38:59 +0100 Subject: [PATCH 255/311] tree-optimization/112990 - unsupported VEC_PERM from match pattern The following avoids creating an unsupported VEC_PERM after vector lowering from the pattern merging a bit-insert from a bit-field-ref to a VEC_PERM. For the already existing s390 testcase we get TImode vectors which later ICE during attempted expansion of a vec_perm_const. PR tree-optimization/112990 * match.pd (bit_insert @0 (BIT_FIELD_REF @1 ..) ..): Restrict to vector modes after lowering. --- gcc/match.pd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/match.pd b/gcc/match.pd index 15bca217aafb..e3dcff5c29ca 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8505,6 +8505,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (bit_insert @0 (BIT_FIELD_REF@2 @1 @rsize @rpos) @ipos) (if (VECTOR_TYPE_P (type) + && (VECTOR_MODE_P (TYPE_MODE (type)) + || optimize_vectors_before_lowering_p ()) && types_match (@0, @1) && types_match (TREE_TYPE (TREE_TYPE (@0)), TREE_TYPE (@2)) && TYPE_VECTOR_SUBPARTS (type).is_constant ()) From 93db32a4146afd2a6d90410691351a56768167c9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 13 Dec 2023 08:45:58 +0100 Subject: [PATCH 256/311] tree-optimization/112991 - re-do PR112961 fix The following does away with the fake edge adding as in the original PR112961 fix and instead exposes handling of entry PHIs as additional parameter of the region VN run. PR tree-optimization/112991 PR tree-optimization/112961 * tree-ssa-sccvn.h (do_rpo_vn): Add skip_entry_phis argument. * tree-ssa-sccvn.cc (do_rpo_vn): Likewise. (do_rpo_vn_1): Likewise, merge with auto-processing. (run_rpo_vn): Adjust. (pass_fre::execute): Likewise. * tree-if-conv.cc (tree_if_conversion): Revert last change. Value-number latch block but disable value-numbering of entry PHIs. * tree-ssa-uninit.cc (execute_early_warn_uninitialized): Adjust. * gcc.dg/torture/pr112991.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr112991.c | 21 ++++++++++++++++++++ gcc/tree-if-conv.cc | 26 ++++++++++--------------- gcc/tree-ssa-sccvn.cc | 24 ++++++++++++++--------- gcc/tree-ssa-sccvn.h | 1 + gcc/tree-ssa-uninit.cc | 2 +- 5 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr112991.c diff --git a/gcc/testsuite/gcc.dg/torture/pr112991.c b/gcc/testsuite/gcc.dg/torture/pr112991.c new file mode 100644 index 000000000000..aace98545997 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr112991.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ + +typedef struct { + unsigned links[2]; +} RMF_unit; +long RMF_recurseListsBound_count; +int RMF_recurseListsBound_tbl, RMF_recurseListsBound_list_head_1; +unsigned RMF_recurseListsBound_list_head_0; +void RMF_recurseListsBound() { + int list_count = RMF_recurseListsBound_list_head_1; + long link = RMF_recurseListsBound_list_head_0; + for (; RMF_recurseListsBound_count;) { + long next_link = + ((RMF_unit *)&RMF_recurseListsBound_tbl)[link >> 2].links[0]; + if (link) + --RMF_recurseListsBound_count; + link = next_link; + } + while (list_count) + ; +} diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index f9fd01499374..e169413bb44c 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3734,7 +3734,7 @@ tree_if_conversion (class loop *loop, vec *preds) auto_vec reads_to_lower; auto_vec writes_to_lower; bitmap exit_bbs; - edge pe, e; + edge pe; auto_vec refs; bool loop_versioned; @@ -3891,27 +3891,21 @@ tree_if_conversion (class loop *loop, vec *preds) combine_blocks (loop, loop_versioned); } - /* Perform local CSE, this esp. helps the vectorizer analysis if loads - and stores are involved. CSE only the loop body, not the entry - PHIs, those are to be kept in sync with the non-if-converted copy. - Do this by adding a fake entry edge - we do want to include the - latch as otherwise copies on a reduction path cannot be propagated out. - ??? We'll still keep dead stores though. */ - e = make_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun), loop->header, EDGE_FAKE); - exit_bbs = BITMAP_ALLOC (NULL); - for (edge exit : get_loop_exit_edges (loop)) - bitmap_set_bit (exit_bbs, exit->dest->index); - std::pair *name_pair; unsigned ssa_names_idx; FOR_EACH_VEC_ELT (redundant_ssa_names, ssa_names_idx, name_pair) replace_uses_by (name_pair->first, name_pair->second); redundant_ssa_names.release (); - todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs); - - /* Remove the fake edge again. */ - remove_edge (e); + /* Perform local CSE, this esp. helps the vectorizer analysis if loads + and stores are involved. CSE only the loop body, not the entry + PHIs, those are to be kept in sync with the non-if-converted copy. + ??? We'll still keep dead stores though. */ + exit_bbs = BITMAP_ALLOC (NULL); + for (edge exit : get_loop_exit_edges (loop)) + bitmap_set_bit (exit_bbs, exit->dest->index); + todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs, + false, true, true); /* Delete dead predicate computations. */ ifcvt_local_dce (loop); diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index a178b768459b..4d3088643c4f 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -7584,12 +7584,13 @@ eliminate_with_rpo_vn (bitmap inserted_exprs) static unsigned do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs, - bool iterate, bool eliminate, vn_lookup_kind kind); + bool iterate, bool eliminate, bool skip_entry_phis, + vn_lookup_kind kind); void run_rpo_vn (vn_lookup_kind kind) { - do_rpo_vn_1 (cfun, NULL, NULL, true, false, kind); + do_rpo_vn_1 (cfun, NULL, NULL, true, false, false, kind); /* ??? Prune requirement of these. */ constant_to_value_id = new hash_table (23); @@ -8290,11 +8291,13 @@ do_unwind (unwind_state *to, rpo_elim &avail) /* Do VN on a SEME region specified by ENTRY and EXIT_BBS in FN. If ITERATE is true then treat backedges optimistically as not executed and iterate. If ELIMINATE is true then perform - elimination, otherwise leave that to the caller. */ + elimination, otherwise leave that to the caller. If SKIP_ENTRY_PHIS + is true then force PHI nodes in ENTRY->dest to VARYING. */ static unsigned do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs, - bool iterate, bool eliminate, vn_lookup_kind kind) + bool iterate, bool eliminate, bool skip_entry_phis, + vn_lookup_kind kind) { unsigned todo = 0; default_vn_walk_kind = kind; @@ -8335,10 +8338,10 @@ do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs, if (e != entry && !(e->flags & EDGE_DFS_BACK)) break; - bool skip_entry_phis = e != NULL; - if (skip_entry_phis && dump_file && (dump_flags & TDF_DETAILS)) + if (e != NULL && dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Region does not contain all edges into " "the entry block, skipping its PHIs.\n"); + skip_entry_phis |= e != NULL; int *bb_to_rpo = XNEWVEC (int, last_basic_block_for_fn (fn)); for (int i = 0; i < n; ++i) @@ -8715,14 +8718,17 @@ do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs, If ITERATE is true then treat backedges optimistically as not executed and iterate. If ELIMINATE is true then perform elimination, otherwise leave that to the caller. + If SKIP_ENTRY_PHIS is true then force PHI nodes in ENTRY->dest to VARYING. KIND specifies the amount of work done for handling memory operations. */ unsigned do_rpo_vn (function *fn, edge entry, bitmap exit_bbs, - bool iterate, bool eliminate, vn_lookup_kind kind) + bool iterate, bool eliminate, bool skip_entry_phis, + vn_lookup_kind kind) { auto_timevar tv (TV_TREE_RPO_VN); - unsigned todo = do_rpo_vn_1 (fn, entry, exit_bbs, iterate, eliminate, kind); + unsigned todo = do_rpo_vn_1 (fn, entry, exit_bbs, iterate, eliminate, + skip_entry_phis, kind); free_rpo_vn (); return todo; } @@ -8778,7 +8784,7 @@ pass_fre::execute (function *fun) if (iterate_p) loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - todo = do_rpo_vn_1 (fun, NULL, NULL, iterate_p, true, VN_WALKREWRITE); + todo = do_rpo_vn_1 (fun, NULL, NULL, iterate_p, true, false, VN_WALKREWRITE); free_rpo_vn (); if (iterate_p) diff --git a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h index 98d70e0ffe07..5e370b85213a 100644 --- a/gcc/tree-ssa-sccvn.h +++ b/gcc/tree-ssa-sccvn.h @@ -298,6 +298,7 @@ tree vn_nary_simplify (vn_nary_op_t); unsigned do_rpo_vn (function *, edge, bitmap, /* iterate */ bool = false, /* eliminate */ bool = true, + /* skip_entry_phis */ bool = false, vn_lookup_kind = VN_WALKREWRITE); /* Private interface for PRE. */ diff --git a/gcc/tree-ssa-uninit.cc b/gcc/tree-ssa-uninit.cc index 9a7c7d12dd84..8d169c337290 100644 --- a/gcc/tree-ssa-uninit.cc +++ b/gcc/tree-ssa-uninit.cc @@ -1500,7 +1500,7 @@ execute_early_warn_uninitialized (struct function *fun) elimination to compute edge reachability. Don't bother when we only warn for unconditionally executed code though. */ if (!optimize) - do_rpo_vn (fun, NULL, NULL, false, false, VN_NOWALK); + do_rpo_vn (fun, NULL, NULL, false, false, false, VN_NOWALK); else set_all_edges_as_executable (fun); From 4dfc6bcabb5a040417e60cb8072e562e25974917 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 13 Dec 2023 09:05:59 +0100 Subject: [PATCH 257/311] Avoid losing MEM_REF offset in MEM_EXPR adjustment for stack slot sharing When investigating PR111591 with respect to TBAA and stack slot sharing I noticed we're eventually scrapping a [TARGET_]MEM_REF offset when rewriting the VAR_DECL base of the MEM_EXPR to use a pointer to the partition instead. The following makes sure to preserve that. * emit-rtl.cc (set_mem_attributes_minus_bitpos): Preserve the offset when rewriting an exising MEM_REF base for stack slot sharing. --- gcc/emit-rtl.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc index 84b6833225e6..4a7e420e7c05 100644 --- a/gcc/emit-rtl.cc +++ b/gcc/emit-rtl.cc @@ -2128,9 +2128,15 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp, tree *orig_base = &attrs.expr; while (handled_component_p (*orig_base)) orig_base = &TREE_OPERAND (*orig_base, 0); - tree aptrt = reference_alias_ptr_type (*orig_base); - *orig_base = build2 (MEM_REF, TREE_TYPE (*orig_base), *namep, - build_int_cst (aptrt, 0)); + if (TREE_CODE (*orig_base) == MEM_REF + || TREE_CODE (*orig_base) == TARGET_MEM_REF) + TREE_OPERAND (*orig_base, 0) = *namep; + else + { + tree aptrt = reference_alias_ptr_type (*orig_base); + *orig_base = build2 (MEM_REF, TREE_TYPE (*orig_base), + *namep, build_int_cst (aptrt, 0)); + } } } From 02c30fdad2f46a1f7b4e30d0eff0ac275cd108a5 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 13 Dec 2023 11:34:12 +0100 Subject: [PATCH 258/311] i386: Fix ICE on __builtin_ia32_pabsd128 without lhs [PR112962] The following patch fixes ICE on the testcase in similar way to how other folded builtins are handled in ix86_gimple_fold_builtin when they don't have a lhs; these builtins are const or pure, so normally DCE would remove them later, but with -O0 that isn't guaranteed to happen, and during expansion if they are marked TREE_SIDE_EFFECTS it might still be attempted to be expanded. This removes them right away during the folding. Initially I wanted to also change all gsi_replace last args in that function to true, but Andrew pointed to PR107209, so I've kept them as is. 2023-12-13 Jakub Jelinek PR target/112962 * config/i386/i386.cc (ix86_gimple_fold_builtin): For shifts and abs without lhs replace with nop. * gcc.target/i386/pr112962.c: New test. --- gcc/config/i386/i386.cc | 10 ++++++++-- gcc/testsuite/gcc.target/i386/pr112962.c | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112962.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4706f0d40594..3e78949c7a66 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -19377,7 +19377,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) do_shift: gcc_assert (n_args >= 2); if (!gimple_call_lhs (stmt)) - break; + { + gsi_replace (gsi, gimple_build_nop (), false); + return true; + } arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); @@ -19523,7 +19526,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) case IX86_BUILTIN_PABSD256_MASK: gcc_assert (n_args >= 1); if (!gimple_call_lhs (stmt)) - break; + { + gsi_replace (gsi, gimple_build_nop (), false); + return true; + } arg0 = gimple_call_arg (stmt, 0); elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); /* For masked ABS, only optimize if the mask is all ones. */ diff --git a/gcc/testsuite/gcc.target/i386/pr112962.c b/gcc/testsuite/gcc.target/i386/pr112962.c new file mode 100644 index 000000000000..8dfc0a6beafb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112962.c @@ -0,0 +1,11 @@ +/* PR target/112962 */ +/* { dg-do compile } */ +/* { dg-options "-fexceptions -mssse3" } */ + +typedef int __attribute__((__vector_size__ (16))) V; + +void +foo (void) +{ + __builtin_ia32_pabsd128 ((V) {}); +} From bb600f9822fb8b73d75df2b3fa7a0fab391254fa Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 13 Dec 2023 11:35:20 +0100 Subject: [PATCH 259/311] attribs: Fix valgrind failures on -Wno-attributes* tests [PR112953] The r14-6076 change changed the allocation of attribute tables from table = new attribute_spec[2]; to table = new attribute_spec { ... }; with ignored_attributes_table.safe_push (table); later in both cases, but didn't change the corresponding delete in free_attr_data, which means valgrind is unhappy about that: FAIL: c-c++-common/Wno-attributes-2.c -Wc++-compat (test for excess errors) Excess errors: ==974681== Mismatched free() / delete / delete [] ==974681== at 0x484965B: operator delete[](void*) (vg_replace_malloc.c:1103) ==974681== by 0x707434: free_attr_data() (attribs.cc:318) ==974681== by 0xCFF8A4: compile_file() (toplev.cc:454) ==974681== by 0x704D23: do_compile (toplev.cc:2150) ==974681== by 0x704D23: toplev::main(int, char**) (toplev.cc:2306) ==974681== by 0x7064BA: main (main.cc:39) ==974681== Address 0x51dffa0 is 0 bytes inside a block of size 40 alloc'd ==974681== at 0x4845FF5: operator new(unsigned long) (vg_replace_malloc.c:422) ==974681== by 0x70A040: handle_ignored_attributes_option(vec*) (attribs.cc:301) ==974681== by 0x7FA089: handle_pragma_diagnostic_impl (c-pragma.cc:934) ==974681== by 0x7FA089: handle_pragma_diagnostic(cpp_reader*) (c-pragma.cc:1028) ==974681== by 0x75814F: c_parser_pragma(c_parser*, pragma_context, bool*) (c-parser.cc:14707) ==974681== by 0x784A85: c_parser_external_declaration(c_parser*) (c-parser.cc:2027) ==974681== by 0x785223: c_parser_translation_unit (c-parser.cc:1900) ==974681== by 0x785223: c_parse_file() (c-parser.cc:26713) ==974681== by 0x7F6331: c_common_parse_file() (c-opts.cc:1301) ==974681== by 0xCFF87D: compile_file() (toplev.cc:446) ==974681== by 0x704D23: toplev::main(int, char**) (toplev.cc:2306) ==974681== by 0x7064BA: main (main.cc:39) 2023-12-13 Jakub Jelinek PR middle-end/112953 * attribs.cc (free_attr_data): Use delete x rather than delete[] x. --- gcc/attribs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/attribs.cc b/gcc/attribs.cc index 776655dde00e..4e313d38f0f0 100644 --- a/gcc/attribs.cc +++ b/gcc/attribs.cc @@ -315,7 +315,7 @@ void free_attr_data () { for (auto x : ignored_attributes_table) - delete[] x; + delete x; ignored_attributes_table.release (); } From 07efd5668537892e1c07264455bfd96f1a99a130 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 13 Dec 2023 11:36:27 +0100 Subject: [PATCH 260/311] lower-bitint: Fix lowering of non-_BitInt to _BitInt cast merged with some wider cast [PR112940] The following testcase ICEs, because a PHI argument from latch edge uses a SSA_NAME set only in a conditionally executed block inside of the loop. This happens when we have some outer cast which lowers its operand several times, under some condition with variable index, under different condition with some constant index, otherwise something else, and then there is an inner cast from non-_BitInt integer (or small/middle one). Such cast in certain conditions is emitted by initializing some SSA_NAMEs in the initialization statements before loops (say for casts from <= limb size precision by computing a SSA_NAME for the first limb and then extension of it for the later limbs) and uses the prepare_data_in_out function to create a PHI node. Such function is passed the value (constant or SSA_NAME) to use in the PHI argument from the pre-header edge, but for the latch edge it always created a new SSA_NAME and then caller emitted in the following 3 spots an extra assignment to set that SSA_NAME to whatever value we want from the latch edge. In all these 3 cases the argument from the latch edge is known already before the loop though, either constant or SSA_NAME computed in pre-header as well. But the need to emit an assignment combined with the handle_operand done in a conditional basic block results in the SSA verification failure. The following patch fixes it by extending the prpare_data_in_out method, so that when the latch edge argument is known before (constant or computed in pre-header), we can just use it directly and avoid the extra assignment that would normally be hopefully optimized away later to what we now emit directly. 2023-12-13 Jakub Jelinek PR tree-optimization/112940 * gimple-lower-bitint.cc (struct bitint_large_huge): Add another argument to prepare_data_in_out method defaulted to NULL_TREE. (bitint_large_huge::handle_operand): Pass another argument to prepare_data_in_out instead of emitting an assignment to set it. (bitint_large_huge::prepare_data_in_out): Add VAL_OUT argument. If non-NULL, use it as PHI argument instead of creating a new SSA_NAME. (bitint_large_huge::handle_cast): Pass rext as another argument to 2 prepare_data_in_out calls instead of emitting assignments to set them. * gcc.dg/bitint-53.c: New test. --- gcc/gimple-lower-bitint.cc | 26 +++++++++++--------------- gcc/testsuite/gcc.dg/bitint-53.c | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 15 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/bitint-53.c diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index 65a7bbe3fa9a..d52e4c44f065 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -405,7 +405,7 @@ struct bitint_large_huge profile_probability, profile_probability, edge &, edge &, edge &); tree handle_operand (tree, tree); - tree prepare_data_in_out (tree, tree, tree *); + tree prepare_data_in_out (tree, tree, tree *, tree = NULL_TREE); tree add_cast (tree, tree); tree handle_plus_minus (tree_code, tree, tree, tree); tree handle_lshift (tree, tree, tree); @@ -873,11 +873,8 @@ bitint_large_huge::handle_operand (tree op, tree idx) gcc_assert (m_first); m_data.pop (); m_data.pop (); - prepare_data_in_out (fold_convert (m_limb_type, op), idx, &out); - g = gimple_build_assign (m_data[m_data_cnt + 1], - build_int_cst (m_limb_type, ext)); - insert_before (g); - m_data[m_data_cnt + 1] = gimple_assign_rhs1 (g); + prepare_data_in_out (fold_convert (m_limb_type, op), idx, &out, + build_int_cst (m_limb_type, ext)); } else if (min_prec > prec - rem - 2 * limb_prec) { @@ -1009,10 +1006,13 @@ bitint_large_huge::handle_operand (tree op, tree idx) /* Helper method, add a PHI node with VAL from preheader edge if inside of a loop and m_first. Keep state in a pair of m_data - elements. */ + elements. If VAL_OUT is non-NULL, use that as PHI argument from + the latch edge, otherwise create a new SSA_NAME for it and let + caller initialize it. */ tree -bitint_large_huge::prepare_data_in_out (tree val, tree idx, tree *data_out) +bitint_large_huge::prepare_data_in_out (tree val, tree idx, tree *data_out, + tree val_out) { if (!m_first) { @@ -1035,7 +1035,7 @@ bitint_large_huge::prepare_data_in_out (tree val, tree idx, tree *data_out) if (e1 == e2) e2 = EDGE_PRED (m_bb, 1); add_phi_arg (phi, val, e1, UNKNOWN_LOCATION); - tree out = make_ssa_name (TREE_TYPE (val)); + tree out = val_out ? val_out : make_ssa_name (TREE_TYPE (val)); add_phi_arg (phi, out, e2, UNKNOWN_LOCATION); m_data.safe_push (in); m_data.safe_push (out); @@ -1542,14 +1542,10 @@ bitint_large_huge::handle_cast (tree lhs_type, tree rhs1, tree idx) if (m_first) { tree out1, out2; - prepare_data_in_out (r1, idx, &out1); - g = gimple_build_assign (m_data[m_data_cnt + 1], rext); - insert_before (g); + prepare_data_in_out (r1, idx, &out1, rext); if (TYPE_PRECISION (rhs_type) > limb_prec) { - prepare_data_in_out (r2, idx, &out2); - g = gimple_build_assign (m_data[m_data_cnt + 3], rext); - insert_before (g); + prepare_data_in_out (r2, idx, &out2, rext); m_data.pop (); t = m_data.pop (); m_data[m_data_cnt + 1] = t; diff --git a/gcc/testsuite/gcc.dg/bitint-53.c b/gcc/testsuite/gcc.dg/bitint-53.c new file mode 100644 index 000000000000..2474660c4aa7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-53.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/112940 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +#if __BITINT_MAXWIDTH__ >= 1025 +_BitInt (1025) b; +#endif + +void +foo (long x) +{ +#if __BITINT_MAXWIDTH__ >= 1025 + b += (unsigned _BitInt (255)) x; +#else + (void) x; +#endif +} From f6d787c231905063dc3b55ce7028e348b74719be Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Wed, 13 Dec 2023 17:21:07 +0800 Subject: [PATCH 261/311] Middle-end: Adjust decrement IV style partial vectorization COST model Hi, before this patch, a simple conversion case for RVV codegen: foo: ble a2,zero,.L8 addiw a5,a2,-1 li a4,6 bleu a5,a4,.L6 srliw a3,a2,3 slli a3,a3,3 add a3,a3,a0 mv a5,a0 mv a4,a1 vsetivli zero,8,e16,m1,ta,ma .L4: vle8.v v2,0(a5) addi a5,a5,8 vzext.vf2 v1,v2 vse16.v v1,0(a4) addi a4,a4,16 bne a3,a5,.L4 andi a5,a2,-8 beq a2,a5,.L10 .L3: slli a4,a5,32 srli a4,a4,32 subw a2,a2,a5 slli a2,a2,32 slli a5,a4,1 srli a2,a2,32 add a0,a0,a4 add a1,a1,a5 vsetvli zero,a2,e16,m1,ta,ma vle8.v v2,0(a0) vzext.vf2 v1,v2 vse16.v v1,0(a1) .L8: ret .L10: ret .L6: li a5,0 j .L3 This vectorization go through first loop: vsetivli zero,8,e16,m1,ta,ma .L4: vle8.v v2,0(a5) addi a5,a5,8 vzext.vf2 v1,v2 vse16.v v1,0(a4) addi a4,a4,16 bne a3,a5,.L4 Each iteration processes 8 elements. For a scalable vectorization with VLEN > 128 bits CPU, it's ok when VLEN = 128. But, as long as VLEN > 128 bits, it will waste the CPU resources. That is, e.g. VLEN = 256bits. only half of the vector units are working and another half is idle. After investigation, I realize that I forgot to adjust COST for SELECT_VL. So, adjust COST for SELECT_VL styple length vectorization. We adjust COST from 3 to 2. since after this patch: foo: ble a2,zero,.L5 .L3: vsetvli a5,a2,e16,m1,ta,ma -----> SELECT_VL cost. vle8.v v2,0(a0) slli a4,a5,1 -----> additional shift of outcome SELECT_VL for memory address calculation. vzext.vf2 v1,v2 sub a2,a2,a5 vse16.v v1,0(a1) add a0,a0,a5 add a1,a1,a4 bne a2,zero,.L3 .L5: ret This patch is a simple fix that I previous forgot. Ok for trunk ? If not, I am going to adjust cost in backend cost model. PR target/111317 gcc/ChangeLog: * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Adjust for COST for decrement IV. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr111317.c: New test. --- .../gcc.dg/vect/costmodel/riscv/rvv/pr111317.c | 12 ++++++++++++ gcc/tree-vect-loop.cc | 17 ++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111317.c diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111317.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111317.c new file mode 100644 index 000000000000..d4bea242a9a0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111317.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m1" } */ + +void +foo (char *__restrict a, short *__restrict b, int n) +{ + for (int i = 0; i < n; i++) + b[i] = (short) a[i]; +} + +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*[a-x0-9]+,\s*e16,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 6261cd1be1dd..19e38b8637b0 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -4870,10 +4870,21 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, if (partial_load_store_bias != 0) body_stmts += 1; - /* Each may need two MINs and one MINUS to update lengths in body - for next iteration. */ + unsigned int length_update_cost = 0; + if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo)) + /* For decrement IV style, we use a single SELECT_VL since + beginning to calculate the number of elements need to be + processed in current iteration, and a SHIFT operation to + compute the next memory address instead of adding vectorization + factor. */ + length_update_cost = 2; + else + /* For increment IV stype, Each may need two MINs and one MINUS to + update lengths in body for next iteration. */ + length_update_cost = 3; + if (need_iterate_p) - body_stmts += 3 * num_vectors; + body_stmts += length_update_cost * num_vectors; } (void) add_stmt_cost (target_cost_data, prologue_stmts, From 90be333ad515b6126ce6dfe37aff6eb6b442b4ed Mon Sep 17 00:00:00 2001 From: "demin.han" Date: Wed, 13 Dec 2023 19:12:03 +0800 Subject: [PATCH 262/311] RISC-V: Fix dynamic lmul tests depended on abi Some toolchain configs would report: fatal error: gnu/stubs-ilp32.h: No such file or directory Fix method suggested by Juzhe-Zhong gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h: New file. Signed-off-by: demin.han Signed-off-by: demin.han --- .../gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h new file mode 100644 index 000000000000..fbb4858fc867 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h @@ -0,0 +1,11 @@ +/* Wrapper of riscv_vector.h, prevent riscv_vector.h including stdint.h from + C library, that might cause problem on testing RV32 related testcase when + we disable multilib. */ +#ifndef _RISCV_VECTOR_WRAP_H + +#define _GCC_WRAP_STDINT_H +#include "stdint-gcc.h" +#include_next +#define _RISCV_VECTOR_WRAP_H + +#endif From ef21ae5c45f3b79a36fadc1cb5723c095e2965ad Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Wed, 13 Dec 2023 13:48:11 +0800 Subject: [PATCH 263/311] RISC-V: Postpone full available optimization [VSETVL PASS] Fix VSETVL BUG that AVL is polluted .L15: li a3,9 lui a4,%hi(s) sw a3,%lo(j)(t2) sh a5,%lo(s)(a4) <--a4 is hold the address of s beq t0,zero,.L42 sw t5,8(t4) vsetvli zero,a4,e8,m8,ta,ma <<--- a4 as avl Actually, this vsetvl is redundant. The root cause we include full available optimization in LCM local data computation. full available optimization should be after LCM computation. PR target/112929 PR target/112988 gcc/ChangeLog: * config/riscv/riscv-vsetvl.cc (pre_vsetvl::compute_lcm_local_properties): Remove full available. (pre_vsetvl::pre_global_vsetvl_info): Add full available optimization. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/pr112929.c: New test. * gcc.target/riscv/rvv/vsetvl/pr112988.c: New test. --- gcc/config/riscv/riscv-vsetvl.cc | 14 +++- .../gcc.target/riscv/rvv/vsetvl/pr112929.c | 58 ++++++++++++++++ .../gcc.target/riscv/rvv/vsetvl/pr112988.c | 69 +++++++++++++++++++ 3 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index ed5a2b58ab04..6af8d8429ab4 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -2723,8 +2723,7 @@ pre_vsetvl::compute_lcm_local_properties () vsetvl_info &header_info = block_info.get_entry_info (); vsetvl_info &footer_info = block_info.get_exit_info (); - if (header_info.valid_p () - && (anticipated_exp_p (header_info) || block_info.full_available)) + if (header_info.valid_p () && anticipated_exp_p (header_info)) bitmap_set_bit (m_antloc[bb_index], get_expr_index (m_exprs, header_info)); @@ -3224,6 +3223,17 @@ pre_vsetvl::pre_global_vsetvl_info () info.set_delete (); } + /* Remove vsetvl infos if all precessors are available to the block. */ + for (const bb_info *bb : crtl->ssa->bbs ()) + { + vsetvl_block_info &block_info = get_block_info (bb); + if (block_info.empty_p () || !block_info.full_available) + continue; + + vsetvl_info &info = block_info.get_entry_info (); + info.set_delete (); + } + for (const bb_info *bb : crtl->ssa->bbs ()) { vsetvl_block_info &block_info = get_block_info (bb); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c new file mode 100644 index 000000000000..0435e5dbc564 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c @@ -0,0 +1,58 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +int printf(char *, ...); +int a, l, i, p, q, t, n, o; +int *volatile c; +static int j; +static struct pack_1_struct d; +long e; +char m = 5; +short s; + +#pragma pack(1) +struct pack_1_struct { + long c; + int d; + int e; + int f; + int g; + int h; + int i; +} h, r = {1}, *f = &h, *volatile g; + +void add_em_up(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + __builtin_va_end(ap); +} + +int main() { + int u; + j = 0; + + for (; j < 9; ++j) { + u = ++t ? a : 0; + if (u) { + int *v = &d.d; + *v = g || e; + *c = 0; + *f = h; + } + s = l && c; + o = i; + d.f || (p = 0); + q |= n; + } + + r = *f; + + add_em_up(1, 1); + + printf("%d\n", m); +} + +/* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-not {vsetivli} } } */ +/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c new file mode 100644 index 000000000000..6f983ef8bb53 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +int a = 0; +int p, q, r, x = 230; +short d; +int e[256]; +static struct f w; +int *c = &r; + +short y(short z) { + return z * d; +} + +#pragma pack(1) +struct f { + int g; + short h; + int j; + char k; + char l; + long m; + long n; + int o; +} s = {1}, v, t, *u = &v, *b = &s; + +void add_em_up(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + __builtin_va_end(ap); +} + +int main() { + int i = 0; + for (; i < 256; i++) + e[i] = i; + + p = 0; + for (; p <= 0; p++) { + *c = 4; + *u = t; + x |= y(6 >= q); + } + + *b = w; + + add_em_up(1, 1); + + if (a != 0) + return 1; + if (q != 0) + return 2; + if (p != 1) + return 3; + if (r != 4) + return 4; + if (x != 0xE6) + return 5; + if (d != 0) + return 6; + + return 0; +} + +/* { dg-final { scan-assembler-times {vsetvli} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli} 1 } } */ +/* { dg-final { scan-assembler-times {vsetivli\tzero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {vsetvli\tzero,\s*[a-x0-9]+,\s*e8,\s*m8,\s*t[au],\s*m[au]} 1 } } */ +/* { dg-final { scan-assembler-times {li\t[a-x0-9]+,\s*32} 1 } } */ From eb6c2bcb673c1de622395f0b4c30b65b776ad55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arsen=20Arsenovi=C4=87?= Date: Sat, 2 Dec 2023 16:37:17 +0100 Subject: [PATCH 264/311] download_prerequisites: add --only-gettext contrib/ChangeLog: * download_prerequisites : Parse --only-gettext. (echo_archives): Check only_gettext and stop early if true. (helptext): Document --only-gettext. --- contrib/download_prerequisites | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites index 9568091c0dba..30ff0cc9491a 100755 --- a/contrib/download_prerequisites +++ b/contrib/download_prerequisites @@ -36,16 +36,18 @@ gettext='gettext-0.22.tar.gz' base_url='http://gcc.gnu.org/pub/gcc/infrastructure/' echo_archives() { + echo "${gettext}" + if "${only_gettext}"; then return; fi echo "${gmp}" echo "${mpfr}" echo "${mpc}" - echo "${gettext}" if [ ${graphite} -gt 0 ]; then echo "${isl}"; fi } graphite=1 verify=1 force=0 +only_gettext=false OS=$(uname) if type wget > /dev/null ; then @@ -74,6 +76,7 @@ The following options are available: --no-verify don't verify package integrity --sha512 use SHA512 checksum to verify package integrity (default) --md5 use MD5 checksum to verify package integrity + --only-gettext inhibit downloading any package but gettext --help show this text and exit --version show version information and exit " @@ -159,6 +162,9 @@ do chksum_extension='md5' verify=1 ;; + --only-gettext) + only_gettext=true + ;; -*) die "unknown option: ${arg}" ;; From 36cb7be477885a2464fe9a70467278c7debd5e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arsen=20Arsenovi=C4=87?= Date: Thu, 16 Nov 2023 23:50:30 +0100 Subject: [PATCH 265/311] gettext: disable install, docs targets, libasprintf, threads This fixes issues reported by David Edelsohn , and by Eric Gallager . ChangeLog: * Makefile.def (gettext): Disable (via missing) {install-,}{pdf,html,info,dvi} and TAGS targets. Set no_install to true. Add --disable-threads --disable-libasprintf. Drop the lib_path (as there are no shared libs). * Makefile.in: Regenerate. --- Makefile.def | 13 +++- Makefile.in | 202 ++++++++------------------------------------------- 2 files changed, 40 insertions(+), 175 deletions(-) diff --git a/Makefile.def b/Makefile.def index 792f81447e1b..ba89d46b2495 100644 --- a/Makefile.def +++ b/Makefile.def @@ -80,8 +80,17 @@ host_modules= { module= gettext; bootstrap=true; no_install=true; // need it in some configuratons, which is determined via nontrivial tests. // Always enabling pic seems to make sense for something tied to // user-facing output. - extra_configure_flags='--disable-shared --disable-java --disable-csharp --with-pic'; - lib_path=intl/.libs; }; + extra_configure_flags='--disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf'; + missing= pdf; + missing= html; + missing= info; + missing= dvi; + missing= install-pdf; + missing= install-html; + missing= install-info; + missing= install-dvi; + missing= TAGS; + no_install= true; }; host_modules= { module= tcl; missing=mostlyclean; }; host_modules= { module= itcl; }; diff --git a/Makefile.in b/Makefile.in index da2344b3f3dc..3bd7d37e9605 100644 --- a/Makefile.in +++ b/Makefile.in @@ -768,7 +768,7 @@ TARGET_LIB_PATH_libatomic = $$r/$(TARGET_SUBDIR)/libatomic/.libs: # This is the list of directories that may be needed in RPATH_ENVVAR # so that programs built for the host machine work. -HOST_LIB_PATH = $(HOST_LIB_PATH_gmp)$(HOST_LIB_PATH_mpfr)$(HOST_LIB_PATH_mpc)$(HOST_LIB_PATH_isl)$(HOST_LIB_PATH_gettext) +HOST_LIB_PATH = $(HOST_LIB_PATH_gmp)$(HOST_LIB_PATH_mpfr)$(HOST_LIB_PATH_mpc)$(HOST_LIB_PATH_isl) # Define HOST_LIB_PATH_gcc here, for the sake of TARGET_LIB_PATH, ouch @if gcc @@ -796,11 +796,6 @@ HOST_LIB_PATH_isl = \ $$r/$(HOST_SUBDIR)/isl/.libs:$$r/$(HOST_SUBDIR)/prev-isl/.libs: @endif isl -@if gettext -HOST_LIB_PATH_gettext = \ - $$r/$(HOST_SUBDIR)/gettext/intl/.libs:$$r/$(HOST_SUBDIR)/prev-gettext/intl/.libs: -@endif gettext - CXX_FOR_TARGET_FLAG_TO_PASS = \ "CXX_FOR_TARGET=$(CXX_FOR_TARGET)" @@ -19827,7 +19822,7 @@ configure-gettext: $$s/$$module_srcdir/configure \ --srcdir=$${topdir}/$$module_srcdir \ $(HOST_CONFIGARGS) --build=${build_alias} --host=${host_alias} \ - --target=${target_alias} --disable-shared --disable-java --disable-csharp --with-pic \ + --target=${target_alias} --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf \ || exit 1 @endif gettext @@ -19863,7 +19858,7 @@ configure-stage1-gettext: --target=${target_alias} \ \ $(STAGE1_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stage2-gettext maybe-configure-stage2-gettext @@ -19897,7 +19892,7 @@ configure-stage2-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGE2_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stage3-gettext maybe-configure-stage3-gettext @@ -19931,7 +19926,7 @@ configure-stage3-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGE3_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stage4-gettext maybe-configure-stage4-gettext @@ -19965,7 +19960,7 @@ configure-stage4-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGE4_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stageprofile-gettext maybe-configure-stageprofile-gettext @@ -19999,7 +19994,7 @@ configure-stageprofile-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGEprofile_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stagetrain-gettext maybe-configure-stagetrain-gettext @@ -20033,7 +20028,7 @@ configure-stagetrain-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGEtrain_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stagefeedback-gettext maybe-configure-stagefeedback-gettext @@ -20067,7 +20062,7 @@ configure-stagefeedback-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGEfeedback_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stageautoprofile-gettext maybe-configure-stageautoprofile-gettext @@ -20101,7 +20096,7 @@ configure-stageautoprofile-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGEautoprofile_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap .PHONY: configure-stageautofeedback-gettext maybe-configure-stageautofeedback-gettext @@ -20135,7 +20130,7 @@ configure-stageautofeedback-gettext: --target=${target_alias} \ --with-build-libsubdir=$(HOST_SUBDIR) \ $(STAGEautofeedback_CONFIGURE_FLAGS) \ - --disable-shared --disable-java --disable-csharp --with-pic + --disable-shared --disable-threads --disable-java --disable-csharp --with-pic --disable-libasprintf @endif gettext-bootstrap @@ -20592,23 +20587,8 @@ maybe-info-gettext: @if gettext maybe-info-gettext: info-gettext -info-gettext: \ - configure-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing info in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - info) \ - || exit 1 +# gettext doesn't support info. +info-gettext: @endif gettext @@ -20617,23 +20597,8 @@ maybe-dvi-gettext: @if gettext maybe-dvi-gettext: dvi-gettext -dvi-gettext: \ - configure-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing dvi in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - dvi) \ - || exit 1 +# gettext doesn't support dvi. +dvi-gettext: @endif gettext @@ -20642,23 +20607,8 @@ maybe-pdf-gettext: @if gettext maybe-pdf-gettext: pdf-gettext -pdf-gettext: \ - configure-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing pdf in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - pdf) \ - || exit 1 +# gettext doesn't support pdf. +pdf-gettext: @endif gettext @@ -20667,23 +20617,8 @@ maybe-html-gettext: @if gettext maybe-html-gettext: html-gettext -html-gettext: \ - configure-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing html in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - html) \ - || exit 1 +# gettext doesn't support html. +html-gettext: @endif gettext @@ -20692,23 +20627,8 @@ maybe-TAGS-gettext: @if gettext maybe-TAGS-gettext: TAGS-gettext -TAGS-gettext: \ - configure-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing TAGS in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - TAGS) \ - || exit 1 +# gettext doesn't support TAGS. +TAGS-gettext: @endif gettext @@ -20717,24 +20637,8 @@ maybe-install-info-gettext: @if gettext maybe-install-info-gettext: install-info-gettext -install-info-gettext: \ - configure-gettext \ - info-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing install-info in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - install-info) \ - || exit 1 +# gettext doesn't support install-info. +install-info-gettext: @endif gettext @@ -20743,24 +20647,8 @@ maybe-install-dvi-gettext: @if gettext maybe-install-dvi-gettext: install-dvi-gettext -install-dvi-gettext: \ - configure-gettext \ - dvi-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing install-dvi in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - install-dvi) \ - || exit 1 +# gettext doesn't support install-dvi. +install-dvi-gettext: @endif gettext @@ -20769,24 +20657,8 @@ maybe-install-pdf-gettext: @if gettext maybe-install-pdf-gettext: install-pdf-gettext -install-pdf-gettext: \ - configure-gettext \ - pdf-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing install-pdf in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - install-pdf) \ - || exit 1 +# gettext doesn't support install-pdf. +install-pdf-gettext: @endif gettext @@ -20795,24 +20667,8 @@ maybe-install-html-gettext: @if gettext maybe-install-html-gettext: install-html-gettext -install-html-gettext: \ - configure-gettext \ - html-gettext - @[ -f ./gettext/Makefile ] || exit 0; \ - r=`${PWD_COMMAND}`; export r; \ - s=`cd $(srcdir); ${PWD_COMMAND}`; export s; \ - $(HOST_EXPORTS) \ - for flag in $(EXTRA_HOST_FLAGS) ; do \ - eval `echo "$$flag" | sed -e "s|^\([^=]*\)=\(.*\)|\1='\2'; export \1|"`; \ - done; \ - echo "Doing install-html in gettext"; \ - (cd $(HOST_SUBDIR)/gettext && \ - $(MAKE) $(BASE_FLAGS_TO_PASS) "AR=$${AR}" "AS=$${AS}" \ - "CC=$${CC}" "CXX=$${CXX}" "LD=$${LD}" "NM=$${NM}" \ - "RANLIB=$${RANLIB}" \ - "DLLTOOL=$${DLLTOOL}" "WINDRES=$${WINDRES}" "WINDMC=$${WINDMC}" \ - install-html) \ - || exit 1 +# gettext doesn't support install-html. +install-html-gettext: @endif gettext From ad537ccd525fd3af759febd6c0936f82de808a70 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Wed, 13 Dec 2023 12:00:45 +0000 Subject: [PATCH 266/311] libstdc++: Fix regression in std::format output of %Y for negative years The change in r14-6468-ga01462ae8bafa8 was only supposed to apply to %C formats, not %Y. libstdc++-v3/ChangeLog: * include/bits/chrono_io.h (__formatter_chrono::_M_C_y_Y): Do not round century down for %Y formats. --- libstdc++-v3/include/bits/chrono_io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/chrono_io.h b/libstdc++-v3/include/bits/chrono_io.h index b63b8592ebac..bcd76e4ab7b0 100644 --- a/libstdc++-v3/include/bits/chrono_io.h +++ b/libstdc++-v3/include/bits/chrono_io.h @@ -825,7 +825,7 @@ namespace __format { __s.assign(1, _S_plus_minus[1]); // For floored division -123//100 is -2 and -100//100 is -1 - if ((__ci * 100) != __yi) + if (__conv == 'C' && (__ci * 100) != __yi) ++__ci; } if (__ci >= 100) [[unlikely]] From 6a737ec24a98b3aa13605988c54c827f1bdae308 Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Wed, 13 Dec 2023 09:12:47 +0000 Subject: [PATCH 267/311] RISC-V:Add crypto vector implied ISA info. Due to the crypto vector entension is depend on the Vector extension, so add the implied ISA info with the corresponding crypto vector extension. gcc/ChangeLog: * common/config/riscv/riscv-common.cc: Modify implied ISA info. * config/riscv/arch-canonicalize: Add crypto vector implied info. --- gcc/common/config/riscv/riscv-common.cc | 9 +++++++++ gcc/config/riscv/arch-canonicalize | 21 +++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 4d5a2f874a25..769875981433 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -145,6 +145,15 @@ static const riscv_implied_info_t riscv_implied_info[] = {"zvksc", "zvbc"}, {"zvksg", "zvks"}, {"zvksg", "zvkg"}, + {"zvbb", "zvkb"}, + {"zvbc", "zve64x"}, + {"zvkb", "zve32x"}, + {"zvkg", "zve32x"}, + {"zvkned", "zve32x"}, + {"zvknha", "zve32x"}, + {"zvknhb", "zve64x"}, + {"zvksed", "zve32x"}, + {"zvksh", "zve32x"}, {"zfh", "zfhmin"}, {"zfhmin", "f"}, diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize index ea2f67a09445..a8f47a1752b0 100755 --- a/gcc/config/riscv/arch-canonicalize +++ b/gcc/config/riscv/arch-canonicalize @@ -69,12 +69,21 @@ IMPLIED_EXT = { "zvl32768b" : ["zvl16384b"], "zvl65536b" : ["zvl32768b"], - "zvkn" : ["zvkned", "zvknhb", "zvbb", "zvkt"], - "zvknc" : ["zvkn", "zvbc"], - "zvkng" : ["zvkn", "zvkg"], - "zvks" : ["zvksed", "zvksh", "zvbb", "zvkt"], - "zvksc" : ["zvks", "zvbc"], - "zvksg" : ["zvks", "zvkg"], + "zvkn" : ["zvkned", "zvknhb", "zvkb", "zvkt"], + "zvknc" : ["zvkn", "zvbc"], + "zvkng" : ["zvkn", "zvkg"], + "zvks" : ["zvksed", "zvksh", "zvkb", "zvkt"], + "zvksc" : ["zvks", "zvbc"], + "zvksg" : ["zvks", "zvkg"], + "zvbb" : ["zvkb"], + "zvbc" : ["zve64x"], + "zvkb" : ["zve32x"], + "zvkg" : ["zve32x"], + "zvkned" : ["zve32x"], + "zvknha" : ["zve32x"], + "zvknhb" : ["zve64x"], + "zvksed" : ["zve32x"], + "zvksh" : ["zve32x"], } def arch_canonicalize(arch, isa_spec): From ff8d0ce17fb585a29a83349acbc67b2dd3556629 Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Wed, 13 Dec 2023 13:36:44 +0000 Subject: [PATCH 268/311] ARC: Add *extvsi_n_0 define_insn_and_split for PR 110717. This patch improves the code generated for bitfield sign extensions on ARC cpus without a barrel shifter. Compiling the following test case: int foo(int x) { return (x<<27)>>27; } with -O2 -mcpu=em, generates two loops: foo: mov lp_count,27 lp 2f add r0,r0,r0 nop 2: # end single insn loop mov lp_count,27 lp 2f asr r0,r0 nop 2: # end single insn loop j_s [blink] and the closely related test case: struct S { int a : 5; }; int bar (struct S *p) { return p->a; } generates the slightly better: bar: ldb_s r0,[r0] mov_s r2,0 ;3 add3 r0,r2,r0 sexb_s r0,r0 asr_s r0,r0 asr_s r0,r0 j_s.d [blink] asr_s r0,r0 which uses 6 instructions to perform this particular sign extension. It turns out that sign extensions can always be implemented using at most three instructions on ARC (without a barrel shifter) using the idiom ((x&mask)^msb)-msb [as described in section "2-5 Sign Extension" of Henry Warren's book "Hacker's Delight"]. Using this, the sign extensions above on ARC's EM both become: bmsk_s r0,r0,4 xor r0,r0,16 sub r0,r0,16 which takes about 3 cycles, compared to the ~112 cycles for the loops in foo. 2023-12-13 Roger Sayle Jeff Law gcc/ChangeLog * config/arc/arc.md (*extvsi_n_0): New define_insn_and_split to implement SImode sign extract using a AND, XOR and MINUS sequence. gcc/testsuite/ChangeLog * gcc.target/arc/extvsi-1.c: New test case. * gcc.target/arc/extvsi-2.c: Likewise. --- gcc/config/arc/arc.md | 20 ++++++++++++++++++++ gcc/testsuite/gcc.target/arc/extvsi-1.c | 15 +++++++++++++++ gcc/testsuite/gcc.target/arc/extvsi-2.c | 12 ++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arc/extvsi-1.c create mode 100644 gcc/testsuite/gcc.target/arc/extvsi-2.c diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index bf9f88eff047..d980876eff8f 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -6127,6 +6127,26 @@ archs4x, archs4xd" "" [(set_attr "length" "8")]) +(define_insn_and_split "*extvsi_n_0" + [(set (match_operand:SI 0 "register_operand" "=r") + (sign_extract:SI (match_operand:SI 1 "register_operand" "0") + (match_operand:QI 2 "const_int_operand") + (const_int 0)))] + "!TARGET_BARREL_SHIFTER + && IN_RANGE (INTVAL (operands[2]), 2, + (optimize_insn_for_size_p () ? 28 : 30))" + "#" + "&& 1" +[(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3))) + (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4))) + (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4)))] +{ + int tmp = INTVAL (operands[2]); + operands[3] = GEN_INT (~(HOST_WIDE_INT_M1U << tmp)); + operands[4] = GEN_INT (HOST_WIDE_INT_1U << (tmp - 1)); +} + [(set_attr "length" "14")]) + (define_insn_and_split "rotlsi3_cnt1" [(set (match_operand:SI 0 "dest_reg_operand" "=r") (rotate:SI (match_operand:SI 1 "register_operand" "r") diff --git a/gcc/testsuite/gcc.target/arc/extvsi-1.c b/gcc/testsuite/gcc.target/arc/extvsi-1.c new file mode 100644 index 000000000000..5ac6feafae30 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/extvsi-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=em" } */ +struct S { int a : 5; }; + +int foo (struct S *p) +{ + return p->a; +} + +/* { dg-final { scan-assembler "msk_s\\s+r0,r0,4" } } */ +/* { dg-final { scan-assembler "xor\\s+r0,r0,16" } } */ +/* { dg-final { scan-assembler "sub\\s+r0,r0,16" } } */ +/* { dg-final { scan-assembler-not "add3\\s+r0,r2,r0" } } */ +/* { dg-final { scan-assembler-not "sext_s\\s+r0,r0" } } */ +/* { dg-final { scan-assembler-not "asr_s\\s+r0,r0" } } */ diff --git a/gcc/testsuite/gcc.target/arc/extvsi-2.c b/gcc/testsuite/gcc.target/arc/extvsi-2.c new file mode 100644 index 000000000000..953ea6a8b243 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/extvsi-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcpu=em" } */ + +int foo(int x) +{ + return (x<<27)>>27; +} + +/* { dg-final { scan-assembler "msk_s\\s+r0,r0,4" } } */ +/* { dg-final { scan-assembler "xor\\s+r0,r0,16" } } */ +/* { dg-final { scan-assembler "sub\\s+r0,r0,16" } } */ +/* { dg-final { scan-assembler-not "lp\\s+2f" } } */ From f6f76583fcf381b3a778e196f11634e458d3b36d Mon Sep 17 00:00:00 2001 From: Andrew Carlotti Date: Wed, 13 Dec 2023 12:34:19 +0000 Subject: [PATCH 269/311] aarch64 testsuite: Only run aarch64-ssve tests once gcc/testsuite/ChangeLog: * g++.target/aarch64/sve/aarch64-ssve.exp: --- gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp index d6a5a561a33e..98242a97b46e 100644 --- a/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp +++ b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp @@ -27,6 +27,10 @@ if {![istarget aarch64*-*-*] } { load_lib gcc-defs.exp +if ![gcc_parallel_test_run_p aarch64-ssve] { + return +} + gcc_parallel_test_enable 0 # Code shared by all tests. From d702387b1b765f85e19961a0b2a29df14df89bfd Mon Sep 17 00:00:00 2001 From: Pan Li Date: Wed, 13 Dec 2023 21:46:14 +0800 Subject: [PATCH 270/311] RISC-V: Refine test cases for both PR112929 and PR112988 Refine the test cases for: * Name convention. * Add run case. These test cases used to cause out-of-bounds writes to the stack and therefore showed unreliable behavior. Depending on the execution environment they can either pass or fail. As of now, with the latest QEMU version, they will pass even without the underlying issue fixed. As the test case is known to have caused the problem before we keep it as a run test case for future reference. PR target/112929 PR target/112988 gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/vsetvl/pr112929.c: Moved to... * gcc.target/riscv/rvv/vsetvl/pr112929-1.c: ...here. * gcc.target/riscv/rvv/vsetvl/pr112988.c: Moved to... * gcc.target/riscv/rvv/vsetvl/pr112988-1.c: ...here. * gcc.target/riscv/rvv/vsetvl/pr112929-2.c: New test. * gcc.target/riscv/rvv/vsetvl/pr112988-2.c: New test. Signed-off-by: Pan Li --- .../rvv/vsetvl/{pr112929.c => pr112929-1.c} | 0 .../gcc.target/riscv/rvv/vsetvl/pr112929-2.c | 57 +++++++++++++++++++ .../rvv/vsetvl/{pr112988.c => pr112988-1.c} | 0 .../gcc.target/riscv/rvv/vsetvl/pr112988-2.c | 53 +++++++++++++++++ 4 files changed, 110 insertions(+) rename gcc/testsuite/gcc.target/riscv/rvv/vsetvl/{pr112929.c => pr112929-1.c} (100%) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-2.c rename gcc/testsuite/gcc.target/riscv/rvv/vsetvl/{pr112988.c => pr112988-1.c} (100%) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-2.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c similarity index 100% rename from gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929.c rename to gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-1.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-2.c new file mode 100644 index 000000000000..f20220266391 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112929-2.c @@ -0,0 +1,57 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model" } */ + +int printf(char *, ...); +int a, l, i, p, q, t, n, o; +int *volatile c; +static int j; +static struct pack_1_struct d; +long e; +char m = 5; +short s; + +#pragma pack(1) +struct pack_1_struct { + long c; + int d; + int e; + int f; + int g; + int h; + int i; +} h, r = {1}, *f = &h, *volatile g; + +void add_em_up(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + __builtin_va_end(ap); +} + +int main() { + int u; + j = 0; + + for (; j < 9; ++j) { + u = ++t ? a : 0; + if (u) { + int *v = &d.d; + *v = g || e; + *c = 0; + *f = h; + } + s = l && c; + o = i; + d.f || (p = 0); + q |= n; + } + + r = *f; + + add_em_up(1, 1); + printf("%d\n", m); + + if (m != 5) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c similarity index 100% rename from gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988.c rename to gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-1.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-2.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-2.c new file mode 100644 index 000000000000..e952b85b6309 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr112988-2.c @@ -0,0 +1,53 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 -O3 -fno-vect-cost-model" } */ + +int a = 0; +int p, q, r, x = 230; +short d; +int e[256]; +static struct f w; +int *c = &r; + +short y(short z) { + return z * d; +} + +#pragma pack(1) +struct f { + int g; + short h; + int j; + char k; + char l; + long m; + long n; + int o; +} s = {1}, v, t, *u = &v, *b = &s; + +void add_em_up(int count, ...) { + __builtin_va_list ap; + __builtin_va_start(ap, count); + __builtin_va_end(ap); +} + +int main() { + int i = 0; + for (; i < 256; i++) + e[i] = i; + + p = 0; + for (; p <= 0; p++) { + *c = 4; + *u = t; + x |= y(6 >= q); + } + + *b = w; + + add_em_up(1, 1); + + if (a != 0 || q != 0 || p != 1 || r != 4 || x != 0xE6 || d != 0) + __builtin_abort (); + + return 0; +} From 373a85a82650d9938adb02a14686049cbcbe6892 Mon Sep 17 00:00:00 2001 From: Peter Bergner Date: Wed, 13 Dec 2023 08:13:34 -0600 Subject: [PATCH 271/311] testsuite: Add dg-do compile target c++17 directive for testcase [PR112822] Add dg-do compile target directive that limits the test case to being built on c++17 compiles or greater. 2023-12-13 Peter Bergner gcc/testsuite/ PR tree-optimization/112822 * g++.dg/pr112822.C: Add dg-do compile target c++17 directive. --- gcc/testsuite/g++.dg/pr112822.C | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/g++.dg/pr112822.C b/gcc/testsuite/g++.dg/pr112822.C index d1490405493a..a8557522467d 100644 --- a/gcc/testsuite/g++.dg/pr112822.C +++ b/gcc/testsuite/g++.dg/pr112822.C @@ -1,4 +1,5 @@ /* PR tree-optimization/112822 */ +/* { dg-do compile { target c++17 } } */ /* { dg-options "-w -O2" } */ /* Verify we do not ICE on the following noisy creduced test case. */ From 348874f0baac0f22c98ab11abbfa65fd172f6bdd Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Tue, 4 Jan 2022 12:22:01 +0000 Subject: [PATCH 272/311] libgomp: basic pinned memory on Linux Implement the OpenMP pinned memory trait on Linux hosts using the mlock syscall. Pinned allocations are performed using mmap, not malloc, to ensure that they can be unpinned safely when freed. This implementation will work OK for page-scale allocations, and finer-grained allocations will be implemented in a future patch. libgomp/ChangeLog: * allocator.c (MEMSPACE_ALLOC): Add PIN. (MEMSPACE_CALLOC): Add PIN. (MEMSPACE_REALLOC): Add PIN. (MEMSPACE_FREE): Add PIN. (MEMSPACE_VALIDATE): Add PIN. (omp_init_allocator): Use MEMSPACE_VALIDATE to check pinning. (omp_aligned_alloc): Add pinning to all MEMSPACE_* calls. (omp_aligned_calloc): Likewise. (omp_realloc): Likewise. (omp_free): Likewise. * config/linux/allocator.c: New file. * config/nvptx/allocator.c (MEMSPACE_ALLOC): Add PIN. (MEMSPACE_CALLOC): Add PIN. (MEMSPACE_REALLOC): Add PIN. (MEMSPACE_FREE): Add PIN. (MEMSPACE_VALIDATE): Add PIN. * config/gcn/allocator.c (MEMSPACE_ALLOC): Add PIN. (MEMSPACE_CALLOC): Add PIN. (MEMSPACE_REALLOC): Add PIN. (MEMSPACE_FREE): Add PIN. * libgomp.texi: Switch pinned trait to supported. (MEMSPACE_VALIDATE): Add PIN. * testsuite/libgomp.c/alloc-pinned-1.c: New test. * testsuite/libgomp.c/alloc-pinned-2.c: New test. * testsuite/libgomp.c/alloc-pinned-3.c: New test. * testsuite/libgomp.c/alloc-pinned-4.c: New test. Co-Authored-By: Thomas Schwinge --- libgomp/allocator.c | 65 +++++--- libgomp/config/gcn/allocator.c | 21 +-- libgomp/config/linux/allocator.c | 111 +++++++++++++ libgomp/config/nvptx/allocator.c | 21 +-- libgomp/libgomp.texi | 3 +- libgomp/testsuite/libgomp.c/alloc-pinned-1.c | 115 ++++++++++++++ libgomp/testsuite/libgomp.c/alloc-pinned-2.c | 120 ++++++++++++++ libgomp/testsuite/libgomp.c/alloc-pinned-3.c | 156 +++++++++++++++++++ libgomp/testsuite/libgomp.c/alloc-pinned-4.c | 150 ++++++++++++++++++ 9 files changed, 716 insertions(+), 46 deletions(-) create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-1.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-2.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-3.c create mode 100644 libgomp/testsuite/libgomp.c/alloc-pinned-4.c diff --git a/libgomp/allocator.c b/libgomp/allocator.c index 58a4c57f8835..e446267c2ad1 100644 --- a/libgomp/allocator.c +++ b/libgomp/allocator.c @@ -101,27 +101,30 @@ GOMP_is_alloc (void *ptr) #define omp_max_predefined_alloc omp_thread_mem_alloc /* These macros may be overridden in config//allocator.c. + The defaults (no override) are to return NULL for pinned memory requests + and pass through to the regular OS calls otherwise. The following definitions (ab)use comma operators to avoid unused variable errors. */ #ifndef MEMSPACE_ALLOC -#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \ - malloc (((void)(MEMSPACE), (SIZE))) +#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ + (PIN ? NULL : malloc (((void)(MEMSPACE), (SIZE)))) #endif #ifndef MEMSPACE_CALLOC -#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \ - calloc (1, (((void)(MEMSPACE), (SIZE)))) +#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ + (PIN ? NULL : calloc (1, (((void)(MEMSPACE), (SIZE))))) #endif #ifndef MEMSPACE_REALLOC -#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \ - realloc (ADDR, (((void)(MEMSPACE), (void)(OLDSIZE), (SIZE)))) +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ + ((PIN) || (OLDPIN) ? NULL \ + : realloc (ADDR, (((void)(MEMSPACE), (void)(OLDSIZE), (SIZE))))) #endif #ifndef MEMSPACE_FREE -#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \ - free (((void)(MEMSPACE), (void)(SIZE), (ADDR))) +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ + if (PIN) free (((void)(MEMSPACE), (void)(SIZE), (ADDR))) #endif #ifndef MEMSPACE_VALIDATE -#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS) \ - (((void)(MEMSPACE), (void)(ACCESS), 1)) +#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \ + (PIN ? 0 : ((void)(MEMSPACE), (void)(ACCESS), 1)) #endif /* Map the predefined allocators to the correct memory space. @@ -502,12 +505,8 @@ omp_init_allocator (omp_memspace_handle_t memspace, int ntraits, } #endif - /* No support for this so far. */ - if (data.pinned) - return omp_null_allocator; - /* Reject unsupported memory spaces. */ - if (!MEMSPACE_VALIDATE (data.memspace, data.access)) + if (!MEMSPACE_VALIDATE (data.memspace, data.access, data.pinned)) return omp_null_allocator; ret = gomp_malloc (sizeof (struct omp_allocator_data)); @@ -649,7 +648,8 @@ retry: } else #endif - ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size); + ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size, + allocator_data->pinned); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -686,7 +686,8 @@ retry: memspace = (allocator_data ? allocator_data->memspace : predefined_alloc_mapping[allocator]); - ptr = MEMSPACE_ALLOC (memspace, new_size); + ptr = MEMSPACE_ALLOC (memspace, new_size, + allocator_data && allocator_data->pinned); } if (ptr == NULL) goto fail; @@ -757,6 +758,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) { struct omp_mem_header *data; omp_memspace_handle_t memspace = omp_default_mem_space; + int pinned = false; if (ptr == NULL) return; @@ -798,6 +800,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) #endif memspace = allocator_data->memspace; + pinned = allocator_data->pinned; } else { @@ -822,7 +825,7 @@ omp_free (void *ptr, omp_allocator_handle_t allocator) memspace = predefined_alloc_mapping[data->allocator]; } - MEMSPACE_FREE (memspace, data->ptr, data->size); + MEMSPACE_FREE (memspace, data->ptr, data->size, pinned); } ialias (omp_free) @@ -953,7 +956,8 @@ retry: } else #endif - ptr = MEMSPACE_CALLOC (allocator_data->memspace, new_size); + ptr = MEMSPACE_CALLOC (allocator_data->memspace, new_size, + allocator_data->pinned); if (ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -992,7 +996,8 @@ retry: memspace = (allocator_data ? allocator_data->memspace : predefined_alloc_mapping[allocator]); - ptr = MEMSPACE_CALLOC (memspace, new_size); + ptr = MEMSPACE_CALLOC (memspace, new_size, + allocator_data && allocator_data->pinned); } if (ptr == NULL) goto fail; @@ -1224,9 +1229,13 @@ retry: #endif if (prev_size) new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr, - data->size, new_size); + data->size, new_size, + (free_allocator_data + && free_allocator_data->pinned), + allocator_data->pinned); else - new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size); + new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size, + allocator_data->pinned); if (new_ptr == NULL) { #ifdef HAVE_SYNC_BUILTINS @@ -1279,10 +1288,14 @@ retry: memspace = (allocator_data ? allocator_data->memspace : predefined_alloc_mapping[allocator]); - new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size); + new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size, + (free_allocator_data + && free_allocator_data->pinned), + allocator_data && allocator_data->pinned); } if (new_ptr == NULL) goto fail; + ret = (char *) new_ptr + sizeof (struct omp_mem_header); ((struct omp_mem_header *) ret)[-1].ptr = new_ptr; ((struct omp_mem_header *) ret)[-1].size = new_size; @@ -1312,7 +1325,8 @@ retry: memspace = (allocator_data ? allocator_data->memspace : predefined_alloc_mapping[allocator]); - new_ptr = MEMSPACE_ALLOC (memspace, new_size); + new_ptr = MEMSPACE_ALLOC (memspace, new_size, + allocator_data && allocator_data->pinned); } if (new_ptr == NULL) goto fail; @@ -1367,7 +1381,8 @@ retry: was_memspace = (free_allocator_data ? free_allocator_data->memspace : predefined_alloc_mapping[free_allocator]); - MEMSPACE_FREE (was_memspace, data->ptr, data->size); + int was_pinned = (free_allocator_data && free_allocator_data->pinned); + MEMSPACE_FREE (was_memspace, data->ptr, data->size, was_pinned); } return ret; diff --git a/libgomp/config/gcn/allocator.c b/libgomp/config/gcn/allocator.c index e9a95d683f9c..679218f08d2c 100644 --- a/libgomp/config/gcn/allocator.c +++ b/libgomp/config/gcn/allocator.c @@ -109,16 +109,17 @@ gcn_memspace_validate (omp_memspace_handle_t memspace, unsigned access) || access != omp_atv_all); } -#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \ - gcn_memspace_alloc (MEMSPACE, SIZE) -#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \ - gcn_memspace_calloc (MEMSPACE, SIZE) -#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \ - gcn_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE) -#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \ - gcn_memspace_free (MEMSPACE, ADDR, SIZE) -#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS) \ - gcn_memspace_validate (MEMSPACE, ACCESS) +#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ + gcn_memspace_alloc (MEMSPACE, ((void)(PIN), (SIZE))) +#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ + gcn_memspace_calloc (MEMSPACE, ((void)(PIN), (SIZE))) +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ + gcn_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, \ + ((void)(PIN), (void)(OLDPIN), (SIZE))) +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ + gcn_memspace_free (MEMSPACE, ADDR, ((void)(PIN), (SIZE))) +#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \ + gcn_memspace_validate (MEMSPACE, ((void)(PIN), (ACCESS))) /* The default low-latency memspace implies omp_atv_all, which is incompatible with the LDS memory space. */ diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index 64b1b4b96233..269d0d607d83 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -34,4 +34,115 @@ #define LIBGOMP_USE_LIBNUMA #endif +/* Implement malloc routines that can handle pinned memory on Linux. + + It's possible to use mlock on any heap memory, but using munlock is + problematic if there are multiple pinned allocations on the same page. + Tracking all that manually would be possible, but adds overhead. This may + be worth it if there are a lot of small allocations getting pinned, but + this seems less likely in a HPC application. + + Instead we optimize for large pinned allocations, and use mmap to ensure + that two pinned allocations don't share the same page. This also means + that large allocations don't pin extra pages by being poorly aligned. */ + +#define _GNU_SOURCE +#include +#include +#include "libgomp.h" + +static void * +linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) +{ + (void)memspace; + + if (pin) + { + /* Note that mmap always returns zeroed memory and is therefore also a + suitable implementation of calloc. */ + void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + return NULL; + + if (mlock (addr, size)) + { + gomp_debug (0, "libgomp: failed to pin %ld bytes of" + " memory (ulimit too low?)\n", size); + munmap (addr, size); + return NULL; + } + + return addr; + } + else + return malloc (size); +} + +static void * +linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) +{ + if (pin) + return linux_memspace_alloc (memspace, size, pin); + else + return calloc (1, size); +} + +static void +linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, + int pin) +{ + (void)memspace; + + if (pin) + munmap (addr, size); + else + free (addr); +} + +static void * +linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, + size_t oldsize, size_t size, int oldpin, int pin) +{ + if (oldpin && pin) + { + void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE); + if (newaddr == MAP_FAILED) + return NULL; + + return newaddr; + } + else if (oldpin || pin) + { + void *newaddr = linux_memspace_alloc (memspace, size, pin); + if (newaddr) + { + memcpy (newaddr, addr, oldsize < size ? oldsize : size); + linux_memspace_free (memspace, addr, oldsize, oldpin); + } + + return newaddr; + } + else + return realloc (addr, size); +} + +static int +linux_memspace_validate (omp_memspace_handle_t, unsigned, int) +{ + /* Everything should be accepted on Linux, including pinning. */ + return 1; +} + +#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ + linux_memspace_alloc (MEMSPACE, SIZE, PIN) +#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ + linux_memspace_calloc (MEMSPACE, SIZE, PIN) +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ + linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ + linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN) +#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \ + linux_memspace_validate (MEMSPACE, ACCESS, PIN) + #include "../../allocator.c" diff --git a/libgomp/config/nvptx/allocator.c b/libgomp/config/nvptx/allocator.c index a3302411bcb5..6a226a81b75c 100644 --- a/libgomp/config/nvptx/allocator.c +++ b/libgomp/config/nvptx/allocator.c @@ -123,16 +123,17 @@ nvptx_memspace_validate (omp_memspace_handle_t memspace, unsigned access) #endif } -#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \ - nvptx_memspace_alloc (MEMSPACE, SIZE) -#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \ - nvptx_memspace_calloc (MEMSPACE, SIZE) -#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \ - nvptx_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE) -#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \ - nvptx_memspace_free (MEMSPACE, ADDR, SIZE) -#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS) \ - nvptx_memspace_validate (MEMSPACE, ACCESS) +#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ + nvptx_memspace_alloc (MEMSPACE, ((void)(PIN), (SIZE))) +#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ + nvptx_memspace_calloc (MEMSPACE, ((void)(PIN), (SIZE))) +#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ + nvptx_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, \ + ((void)(OLDPIN), (void)(PIN), (SIZE))) +#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ + nvptx_memspace_free (MEMSPACE, ADDR, ((void)(PIN), (SIZE))) +#define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \ + nvptx_memspace_validate (MEMSPACE, ((void)(PIN), (ACCESS))) /* The default low-latency memspace implies omp_atv_all, which is incompatible with the .shared memory space. */ diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index cff2a2a00800..85edb9f39aba 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -5757,7 +5757,8 @@ a @code{nearest} allocation. Additional notes regarding the traits: @itemize -@item The @code{pinned} trait is unsupported. +@item The @code{pinned} trait is supported on Linux hosts, but is subject to + the OS @code{ulimit}/@code{rlimit} locked memory settings. @item The default for the @code{pool_size} trait is no pool and for every (re)allocation the associated library routine is called, which might internally use a memory pool. diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-1.c b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c new file mode 100644 index 000000000000..e17a21f0a6c0 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/alloc-pinned-1.c @@ -0,0 +1,115 @@ +/* { dg-do run } */ + +/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */ + +/* Test that pinned memory works. */ + +#include +#include + +#ifdef __linux__ +#include +#include + +#include +#include + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#define CHECK_SIZE(SIZE) { \ + struct rlimit limit; \ + if (getrlimit (RLIMIT_MEMLOCK, &limit) \ + || limit.rlim_cur <= SIZE) \ + fprintf (stderr, "unsufficient lockable memory; please increase ulimit\n"); \ + } + +int +get_pinned_mem () +{ + int pid = getpid (); + char buf[100]; + sprintf (buf, "/proc/%d/status", pid); + + FILE *proc = fopen (buf, "r"); + if (!proc) + abort (); + while (fgets (buf, 100, proc)) + { + int val; + if (sscanf (buf, "VmLck: %d", &val)) + { + fclose (proc); + return val; + } + } + abort (); +} +#else +#define PAGE_SIZE 1024 /* unknown */ +#define CHECK_SIZE(SIZE) fprintf (stderr, "OS unsupported\n"); +#define EXPECT_OMP_NULL_ALLOCATOR + +int +get_pinned_mem () +{ + return 0; +} +#endif + +static void +verify0 (char *p, size_t s) +{ + for (size_t i = 0; i < s; ++i) + if (p[i] != 0) + abort (); +} + +#include + +int +main () +{ + /* Allocate at least a page each time, allowing space for overhead, + but stay within the ulimit. */ + const int SIZE = PAGE_SIZE - 128; + CHECK_SIZE (SIZE * 5); // This is intended to help diagnose failures + + const omp_alloctrait_t traits[] = { + { omp_atk_pinned, 1 } + }; + omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, + 1, traits); + +#ifdef EXPECT_OMP_NULL_ALLOCATOR + if (allocator == omp_null_allocator) + return 0; +#endif + + // Sanity check + if (get_pinned_mem () != 0) + abort (); + + void *p = omp_alloc (SIZE, allocator); + if (!p) + abort (); + + int amount = get_pinned_mem (); + if (amount == 0) + abort (); + + p = omp_realloc (p, SIZE * 2, allocator, allocator); + + int amount2 = get_pinned_mem (); + if (amount2 <= amount) + abort (); + + /* SIZE*2 ensures that it doesn't slot into the space possibly + vacated by realloc. */ + p = omp_calloc (1, SIZE * 2, allocator); + + if (get_pinned_mem () <= amount2) + abort (); + + verify0 (p, SIZE * 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-2.c b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c new file mode 100644 index 000000000000..3cf322cfbc86 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/alloc-pinned-2.c @@ -0,0 +1,120 @@ +/* { dg-do run } */ + +/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */ + +/* Test that pinned memory works (pool_size code path). */ + +#include +#include + +#ifdef __linux__ +#include +#include + +#include +#include + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#define CHECK_SIZE(SIZE) { \ + struct rlimit limit; \ + if (getrlimit (RLIMIT_MEMLOCK, &limit) \ + || limit.rlim_cur <= SIZE) \ + fprintf (stderr, "unsufficient lockable memory; please increase ulimit\n"); \ + } + +int +get_pinned_mem () +{ + int pid = getpid (); + char buf[100]; + sprintf (buf, "/proc/%d/status", pid); + + FILE *proc = fopen (buf, "r"); + if (!proc) + abort (); + while (fgets (buf, 100, proc)) + { + int val; + if (sscanf (buf, "VmLck: %d", &val)) + { + fclose (proc); + return val; + } + } + abort (); +} +#else +#define PAGE_SIZE 1024 /* unknown */ +#define CHECK_SIZE(SIZE) fprintf (stderr, "OS unsupported\n"); +#define EXPECT_OMP_NULL_ALLOCATOR + +int +get_pinned_mem () +{ + return 0; +} +#endif + +static void +verify0 (char *p, size_t s) +{ + for (size_t i = 0; i < s; ++i) + if (p[i] != 0) + abort (); +} + +#include + +int +main () +{ + /* Allocate at least a page each time, allowing space for overhead, + but stay within the ulimit. */ + const int SIZE = PAGE_SIZE - 128; + CHECK_SIZE (SIZE * 5); // This is intended to help diagnose failures + + const omp_alloctrait_t traits[] = { + { omp_atk_pinned, 1 }, + { omp_atk_pool_size, SIZE * 8 } + }; + omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, + 2, traits); + +#ifdef EXPECT_OMP_NULL_ALLOCATOR + if (allocator == omp_null_allocator) + return 0; +#endif + + // Sanity check + if (get_pinned_mem () != 0) + abort (); + + void *p = omp_alloc (SIZE, allocator); + if (!p) + abort (); + + int amount = get_pinned_mem (); + if (amount == 0) + abort (); + + p = omp_realloc (p, SIZE * 2, allocator, allocator); + if (!p) + abort (); + + int amount2 = get_pinned_mem (); + if (amount2 <= amount) + abort (); + + /* SIZE*2 ensures that it doesn't slot into the space possibly + vacated by realloc. */ + p = omp_calloc (1, SIZE * 2, allocator); + if (!p) + abort (); + + if (get_pinned_mem () <= amount2) + abort (); + + verify0 (p, SIZE * 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-3.c b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c new file mode 100644 index 000000000000..53e4720cc9c1 --- /dev/null +++ b/libgomp/testsuite/libgomp.c/alloc-pinned-3.c @@ -0,0 +1,156 @@ +/* { dg-do run } */ + +/* Test that pinned memory fails correctly. */ + +#include +#include + +#ifdef __linux__ +#include +#include + +#include +#include + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +int +get_pinned_mem () +{ + int pid = getpid (); + char buf[100]; + sprintf (buf, "/proc/%d/status", pid); + + FILE *proc = fopen (buf, "r"); + if (!proc) + abort (); + while (fgets (buf, 100, proc)) + { + int val; + if (sscanf (buf, "VmLck: %d", &val)) + { + fclose (proc); + return val; + } + } + abort (); +} + +void +set_pin_limit (int size) +{ + struct rlimit limit; + if (getrlimit (RLIMIT_MEMLOCK, &limit)) + abort (); + limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size); + if (setrlimit (RLIMIT_MEMLOCK, &limit)) + abort (); +} +#else +#define PAGE_SIZE 10000 * 1024 /* unknown */ +#define EXPECT_OMP_NULL_ALLOCATOR + +int +get_pinned_mem () +{ + return 0; +} + +void +set_pin_limit () +{ +} +#endif + +static void +verify0 (char *p, size_t s) +{ + for (size_t i = 0; i < s; ++i) + if (p[i] != 0) + abort (); +} + +#include + +int +main () +{ + /* This needs to be large enough to cover multiple pages. */ + const int SIZE = PAGE_SIZE * 4; + + /* Pinned memory, no fallback. */ + const omp_alloctrait_t traits1[] = { + { omp_atk_pinned, 1 }, + { omp_atk_fallback, omp_atv_null_fb } + }; + omp_allocator_handle_t allocator1 = omp_init_allocator (omp_default_mem_space, + 2, traits1); + + /* Pinned memory, plain memory fallback. */ + const omp_alloctrait_t traits2[] = { + { omp_atk_pinned, 1 }, + { omp_atk_fallback, omp_atv_default_mem_fb } + }; + omp_allocator_handle_t allocator2 = omp_init_allocator (omp_default_mem_space, + 2, traits2); + +#ifdef EXPECT_OMP_NULL_ALLOCATOR + if (allocator1 == omp_null_allocator + && allocator2 == omp_null_allocator) + return 0; +#endif + + /* Ensure that the limit is smaller than the allocation. */ + set_pin_limit (SIZE / 2); + + // Sanity check + if (get_pinned_mem () != 0) + abort (); + + // Should fail + void *p1 = omp_alloc (SIZE, allocator1); + if (p1) + abort (); + + // Should fail + void *p2 = omp_calloc (1, SIZE, allocator1); + if (p2) + abort (); + + // Should fall back + void *p3 = omp_alloc (SIZE, allocator2); + if (!p3) + abort (); + + // Should fall back + void *p4 = omp_calloc (1, SIZE, allocator2); + if (!p4) + abort (); + verify0 (p4, SIZE); + + // Should fail to realloc + void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc); + void *p5 = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc); + if (!notpinned || p5) + abort (); + + // Should fall back to no realloc needed + void *p6 = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc); + if (p6 != notpinned) + abort (); + + // No memory should have been pinned + int amount = get_pinned_mem (); + if (amount != 0) + abort (); + + // Ensure free works correctly + if (p1) omp_free (p1, allocator1); + if (p2) omp_free (p2, allocator1); + if (p3) omp_free (p3, allocator2); + if (p4) omp_free (p4, allocator2); + // p5 and notpinned have been reallocated + if (p6) omp_free (p6, omp_default_mem_alloc); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-4.c b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c new file mode 100644 index 000000000000..9d850c23e4bb --- /dev/null +++ b/libgomp/testsuite/libgomp.c/alloc-pinned-4.c @@ -0,0 +1,150 @@ +/* { dg-do run } */ + +/* Test that pinned memory fails correctly, pool_size code path. */ + +#include +#include + +#ifdef __linux__ +#include +#include + +#include +#include + +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +int +get_pinned_mem () +{ + int pid = getpid (); + char buf[100]; + sprintf (buf, "/proc/%d/status", pid); + + FILE *proc = fopen (buf, "r"); + if (!proc) + abort (); + while (fgets (buf, 100, proc)) + { + int val; + if (sscanf (buf, "VmLck: %d", &val)) + { + fclose (proc); + return val; + } + } + abort (); +} + +void +set_pin_limit (int size) +{ + struct rlimit limit; + if (getrlimit (RLIMIT_MEMLOCK, &limit)) + abort (); + limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size); + if (setrlimit (RLIMIT_MEMLOCK, &limit)) + abort (); +} +#else +#define PAGE_SIZE 10000 * 1024 /* unknown */ +#define EXPECT_OMP_NULL_ALLOCATOR + +int +get_pinned_mem () +{ + return 0; +} + +void +set_pin_limit () +{ +} +#endif + +static void +verify0 (char *p, size_t s) +{ + for (size_t i = 0; i < s; ++i) + if (p[i] != 0) + abort (); +} + +#include + +int +main () +{ + /* This needs to be large enough to cover multiple pages. */ + const int SIZE = PAGE_SIZE * 4; + + /* Pinned memory, no fallback. */ + const omp_alloctrait_t traits1[] = { + { omp_atk_pinned, 1 }, + { omp_atk_fallback, omp_atv_null_fb }, + { omp_atk_pool_size, SIZE * 8 } + }; + omp_allocator_handle_t allocator1 = omp_init_allocator (omp_default_mem_space, + 3, traits1); + + /* Pinned memory, plain memory fallback. */ + const omp_alloctrait_t traits2[] = { + { omp_atk_pinned, 1 }, + { omp_atk_fallback, omp_atv_default_mem_fb }, + { omp_atk_pool_size, SIZE * 8 } + }; + omp_allocator_handle_t allocator2 = omp_init_allocator (omp_default_mem_space, + 3, traits2); + +#ifdef EXPECT_OMP_NULL_ALLOCATOR + if (allocator1 == omp_null_allocator + && allocator2 == omp_null_allocator) + return 0; +#endif + + /* Ensure that the limit is smaller than the allocation. */ + set_pin_limit (SIZE / 2); + + // Sanity check + if (get_pinned_mem () != 0) + abort (); + + // Should fail + void *p = omp_alloc (SIZE, allocator1); + if (p) + abort (); + + // Should fail + p = omp_calloc (1, SIZE, allocator1); + if (p) + abort (); + + // Should fall back + p = omp_alloc (SIZE, allocator2); + if (!p) + abort (); + + // Should fall back + p = omp_calloc (1, SIZE, allocator2); + if (!p) + abort (); + verify0 (p, SIZE); + + // Should fail to realloc + void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc); + p = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc); + if (!notpinned || p) + abort (); + + // Should fall back to no realloc needed + p = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc); + if (p != notpinned) + abort (); + + // No memory should have been pinned + int amount = get_pinned_mem (); + if (amount != 0) + abort (); + + return 0; +} From 943fd92254cccbfb5cd8bb0203ce47302afde319 Mon Sep 17 00:00:00 2001 From: Andrew Carlotti Date: Thu, 16 Nov 2023 18:56:43 +0000 Subject: [PATCH 273/311] aarch64: Add missing driver-aarch64 dependencies gcc/ChangeLog: * config/aarch64/x-aarch64: Add missing dependencies. --- gcc/config/aarch64/x-aarch64 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/x-aarch64 b/gcc/config/aarch64/x-aarch64 index 3cf701a0a01a..ee828c9af53a 100644 --- a/gcc/config/aarch64/x-aarch64 +++ b/gcc/config/aarch64/x-aarch64 @@ -1,3 +1,5 @@ driver-aarch64.o: $(srcdir)/config/aarch64/driver-aarch64.cc \ - $(CONFIG_H) $(SYSTEM_H) + $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(CORETYPES_H) \ + $(srcdir)/config/aarch64/aarch64-protos.h \ + $(srcdir)/config/aarch64/aarch64-feature-deps.h $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $< From e6bb4d997953d3ad28082a1dccb69657c6f441d9 Mon Sep 17 00:00:00 2001 From: Andrew Carlotti Date: Mon, 27 Nov 2023 16:12:01 +0000 Subject: [PATCH 274/311] aarch64 testsuite: Check entire .arch string Add a terminating newline to various tests, and add missing extensions to some test strings. The current output is broken for options_set_4.c, so this test is left unchanged, to be fixed in a subsequent patch. gcc/testsuite/ChangeLog: * gcc.target/aarch64/cpunative/native_cpu_18.c: Add \+nopauth\n * gcc.target/aarch64/options_set_7.c: Add \+crc\n * gcc.target/aarch64/options_set_8.c: Add \+crc\+nodotprod\n * gcc.target/aarch64/cpunative/native_cpu_0.c: Add \n * gcc.target/aarch64/cpunative/native_cpu_1.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_2.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_3.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_4.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_5.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_6.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_7.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_8.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_9.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_10.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_11.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_12.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_13.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_14.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_15.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_16.c: Ditto. * gcc.target/aarch64/cpunative/native_cpu_17.c: Ditto. * gcc.target/aarch64/options_set_1.c: Ditto. * gcc.target/aarch64/options_set_2.c: Ditto. * gcc.target/aarch64/options_set_3.c: Ditto. * gcc.target/aarch64/options_set_5.c: Ditto. * gcc.target/aarch64/options_set_6.c: Ditto. * gcc.target/aarch64/options_set_9.c: Ditto. * gcc.target/aarch64/options_set_11.c: Ditto. * gcc.target/aarch64/options_set_12.c: Ditto. * gcc.target/aarch64/options_set_13.c: Ditto. * gcc.target/aarch64/options_set_14.c: Ditto. * gcc.target/aarch64/options_set_15.c: Ditto. * gcc.target/aarch64/options_set_16.c: Ditto. * gcc.target/aarch64/options_set_17.c: Ditto. * gcc.target/aarch64/options_set_18.c: Ditto. * gcc.target/aarch64/options_set_19.c: Ditto. * gcc.target/aarch64/options_set_20.c: Ditto. * gcc.target/aarch64/options_set_21.c: Ditto. * gcc.target/aarch64/options_set_22.c: Ditto. * gcc.target/aarch64/options_set_23.c: Ditto. * gcc.target/aarch64/options_set_24.c: Ditto. * gcc.target/aarch64/options_set_25.c: Ditto. * gcc.target/aarch64/options_set_26.c: Ditto. --- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_11.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_12.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_14.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_15.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_3.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_4.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_5.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_8.c | 2 +- gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_9.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_11.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_12.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_13.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_14.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_15.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_16.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_17.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_18.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_19.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_2.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_20.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_21.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_22.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_23.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_24.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_25.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_26.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_3.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_5.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_6.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_7.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_8.c | 2 +- gcc/testsuite/gcc.target/aarch64/options_set_9.c | 2 +- 43 files changed, 43 insertions(+), 43 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c index 8499f87c39b1..fb5a7a18ad1a 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_1.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_1.c index 2cf0e89994b1..cb50e3b73057 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_1.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_1.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+nosimd} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+nosimd\n} } } */ /* Test one where fp is on by default so turn off simd. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c index ddb06b822757..6a524bad371c 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\n} } } */ /* Test one with no entry in feature list. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_11.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_11.c index 96b9ca434ebb..644f4792275b 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_11.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_11.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+sb} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+sb\n} } } */ /* Test one with a feature name that overlaps with another one. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_12.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_12.c index c3b44adbf6c8..fb34ddf697cc 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_12.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_12.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+ssbs} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+ssbs\n} } } */ /* Test one where the longer feature overlaps with a shorter one. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c index 551669091c70..b29d50e1f79f 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\n} } } */ /* Test one with mixed order of feature bits. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_14.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_14.c index 781ab1ebbfb4..59846f76acf8 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_14.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_14.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+dotprod\n} } } */ /* Test one where valid feature bits are at a boundary > buffer size. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_15.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_15.c index c9205d95b793..68a51898eab4 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_15.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_15.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+sve2-sm4} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+sve2-sm4\n} } } */ /* Test one where the bounary of buffer size would cut off and leave a valid feature in the first full buffer. e.g. this will cut off at diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c index 2f963bb23127..b3613165a05b 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c index c68a697aa3e9..a9dde5ffab14 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2\n} } } */ /* Test a normal looking procinfo. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c index b5f0a3005f50..10325df44972 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_18.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8.6-a\+crc\+fp16\+aes\+sha3\+rng} } } */ +/* { dg-final { scan-assembler {\.arch armv8.6-a\+crc\+fp16\+aes\+sha3\+rng\+nopauth\n} } } */ /* Test one where the boundary of buffer size would overwrite the last character read when stitching the fgets-calls together. With the diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c index edbdb56268ea..cfca02cb147f 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\n} } } */ /* Test one where asimd is provided byt no fp. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_3.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_3.c index 50685c297dbd..316ddbd2ab9a 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_3.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_3.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\n} } } */ /* Test where asimd and fp are the only ones provided, these are default and so shouldn't emit anything. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_4.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_4.c index 91ae809757a4..053dd2b6dbe6 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_4.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_4.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+crypto} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\n} } } */ /* Test one where all crypto bits are given so crypto should be enabled. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_5.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_5.c index 84139e58ee00..49dee9d6abcb 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_5.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_5.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\n} } } */ /* Test one where fp16 is available and so should be emitted. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c index 7608e8845a66..20012beff7b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto\n} } } */ /* Test one where the feature bits for crypto and fp16 are given in same order as declared in options file. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c index 72b14b4f6ed0..70a7e62fdffc 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto\n} } } */ /* Test one where the crypto and fp16 options are specified in different order from what is in the options file. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_8.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_8.c index 7a5a2144a397..795dd5ff61b3 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_8.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_8.c @@ -7,6 +7,6 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+sve} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+sve\n} } } */ /* Test one where sve is enabled. */ diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_9.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_9.c index 528b5d029f1a..6b55a739c851 100644 --- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_9.c +++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_9.c @@ -7,7 +7,7 @@ int main() return 0; } -/* { dg-final { scan-assembler {\.arch armv8-a\+sve2-sm4} } } */ +/* { dg-final { scan-assembler {\.arch armv8-a\+sve2-sm4\n} } } */ /* Test one here a feature that is a prefix of another is enabled. In this case sve is a prefix to svesm4, but sve2-sm4 should be diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_1.c b/gcc/testsuite/gcc.target/aarch64/options_set_1.c index 40d9a05c905e..dc5eff8c901d 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_1.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_1.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\n} 1 } } */ /* Check to see if crc is output by default. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_11.c b/gcc/testsuite/gcc.target/aarch64/options_set_11.c index d083bfdbd5c4..e0e82cf514dc 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_11.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_11.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\n} } } */ /* FP is default on, no need to pass on to assembler. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_12.c b/gcc/testsuite/gcc.target/aarch64/options_set_12.c index 58a09fda2c11..aef44b331131 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_12.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_12.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16\n} } } */ /* fp16 not default, should be emitted. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_13.c b/gcc/testsuite/gcc.target/aarch64/options_set_13.c index 2a517ecb58f8..b116e08e75b4 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_13.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_13.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16\n} } } */ /* FP is part of FP16, don't emit it. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_14.c b/gcc/testsuite/gcc.target/aarch64/options_set_14.c index c192bf6cb636..e9fc3e5dc2c3 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_14.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_14.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml\n} } } */ /* fmp16fml is smallest option to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_15.c b/gcc/testsuite/gcc.target/aarch64/options_set_15.c index 32ec3ea46431..999791b9f0d8 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_15.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_15.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml*} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml*\n} } } */ /* fp included in fp16fml, only emit latter. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_16.c b/gcc/testsuite/gcc.target/aarch64/options_set_16.c index b45c01a915b9..477b71c38179 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_16.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_16.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+fp16fml\n} } } */ /* fp16fml is smallest options to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_17.c b/gcc/testsuite/gcc.target/aarch64/options_set_17.c index c490e1f47a0a..8b21e2e1a0a0 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_17.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_17.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+dotprod} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.2-a\+crc\+dotprod\n} } } */ /* dotprod needs to be emitted pre armv8.4. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_18.c b/gcc/testsuite/gcc.target/aarch64/options_set_18.c index 61587dbbd63a..977b41e3e21e 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_18.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_18.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\n} } } */ /* dotprod is default in armv8.4-a, don't emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_19.c b/gcc/testsuite/gcc.target/aarch64/options_set_19.c index 72b58126182f..0b2ec02e5c6b 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_19.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_19.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\n} } } */ /* fp default, don't emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c index f82cb5f7823b..937edc693c2d 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto\n} 1 } } */ /* Check to see if crc and crypto are maintained if crypto specified. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_20.c b/gcc/testsuite/gcc.target/aarch64/options_set_20.c index b383e0aced2d..452b48c7291b 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_20.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_20.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_21.c b/gcc/testsuite/gcc.target/aarch64/options_set_21.c index 19fcd6fda6e0..f142e70fb51e 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_21.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_21.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_22.c b/gcc/testsuite/gcc.target/aarch64/options_set_22.c index 77ae4089f398..04ddd461857a 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_22.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_22.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_23.c b/gcc/testsuite/gcc.target/aarch64/options_set_23.c index dee637c5d2cb..81cfe0189e06 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_23.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_23.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_24.c b/gcc/testsuite/gcc.target/aarch64/options_set_24.c index 54b0e3d4a831..425cc513eeeb 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_24.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_24.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_25.c b/gcc/testsuite/gcc.target/aarch64/options_set_25.c index a3b2d63c06eb..5a3c10593019 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_25.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_25.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_26.c b/gcc/testsuite/gcc.target/aarch64/options_set_26.c index b383e0aced2d..452b48c7291b 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_26.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_26.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16} } } */ +/* { dg-final { scan-assembler {\.arch armv8\.4-a\+crc\+fp16\n} } } */ /* fp16 smallest set to emit. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c index 7d350cfa3616..96140e36270d 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto\n} 1 } } */ /* Check if smallest set is maintained when outputting. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_5.c b/gcc/testsuite/gcc.target/aarch64/options_set_5.c index b4c0901195ed..028fbc46ef6a 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_5.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_5.c @@ -6,7 +6,7 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes\n} 1 } } */ /* Check if turning off feature bits works correctly and grouping is no longer valid. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c index 2a1d7fe5b8ea..09ebdaa212bd 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes\n} 1 } } */ /* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_7.c b/gcc/testsuite/gcc.target/aarch64/options_set_7.c index 71a2c8a19058..eb5724f74e09 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_7.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_7.c @@ -6,6 +6,6 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.4\-a} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.4\-a\+crc\n} 1 } } */ /* Checking if enabling default features drops the superfluous bits. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_8.c b/gcc/testsuite/gcc.target/aarch64/options_set_8.c index 83be1bd7a5c3..a0eacff9ce63 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_8.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_8.c @@ -6,7 +6,7 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\.4\-a} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\.4\-a\+crc\+nodotprod\n} 1 } } */ /* Checking if trying to turn off default features propagates the commandline option. */ diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_9.c b/gcc/testsuite/gcc.target/aarch64/options_set_9.c index e3c7cdc54ffb..5052f891b41c 100644 --- a/gcc/testsuite/gcc.target/aarch64/options_set_9.c +++ b/gcc/testsuite/gcc.target/aarch64/options_set_9.c @@ -6,7 +6,7 @@ int main () return 0; } -/* { dg-final { scan-assembler-times {\.arch armv8\-a} 1 } } */ +/* { dg-final { scan-assembler-times {\.arch armv8\-a\n} 1 } } */ /* Check that grouping of bits that don't form a synthetic group don't turn on the parent. e.g. rdma turns on simd+fp, but simd+fp does not turn on From 392f70cc11089f6da2611177de5f3e8baad6d327 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Mon, 31 Jul 2023 17:52:06 +0100 Subject: [PATCH 275/311] amdgcn: Support XNACK mode The XNACK feature allows memory load instructions to restart safely following a page-miss interrupt. This is useful for shared-memory devices, like APUs, and to implement OpenMP Unified Shared Memory. To support the feature we must be able to set the appropriate meta-data and set the load instructions to early-clobber. When the port supports scheduling of s_waitcnt instructions there will be further requirements. gcc/ChangeLog: * config/gcn/gcn-hsa.h (NO_XNACK): Ignore missing -march. (XNACKOPT): Match on/off; ignore any. * config/gcn/gcn-valu.md (gather_insn_1offset): Add xnack compatible alternatives. (gather_insn_2offsets): Likewise. * config/gcn/gcn.cc (gcn_option_override): Permit -mxnack for devices other than Fiji and gfx1030. (gcn_expand_epilogue): Remove early-clobber problems. (gcn_hsa_declare_function_name): Obey -mxnack setting. * config/gcn/gcn.md (xnack): New attribute. (enabled): Rework to include "xnack" attribute. (*movbi): Add xnack compatible alternatives. (*mov_insn): Likewise. (*mov_insn): Likewise. (*mov_insn): Likewise. (*movti_insn): Likewise. * config/gcn/gcn.opt (-mxnack): Change the default to "any". * doc/invoke.texi: Remove placeholder notice for -mxnack. --- gcc/config/gcn/gcn-hsa.h | 7 +- gcc/config/gcn/gcn-valu.md | 28 +++-- gcc/config/gcn/gcn.cc | 27 ++-- gcc/config/gcn/gcn.md | 249 +++++++++++++++++++++---------------- gcc/config/gcn/gcn.opt | 4 +- gcc/doc/invoke.texi | 3 +- 6 files changed, 180 insertions(+), 138 deletions(-) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 4d72299da9b7..bfb104526c52 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,16 +75,13 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" -#define NO_XNACK "!march=*:;march=fiji:;march=gfx1030:;" +#define NO_XNACK "march=fiji:;march=gfx1030:;" #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;" /* In HSACOv4 no attribute setting means the binary supports "any" hardware configuration. The name of the attribute also changed. */ #define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc" - -/* Replace once XNACK is supported: - #define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack" */ -#define XNACKOPT "!mnack=*:-mattr=-xnack;mnack=*:-mattr=-xnack" +#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack" /* Use LLVM assembler and linker options. */ #define ASM_SPEC "-triple=amdgcn--amdhsa " \ diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index a928decd408c..64b8ea1057fc 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1145,13 +1145,13 @@ {}) (define_insn "gather_insn_1offset" - [(set (match_operand:V_MOV 0 "register_operand" "=v,a") + [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a") (unspec:V_MOV - [(plus: (match_operand: 1 "register_operand" " v,v") + [(plus: (match_operand: 1 "register_operand" " v,v, v, v") (vec_duplicate: - (match_operand 2 "immediate_operand" " n,n"))) - (match_operand 3 "immediate_operand" " n,n") - (match_operand 4 "immediate_operand" " n,n") + (match_operand 2 "immediate_operand" " n,n, n, n"))) + (match_operand 3 "immediate_operand" " n,n, n, n") + (match_operand 4 "immediate_operand" " n,n, n, n") (mem:BLK (scratch))] UNSPEC_GATHER))] "(AS_FLAT_P (INTVAL (operands[3])) @@ -1182,7 +1182,8 @@ } [(set_attr "type" "flat") (set_attr "length" "12") - (set_attr "gcn_version" "*,cdna2")]) + (set_attr "gcn_version" "*,cdna2,*,cdna2") + (set_attr "xnack" "off,off,on,on")]) (define_insn "gather_insn_1offset_ds" [(set (match_operand:V_MOV 0 "register_operand" "=v,a") @@ -1208,18 +1209,18 @@ (set_attr "gcn_version" "*,cdna2")]) (define_insn "gather_insn_2offsets" - [(set (match_operand:V_MOV 0 "register_operand" "=v,a") + [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a") (unspec:V_MOV [(plus: (plus: (vec_duplicate: - (match_operand:DI 1 "register_operand" "Sv,Sv")) + (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv")) (sign_extend: - (match_operand: 2 "register_operand" " v,v"))) + (match_operand: 2 "register_operand" " v, v, v, v"))) (vec_duplicate: (match_operand 3 "immediate_operand" - " n,n"))) - (match_operand 4 "immediate_operand" " n,n") - (match_operand 5 "immediate_operand" " n,n") + " n, n, n, n"))) + (match_operand 4 "immediate_operand" " n, n, n, n") + (match_operand 5 "immediate_operand" " n, n, n, n") (mem:BLK (scratch))] UNSPEC_GATHER))] "(AS_GLOBAL_P (INTVAL (operands[4])) @@ -1239,7 +1240,8 @@ } [(set_attr "type" "flat") (set_attr "length" "12") - (set_attr "gcn_version" "*,cdna2")]) + (set_attr "gcn_version" "*,cdna2,*,cdna2") + (set_attr "xnack" "off,off,on,on")]) (define_expand "scatter_store" [(match_operand:DI 0 "register_operand") diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 031b405e810a..d92cd01d03f0 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -160,11 +160,18 @@ gcn_option_override (void) acc_lds_size = 32768; } - /* The xnack option is a placeholder, for now. Before removing, update - gcn-hsa.h's XNACKOPT, gcn.opt's mxnack= default init+descr, and - invoke.texi's default description. */ - if (flag_xnack != HSACO_ATTR_OFF) - sorry ("XNACK support"); + /* gfx803 "Fiji" and gfx1030 do not support XNACK. */ + if (gcn_arch == PROCESSOR_FIJI + || gcn_arch == PROCESSOR_GFX1030) + { + if (flag_xnack == HSACO_ATTR_ON) + error ("-mxnack=on is incompatible with -march=%s", + (gcn_arch == PROCESSOR_FIJI ? "fiji" + : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030" + : NULL)); + /* Allow HSACO_ATTR_ANY silently because that's the default. */ + flag_xnack = HSACO_ATTR_OFF; + } } /* }}} */ @@ -3585,18 +3592,20 @@ gcn_expand_epilogue (void) /* Assume that an exit value compatible with gcn-run is expected. That is, the third input parameter is an int*. - We can't allocate any new registers, but the kernarg_reg is - dead after this, so we'll use that. */ + We can't allocate any new registers, but the dispatch_ptr and + kernarg_reg are dead after this, so we'll use those. */ + rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg + [DISPATCH_PTR_ARG]); rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg [KERNARG_SEGMENT_PTR_ARG]); rtx retptr_mem = gen_rtx_MEM (DImode, gen_rtx_PLUS (DImode, kernarg_reg, GEN_INT (16))); set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT); - emit_move_insn (kernarg_reg, retptr_mem); + emit_move_insn (dispatch_ptr_reg, retptr_mem); rtx retval_addr = gen_rtx_REG (DImode, FIRST_VPARM_REG + 2); - emit_move_insn (retval_addr, kernarg_reg); + emit_move_insn (retval_addr, dispatch_ptr_reg); rtx retval_mem = gen_rtx_MEM (SImode, retval_addr); set_mem_addr_space (retval_mem, ADDR_SPACE_FLAT); emit_move_insn (retval_mem, gen_rtx_REG (SImode, RETURN_VALUE_REG)); diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index b7fbbaf830b0..c7f63d0a3ac8 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -295,6 +295,8 @@ (define_attr "gcn_version" "gcn3,gcn5,cdna2" (const_string "gcn3")) (define_attr "rdna" "any,no,yes" (const_string "any")) +(define_attr "xnack" "na,off,on" (const_string "na")) + (define_attr "enabled" "" (cond [(and (eq_attr "rdna" "no") (ne (symbol_ref "TARGET_RDNA2") (const_int 0))) @@ -302,14 +304,19 @@ (and (eq_attr "rdna" "yes") (eq (symbol_ref "TARGET_RDNA2") (const_int 0))) (const_int 0) - (eq_attr "gcn_version" "gcn3") (const_int 1) (and (eq_attr "gcn_version" "gcn5") - (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) - (const_int 1) + (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) + (const_int 0) (and (eq_attr "gcn_version" "cdna2") - (ne (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0))) - (const_int 1)] - (const_int 0))) + (eq (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0))) + (const_int 0) + (and (eq_attr "xnack" "off") + (ne (symbol_ref "TARGET_XNACK") (const_int 0))) + (const_int 0) + (and (eq_attr "xnack" "on") + (eq (symbol_ref "TARGET_XNACK") (const_int 0))) + (const_int 0)] + (const_int 1))) ; We need to be able to identify v_readlane and v_writelane with ; SGPR lane selection in order to handle "Manually Inserted Wait States". @@ -508,9 +515,9 @@ (define_insn "*movbi" [(set (match_operand:BI 0 "nonimmediate_operand" - "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM") + "=Sg, v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM") (match_operand:BI 1 "gcn_load_operand" - "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))] + "SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))] "" { /* SCC as an operand is currently not accepted by the LLVM assembler, so @@ -537,25 +544,29 @@ return "s_mov_b32\tvcc_lo, %1\;" "s_mov_b32\tvcc_hi, 0"; case 6: - return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; case 7: - return "s_store_dword\t%1, %A0"; + return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; case 8: - return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; + return "s_store_dword\t%1, %A0"; case 9: - return "flat_store_dword\t%A0, %1%O0%g0"; case 10: - return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; + return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; case 11: + return "flat_store_dword\t%A0, %1%O0%g0"; + case 12: + case 13: + return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; + case 14: return "global_store_dword\t%A0, %1%O0%g0"; default: gcc_unreachable (); } } - [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat, - flat,flat") - (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*") - (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")]) + [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat, + flat,flat,flat,flat") + (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12") + (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")]) ; 32bit move pattern @@ -563,32 +574,38 @@ [(set (match_operand:SISF 0 "nonimmediate_operand") (match_operand:SISF 1 "gcn_load_operand"))] "" - {@ [cons: =0, 1; attrs: type, exec, length, gcn_version] - [SD ,SSA ;sop1 ,* ,4 ,* ] s_mov_b32\t%0, %1 - [SD ,J ;sopk ,* ,4 ,* ] s_movk_i32\t%0, %1 - [SD ,B ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1 - [SD ,RB ;smem ,* ,12,* ] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) - [RB ,Sm ;smem ,* ,12,* ] s_buffer_store%s1\t%1, s[0:3], %0 - [Sm ,RS ;smem ,* ,12,* ] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) - [RS ,Sm ;smem ,* ,12,* ] s_store_dword\t%1, %A0 - [v ,v ;vop1 ,* ,4 ,* ] v_mov_b32\t%0, %1 - [Sg ,v ;vop3a,none,8 ,* ] v_readlane_b32\t%0, %1, 0 - [v ,Sv ;vop3a,none,8 ,* ] v_writelane_b32\t%0, %1, 0 - [v ,^a ;vop3p_mai,*,8,* ] v_accvgpr_read_b32\t%0, %1 - [a ,v ;vop3p_mai,*,8,* ] v_accvgpr_write_b32\t%0, %1 - [a ,a ;vop1 ,* ,4,cdna2] v_accvgpr_mov_b32\t%0, %1 - [v ,RF ;flat ,* ,12,* ] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 - [^a ,RF ;flat ,* ,12,cdna2] ^ - [RF ,v ;flat ,* ,12,* ] flat_store_dword\t%A0, %1%O0%g0 - [RF ,a ;flat ,* ,12,cdna2] ^ - [v ,B ;vop1 ,* ,8 ,* ] v_mov_b32\t%0, %1 - [RLRG,v ;ds ,* ,12,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) - [v ,RLRG;ds ,* ,12,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) - [SD ,Y ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1 - [v ,RM ;flat ,* ,12,* ] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - [^a ,RM ;flat ,* ,12,cdna2] ^ - [RM ,v ;flat ,* ,12,* ] global_store_dword\t%A0, %1%O0%g0 - [RM ,a ;flat ,* ,12,cdna2] ^ + {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack] + [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1 + [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1 + [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1 + [SD ,RB ;smem ,* ,12,* ,off] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) + [&SD ,RB ;smem ,* ,12,* ,on ] ^ + [RB ,Sm ;smem ,* ,12,* ,* ] s_buffer_store%s1\t%1, s[0:3], %0 + [Sm ,RS ;smem ,* ,12,* ,off] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) + [&Sm ,RS ;smem ,* ,12,* ,on ] ^ + [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dword\t%1, %A0 + [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1 + [Sg ,v ;vop3a,none,8 ,* ,* ] v_readlane_b32\t%0, %1, 0 + [v ,Sv ;vop3a,none,8 ,* ,* ] v_writelane_b32\t%0, %1, 0 + [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1 + [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1 + [a ,a ;vop1 ,* ,4,cdna2,* ] v_accvgpr_mov_b32\t%0, %1 + [v ,RF ;flat ,* ,12,* ,off] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 + [&v ,RF ;flat ,* ,12,* ,on ] ^ + [^a ,RF ;flat ,* ,12,cdna2,off] ^ + [&^a ,RF ;flat ,* ,12,cdna2,on ] ^ + [RF ,v ;flat ,* ,12,* ,* ] flat_store_dword\t%A0, %1%O0%g0 + [RF ,a ;flat ,* ,12,cdna2,* ] ^ + [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1 + [RLRG,v ;ds ,* ,12,* ,* ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) + [v ,RLRG;ds ,* ,12,* ,* ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) + [SD ,Y ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1 + [v ,RM ;flat ,* ,12,* ,off] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) + [&v ,RM ;flat ,* ,12,* ,on ] ^ + [^a ,RM ;flat ,* ,12,cdna2,off] ^ + [&^a ,RM ;flat ,* ,12,cdna2,on ] ^ + [RM ,v ;flat ,* ,12,* ,* ] global_store_dword\t%A0, %1%O0%g0 + [RM ,a ;flat ,* ,12,cdna2,* ] ^ }) ; 8/16bit move pattern @@ -598,27 +615,31 @@ [(set (match_operand:QIHI 0 "nonimmediate_operand") (match_operand:QIHI 1 "gcn_load_operand"))] "gcn_valid_move_p (mode, operands[0], operands[1])" - {@ [cons: =0, 1; attrs: type, exec, length, gcn_version] - [SD ,SSA ;sop1 ,* ,4 ,* ] s_mov_b32\t%0, %1 - [SD ,J ;sopk ,* ,4 ,* ] s_movk_i32\t%0, %1 - [SD ,B ;sop1 ,* ,8 ,* ] s_mov_b32\t%0, %1 - [v ,v ;vop1 ,* ,4 ,* ] v_mov_b32\t%0, %1 - [Sg ,v ;vop3a,none,4 ,* ] v_readlane_b32\t%0, %1, 0 - [v ,Sv ;vop3a,none,4 ,* ] v_writelane_b32\t%0, %1, 0 - [v ,^a ;vop3p_mai,*,8,* ] v_accvgpr_read_b32\t%0, %1 - [a ,v ;vop3p_mai,*,8,* ] v_accvgpr_write_b32\t%0, %1 - [a ,a ;vop1 ,* ,8,cdna2] v_accvgpr_mov_b32\t%0, %1 - [v ,RF ;flat ,* ,12,* ] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 - [^a ,RF ;flat ,* ,12,cdna2] ^ - [RF ,v ;flat ,* ,12,* ] flat_store%s0\t%A0, %1%O0%g0 - [RF ,a ;flat ,* ,12,cdna2] ^ - [v ,B ;vop1 ,* ,8 ,* ] v_mov_b32\t%0, %1 - [RLRG,v ;ds ,* ,12,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) - [v ,RLRG;ds ,* ,12,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) - [v ,RM ;flat ,* ,12,* ] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - [^a ,RM ;flat ,* ,12,cdna2] ^ - [RM ,v ;flat ,* ,12,* ] global_store%s0\t%A0, %1%O0%g0 - [RM ,a ;flat ,* ,12,cdna2] ^ + {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack] + [SD ,SSA ;sop1 ,* ,4 ,* ,* ] s_mov_b32\t%0, %1 + [SD ,J ;sopk ,* ,4 ,* ,* ] s_movk_i32\t%0, %1 + [SD ,B ;sop1 ,* ,8 ,* ,* ] s_mov_b32\t%0, %1 + [v ,v ;vop1 ,* ,4 ,* ,* ] v_mov_b32\t%0, %1 + [Sg ,v ;vop3a,none,4 ,* ,* ] v_readlane_b32\t%0, %1, 0 + [v ,Sv ;vop3a,none,4 ,* ,* ] v_writelane_b32\t%0, %1, 0 + [v ,^a ;vop3p_mai,*,8,* ,* ] v_accvgpr_read_b32\t%0, %1 + [a ,v ;vop3p_mai,*,8,* ,* ] v_accvgpr_write_b32\t%0, %1 + [a ,a ;vop1 ,* ,8,cdna2,* ] v_accvgpr_mov_b32\t%0, %1 + [v ,RF ;flat ,* ,12,* ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 + [&v ,RF ;flat ,* ,12,* ,on ] ^ + [^a ,RF ;flat ,* ,12,cdna2,off] ^ + [&^a ,RF ;flat ,* ,12,cdna2,on ] ^ + [RF ,v ;flat ,* ,12,* ,* ] flat_store%s0\t%A0, %1%O0%g0 + [RF ,a ;flat ,* ,12,cdna2,* ] ^ + [v ,B ;vop1 ,* ,8 ,* ,* ] v_mov_b32\t%0, %1 + [RLRG,v ;ds ,* ,12,* ,* ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) + [v ,RLRG;ds ,* ,12,* ,* ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) + [v ,RM ;flat ,* ,12,* ,off] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) + [&v ,RM ;flat ,* ,12,* ,on ] ^ + [^a ,RM ;flat ,* ,12,cdna2,off] ^ + [&^a ,RM ;flat ,* ,12,cdna2,on ] ^ + [RM ,v ;flat ,* ,12,* ,* ] global_store%s0\t%A0, %1%O0%g0 + [RM ,a ;flat ,* ,12,cdna2,* ] ^ }) ; 64bit move pattern @@ -627,29 +648,34 @@ [(set (match_operand:DIDF 0 "nonimmediate_operand") (match_operand:DIDF 1 "general_operand"))] "GET_CODE(operands[1]) != SYMBOL_REF" - {@ [cons: =0, 1; attrs: type, length, gcn_version] - [SD ,SSA ;sop1 ,4 ,* ] s_mov_b64\t%0, %1 - [SD ,C ;sop1 ,8 ,* ] ^ - [SD ,DB ;mult ,* ,* ] # - [RS ,Sm ;smem ,12,* ] s_store_dwordx2\t%1, %A0 - [Sm ,RS ;smem ,12,* ] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) - [v ,v ;vmult,* ,* ] # - [v ,DB ;vmult,* ,* ] # - [Sg ,v ;vmult,* ,* ] # - [v ,Sv ;vmult,* ,* ] # - [v ,^a ;vmult,* ,* ] # - [a ,v ;vmult,* ,* ] # - [a ,a ;vmult,* ,cdna2] # - [v ,RF ;flat ,12,* ] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 - [^a ,RF ;flat ,12,cdna2] ^ - [RF ,v ;flat ,12,* ] flat_store_dwordx2\t%A0, %1%O0%g0 - [RF ,a ;flat ,12,cdna2] ^ - [RLRG,v ;ds ,12,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) - [v ,RLRG;ds ,12,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) - [v ,RM ;flat ,12,* ] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - [^a ,RM ;flat ,12,cdna2] ^ - [RM ,v ;flat ,12,* ] global_store_dwordx2\t%A0, %1%O0%g0 - [RM ,a ;flat ,12,cdna2] ^ + {@ [cons: =0, 1; attrs: type, length, gcn_version, xnack] + [SD ,SSA ;sop1 ,4 ,* ,* ] s_mov_b64\t%0, %1 + [SD ,C ;sop1 ,8 ,* ,* ] ^ + [SD ,DB ;mult ,* ,* ,* ] # + [RS ,Sm ;smem ,12,* ,* ] s_store_dwordx2\t%1, %A0 + [Sm ,RS ;smem ,12,* ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) + [&Sm ,RS ;smem ,12,* ,on ] ^ + [v ,v ;vmult,* ,* ,* ] # + [v ,DB ;vmult,* ,* ,* ] # + [Sg ,v ;vmult,* ,* ,* ] # + [v ,Sv ;vmult,* ,* ,* ] # + [v ,^a ;vmult,* ,* ,* ] # + [a ,v ;vmult,* ,* ,* ] # + [a ,a ;vmult,* ,cdna2,* ] # + [v ,RF ;flat ,12,* ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 + [&v ,RF ;flat ,12,* ,on ] ^ + [^a ,RF ;flat ,12,cdna2,off] ^ + [&^a ,RF ;flat ,12,cdna2,on ] ^ + [RF ,v ;flat ,12,* ,* ] flat_store_dwordx2\t%A0, %1%O0%g0 + [RF ,a ;flat ,12,cdna2,* ] ^ + [RLRG,v ;ds ,12,* ,* ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) + [v ,RLRG;ds ,12,* ,* ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) + [v ,RM ;flat ,12,* ,off] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) + [&v ,RM ;flat ,12,* ,on ] ^ + [^a ,RM ;flat ,12,cdna2,off] ^ + [&^a ,RM ;flat ,12,cdna2,on ] ^ + [RM ,v ;flat ,12,* ,* ] global_store_dwordx2\t%A0, %1%O0%g0 + [RM ,a ;flat ,12,cdna2,* ] ^ } "reload_completed && ((!MEM_P (operands[0]) && !MEM_P (operands[1]) @@ -687,26 +713,31 @@ [(set (match_operand:TI 0 "nonimmediate_operand") (match_operand:TI 1 "general_operand" ))] "" - {@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version] - [SD,SSB;mult ,* ,* ,* ] # - [RS,Sm ;smem ,* ,12,* ] s_store_dwordx4\t%1, %A0 - [Sm,RS ;smem ,yes,12,* ] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) - [RF,v ;flat ,* ,12,* ] flat_store_dwordx4\t%A0, %1%O0%g0 - [RF,a ;flat ,* ,12,cdna2] ^ - [v ,RF ;flat ,* ,12,* ] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 - [^a,RF ;flat ,* ,12,cdna2] ^ - [v ,v ;vmult,* ,* ,* ] # - [v ,Sv ;vmult,* ,* ,* ] # - [SD,v ;vmult,* ,* ,* ] # - [RM,v ;flat ,yes,12,* ] global_store_dwordx4\t%A0, %1%O0%g0 - [RM,a ;flat ,yes,12,cdna2] ^ - [v ,RM ;flat ,* ,12,* ] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - [^a,RM ;flat ,* ,12,cdna2] ^ - [RL,v ;ds ,* ,12,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) - [v ,RL ;ds ,* ,12,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) - [v ,^a ;vmult,* ,* ,* ] # - [a ,v ;vmult,* ,* ,* ] # - [a ,a ;vmult,* ,* ,cdna2] # + {@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version, xnack] + [SD ,SSB;mult ,* ,* ,* ,* ] # + [RS ,Sm ;smem ,* ,12,* ,* ] s_store_dwordx4\t%1, %A0 + [Sm ,RS ;smem ,yes,12,* ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) + [&Sm,RS ;smem ,yes,12,* ,on ] ^ + [RF ,v ;flat ,* ,12,* ,* ] flat_store_dwordx4\t%A0, %1%O0%g0 + [RF ,a ;flat ,* ,12,cdna2,* ] ^ + [v ,RF ;flat ,* ,12,* ,off] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 + [&v ,RF ;flat ,* ,12,* ,on ] ^ + [^a ,RF ;flat ,* ,12,cdna2,off] ^ + [&^a,RF ;flat ,* ,12,cdna2,on ] ^ + [v ,v ;vmult,* ,* ,* ,* ] # + [v ,Sv ;vmult,* ,* ,* ,* ] # + [SD ,v ;vmult,* ,* ,* ,* ] # + [RM ,v ;flat ,yes,12,* ,* ] global_store_dwordx4\t%A0, %1%O0%g0 + [RM ,a ;flat ,yes,12,cdna2,* ] ^ + [v ,RM ;flat ,* ,12,* ,off] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) + [&v ,RM ;flat ,* ,12,* ,on ] ^ + [^a ,RM ;flat ,* ,12,cdna2,off] ^ + [&^a,RM ;flat ,* ,12,cdna2,on ] ^ + [RL ,v ;ds ,* ,12,* ,* ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) + [v ,RL ;ds ,* ,12,* ,* ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) + [v ,^a ;vmult,* ,* ,* ,* ] # + [a ,v ;vmult,* ,* ,* ,* ] # + [a ,a ;vmult,* ,* ,cdna2,* ] # } "reload_completed && REG_P (operands[0]) @@ -889,6 +920,8 @@ (clobber (reg:BI SCC_REG))] "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF" { + /* This s_load may not be XNACK-safe on devices where the GOT may fault. + DGPUs are most likely fine. */ if (SYMBOL_REF_P (operands[1]) && SYMBOL_REF_WEAK (operands[1])) return "s_getpc_b64\t%0\;" @@ -913,6 +946,8 @@ { /* !!! These sequences clobber CC_SAVE_REG. */ + /* This s_load may not be XNACK-safe on devices where the GOT may fault. + DGPUs are most likely fine. */ if (SYMBOL_REF_P (operands[1]) && SYMBOL_REF_WEAK (operands[1])) return "s_mov_b32\ts22, scc\;" diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index e5db6df92d77..c356a0cbb089 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -98,8 +98,8 @@ EnumValue Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY) mxnack= -Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_OFF) -Compile for devices requiring XNACK enabled. Default \"off\". +Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY) +Compile for devices requiring XNACK enabled. Default \"any\". msram-ecc= Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 19feba467a46..db039c472209 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21628,8 +21628,7 @@ run-time performance. The default is 32KB when using OpenACC or OpenMP, and Compile binaries suitable for devices with the XNACK feature enabled, disabled, or either mode. Some devices always require XNACK and some allow the user to configure XNACK. The compiled code must match the device mode. -@c The default is @samp{-mxnack=any}. -At present this option is a placeholder for support that is not yet implemented. +The default is @samp{-mxnack=any}. @end table From 4c12bcbeb0c0fd6da4c56e7622814201daadd585 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Wed, 13 Dec 2023 12:00:52 +0000 Subject: [PATCH 276/311] amdgcn: Work around XNACK register allocation problem The extra register pressure is causing infinite loops in some cases, especially at -O0. I have not yet observed any issue on devices that have AVGPRs for spilling, and XNACK is only really useful on those devices anyway, so change the defaults. gcc/ChangeLog: * config/gcn/gcn-hsa.h (NO_XNACK): Change the defaults. * config/gcn/gcn-opts.h (enum hsaco_attr_type): Add HSACO_ATTR_DEFAULT. * config/gcn/gcn.cc (gcn_option_override): Set the default flag_xnack. * config/gcn/gcn.opt: Add -mxnack=default. * doc/invoke.texi: Document the -mxnack default. --- gcc/config/gcn/gcn-hsa.h | 4 +++- gcc/config/gcn/gcn-opts.h | 3 ++- gcc/config/gcn/gcn.cc | 23 +++++++++++++++++++++++ gcc/config/gcn/gcn.opt | 7 +++++-- gcc/doc/invoke.texi | 3 ++- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index bfb104526c52..b44d42b02d6f 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,7 +75,9 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" -#define NO_XNACK "march=fiji:;march=gfx1030:;" +#define NO_XNACK "march=fiji:;march=gfx1030:;" \ + /* These match the defaults set in gcn.cc. */ \ + "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};" #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;" /* In HSACOv4 no attribute setting means the binary supports "any" hardware diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index b4f494d868cd..634cec6d8324 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -65,7 +65,8 @@ enum hsaco_attr_type { HSACO_ATTR_OFF, HSACO_ATTR_ON, - HSACO_ATTR_ANY + HSACO_ATTR_ANY, + HSACO_ATTR_DEFAULT }; #endif diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index d92cd01d03f0..b67551a2e8ee 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -172,6 +172,29 @@ gcn_option_override (void) /* Allow HSACO_ATTR_ANY silently because that's the default. */ flag_xnack = HSACO_ATTR_OFF; } + + /* There's no need for XNACK on devices without USM, and there are register + allocation problems caused by the early-clobber when AVGPR spills are not + available. + FIXME: can the regalloc mean the default can be really "any"? */ + if (flag_xnack == HSACO_ATTR_DEFAULT) + switch (gcn_arch) + { + case PROCESSOR_FIJI: + case PROCESSOR_VEGA10: + case PROCESSOR_VEGA20: + case PROCESSOR_GFX908: + flag_xnack = HSACO_ATTR_OFF; + break; + case PROCESSOR_GFX90a: + flag_xnack = HSACO_ATTR_ANY; + break; + default: + gcc_unreachable (); + } + + if (flag_sram_ecc == HSACO_ATTR_DEFAULT) + flag_sram_ecc = HSACO_ATTR_ANY; } /* }}} */ diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index c356a0cbb089..32486d9615fe 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -97,9 +97,12 @@ Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON) EnumValue Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY) +EnumValue +Enum(hsaco_attr_type) String(default) Value(HSACO_ATTR_DEFAULT) + mxnack= -Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY) -Compile for devices requiring XNACK enabled. Default \"any\". +Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_DEFAULT) +Compile for devices requiring XNACK enabled. Default \"any\" if USM is supported. msram-ecc= Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index db039c472209..8f885b8c6d69 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21628,7 +21628,8 @@ run-time performance. The default is 32KB when using OpenACC or OpenMP, and Compile binaries suitable for devices with the XNACK feature enabled, disabled, or either mode. Some devices always require XNACK and some allow the user to configure XNACK. The compiled code must match the device mode. -The default is @samp{-mxnack=any}. +The default is @samp{-mxnack=any} on devices that support Unified Shared +Memory, and @samp{-mxnack=no} otherwise. @end table From d2b269ce30d77dbfc6c28c75887c330d4698b132 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 21:33:11 -0500 Subject: [PATCH 277/311] testsuite: fix g++.dg/pr112822.C gcc/testsuite/ChangeLog: * g++.dg/pr112822.C: Require C++17. --- gcc/testsuite/g++.dg/pr112822.C | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/g++.dg/pr112822.C b/gcc/testsuite/g++.dg/pr112822.C index a8557522467d..9949fbb08acf 100644 --- a/gcc/testsuite/g++.dg/pr112822.C +++ b/gcc/testsuite/g++.dg/pr112822.C @@ -1,6 +1,7 @@ /* PR tree-optimization/112822 */ /* { dg-do compile { target c++17 } } */ /* { dg-options "-w -O2" } */ +// { dg-do compile { target c++17 } } /* Verify we do not ICE on the following noisy creduced test case. */ From 52b4b7d7f5c7c09f5aaf3934978de9702d8c214b Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 23:48:57 -0500 Subject: [PATCH 278/311] c++: copy location to AGGR_INIT_EXPR When building an AGGR_INIT_EXPR from a CALL_EXPR, we shouldn't lose location information. gcc/cp/ChangeLog: * tree.cc (build_aggr_init_expr): Copy EXPR_LOCATION. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/constexpr-nsdmi7b.C: Adjust line. * g++.dg/template/copy1.C: Likewise. --- gcc/cp/tree.cc | 1 + gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi7b.C | 4 ++-- gcc/testsuite/g++.dg/template/copy1.C | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc index da4d5c51f074..c4e41fd7b5c9 100644 --- a/gcc/cp/tree.cc +++ b/gcc/cp/tree.cc @@ -689,6 +689,7 @@ build_aggr_init_expr (tree type, tree init) CALL_EXPR_OPERATOR_SYNTAX (rval) = CALL_EXPR_OPERATOR_SYNTAX (init); CALL_EXPR_ORDERED_ARGS (rval) = CALL_EXPR_ORDERED_ARGS (init); CALL_EXPR_REVERSE_ARGS (rval) = CALL_EXPR_REVERSE_ARGS (init); + SET_EXPR_LOCATION (rval, EXPR_LOCATION (init)); } else rval = init; diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi7b.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi7b.C index a410e4826647..586ee54124cf 100644 --- a/gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi7b.C +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-nsdmi7b.C @@ -20,8 +20,8 @@ bar() { A a = foo(); a.p->n = 5; - return a; -} // { dg-error "non-.constexpr." "" { target c++20_down } } + return a; // { dg-error "non-.constexpr." "" { target c++20_down } } +} constexpr int baz() diff --git a/gcc/testsuite/g++.dg/template/copy1.C b/gcc/testsuite/g++.dg/template/copy1.C index eacd9e2c0257..7e0a3805a773 100644 --- a/gcc/testsuite/g++.dg/template/copy1.C +++ b/gcc/testsuite/g++.dg/template/copy1.C @@ -6,10 +6,10 @@ struct A { - // { dg-error "reference" "" { target c++14_down } .+1 } A(A&); // { dg-message "A::A" "" { target c++14_down } } template A(T); // { dg-message "A::A" "" { target c++14_down } } }; +// { dg-error "reference" "" { target c++14_down } .+1 } A a = 0; // { dg-error "no match" "" { target c++14_down } } From 958940eb3511e341e57606f5a2f5399bc89533cb Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 22:53:10 -0500 Subject: [PATCH 279/311] c++: constant direct-initialization [PR108243] When testing the proposed patch for PR71093 I noticed that it changed the diagnostic for consteval-prop6.C. I then noticed that the diagnostic wasn't very helpful either way; it was complaining about modification of the 'x' variable, but it's not a problem to initialize a local variable with a consteval constructor as long as the value is actually constant, we want to know why the value isn't constant. And then it turned out that this also fixed a missed-optimization bug in the testsuite. PR c++/108243 gcc/cp/ChangeLog: * constexpr.cc (cxx_eval_outermost_constant_expr): Turn a constructor CALL_EXPR into a TARGET_EXPR. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/consteval-prop6.C: Adjust diagnostic. * g++.dg/opt/is_constant_evaluated3.C: Remove xfails. --- gcc/cp/constexpr.cc | 16 +++++++++++++++- gcc/testsuite/g++.dg/cpp2a/consteval-prop6.C | 2 +- .../g++.dg/opt/is_constant_evaluated3.C | 8 ++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 58187a4fd120..4cf9dd71b053 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -8651,7 +8651,21 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, } if (!object) { - if (TREE_CODE (t) == TARGET_EXPR) + if (TREE_CODE (t) == CALL_EXPR) + { + /* If T is calling a constructor to initialize an object, reframe + it as an AGGR_INIT_EXPR to avoid trying to modify an object + from outside the constant evaluation, which will fail even if + the value is actually constant (is_constant_evaluated3.C). */ + tree fn = cp_get_callee_fndecl_nofold (t); + if (fn && DECL_CONSTRUCTOR_P (fn)) + { + object = CALL_EXPR_ARG (t, 0); + object = build_fold_indirect_ref (object); + r = build_aggr_init_expr (type, r); + } + } + else if (TREE_CODE (t) == TARGET_EXPR) object = TARGET_EXPR_SLOT (t); else if (TREE_CODE (t) == AGGR_INIT_EXPR) object = AGGR_INIT_EXPR_SLOT (t); diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-prop6.C b/gcc/testsuite/g++.dg/cpp2a/consteval-prop6.C index 93ed398d9bf5..ca7db7c63d37 100644 --- a/gcc/testsuite/g++.dg/cpp2a/consteval-prop6.C +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-prop6.C @@ -48,7 +48,7 @@ struct X { int a = sizeof(undef(0)); int x = undef(0); - X() = default; // { dg-error "modification of .x. is not a constant expression" } + X() = default; // { dg-error {'consteval int undef\(int\)' used before its definition} } }; void diff --git a/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C b/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C index 0a1e46e5638d..783127cf909b 100644 --- a/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C +++ b/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C @@ -17,7 +17,7 @@ int main() { } // { dg-final { scan-tree-dump "a1 = {\\.n=42, \\.m=0}" "original" } } -// { dg-final { scan-tree-dump "a2 = {\\.n=42, \\.m=0}" "original" { xfail *-*-* } } } -// { dg-final { scan-tree-dump "a3 = {\\.n=42, \\.m=0}" "original" { xfail *-*-* } } } -// { dg-final { scan-tree-dump "a4 = {\\.n=42, \\.m=0}" "original" { xfail *-*-* } } } -// { dg-final { scan-tree-dump "a5 = {\\.n=42, \\.m=0}" "original" { xfail *-*-* } } } +// { dg-final { scan-tree-dump "a2 = {\\.n=42, \\.m=0}" "original" } } +// { dg-final { scan-tree-dump "a3 = {\\.n=42, \\.m=0}" "original" } } +// { dg-final { scan-tree-dump "a4 = {\\.n=42, \\.m=0}" "original" } } +// { dg-final { scan-tree-dump "a5 = {\\.n=42, \\.m=0}" "original" } } From e0659b5417b7f8a090ad2ed4dea830f11ef9c877 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 12 Dec 2023 19:20:27 -0500 Subject: [PATCH 280/311] c++: fix in-charge parm in constexpr I was puzzled by the proposed patch for PR71093 specifically ignoring the in-charge parameter; the problem turned out to be that when cxx_eval_call_expression jumps from the clone to the cloned function, it assumes that the latter has the same parameters, and so the in-charge parm doesn't get an argument. Since a class with vbases can't have constexpr 'tors there isn't actually a need for an in-charge parameter in a destructor, but we used to use it for deleting destructors and never removed it. I have a patch to do that for GCC 15, but for now let's work around it. gcc/cp/ChangeLog: * constexpr.cc (cxx_eval_call_expression): Handle missing in-charge argument. --- gcc/cp/constexpr.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 4cf9dd71b053..9d9e96c2afdd 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -3169,6 +3169,19 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, ctx->global->put_value (remapped, arg); remapped = DECL_CHAIN (remapped); } + for (; remapped; remapped = TREE_CHAIN (remapped)) + if (DECL_NAME (remapped) == in_charge_identifier) + { + /* FIXME destructors unnecessarily have in-charge parameters + even in classes without vbases, map it to 0 for now. */ + gcc_assert (!CLASSTYPE_VBASECLASSES (DECL_CONTEXT (fun))); + ctx->global->put_value (remapped, integer_zero_node); + } + else + { + gcc_assert (seen_error ()); + *non_constant_p = true; + } /* Add the RESULT_DECL to the values map, too. */ gcc_assert (!DECL_BY_REFERENCE (res)); ctx->global->put_value (res, NULL_TREE); From 90bc2d09b5bfcc913f79543c3b65202e7246e162 Mon Sep 17 00:00:00 2001 From: Nathaniel Shead Date: Fri, 3 Nov 2023 12:18:29 +1100 Subject: [PATCH 281/311] c++: End lifetime of objects in constexpr after destructor call [PR71093] This patch adds checks for using objects after they've been manually destroyed via explicit destructor call. Currently this is only implemented for 'top-level' objects; FIELD_DECLs and individual elements of arrays will need a lot more work to track correctly and are left for a future patch. The other limitation is that destruction of parameter objects is checked too 'early', happening at the end of the function call rather than the end of the owning full-expression as they should be for consistency; see cpp2a/constexpr-lifetime2.C. This is because I wasn't able to find a good way to link the constructed parameter declarations with the variable declarations that are actually destroyed later on to propagate their lifetime status, so I'm leaving this for a later patch. PR c++/71093 gcc/cp/ChangeLog: * constexpr.cc (constexpr_global_ctx::get_value_ptr): Don't return NULL_TREE for objects we're initializing. (constexpr_global_ctx::destroy_value): Rename from remove_value. Only mark real variables as outside lifetime. (constexpr_global_ctx::clear_value): New function. (destroy_value_checked): New function. (cxx_eval_call_expression): Defer complaining about non-constant arg0 for operator delete. Use remove_value_safe. (cxx_fold_indirect_ref_1): Handle conversion to 'as base' type. (outside_lifetime_error): Include name of object we're accessing. (cxx_eval_store_expression): Handle clobbers. Improve error messages. (cxx_eval_constant_expression): Use remove_value_safe. Clear bind variables before entering body. gcc/testsuite/ChangeLog: * g++.dg/cpp1y/constexpr-lifetime1.C: Improve error message. * g++.dg/cpp1y/constexpr-lifetime2.C: Likewise. * g++.dg/cpp1y/constexpr-lifetime3.C: Likewise. * g++.dg/cpp1y/constexpr-lifetime4.C: Likewise. * g++.dg/cpp2a/bitfield2.C: Likewise. * g++.dg/cpp2a/constexpr-new3.C: Likewise. New check. * g++.dg/cpp1y/constexpr-lifetime7.C: New test. * g++.dg/cpp2a/constexpr-lifetime1.C: New test. * g++.dg/cpp2a/constexpr-lifetime2.C: New test. Signed-off-by: Nathaniel Shead --- gcc/cp/constexpr.cc | 148 +++++++++++++++--- .../g++.dg/cpp1y/constexpr-lifetime1.C | 2 +- .../g++.dg/cpp1y/constexpr-lifetime2.C | 2 +- .../g++.dg/cpp1y/constexpr-lifetime3.C | 2 +- .../g++.dg/cpp1y/constexpr-lifetime4.C | 2 +- .../g++.dg/cpp1y/constexpr-lifetime7.C | 93 +++++++++++ gcc/testsuite/g++.dg/cpp2a/bitfield2.C | 2 +- .../g++.dg/cpp2a/constexpr-lifetime1.C | 21 +++ .../g++.dg/cpp2a/constexpr-lifetime2.C | 23 +++ gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C | 17 +- 10 files changed, 284 insertions(+), 28 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime7.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime1.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime2.C diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 9d9e96c2afdd..e1b2d27fc36b 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -1193,13 +1193,20 @@ public: return *p; return NULL_TREE; } - tree *get_value_ptr (tree t) + tree *get_value_ptr (tree t, bool initializing) { if (modifiable && !modifiable->contains (t)) return nullptr; if (tree *p = values.get (t)) - if (*p != void_node) - return p; + { + if (*p != void_node) + return p; + else if (initializing) + { + *p = NULL_TREE; + return p; + } + } return nullptr; } void put_value (tree t, tree v) @@ -1208,13 +1215,19 @@ public: if (!already_in_map && modifiable) modifiable->add (t); } - void remove_value (tree t) + void destroy_value (tree t) { - if (DECL_P (t)) + if (TREE_CODE (t) == VAR_DECL + || TREE_CODE (t) == PARM_DECL + || TREE_CODE (t) == RESULT_DECL) values.put (t, void_node); else values.remove (t); } + void clear_value (tree t) + { + values.remove (t); + } }; /* Helper class for constexpr_global_ctx. In some cases we want to avoid @@ -1238,7 +1251,7 @@ public: ~modifiable_tracker () { for (tree t: set) - global->remove_value (t); + global->clear_value (t); global->modifiable = nullptr; } }; @@ -1278,6 +1291,40 @@ struct constexpr_ctx { mce_value manifestly_const_eval; }; +/* Remove T from the global values map, checking for attempts to destroy + a value that has already finished its lifetime. */ + +static void +destroy_value_checked (const constexpr_ctx* ctx, tree t, bool *non_constant_p) +{ + if (t == error_mark_node || TREE_TYPE (t) == error_mark_node) + return; + + /* Don't error again here if we've already reported a problem. */ + if (!*non_constant_p + && DECL_P (t) + /* Non-trivial destructors have their lifetimes ended explicitly + with a clobber, so don't worry about it here. */ + && (!TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (t)) + /* ...except parameters are remapped in cxx_eval_call_expression, + and the destructor call during cleanup won't be able to tell that + this value has already been destroyed, so complain now. This is + not quite unobservable, but is extremely unlikely to crop up in + practice; see g++.dg/cpp2a/constexpr-lifetime2.C. */ + || TREE_CODE (t) == PARM_DECL) + && ctx->global->is_outside_lifetime (t)) + { + if (!ctx->quiet) + { + auto_diagnostic_group d; + error ("destroying %qE outside its lifetime", t); + inform (DECL_SOURCE_LOCATION (t), "declared here"); + } + *non_constant_p = true; + } + ctx->global->destroy_value (t); +} + /* This internal flag controls whether we should avoid doing anything during constexpr evaluation that would cause extra DECL_UID generation, such as template instantiation and function body copying. */ @@ -2806,6 +2853,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, && (CALL_FROM_NEW_OR_DELETE_P (t) || is_std_allocator_allocate (ctx->call))) { + const bool new_op_p = IDENTIFIER_NEW_OP_P (DECL_NAME (fun)); const int nargs = call_expr_nargs (t); tree arg0 = NULL_TREE; for (int i = 0; i < nargs; ++i) @@ -2813,12 +2861,15 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, tree arg = CALL_EXPR_ARG (t, i); arg = cxx_eval_constant_expression (ctx, arg, vc_prvalue, non_constant_p, overflow_p); - VERIFY_CONSTANT (arg); + /* Deleting a non-constant pointer has a better error message + below. */ + if (new_op_p || i != 0) + VERIFY_CONSTANT (arg); if (i == 0) arg0 = arg; } gcc_assert (arg0); - if (IDENTIFIER_NEW_OP_P (DECL_NAME (fun))) + if (new_op_p) { tree type = build_array_type_nelts (char_type_node, tree_to_uhwi (arg0)); @@ -2867,7 +2918,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, return t; } DECL_NAME (var) = heap_deleted_identifier; - ctx->global->remove_value (var); + ctx->global->destroy_value (var); ctx->global->heap_dealloc_count++; return void_node; } @@ -2890,7 +2941,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, return t; } DECL_NAME (var) = heap_deleted_identifier; - ctx->global->remove_value (var); + ctx->global->destroy_value (var); ctx->global->heap_dealloc_count++; return void_node; } @@ -3255,9 +3306,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, non_constant_p, overflow_p); /* Remove the parms/result from the values map. */ - ctx->global->remove_value (res); + destroy_value_checked (ctx, res, non_constant_p); for (tree parm = parms; parm; parm = TREE_CHAIN (parm)) - ctx->global->remove_value (parm); + destroy_value_checked (ctx, parm, non_constant_p); /* Free any parameter CONSTRUCTORs we aren't returning directly. */ while (!ctors->is_empty ()) @@ -5657,6 +5708,10 @@ cxx_fold_indirect_ref_1 (const constexpr_ctx *ctx, location_t loc, tree type, } } + /* Handle conversion to "as base" type. */ + if (CLASSTYPE_AS_BASE (optype) == type) + return op; + /* Handle conversion to an empty base class, which is represented with a NOP_EXPR. Do this before spelunking into the non-empty subobjects, which is likely to be a waste of time (109678). */ @@ -5908,7 +5963,7 @@ outside_lifetime_error (location_t loc, tree r) } else { - error_at (loc, "accessing object outside its lifetime"); + error_at (loc, "accessing %qE outside its lifetime", r); inform (DECL_SOURCE_LOCATION (r), "declared here"); } } @@ -6125,8 +6180,10 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, constexpr_ctx new_ctx = *ctx; tree init = TREE_OPERAND (t, 1); - if (TREE_CLOBBER_P (init)) - /* Just ignore clobbers. */ + + if (TREE_CLOBBER_P (init) + && CLOBBER_KIND (init) < CLOBBER_OBJECT_END) + /* Only handle clobbers ending the lifetime of objects. */ return void_node; /* First we figure out where we're storing to. */ @@ -6136,7 +6193,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, tree type = TREE_TYPE (target); bool preeval = SCALAR_TYPE_P (type) || TREE_CODE (t) == MODIFY_EXPR; - if (preeval) + if (preeval && !TREE_CLOBBER_P (init)) { /* Evaluate the value to be stored without knowing what object it will be stored in, so that any side-effects happen first. */ @@ -6244,11 +6301,18 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, && const_object_being_modified == NULL_TREE) const_object_being_modified = object; + if (DECL_P (object) + && TREE_CLOBBER_P (init) + && DECL_NAME (object) == heap_deleted_identifier) + /* Ignore clobbers of deleted allocations for now; we'll get a better error + message later when operator delete is called. */ + return void_node; + /* And then find/build up our initializer for the path to the subobject we're initializing. */ tree *valp; if (DECL_P (object)) - valp = ctx->global->get_value_ptr (object); + valp = ctx->global->get_value_ptr (object, TREE_CODE (t) == INIT_EXPR); else valp = NULL; if (!valp) @@ -6256,10 +6320,45 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, /* A constant-expression cannot modify objects from outside the constant-expression. */ if (!ctx->quiet) - error ("modification of %qE is not a constant expression", object); + { + auto_diagnostic_group d; + if (DECL_P (object) && DECL_NAME (object) == heap_deleted_identifier) + { + error ("modification of allocated storage after deallocation " + "is not a constant expression"); + inform (DECL_SOURCE_LOCATION (object), "allocated here"); + } + else if (DECL_P (object) && ctx->global->is_outside_lifetime (object)) + { + if (TREE_CLOBBER_P (init)) + error ("destroying %qE outside its lifetime", object); + else + error ("modification of %qE outside its lifetime " + "is not a constant expression", object); + inform (DECL_SOURCE_LOCATION (object), "declared here"); + } + else + { + if (TREE_CLOBBER_P (init)) + error ("destroying %qE from outside current evaluation " + "is not a constant expression", object); + else + error ("modification of %qE from outside current evaluation " + "is not a constant expression", object); + } + } *non_constant_p = true; return t; } + + /* Handle explicit end-of-lifetime. */ + if (TREE_CLOBBER_P (init)) + { + if (refs->is_empty ()) + ctx->global->destroy_value (object); + return void_node; + } + type = TREE_TYPE (object); bool no_zero_init = true; @@ -6533,7 +6632,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, /* The hash table might have moved since the get earlier, and the initializer might have mutated the underlying CONSTRUCTORs, so we must recompute VALP. */ - valp = ctx->global->get_value_ptr (object); + valp = ctx->global->get_value_ptr (object, TREE_CODE (t) == INIT_EXPR); for (unsigned i = 0; i < vec_safe_length (indexes); i++) { ctors[i] = valp; @@ -7650,7 +7749,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, /* Forget SAVE_EXPRs and TARGET_EXPRs created by this full-expression. */ for (tree save_expr : save_exprs) - ctx->global->remove_value (save_expr); + destroy_value_checked (ctx, save_expr, non_constant_p); } break; @@ -8203,13 +8302,18 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t, non_constant_p, overflow_p, jump_target); case BIND_EXPR: + /* Pre-emptively clear the vars declared by this BIND_EXPR from the value + map, so that when checking whether they're already destroyed later we + don't get confused by remnants of previous calls. */ + for (tree decl = BIND_EXPR_VARS (t); decl; decl = DECL_CHAIN (decl)) + ctx->global->clear_value (decl); r = cxx_eval_constant_expression (ctx, BIND_EXPR_BODY (t), lval, non_constant_p, overflow_p, jump_target); for (tree decl = BIND_EXPR_VARS (t); decl; decl = DECL_CHAIN (decl)) - ctx->global->remove_value (decl); - return r; + destroy_value_checked (ctx, decl, non_constant_p); + break; case PREINCREMENT_EXPR: case POSTINCREMENT_EXPR: diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime1.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime1.C index 43aa7c974c19..3fda29e0cc2d 100644 --- a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime1.C +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime1.C @@ -10,4 +10,4 @@ constexpr const int& test() { auto local = S{}; // { dg-message "note: declared here" } return local.get(); } -constexpr int x = test(); // { dg-error "accessing object outside its lifetime" } +constexpr int x = test(); // { dg-error "accessing .local. outside its lifetime" } diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime2.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime2.C index 2f5ae8db6d54..d82ba5c8b731 100644 --- a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime2.C +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime2.C @@ -8,7 +8,7 @@ struct S { constexpr int error() { const auto& local = S{}.get(); // { dg-message "note: declared here" } - return local; // { dg-error "accessing object outside its lifetime" } + return local; // { dg-error "accessing '\[^'\]+' outside its lifetime" } } constexpr int x = error(); // { dg-message "in .constexpr. expansion" } diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime3.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime3.C index 53785521d057..67e9b91c7235 100644 --- a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime3.C +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime3.C @@ -7,7 +7,7 @@ constexpr int f(int i) { int j = 123; // { dg-message "note: declared here" } p = &j; } - return *p; // { dg-error "accessing object outside its lifetime" } + return *p; // { dg-error "accessing 'j' outside its lifetime" } } constexpr int i = f(0); // { dg-message "in .constexpr. expansion" } diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime4.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime4.C index 181a12016632..6f0d749dcf2a 100644 --- a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime4.C +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime4.C @@ -5,7 +5,7 @@ constexpr const double& test() { return local; } -static_assert(test() == 3.0, ""); // { dg-error "constant|accessing object outside its lifetime" } +static_assert(test() == 3.0, ""); // { dg-error "constant|accessing '\[^'\]+' outside its lifetime" } // no deference, shouldn't error static_assert((test(), true), ""); diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime7.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime7.C new file mode 100644 index 000000000000..4148f42f7be8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-lifetime7.C @@ -0,0 +1,93 @@ +// PR c++/71093 +// { dg-do compile { target c++14 } } + +constexpr int f (const int *p) +{ + typedef int T; + p->~T (); // { dg-error "destroying" } + return *p; +} + +constexpr int i = 0; +constexpr int j = f (&i); + + +template +constexpr bool test_access() { + T x {}; + x.~T(); + T y = x; // { dg-error "lifetime" } + return true; +} + +template +constexpr bool test_modification() { + T x {}; + x.~T(); + x = T(); // { dg-error "lifetime" } + return true; +} + +template +constexpr bool test_scope() { + { + T x {}; + x.~T(); + } // { dg-error "destroying" } + return true; +} + +template +constexpr bool test_destroy_temp() { + T{}.~T(); // { dg-error "destroying" } + return true; +} + +template +constexpr bool test_parameter(T t) { + // note: error message occurs at point of call + t.~T(); + return true; +} + +template +constexpr void test_bindings_impl(int n) { + if (n == 0) return; + T a {}; + if (n == 1) return; + T b {}; +} + +template +constexpr bool test_bindings() { + test_bindings_impl(1); + test_bindings_impl(0); + test_bindings_impl(2); + return true; +} + +constexpr bool i1 = test_access(); // { dg-message "in .constexpr." } +constexpr bool i2 = test_modification(); // { dg-message "in .constexpr." } +constexpr bool i3 = test_scope(); // { dg-message "in .constexpr." } +constexpr bool i4 = test_destroy_temp(); // { dg-message "in .constexpr." "" { xfail *-*-* } } +constexpr bool i5 = test_parameter(int{}); // { dg-error "destroying" } +constexpr bool i6 = test_bindings(); + +struct Trivial { int x; }; +constexpr bool t1 = test_access(); // { dg-message "in .constexpr." } +constexpr bool t2 = test_modification(); // { dg-message "in .constexpr." } +constexpr bool t3 = test_scope(); // { dg-message "in .constexpr." } +constexpr bool t4 = test_destroy_temp(); // { dg-message "in .constexpr." } +constexpr bool t5 = test_parameter(Trivial{}); // { dg-error "destroying" } +constexpr bool t6 = test_bindings(); + +#if __cplusplus >= 202002L +struct NonTrivial { int x; constexpr ~NonTrivial() {} }; // { dg-error "destroying" "" { target c++20 } } +constexpr bool n1 = test_access(); // { dg-message "in .constexpr." "" { target c++20 } } +constexpr bool n2 = test_modification(); // { dg-message "in .constexpr." "" { target c++20 } } +constexpr bool n3 = test_scope(); // { dg-message "in .constexpr." "" { target c++20 } } +constexpr bool n4 = test_destroy_temp(); // { dg-message "in .constexpr." "" { target c++20 } } +constexpr bool n5 = test_parameter(NonTrivial{}); // { dg-error "destroying" "" { target c++20 } } +constexpr bool n6 = test_bindings(); +#endif + diff --git a/gcc/testsuite/g++.dg/cpp2a/bitfield2.C b/gcc/testsuite/g++.dg/cpp2a/bitfield2.C index dcb424fc8f66..885d4f0e26d5 100644 --- a/gcc/testsuite/g++.dg/cpp2a/bitfield2.C +++ b/gcc/testsuite/g++.dg/cpp2a/bitfield2.C @@ -13,7 +13,7 @@ template struct U { int j : W = 7; // { dg-warning "default member initializers for bit-fields only available with" "" { target c++17_down } } int k : W { 8 }; // { dg-warning "default member initializers for bit-fields only available with" "" { target c++17_down } } - int l : V ? 7 : a = 3; // { dg-error "modification of .a. is not a constant expression" } + int l : V ? 7 : a = 3; // { dg-error "modification of .a. from outside current evaluation is not a constant expression" } // { dg-error "width not an integer constant" "" { target *-*-* } .-1 } int m : (V ? W : b) = 9; // { dg-warning "default member initializers for bit-fields only available with" "" { target c++17_down } } // { dg-error "zero width for bit-field" "" { target *-*-* } .-1 } diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime1.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime1.C new file mode 100644 index 000000000000..36163844eca3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime1.C @@ -0,0 +1,21 @@ +// { dg-do compile { target c++20 } } + +#include "construct_at.h" + +struct S { int x; }; +constexpr int f() { + S s; + s.~S(); + std::construct_at(&s, 5); + return s.x; +} +static_assert(f() == 5); + +struct T { int x; constexpr ~T() {} }; +constexpr int g() { + T t; + t.~T(); + std::construct_at(&t, 12); + return t.x; +} +static_assert(g() == 12); diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime2.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime2.C new file mode 100644 index 000000000000..56cc9e3c1c80 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-lifetime2.C @@ -0,0 +1,23 @@ +// { dg-do compile { target c++20 } } + +#include "construct_at.h" + +struct S { int x; }; + +constexpr bool foo(S s, S*& p) { + p = &s; + s.~S(); + return true; +} + +constexpr bool bar() { + // This is, strictly speaking, implementation-defined behaviour; + // see [expr.call] p6. However, in all other cases we destroy + // at the end of the full-expression, so the below should be fixed. + S* p; + foo(S{}, p), std::construct_at(p); // { dg-bogus "destroying" "" { xfail *-*-* } } + + return true; +} + +constexpr bool x = bar(); diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C index 3ba440fec534..5d9f192507b8 100644 --- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new3.C @@ -34,7 +34,7 @@ constexpr auto v3 = f3 (); // { dg-message "in 'constexpr' expansion of" } constexpr bool f4 (int *p) { - delete p; // { dg-error "deallocation of storage that was not previously allocated" } + delete p; // { dg-error "destroying 'q' from outside current evaluation" } return false; } @@ -70,3 +70,18 @@ f7 () } constexpr auto v7 = f7 (); + +constexpr bool +f8_impl (int *p) +{ + delete p; // { dg-error "deallocation of storage that was not previously allocated" } + return false; +} + +constexpr bool +f8 () +{ + int q = 0; + return f8_impl (&q); +} +constexpr auto v8 = f8 (); // { dg-message "in 'constexpr' expansion of" } From 33a3f85ee4b5bc562a84c6896294278e0a5ab160 Mon Sep 17 00:00:00 2001 From: Gaius Mulley Date: Wed, 13 Dec 2023 17:35:02 +0000 Subject: [PATCH 282/311] PR modula2/112921 missing modules shortreal shortstr shortconv convstringshort For completeness here are three SHORTREAL modules which match their LONGREAL and REAL counterparts. The datatype SHORTREAL is a GNU extension and these modules were missing. gcc/m2/ChangeLog: PR modula2/112921 * gm2-libs-iso/ConvStringShort.def: New file. * gm2-libs-iso/ConvStringShort.mod: New file. * gm2-libs-iso/ShortConv.def: New file. * gm2-libs-iso/ShortConv.mod: New file. * gm2-libs-iso/ShortMath.def: New file. * gm2-libs-iso/ShortMath.mod: New file. * gm2-libs-iso/ShortStr.def: New file. * gm2-libs-iso/ShortStr.mod: New file. libgm2/ChangeLog: PR modula2/112921 * libm2iso/Makefile.am (M2DEFS): Add ConvStringShort.def, ShortConv.def, ShortMath.def and ShortStr.def. (M2MODS): Add ConvStringShort.mod, ShortConv.mod, ShortMath.mod and ShortStr.mod. * libm2iso/Makefile.in: Regenerate. gcc/testsuite/ChangeLog: PR modula2/112921 * gm2/iso/run/pass/shorttest.mod: New test. Signed-off-by: Gaius Mulley --- gcc/m2/gm2-libs-iso/ConvStringShort.def | 60 ++++ gcc/m2/gm2-libs-iso/ConvStringShort.mod | 69 ++++ gcc/m2/gm2-libs-iso/ShortConv.def | 73 ++++ gcc/m2/gm2-libs-iso/ShortConv.mod | 350 +++++++++++++++++++ gcc/m2/gm2-libs-iso/ShortMath.def | 76 ++++ gcc/m2/gm2-libs-iso/ShortMath.mod | 110 ++++++ gcc/m2/gm2-libs-iso/ShortStr.def | 87 +++++ gcc/m2/gm2-libs-iso/ShortStr.mod | 150 ++++++++ gcc/testsuite/gm2/iso/run/pass/shorttest.mod | 13 + libgm2/libm2iso/Makefile.am | 12 +- libgm2/libm2iso/Makefile.in | 35 +- 11 files changed, 1020 insertions(+), 15 deletions(-) create mode 100644 gcc/m2/gm2-libs-iso/ConvStringShort.def create mode 100644 gcc/m2/gm2-libs-iso/ConvStringShort.mod create mode 100644 gcc/m2/gm2-libs-iso/ShortConv.def create mode 100644 gcc/m2/gm2-libs-iso/ShortConv.mod create mode 100644 gcc/m2/gm2-libs-iso/ShortMath.def create mode 100644 gcc/m2/gm2-libs-iso/ShortMath.mod create mode 100644 gcc/m2/gm2-libs-iso/ShortStr.def create mode 100644 gcc/m2/gm2-libs-iso/ShortStr.mod create mode 100644 gcc/testsuite/gm2/iso/run/pass/shorttest.mod diff --git a/gcc/m2/gm2-libs-iso/ConvStringShort.def b/gcc/m2/gm2-libs-iso/ConvStringShort.def new file mode 100644 index 000000000000..a6b485c1d996 --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ConvStringShort.def @@ -0,0 +1,60 @@ +(* ConvStringShort.def converts floating point numbers to Strings. + +Copyright (C) 2009-2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +DEFINITION MODULE ConvStringShort ; + +FROM DynamicStrings IMPORT String ; + + +(* + RealToFloatString - converts a real with, sigFigs, into a string + and returns the result as a string. +*) + +PROCEDURE RealToFloatString (real: SHORTREAL; sigFigs: CARDINAL) : String ; + + +(* + RealToEngString - converts the value of real to floating-point + string form, with sigFigs significant figures. + The number is scaled with one to three digits + in the whole number part and with an exponent + that is a multiple of three. +*) + +PROCEDURE RealToEngString (real: SHORTREAL; sigFigs: CARDINAL) : String ; + + +(* + RealToFixedString - returns the number of characters in the fixed-point + string representation of real rounded to the given + place relative to the decimal point. +*) + +PROCEDURE RealToFixedString (real: SHORTREAL; place: INTEGER) : String ; + + +END ConvStringShort. diff --git a/gcc/m2/gm2-libs-iso/ConvStringShort.mod b/gcc/m2/gm2-libs-iso/ConvStringShort.mod new file mode 100644 index 000000000000..064027c017c0 --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ConvStringShort.mod @@ -0,0 +1,69 @@ +(* ConvStringShort.mod converts floating point numbers to Strings. + +Copyright (C) 2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +IMPLEMENTATION MODULE ConvStringShort ; + +IMPORT ConvStringReal ; + + +(* + RealToFloatString - converts a real with, sigFigs, into a string + and returns the result as a string. +*) + +PROCEDURE RealToFloatString (real: SHORTREAL; sigFigs: CARDINAL) : String ; +BEGIN + RETURN ConvStringReal.RealToFloatString (real, sigFigs) +END RealToFloatString ; + + +(* + RealToEngString - converts the value of real to floating-point + string form, with sigFigs significant figures. + The number is scaled with one to three digits + in the whole number part and with an exponent + that is a multiple of three. +*) + +PROCEDURE RealToEngString (real: SHORTREAL; sigFigs: CARDINAL) : String ; +BEGIN + RETURN ConvStringReal.RealToEngString (real, sigFigs) +END RealToEngString ; + + +(* + RealToFixedString - returns the number of characters in the fixed-point + string representation of real rounded to the given + place relative to the decimal point. +*) + +PROCEDURE RealToFixedString (real: SHORTREAL; place: INTEGER) : String ; +BEGIN + RETURN ConvStringReal.RealToFixedString (real, place) +END RealToFixedString ; + + +END ConvStringShort. diff --git a/gcc/m2/gm2-libs-iso/ShortConv.def b/gcc/m2/gm2-libs-iso/ShortConv.def new file mode 100644 index 000000000000..2373c7b0b83e --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ShortConv.def @@ -0,0 +1,73 @@ +(* ShortStr.mod provides low level SHORTREAL/string conversions. + +Copyright (C) 2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +DEFINITION MODULE ShortConv; + +IMPORT + ConvTypes; + +TYPE + ConvResults = ConvTypes.ConvResults; (* strAllRight, strOutOfRange, + strWrongFormat, strEmpty *) + +PROCEDURE ScanReal (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState); + (* Represents the start state of a finite state scanner for real + numbers - assigns class of inputCh to chClass and a procedure + representing the next state to nextState. + *) + +PROCEDURE FormatReal (str: ARRAY OF CHAR): ConvResults; + (* Returns the format of the string value for conversion to LONGREAL. *) + +PROCEDURE ValueReal (str: ARRAY OF CHAR): SHORTREAL; + (* Returns the value corresponding to the real number string value + str if str is well-formed; otherwise raises the ShortConv exception. + *) + +PROCEDURE LengthFloatReal (real: SHORTREAL; sigFigs: CARDINAL): CARDINAL; + (* Returns the number of characters in the floating-point string + representation of real with sigFigs significant figures. + *) + +PROCEDURE LengthEngReal (real: SHORTREAL; sigFigs: CARDINAL): CARDINAL; + (* Returns the number of characters in the floating-point engineering + string representation of real with sigFigs significant figures. + *) + +PROCEDURE LengthFixedReal (real: SHORTREAL; place: INTEGER): CARDINAL; + (* Returns the number of characters in the fixed-point string + representation of real rounded to the given place relative to the + decimal point. + *) + +PROCEDURE IsRConvException (): BOOLEAN; + (* Returns TRUE if the current coroutine is in the exceptional + execution state because of the raising of an exception in a + routine from this module; otherwise returns FALSE. + *) + +END ShortConv. diff --git a/gcc/m2/gm2-libs-iso/ShortConv.mod b/gcc/m2/gm2-libs-iso/ShortConv.mod new file mode 100644 index 000000000000..66f31b51960a --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ShortConv.mod @@ -0,0 +1,350 @@ +(* ShortConv.mod implement the ISO ShortConv specification. + +Copyright (C) 2009-2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +IMPLEMENTATION MODULE ShortConv ; + +FROM SYSTEM IMPORT ADDRESS ; +FROM ConvTypes IMPORT ScanClass ; +FROM CharClass IMPORT IsNumeric, IsWhiteSpace ; +FROM DynamicStrings IMPORT String, InitString, InitStringCharStar, KillString, Length, Slice, Mark, Index, string ; +FROM dtoa IMPORT strtod ; +FROM ConvStringShort IMPORT RealToFloatString, RealToEngString, RealToFixedString ; +FROM M2RTS IMPORT Halt ; +FROM libc IMPORT free ; +IMPORT EXCEPTIONS ; + + +TYPE + RealConvException = (noException, invalid, outofrange) ; + +VAR + realConv: EXCEPTIONS.ExceptionSource ; + + +(* Low-level LONGREAL/string conversions. *) + +(* Represents the start state of a finite state scanner for real + numbers - assigns class of inputCh to chClass and a procedure + representing the next state to nextState. +*) + +PROCEDURE ScanReal (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanSecondDigit ; + chClass := valid + ELSIF (inputCh='+') OR (inputCh='-') + THEN + nextState := scanFirstDigit ; + chClass := valid + ELSIF IsWhiteSpace(inputCh) + THEN + nextState := ScanReal ; + chClass := padding + ELSE + nextState := ScanReal ; + chClass := invalid + END +END ScanReal ; + + +(* + scanFirstDigit - +*) + +PROCEDURE scanFirstDigit (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanSecondDigit ; + chClass := valid + ELSE + nextState := scanFirstDigit ; + chClass := invalid + END +END scanFirstDigit ; + + +(* + scanSecondDigit - +*) + +PROCEDURE scanSecondDigit (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanSecondDigit ; + chClass := valid + ELSIF inputCh='.' + THEN + nextState := scanFixed ; + chClass := valid + ELSIF inputCh='E' + THEN + nextState := scanScientific ; + chClass := valid + ELSE + nextState := noOpFinished ; + chClass := terminator + END +END scanSecondDigit ; + + +(* + scanFixed - +*) + +PROCEDURE scanFixed (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanFixed ; + chClass := valid + ELSIF inputCh='E' + THEN + nextState := scanScientific ; + chClass := valid + ELSE + nextState := noOpFinished ; + chClass := terminator + END +END scanFixed ; + + +(* + scanScientific - +*) + +PROCEDURE scanScientific (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanScientificSecond ; + chClass := valid + ELSIF (inputCh='-') OR (inputCh='+') + THEN + nextState := scanScientificSign ; + chClass := valid + ELSE + nextState := scanScientific ; + chClass := invalid + END +END scanScientific ; + + +(* + scanScientificSign - +*) + +PROCEDURE scanScientificSign (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanScientificSecond ; + chClass := valid + ELSE + nextState := scanScientificSign ; + chClass := invalid + END +END scanScientificSign ; + + +(* + scanScientificSecond - +*) + +PROCEDURE scanScientificSecond (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + IF IsNumeric(inputCh) + THEN + nextState := scanScientificSecond ; + chClass := valid + ELSE + nextState := noOpFinished ; + chClass := terminator + END +END scanScientificSecond ; + + +(* + noOpFinished - +*) + +PROCEDURE noOpFinished (inputCh: CHAR; VAR chClass: ConvTypes.ScanClass; + VAR nextState: ConvTypes.ScanState) ; +BEGIN + nextState := noOpFinished ; + chClass := terminator ; + (* should we raise an exception here? *) +END noOpFinished ; + + +(* Returns the format of the string value for conversion to LONGREAL. *) + +PROCEDURE FormatReal (str: ARRAY OF CHAR) : ConvResults ; +VAR + proc : ConvTypes.ScanState ; + chClass: ConvTypes.ScanClass ; + i, h : CARDINAL ; +BEGIN + i := 1 ; + h := LENGTH(str) ; + ScanReal(str[0], chClass, proc) ; + WHILE (i. + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +DEFINITION MODULE ShortMath; + + (* Mathematical functions for the type LONGREAL *) + +CONST + pi = 3.1415926535897932384626433832795028841972; + exp1 = 2.7182818284590452353602874713526624977572; + +PROCEDURE __BUILTIN__ sqrt (x: SHORTREAL): SHORTREAL; + (* Returns the positive square root of x *) + +PROCEDURE __BUILTIN__ exp (x: SHORTREAL): SHORTREAL; + (* Returns the exponential of x *) + +PROCEDURE __BUILTIN__ ln (x: SHORTREAL): SHORTREAL; + (* Returns the natural logarithm of x *) + + (* The angle in all trigonometric functions is measured in radians *) + +PROCEDURE __BUILTIN__ sin (x: SHORTREAL): SHORTREAL; + (* Returns the sine of x *) + +PROCEDURE __BUILTIN__ cos (x: SHORTREAL): SHORTREAL; + (* Returns the cosine of x *) + +PROCEDURE tan (x: SHORTREAL): SHORTREAL; + (* Returns the tangent of x *) + +PROCEDURE arcsin (x: SHORTREAL): SHORTREAL; + (* Returns the arcsine of x *) + +PROCEDURE arccos (x: SHORTREAL): SHORTREAL; + (* Returns the arccosine of x *) + +PROCEDURE arctan (x: SHORTREAL): SHORTREAL; + (* Returns the arctangent of x *) + +PROCEDURE power (base, exponent: SHORTREAL): SHORTREAL; + (* Returns the value of the number base raised to the power exponent *) + +PROCEDURE round (x: SHORTREAL): INTEGER; + (* Returns the value of x rounded to the nearest integer *) + +PROCEDURE IsRMathException (): BOOLEAN; + (* Returns TRUE if the current coroutine is in the exceptional + execution state because of the raising of an exception in a + routine from this module; otherwise returns FALSE. + *) + +END ShortMath. diff --git a/gcc/m2/gm2-libs-iso/ShortMath.mod b/gcc/m2/gm2-libs-iso/ShortMath.mod new file mode 100644 index 000000000000..3776b3b99d7d --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ShortMath.mod @@ -0,0 +1,110 @@ +(* LongMath.mod implement the ISO LongMath specification. + +Copyright (C) 2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +IMPLEMENTATION MODULE ShortMath ; + +IMPORT libm ; +IMPORT cbuiltin ; + +PROCEDURE __ATTRIBUTE__ __BUILTIN__ ((__builtin_sqrtf)) sqrt (x: SHORTREAL): SHORTREAL; + (* Returns the positive square root of x *) +BEGIN + RETURN cbuiltin.sqrtf (x) +END sqrt ; + +PROCEDURE __ATTRIBUTE__ __BUILTIN__ ((__builtin_expf)) exp (x: SHORTREAL): SHORTREAL; + (* Returns the exponential of x *) +BEGIN + RETURN cbuiltin.expf (x) +END exp ; + +PROCEDURE __ATTRIBUTE__ __BUILTIN__ ((__builtin_logf)) ln (x: SHORTREAL): SHORTREAL; + (* Returns the natural logarithm of x *) +BEGIN + RETURN cbuiltin.logf (x) +END ln ; + + (* The angle in all trigonometric functions is measured in radians *) + +PROCEDURE __ATTRIBUTE__ __BUILTIN__ ((__builtin_sinf)) sin (x: SHORTREAL): SHORTREAL; + (* Returns the sine of x *) +BEGIN + RETURN cbuiltin.sinf (x) +END sin ; + +PROCEDURE __ATTRIBUTE__ __BUILTIN__ ((__builtin_cosf)) cos (x: SHORTREAL): SHORTREAL; + (* Returns the cosine of x *) +BEGIN + RETURN cbuiltin.cosf (x) +END cos ; + +PROCEDURE tan (x: SHORTREAL): SHORTREAL; + (* Returns the tangent of x *) +BEGIN + RETURN libm.tanf (x) +END tan ; + +PROCEDURE arcsin (x: SHORTREAL): SHORTREAL; + (* Returns the arcsine of x *) +BEGIN + RETURN libm.asinf (x) +END arcsin ; + +PROCEDURE arccos (x: SHORTREAL): SHORTREAL; + (* Returns the arccosine of x *) +BEGIN + RETURN libm.acosf (x) +END arccos ; + +PROCEDURE arctan (x: SHORTREAL): SHORTREAL; + (* Returns the arctangent of x *) +BEGIN + RETURN libm.atanf (x) +END arctan ; + +PROCEDURE power (base, exponent: SHORTREAL): SHORTREAL; + (* Returns the value of the number base raised to the power exponent *) +BEGIN + RETURN libm.powf (base, exponent) +END power ; + +PROCEDURE round (x: SHORTREAL) : INTEGER; + (* Returns the value of x rounded to the nearest integer *) +BEGIN + RETURN TRUNC (x) +END round ; + +PROCEDURE IsRMathException (): BOOLEAN; + (* Returns TRUE if the current coroutine is in the + exceptional execution state because of the raising + of an exception in a routine from this module; otherwise + returns FALSE. + *) +BEGIN + RETURN FALSE +END IsRMathException ; + +END ShortMath. diff --git a/gcc/m2/gm2-libs-iso/ShortStr.def b/gcc/m2/gm2-libs-iso/ShortStr.def new file mode 100644 index 000000000000..42fa9dd2e849 --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ShortStr.def @@ -0,0 +1,87 @@ +(* ShortStr.def provides conversion between shortreal and strings. + +Copyright (C) 2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +DEFINITION MODULE ShortStr; + + (* SHORTREAL/string conversions *) + +IMPORT + ConvTypes; + +TYPE + (* strAllRight, strOutOfRange, strWrongFormat, strEmpty *) + ConvResults = ConvTypes.ConvResults; + +(* the string form of a signed fixed-point real number is + ["+" | "-"], decimal digit, {decimal digit}, [".", + {decimal digit}] +*) + +(* the string form of a signed floating-point real number is + signed fixed-point real number, "E", ["+" | "-"], + decimal digit, {decimal digit} +*) + +PROCEDURE StrToReal (str: ARRAY OF CHAR; VAR real: SHORTREAL; + VAR res: ConvResults); + (* Ignores any leading spaces in str. If the subsequent characters + in str are in the format of a signed real number, assigns a + corresponding value to real. Assigns a value indicating the + format of str to res. + *) + +PROCEDURE RealToFloat (real: SHORTREAL; sigFigs: CARDINAL; + VAR str: ARRAY OF CHAR); + (* Converts the value of real to floating-point string form, with + sigFigs significant figures, and copies the possibly truncated + result to str. + *) + +PROCEDURE RealToEng (real: SHORTREAL; sigFigs: CARDINAL; + VAR str: ARRAY OF CHAR); + (* Converts the value of real to floating-point string form, with + sigFigs significant figures, and copies the possibly truncated + result to str. The number is scaled with one to three digits + in the whole number part and with an exponent that is a + multiple of three. + *) + +PROCEDURE RealToFixed (real: SHORTREAL; place: INTEGER; + VAR str: ARRAY OF CHAR); + (* Converts the value of real to fixed-point string form, rounded + to the given place relative to the decimal point, and copies + the possibly truncated result to str. + *) + +PROCEDURE RealToStr (real: SHORTREAL; VAR str: ARRAY OF CHAR); + (* Converts the value of real as RealToFixed if the sign and + magnitude can be shown within the capacity of str, or + otherwise as RealToFloat, and copies the possibly truncated + result to str. The number of places or significant digits + depend on the capacity of str. + *) + +END ShortStr. diff --git a/gcc/m2/gm2-libs-iso/ShortStr.mod b/gcc/m2/gm2-libs-iso/ShortStr.mod new file mode 100644 index 000000000000..946d8ae681ef --- /dev/null +++ b/gcc/m2/gm2-libs-iso/ShortStr.mod @@ -0,0 +1,150 @@ +(* ShortStr.mod implement the ISO ShortStr specification. + +Copyright (C) 2009-2023 Free Software Foundation, Inc. +Contributed by Gaius Mulley . + +This file is part of GNU Modula-2. + +GNU Modula-2 is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GNU Modula-2 is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. *) + +IMPLEMENTATION MODULE ShortStr; + +(* REAL/string conversions *) + +IMPORT ShortConv ; + +FROM DynamicStrings IMPORT String, InitString, KillString, Length, CopyOut ; + +FROM ConvStringShort IMPORT RealToFixedString, RealToFloatString, + RealToEngString ; + + +(* the string form of a signed fixed-point real number is + ["+" | "-"], decimal digit, {decimal digit}, [".", + {decimal digit}] +*) + +(* the string form of a signed floating-point real number is + signed fixed-point real number, "E", ["+" | "-"], + decimal digit, {decimal digit} +*) + +PROCEDURE StrToReal (str: ARRAY OF CHAR; VAR real: SHORTREAL; + VAR res: ConvResults) ; + (* Ignores any leading spaces in str. If the subsequent characters + in str are in the format of a signed real number, assigns a + corresponding value to real. Assigns a value indicating the + format of str to res. + *) +BEGIN + res := ShortConv.FormatReal(str) ; + IF res=strAllRight + THEN + real := ShortConv.ValueReal(str) + END +END StrToReal ; + + +PROCEDURE RealToFloat (real: SHORTREAL; sigFigs: CARDINAL; + VAR str: ARRAY OF CHAR) ; + (* Converts the value of real to floating-point string form, with + sigFigs significant figures, and copies the possibly truncated + result to str. + *) +VAR + s: String ; +BEGIN + s := RealToFloatString(real, sigFigs) ; + CopyOut(str, s) ; + s := KillString(s) +END RealToFloat ; + + +PROCEDURE RealToEng (real: SHORTREAL; sigFigs: CARDINAL; + VAR str: ARRAY OF CHAR) ; + (* Converts the value of real to floating-point string form, with + sigFigs significant figures, and copies the possibly truncated + result to str. The number is scaled with one to three digits + in the whole number part and with an exponent that is a multiple + of three. + *) +VAR + s: String ; +BEGIN + s := RealToEngString(real, sigFigs) ; + CopyOut(str, s) ; + s := KillString(s) +END RealToEng ; + + +PROCEDURE RealToFixed (real: SHORTREAL; place: INTEGER; + VAR str: ARRAY OF CHAR) ; + (* Converts the value of real to fixed-point string form, rounded + to the given place relative to the decimal point, and copies + the possibly truncated result to str. + *) +VAR + s: String ; +BEGIN + s := RealToFixedString(real, place) ; + CopyOut(str, s) ; + s := KillString(s) +END RealToFixed ; + + +PROCEDURE RealToStr (real: SHORTREAL; VAR str: ARRAY OF CHAR) ; + (* Converts the value of real as RealToFixed if the sign and + magnitude can be shown within the capacity of str, or + otherwise as RealToFloat, and copies the possibly truncated + result to str. The number of places or significant digits + are implementation-defined. + *) +VAR + s : String ; + sigFigs: CARDINAL ; +BEGIN + sigFigs := HIGH(str) ; + WHILE sigFigs>1 DO + s := RealToFixedString(real, sigFigs) ; + IF Length(s)<=HIGH(str) + THEN + CopyOut(str, s) ; + s := KillString(s) ; + RETURN + END ; + s := KillString(s) ; + DEC(sigFigs) + END ; + sigFigs := HIGH(str) ; + WHILE sigFigs#0 DO + s := RealToFloatString(real, sigFigs) ; + IF Length(s)<=HIGH(str) + THEN + CopyOut(str, s) ; + s := KillString(s) ; + RETURN + END ; + s := KillString(s) ; + DEC(sigFigs) + END +END RealToStr ; + + +END ShortStr. diff --git a/gcc/testsuite/gm2/iso/run/pass/shorttest.mod b/gcc/testsuite/gm2/iso/run/pass/shorttest.mod new file mode 100644 index 000000000000..f6c343c21c82 --- /dev/null +++ b/gcc/testsuite/gm2/iso/run/pass/shorttest.mod @@ -0,0 +1,13 @@ +MODULE shorttest ; + +FROM ShortStr IMPORT RealToStr ; +FROM STextIO IMPORT WriteString, WriteLn ; +FROM ShortMath IMPORT pi ; + +VAR + buf: ARRAY [0..30] OF CHAR ; +BEGIN + WriteString ("pi = ") ; + RealToStr (pi, buf) ; + WriteString (buf) ; WriteLn +END shorttest. diff --git a/libgm2/libm2iso/Makefile.am b/libgm2/libm2iso/Makefile.am index 8e774c5ea195..01b5dc7001a3 100644 --- a/libgm2/libm2iso/Makefile.am +++ b/libgm2/libm2iso/Makefile.am @@ -101,6 +101,7 @@ if BUILD_ISOLIB M2DEFS = ChanConsts.def CharClass.def \ ClientSocket.def ComplexMath.def \ ConvStringLong.def ConvStringReal.def \ + ConvStringShort.def \ ConvTypes.def COROUTINES.def \ ErrnoCategory.def EXCEPTIONS.def \ GeneralUserExceptions.def IOChan.def \ @@ -124,7 +125,10 @@ M2DEFS = ChanConsts.def CharClass.def \ RTgenif.def RTio.def \ Semaphores.def SeqFile.def \ ShortComplexMath.def \ - ShortIO.def ShortWholeIO.def \ + ShortConv.def \ + ShortIO.def \ + ShortMath.def ShortStr.def \ + ShortWholeIO.def \ SimpleCipher.def SIOResult.def \ SLongIO.def SLongWholeIO.def \ SRawIO.def SRealIO.def \ @@ -143,6 +147,7 @@ M2DEFS = ChanConsts.def CharClass.def \ M2MODS = ChanConsts.mod CharClass.mod \ ClientSocket.mod ComplexMath.mod \ ConvStringLong.mod ConvStringReal.mod \ + ConvStringShort.mod \ ConvTypes.mod COROUTINES.mod \ EXCEPTIONS.mod GeneralUserExceptions.mod \ IOChan.mod IOConsts.mod \ @@ -164,7 +169,10 @@ M2MODS = ChanConsts.mod CharClass.mod \ RTgen.mod RTio.mod \ Semaphores.mod SeqFile.mod \ ShortComplexMath.mod \ - ShortIO.mod ShortWholeIO.mod \ + ShortConv.mod \ + ShortIO.mod \ + ShortMath.mod ShortStr.mod \ + ShortWholeIO.mod \ SimpleCipher.mod SIOResult.mod \ SLongIO.mod SLongWholeIO.mod \ SRawIO.mod SRealIO.mod \ diff --git a/libgm2/libm2iso/Makefile.in b/libgm2/libm2iso/Makefile.in index a82aa0c408bc..1d04835fbf09 100644 --- a/libgm2/libm2iso/Makefile.in +++ b/libgm2/libm2iso/Makefile.in @@ -160,7 +160,8 @@ libm2iso_la_LIBADD = @BUILD_ISOLIB_TRUE@am__objects_1 = ChanConsts.lo CharClass.lo \ @BUILD_ISOLIB_TRUE@ ClientSocket.lo ComplexMath.lo \ @BUILD_ISOLIB_TRUE@ ConvStringLong.lo ConvStringReal.lo \ -@BUILD_ISOLIB_TRUE@ ConvTypes.lo COROUTINES.lo EXCEPTIONS.lo \ +@BUILD_ISOLIB_TRUE@ ConvStringShort.lo ConvTypes.lo \ +@BUILD_ISOLIB_TRUE@ COROUTINES.lo EXCEPTIONS.lo \ @BUILD_ISOLIB_TRUE@ GeneralUserExceptions.lo IOChan.lo \ @BUILD_ISOLIB_TRUE@ IOConsts.lo IOLink.lo IOResult.lo \ @BUILD_ISOLIB_TRUE@ LongComplexMath.lo LongConv.lo LongIO.lo \ @@ -172,16 +173,16 @@ libm2iso_la_LIBADD = @BUILD_ISOLIB_TRUE@ RealIO.lo RealMath.lo RealStr.lo RndFile.lo \ @BUILD_ISOLIB_TRUE@ RTdata.lo RTentity.lo RTfio.lo RTgenif.lo \ @BUILD_ISOLIB_TRUE@ RTgen.lo RTio.lo Semaphores.lo SeqFile.lo \ -@BUILD_ISOLIB_TRUE@ ShortComplexMath.lo ShortIO.lo \ -@BUILD_ISOLIB_TRUE@ ShortWholeIO.lo SimpleCipher.lo \ -@BUILD_ISOLIB_TRUE@ SIOResult.lo SLongIO.lo SLongWholeIO.lo \ -@BUILD_ISOLIB_TRUE@ SRawIO.lo SRealIO.lo SShortIO.lo \ -@BUILD_ISOLIB_TRUE@ SShortWholeIO.lo StdChans.lo STextIO.lo \ -@BUILD_ISOLIB_TRUE@ Storage.lo StreamFile.lo StringChan.lo \ -@BUILD_ISOLIB_TRUE@ Strings.lo SWholeIO.lo SysClock.lo \ -@BUILD_ISOLIB_TRUE@ SYSTEM.lo TermFile.lo TERMINATION.lo \ -@BUILD_ISOLIB_TRUE@ TextIO.lo TextUtil.lo WholeConv.lo \ -@BUILD_ISOLIB_TRUE@ WholeIO.lo WholeStr.lo +@BUILD_ISOLIB_TRUE@ ShortComplexMath.lo ShortConv.lo ShortIO.lo \ +@BUILD_ISOLIB_TRUE@ ShortMath.lo ShortStr.lo ShortWholeIO.lo \ +@BUILD_ISOLIB_TRUE@ SimpleCipher.lo SIOResult.lo SLongIO.lo \ +@BUILD_ISOLIB_TRUE@ SLongWholeIO.lo SRawIO.lo SRealIO.lo \ +@BUILD_ISOLIB_TRUE@ SShortIO.lo SShortWholeIO.lo StdChans.lo \ +@BUILD_ISOLIB_TRUE@ STextIO.lo Storage.lo StreamFile.lo \ +@BUILD_ISOLIB_TRUE@ StringChan.lo Strings.lo SWholeIO.lo \ +@BUILD_ISOLIB_TRUE@ SysClock.lo SYSTEM.lo TermFile.lo \ +@BUILD_ISOLIB_TRUE@ TERMINATION.lo TextIO.lo TextUtil.lo \ +@BUILD_ISOLIB_TRUE@ WholeConv.lo WholeIO.lo WholeStr.lo @BUILD_ISOLIB_TRUE@am_libm2iso_la_OBJECTS = $(am__objects_1) \ @BUILD_ISOLIB_TRUE@ ErrnoCategory.lo RTco.lo wrapclock.lo \ @BUILD_ISOLIB_TRUE@ wraptime.lo libm2iso_la-wrapsock.lo @@ -488,6 +489,7 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_ISOLIB_TRUE@M2DEFS = ChanConsts.def CharClass.def \ @BUILD_ISOLIB_TRUE@ ClientSocket.def ComplexMath.def \ @BUILD_ISOLIB_TRUE@ ConvStringLong.def ConvStringReal.def \ +@BUILD_ISOLIB_TRUE@ ConvStringShort.def \ @BUILD_ISOLIB_TRUE@ ConvTypes.def COROUTINES.def \ @BUILD_ISOLIB_TRUE@ ErrnoCategory.def EXCEPTIONS.def \ @BUILD_ISOLIB_TRUE@ GeneralUserExceptions.def IOChan.def \ @@ -511,7 +513,10 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_ISOLIB_TRUE@ RTgenif.def RTio.def \ @BUILD_ISOLIB_TRUE@ Semaphores.def SeqFile.def \ @BUILD_ISOLIB_TRUE@ ShortComplexMath.def \ -@BUILD_ISOLIB_TRUE@ ShortIO.def ShortWholeIO.def \ +@BUILD_ISOLIB_TRUE@ ShortConv.def \ +@BUILD_ISOLIB_TRUE@ ShortIO.def \ +@BUILD_ISOLIB_TRUE@ ShortMath.def ShortStr.def \ +@BUILD_ISOLIB_TRUE@ ShortWholeIO.def \ @BUILD_ISOLIB_TRUE@ SimpleCipher.def SIOResult.def \ @BUILD_ISOLIB_TRUE@ SLongIO.def SLongWholeIO.def \ @BUILD_ISOLIB_TRUE@ SRawIO.def SRealIO.def \ @@ -530,6 +535,7 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_ISOLIB_TRUE@M2MODS = ChanConsts.mod CharClass.mod \ @BUILD_ISOLIB_TRUE@ ClientSocket.mod ComplexMath.mod \ @BUILD_ISOLIB_TRUE@ ConvStringLong.mod ConvStringReal.mod \ +@BUILD_ISOLIB_TRUE@ ConvStringShort.mod \ @BUILD_ISOLIB_TRUE@ ConvTypes.mod COROUTINES.mod \ @BUILD_ISOLIB_TRUE@ EXCEPTIONS.mod GeneralUserExceptions.mod \ @BUILD_ISOLIB_TRUE@ IOChan.mod IOConsts.mod \ @@ -551,7 +557,10 @@ FLAGS_TO_PASS = $(AM_MAKEFLAGS) @BUILD_ISOLIB_TRUE@ RTgen.mod RTio.mod \ @BUILD_ISOLIB_TRUE@ Semaphores.mod SeqFile.mod \ @BUILD_ISOLIB_TRUE@ ShortComplexMath.mod \ -@BUILD_ISOLIB_TRUE@ ShortIO.mod ShortWholeIO.mod \ +@BUILD_ISOLIB_TRUE@ ShortConv.mod \ +@BUILD_ISOLIB_TRUE@ ShortIO.mod \ +@BUILD_ISOLIB_TRUE@ ShortMath.mod ShortStr.mod \ +@BUILD_ISOLIB_TRUE@ ShortWholeIO.mod \ @BUILD_ISOLIB_TRUE@ SimpleCipher.mod SIOResult.mod \ @BUILD_ISOLIB_TRUE@ SLongIO.mod SLongWholeIO.mod \ @BUILD_ISOLIB_TRUE@ SRawIO.mod SRealIO.mod \ From da730b29f10fb48d5ed812535768c69ff7d74248 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Wed, 13 Dec 2023 14:22:25 -0500 Subject: [PATCH 283/311] Revert "testsuite: fix g++.dg/pr112822.C" This reverts commit d2b269ce30d77dbfc6c28c75887c330d4698b132. --- gcc/testsuite/g++.dg/pr112822.C | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/pr112822.C b/gcc/testsuite/g++.dg/pr112822.C index 9949fbb08acf..a8557522467d 100644 --- a/gcc/testsuite/g++.dg/pr112822.C +++ b/gcc/testsuite/g++.dg/pr112822.C @@ -1,7 +1,6 @@ /* PR tree-optimization/112822 */ /* { dg-do compile { target c++17 } } */ /* { dg-options "-w -O2" } */ -// { dg-do compile { target c++17 } } /* Verify we do not ICE on the following noisy creduced test case. */ From 063564ecbfc618cd019f86216a0224e144effae1 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Wed, 13 Dec 2023 14:15:44 -0500 Subject: [PATCH 284/311] c++: TARGET_EXPR location in default arg [PR96997] My r14-6505-g52b4b7d7f5c7c0 change to copy the location in build_aggr_init_expr reopened PR96997; let's fix it properly this time, by clearing the location like we do for other trees. PR c++/96997 gcc/cp/ChangeLog: * tree.cc (bot_manip): Check data.clear_location for TARGET_EXPR. gcc/testsuite/ChangeLog: * g++.dg/debug/cleanup2.C: New test. --- gcc/cp/tree.cc | 3 +++ gcc/testsuite/g++.dg/debug/cleanup2.C | 10 ++++++++++ 2 files changed, 13 insertions(+) create mode 100644 gcc/testsuite/g++.dg/debug/cleanup2.C diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc index c4e41fd7b5c9..d26e73aaf954 100644 --- a/gcc/cp/tree.cc +++ b/gcc/cp/tree.cc @@ -3170,6 +3170,9 @@ bot_manip (tree* tp, int* walk_subtrees, void* data_) if (TREE_OPERAND (u, 1) == error_mark_node) return error_mark_node; + if (data.clear_location) + SET_EXPR_LOCATION (u, input_location); + /* Replace the old expression with the new version. */ *tp = u; /* We don't have to go below this point; the recursive call to diff --git a/gcc/testsuite/g++.dg/debug/cleanup2.C b/gcc/testsuite/g++.dg/debug/cleanup2.C new file mode 100644 index 000000000000..03bf92c8424e --- /dev/null +++ b/gcc/testsuite/g++.dg/debug/cleanup2.C @@ -0,0 +1,10 @@ +// PR c++/96997 +// { dg-additional-options "-g -fdump-tree-gimple-lineno" } + +struct A { A(); ~A(); }; +void f(const A& = A()); +int main() { f(); } + +// The destructor call for the A temporary should not have the location of the +// f declaration. +// { dg-final { scan-tree-dump-not ".C:5" "gimple" } } From 5445ff4a51fcee4d281f79b5f54b349290d0327d Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 13 Dec 2023 17:48:11 +0100 Subject: [PATCH 285/311] Fix 'libgomp/config/linux/allocator.c' 'size_t' vs. '%ld' format string mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix-up for commit 348874f0baac0f22c98ab11abbfa65fd172f6bdd "libgomp: basic pinned memory on Linux", which may result in build failures as follow, for example, for the '-m32' multilib of x86_64-pc-linux-gnu: In file included from [...]/source-gcc/libgomp/config/linux/allocator.c:31: [...]/source-gcc/libgomp/config/linux/allocator.c: In function ‘linux_memspace_alloc’: [...]/source-gcc/libgomp/config/linux/allocator.c:70:26: error: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘size_t’ {aka ‘unsigned int’} [-Werror=format=] 70 | gomp_debug (0, "libgomp: failed to pin %ld bytes of" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 71 | " memory (ulimit too low?)\n", size); | ~~~~ | | | size_t {aka unsigned int} [...]/source-gcc/libgomp/libgomp.h:186:29: note: in definition of macro ‘gomp_debug’ 186 | (gomp_debug) ((KIND), __VA_ARGS__); \ | ^~~~~~~~~~~ [...]/source-gcc/libgomp/config/linux/allocator.c:70:52: note: format string is defined here 70 | gomp_debug (0, "libgomp: failed to pin %ld bytes of" | ~~^ | | | long int | %d cc1: all warnings being treated as errors make[9]: *** [allocator.lo] Error 1 make[9]: Leaving directory `[...]/build-gcc/x86_64-pc-linux-gnu/32/libgomp' [...] Fix this in the same way as used elsewhere in libgomp. libgomp/ * config/linux/allocator.c (linux_memspace_alloc): Fix 'size_t' vs. '%ld' format string mismatch. --- libgomp/config/linux/allocator.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index 269d0d607d83..6ffa2417913d 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -50,6 +50,9 @@ #include #include #include "libgomp.h" +#ifdef HAVE_INTTYPES_H +# include /* For PRIu64. */ +#endif static void * linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) @@ -67,8 +70,13 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin) if (mlock (addr, size)) { - gomp_debug (0, "libgomp: failed to pin %ld bytes of" - " memory (ulimit too low?)\n", size); +#ifdef HAVE_INTTYPES_H + gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes of" + " memory (ulimit too low?)\n", (uint64_t) size); +#else + gomp_debug (0, "libgomp: failed to pin %lu bytes of" + " memory (ulimit too low?)\n", (unsigned long) size); +#endif munmap (addr, size); return NULL; } From 819bc4f6700847068883eb862dfaed11f30db80b Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 13 Dec 2023 21:13:22 +0100 Subject: [PATCH 286/311] libcpp: Fix valgrind errors on pr88974.c [PR112956] On the c-c++-common/cpp/pr88974.c testcase I'm seeing ==600549== Conditional jump or move depends on uninitialised value(s) ==600549== at 0x1DD3A05: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:3050) ==600549== by 0x1DBFC7F: _cpp_parse_expr (expr.cc:1392) ==600549== by 0x1DB9471: do_if(cpp_reader*) (directives.cc:2087) ==600549== by 0x1DBB4D8: _cpp_handle_directive (directives.cc:572) ==600549== by 0x1DCD488: _cpp_lex_token (lex.cc:3682) ==600549== by 0x1DD3A97: cpp_get_token_1(cpp_reader*, unsigned int*) (macro.cc:2936) ==600549== by 0x7F7EE4: scan_translation_unit (c-ppoutput.cc:350) ==600549== by 0x7F7EE4: preprocess_file(cpp_reader*) (c-ppoutput.cc:106) ==600549== by 0x7F6235: c_common_init() (c-opts.cc:1280) ==600549== by 0x704C8B: lang_dependent_init (toplev.cc:1837) ==600549== by 0x704C8B: do_compile (toplev.cc:2135) ==600549== by 0x704C8B: toplev::main(int, char**) (toplev.cc:2306) ==600549== by 0x7064BA: main (main.cc:39) error. The problem is that _cpp_lex_direct can leave result->src_loc uninitialized in some cases and later on we use that location_t. _cpp_lex_direct essentially does: cppchar_t c; ... cpp_token *result = pfile->cur_token++; fresh_line: result->flags = 0; ... if (buffer->need_line) { if (pfile->state.in_deferred_pragma) { result->type = CPP_PRAGMA_EOL; ... // keeps result->src_loc uninitialized; return result; } if (!_cpp_get_fresh_line (pfile)) { result->type = CPP_EOF; if (!pfile->state.in_directive && !pfile->state.parsing_args) { result->src_loc = pfile->line_table->highest_line; ... } ... // otherwise result->src_loc is sometimes uninitialized here return result; } ... } ... result->src_loc = pfile->line_table->highest_line; ... c = *buffer->cur++; switch (c) { ... case '\n': ... buffer->need_line = true; if (pfile->state.in_deferred_pragma) { result->type = CPP_PRAGMA_EOL; ... return result; } goto fresh_line; ... } ... So, if _cpp_lex_direct is called without buffer->need_line initially set, result->src_loc is always initialized (and actually hundreds of tests rely on that exact value it has), even when c == '\n' and we set that flag later on and goto fresh_line. For CPP_PRAGMA_EOL case we have in that case separate handling and don't goto. But if _cpp_lex_direct is called with buffer->need_line initially set and either decide to return a CPP_PRAGMA_EOL token or if getting a new line fails for some reason and we return an CPP_ERROR token and we are in directive or parsing args state, it is kept uninitialized and can be whatever the allocation left it there as. The following patch attempts to keep the status quo, use value that was returned previously if it was initialized (i.e. we went through the goto fresh_line; statement in c == '\n' handling) and only initialize result->src_loc if it was uninitialized before. 2023-12-13 Jakub Jelinek PR preprocessor/112956 * lex.cc (_cpp_lex_direct): Initialize c to 0. For CPP_PRAGMA_EOL tokens and if c == 0 also for CPP_EOF set result->src_loc to highest locus. --- libcpp/lex.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libcpp/lex.cc b/libcpp/lex.cc index a5d2f31e766a..273f6b263c8e 100644 --- a/libcpp/lex.cc +++ b/libcpp/lex.cc @@ -3809,7 +3809,7 @@ _cpp_get_fresh_line (cpp_reader *pfile) cpp_token * _cpp_lex_direct (cpp_reader *pfile) { - cppchar_t c; + cppchar_t c = 0; cpp_buffer *buffer; const unsigned char *comment_start; bool fallthrough_comment = false; @@ -3833,6 +3833,7 @@ _cpp_lex_direct (cpp_reader *pfile) pfile->state.in_deferred_pragma = false; if (!pfile->state.pragma_allow_expansion) pfile->state.prevent_expansion--; + result->src_loc = pfile->line_table->highest_line; return result; } if (!_cpp_get_fresh_line (pfile)) @@ -3849,6 +3850,8 @@ _cpp_lex_direct (cpp_reader *pfile) /* Now pop the buffer that _cpp_get_fresh_line did not. */ _cpp_pop_buffer (pfile); } + else if (c == 0) + result->src_loc = pfile->line_table->highest_line; return result; } if (buffer != pfile->buffer) From e1fde9de3ffa0afc804beca654a7540405de54f7 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Tue, 6 Sep 2022 10:26:05 +0000 Subject: [PATCH 287/311] OpenMP/OpenACC: Reindent TO/FROM/_CACHE_ stanza in {c_}finish_omp_clause This patch trivially adds braces and reindents the OMP_CLAUSE_TO/OMP_CLAUSE_FROM/OMP_CLAUSE__CACHE_ stanza in c_finish_omp_clause and finish_omp_clause, in preparation for the following patch (to clarify the diff a little). 2022-09-13 Julian Brown gcc/c/ * c-typeck.cc (c_finish_omp_clauses): Add braces and reindent OMP_CLAUSE_TO/OMP_CLAUSE_FROM/OMP_CLAUSE__CACHE_ stanza. gcc/cp/ * semantics.cc (finish_omp_clause): Add braces and reindent OMP_CLAUSE_TO/OMP_CLAUSE_FROM/OMP_CLAUSE__CACHE_ stanza. --- gcc/c/c-typeck.cc | 615 +++++++++++++++++----------------- gcc/cp/semantics.cc | 786 ++++++++++++++++++++++---------------------- 2 files changed, 706 insertions(+), 695 deletions(-) diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 836893905fa0..1e4615a713a0 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -15318,321 +15318,326 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: case OMP_CLAUSE__CACHE_: - t = OMP_CLAUSE_DECL (c); - if (TREE_CODE (t) == TREE_LIST) - { - grp_start_p = pc; - grp_sentinel = OMP_CLAUSE_CHAIN (c); + { + t = OMP_CLAUSE_DECL (c); + if (TREE_CODE (t) == TREE_LIST) + { + grp_start_p = pc; + grp_sentinel = OMP_CLAUSE_CHAIN (c); - if (handle_omp_array_sections (c, ort)) - remove = true; - else - { - t = OMP_CLAUSE_DECL (c); - if (!omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "array section does not have mappable type " - "in %qs clause", - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (TYPE_ATOMIC (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<_Atomic%> %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - while (TREE_CODE (t) == ARRAY_REF) - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) - { - do - { - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - while (TREE_CODE (t) == COMPONENT_REF - || TREE_CODE (t) == ARRAY_REF); - - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, - DECL_UID (t)))) - { - remove = true; - break; - } - if (bitmap_bit_p (&map_field_head, DECL_UID (t))) - break; - if (bitmap_bit_p (&map_head, DECL_UID (t))) - { - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in motion " - "clauses", t); - else if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data " - "clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in map " - "clauses", t); - remove = true; - } - else - { - bitmap_set_bit (&map_head, DECL_UID (t)); - bitmap_set_bit (&map_field_head, DECL_UID (t)); - } - } - } - if (c_oacc_check_attachments (c)) - remove = true; - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) - /* In this case, we have a single array element which is a - pointer, and we already set OMP_CLAUSE_SIZE in - handle_omp_array_sections above. For attach/detach clauses, - reset the OMP_CLAUSE_SIZE (representing a bias) to zero - here. */ - OMP_CLAUSE_SIZE (c) = size_zero_node; - break; - } - if (t == error_mark_node) - { - remove = true; - break; - } - /* OpenACC attach / detach clauses must be pointers. */ - if (c_oacc_check_attachments (c)) - { - remove = true; - break; - } - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) - /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a - bias) to zero here, so it is not set erroneously to the pointer - size later on in gimplify.cc. */ - OMP_CLAUSE_SIZE (c) = size_zero_node; - while (INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - indir_component_ref_p = false; - if (TREE_CODE (t) == COMPONENT_REF - && (TREE_CODE (TREE_OPERAND (t, 0)) == MEM_REF - || INDIRECT_REF_P (TREE_OPERAND (t, 0)) - || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) - { - t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); - indir_component_ref_p = true; - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - - if (TREE_CODE (t) == COMPONENT_REF - && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) - { - if (DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + if (handle_omp_array_sections (c, ort)) remove = true; - } - else if (!omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE does not have a mappable type in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (TYPE_ATOMIC (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<_Atomic%> %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) - == UNION_TYPE) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - remove = true; - break; - } - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF) - { - if (maybe_ne (mem_ref_offset (t), 0)) + else + { + t = OMP_CLAUSE_DECL (c); + if (!omp_mappable_type (TREE_TYPE (t))) + { error_at (OMP_CLAUSE_LOCATION (c), - "cannot dereference %qE in %qs clause", t, + "array section does not have mappable type " + "in %qs clause", omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { + remove = true; + } + else if (TYPE_ATOMIC (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%<_Atomic%> %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + while (TREE_CODE (t) == ARRAY_REF) t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - if (remove) - break; - if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) - { - if (bitmap_bit_p (&map_field_head, DECL_UID (t)) - || (ort != C_ORT_ACC - && bitmap_bit_p (&map_head, DECL_UID (t)))) - break; - } - } - if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is not a variable in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (VAR_P (t) && DECL_THREAD_LOCAL_P (t)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD is threadprivate variable in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP - || (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_POINTER)) - && !indir_component_ref_p - && !c_mark_addressable (t)) - remove = true; - else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER - || (OMP_CLAUSE_MAP_KIND (c) - == GOMP_MAP_FIRSTPRIVATE_POINTER) - || (OMP_CLAUSE_MAP_KIND (c) - == GOMP_MAP_FORCE_DEVICEPTR))) - && t == OMP_CLAUSE_DECL (c) - && !omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD does not have a mappable type in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (TREE_TYPE (t) == error_mark_node) - remove = true; - else if (TYPE_ATOMIC (strip_array_types (TREE_TYPE (t)))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<_Atomic%> %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, DECL_UID (t)))) - remove = true; - else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER) - { - if (bitmap_bit_p (&generic_head, DECL_UID (t)) - || bitmap_bit_p (&firstprivate_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, DECL_UID (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); + if (TREE_CODE (t) == COMPONENT_REF + && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + { + do + { + t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == MEM_REF + || INDIRECT_REF_P (t)) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + } + while (TREE_CODE (t) == COMPONENT_REF + || TREE_CODE (t) == ARRAY_REF); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_IMPLICIT (c) + && (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, + DECL_UID (t)))) + { + remove = true; + break; + } + if (bitmap_bit_p (&map_field_head, DECL_UID (t))) + break; + if (bitmap_bit_p (&map_head, DECL_UID (t))) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in motion " + "clauses", t); + else if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data " + "clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in map " + "clauses", t); + remove = true; + } + else + { + bitmap_set_bit (&map_head, DECL_UID (t)); + bitmap_set_bit (&map_field_head, DECL_UID (t)); + } + } + } + if (c_oacc_check_attachments (c)) remove = true; - } - else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) - { - if (ort == C_ORT_ACC) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + /* In this case, we have a single array element which is a + pointer, and we already set OMP_CLAUSE_SIZE in + handle_omp_array_sections above. For attach/detach + clauses, reset the OMP_CLAUSE_SIZE (representing a bias) + to zero here. */ + OMP_CLAUSE_SIZE (c) = size_zero_node; + break; + } + if (t == error_mark_node) + { + remove = true; + break; + } + /* OpenACC attach / detach clauses must be pointers. */ + if (c_oacc_check_attachments (c)) + { + remove = true; + break; + } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a + bias) to zero here, so it is not set erroneously to the pointer + size later on in gimplify.cc. */ + OMP_CLAUSE_SIZE (c) = size_zero_node; + while (INDIRECT_REF_P (t) + || TREE_CODE (t) == ARRAY_REF) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + while (TREE_CODE (t) == COMPOUND_EXPR) + { + t = TREE_OPERAND (t, 1); + STRIP_NOPS (t); + } + indir_component_ref_p = false; + if (TREE_CODE (t) == COMPONENT_REF + && (TREE_CODE (TREE_OPERAND (t, 0)) == MEM_REF + || INDIRECT_REF_P (TREE_OPERAND (t, 0)) + || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) + { + t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); + indir_component_ref_p = true; + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + + if (TREE_CODE (t) == COMPONENT_REF + && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) + { + if (DECL_BIT_FIELD (TREE_OPERAND (t, 1))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "bit-field %qE in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (!omp_mappable_type (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qE does not have a mappable type in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (TYPE_ATOMIC (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%<_Atomic%> %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + while (TREE_CODE (t) == COMPONENT_REF) + { + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) + == UNION_TYPE) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qE is a member of a union", t); + remove = true; + break; + } + t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == MEM_REF) + { + if (maybe_ne (mem_ref_offset (t), 0)) + error_at (OMP_CLAUSE_LOCATION (c), + "cannot dereference %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + else + t = TREE_OPERAND (t, 0); + } + while (TREE_CODE (t) == MEM_REF + || INDIRECT_REF_P (t) + || TREE_CODE (t) == ARRAY_REF) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + } + if (remove) + break; + if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) + { + if (bitmap_bit_p (&map_field_head, DECL_UID (t)) + || (ort != C_ORT_ACC + && bitmap_bit_p (&map_head, DECL_UID (t)))) + break; + } + } + if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qE is not a variable in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (VAR_P (t) && DECL_THREAD_LOCAL_P (t)) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qD is threadprivate variable in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_POINTER)) + && !indir_component_ref_p + && !c_mark_addressable (t)) + remove = true; + else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_POINTER) + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FORCE_DEVICEPTR))) + && t == OMP_CLAUSE_DECL (c) + && !omp_mappable_type (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qD does not have a mappable type in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (TREE_TYPE (t) == error_mark_node) + remove = true; + else if (TYPE_ATOMIC (strip_array_types (TREE_TYPE (t)))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%<_Atomic%> %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_IMPLICIT (c) + && (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, + DECL_UID (t)))) + remove = true; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_POINTER)) + { + if (bitmap_bit_p (&generic_head, DECL_UID (t)) + || bitmap_bit_p (&firstprivate_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, DECL_UID (t))) + { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); - remove = true; - } - else - bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); - } - else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) - { - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in motion clauses", t); - else if (ort == C_ORT_ACC) + remove = true; + } + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + { + if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", + t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears both in data and map clauses", t); + remove = true; + } + else + bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); + } + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in motion clauses", t); + else if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in map clauses", t); + remove = true; + } + else if (ort == C_ORT_ACC + && bitmap_bit_p (&generic_head, DECL_UID (t))) + { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in map clauses", t); - remove = true; - } - else if (ort == C_ORT_ACC - && bitmap_bit_p (&generic_head, DECL_UID (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); - remove = true; - } - else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) - || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) - { - if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); - remove = true; - } - else - { - bitmap_set_bit (&map_head, DECL_UID (t)); - if (t != OMP_CLAUSE_DECL (c) - && TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF) - bitmap_set_bit (&map_field_head, DECL_UID (t)); - } + remove = true; + } + else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) + || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) + { + if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears both in data and map clauses", t); + remove = true; + } + else + { + bitmap_set_bit (&map_head, DECL_UID (t)); + if (t != OMP_CLAUSE_DECL (c) + && TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF) + bitmap_set_bit (&map_field_head, DECL_UID (t)); + } + } break; case OMP_CLAUSE_ENTER: diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 973b44f75ae9..508322b2616a 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -8210,408 +8210,414 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: case OMP_CLAUSE__CACHE_: - t = OMP_CLAUSE_DECL (c); - if (TREE_CODE (t) == TREE_LIST) - { - grp_start_p = pc; - grp_sentinel = OMP_CLAUSE_CHAIN (c); + { + t = OMP_CLAUSE_DECL (c); + if (TREE_CODE (t) == TREE_LIST) + { + grp_start_p = pc; + grp_sentinel = OMP_CLAUSE_CHAIN (c); - if (handle_omp_array_sections (c, ort)) - remove = true; - else - { - t = OMP_CLAUSE_DECL (c); - if (TREE_CODE (t) != TREE_LIST - && !type_dependent_expression_p (t) - && !omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "array section does not have mappable type " - "in %qs clause", - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - if (TREE_TYPE (t) != error_mark_node - && !COMPLETE_TYPE_P (TREE_TYPE (t))) - cxx_incomplete_type_inform (TREE_TYPE (t)); - remove = true; - } - while (TREE_CODE (t) == ARRAY_REF) - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) - { - do - { - t = TREE_OPERAND (t, 0); - if (REFERENCE_REF_P (t)) - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - while (TREE_CODE (t) == COMPONENT_REF - || TREE_CODE (t) == ARRAY_REF); - - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, - DECL_UID (t)))) - { - remove = true; - break; - } - if (bitmap_bit_p (&map_field_head, DECL_UID (t))) - break; - if (bitmap_bit_p (&map_head, DECL_UID (t))) - { - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in motion" - " clauses", t); - else if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data" - " clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in map" - " clauses", t); - remove = true; - } - else - { - bitmap_set_bit (&map_head, DECL_UID (t)); - bitmap_set_bit (&map_field_head, DECL_UID (t)); - } - } - } - if (cp_oacc_check_attachments (c)) - remove = true; - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) - /* In this case, we have a single array element which is a - pointer, and we already set OMP_CLAUSE_SIZE in - handle_omp_array_sections above. For attach/detach clauses, - reset the OMP_CLAUSE_SIZE (representing a bias) to zero - here. */ - OMP_CLAUSE_SIZE (c) = size_zero_node; - break; - } - if (t == error_mark_node) - { - remove = true; - break; - } - /* OpenACC attach / detach clauses must be pointers. */ - if (cp_oacc_check_attachments (c)) - { - remove = true; - break; - } - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) - /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a - bias) to zero here, so it is not set erroneously to the pointer - size later on in gimplify.cc. */ - OMP_CLAUSE_SIZE (c) = size_zero_node; - if (REFERENCE_REF_P (t) - && TREE_CODE (TREE_OPERAND (t, 0)) == COMPONENT_REF) - { - t = TREE_OPERAND (t, 0); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH) - OMP_CLAUSE_DECL (c) = t; - } - while (INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - if (TREE_CODE (t) == COMPONENT_REF - && invalid_nonstatic_memfn_p (EXPR_LOCATION (t), t, - tf_warning_or_error)) - remove = true; - indir_component_ref_p = false; - if (TREE_CODE (t) == COMPONENT_REF - && (INDIRECT_REF_P (TREE_OPERAND (t, 0)) - || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) - { - t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); - indir_component_ref_p = true; - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - if (TREE_CODE (t) == COMPONENT_REF - && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) - { - if (type_dependent_expression_p (t)) - break; - if (TREE_CODE (TREE_OPERAND (t, 1)) == FIELD_DECL - && DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + if (handle_omp_array_sections (c, ort)) remove = true; - } - else if (!omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE does not have a mappable type in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - if (TREE_TYPE (t) != error_mark_node - && !COMPLETE_TYPE_P (TREE_TYPE (t))) - cxx_incomplete_type_inform (TREE_TYPE (t)); - remove = true; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_TYPE (TREE_OPERAND (t, 0)) - && (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) - == UNION_TYPE)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - remove = true; - break; - } - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF) - { - if (maybe_ne (mem_ref_offset (t), 0)) + else + { + t = OMP_CLAUSE_DECL (c); + if (TREE_CODE (t) != TREE_LIST + && !type_dependent_expression_p (t) + && !omp_mappable_type (TREE_TYPE (t))) + { error_at (OMP_CLAUSE_LOCATION (c), - "cannot dereference %qE in %qs clause", t, + "array section does not have mappable type " + "in %qs clause", omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { + if (TREE_TYPE (t) != error_mark_node + && !COMPLETE_TYPE_P (TREE_TYPE (t))) + cxx_incomplete_type_inform (TREE_TYPE (t)); + remove = true; + } + while (TREE_CODE (t) == ARRAY_REF) t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - if (remove) - break; - if (REFERENCE_REF_P (t)) - t = TREE_OPERAND (t, 0); - if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) - { - if (bitmap_bit_p (&map_field_head, DECL_UID (t)) - || (ort != C_ORT_ACC - && bitmap_bit_p (&map_head, DECL_UID (t)))) - goto handle_map_references; - } - } - if (!processing_template_decl - && TREE_CODE (t) == FIELD_DECL) - { - OMP_CLAUSE_DECL (c) = finish_non_static_data_member (t, NULL_TREE, - NULL_TREE); - break; - } - if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) - { - if (processing_template_decl && TREE_CODE (t) != OVERLOAD) - break; - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_POINTER - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)) - break; - if (DECL_P (t)) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD is not a variable in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is not a variable in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (VAR_P (t) && CP_DECL_THREAD_LOCAL_P (t)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD is threadprivate variable in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (!processing_template_decl - && !TYPE_REF_P (TREE_TYPE (t)) - && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP - || (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_POINTER)) - && !indir_component_ref_p - && !cxx_mark_addressable (t)) - remove = true; - else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER - || (OMP_CLAUSE_MAP_KIND (c) - == GOMP_MAP_FIRSTPRIVATE_POINTER))) - && t == OMP_CLAUSE_DECL (c) - && !type_dependent_expression_p (t) - && !omp_mappable_type (TYPE_REF_P (TREE_TYPE (t)) - ? TREE_TYPE (TREE_TYPE (t)) - : TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD does not have a mappable type in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - if (TREE_TYPE (t) != error_mark_node - && !COMPLETE_TYPE_P (TREE_TYPE (t))) - cxx_incomplete_type_inform (TREE_TYPE (t)); - remove = true; - } - else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR - && !type_dependent_expression_p (t) - && !INDIRECT_TYPE_P (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD is not a pointer variable", t); - remove = true; - } - else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, - DECL_UID (t)))) - remove = true; - else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER) - { - if (bitmap_bit_p (&generic_head, DECL_UID (t)) - || bitmap_bit_p (&firstprivate_head, DECL_UID (t)) - || bitmap_bit_p (&map_firstprivate_head, DECL_UID (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); + if (TREE_CODE (t) == COMPONENT_REF + && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + { + do + { + t = TREE_OPERAND (t, 0); + if (REFERENCE_REF_P (t)) + t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == MEM_REF + || INDIRECT_REF_P (t)) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + } + while (TREE_CODE (t) == COMPONENT_REF + || TREE_CODE (t) == ARRAY_REF); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_IMPLICIT (c) + && (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, + DECL_UID (t)))) + { + remove = true; + break; + } + if (bitmap_bit_p (&map_field_head, DECL_UID (t))) + break; + if (bitmap_bit_p (&map_head, DECL_UID (t))) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in motion" + " clauses", t); + else if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data" + " clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in map" + " clauses", t); + remove = true; + } + else + { + bitmap_set_bit (&map_head, DECL_UID (t)); + bitmap_set_bit (&map_field_head, DECL_UID (t)); + } + } + } + if (cp_oacc_check_attachments (c)) remove = true; - } - else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) - { - if (ort == C_ORT_ACC) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + /* In this case, we have a single array element which is a + pointer, and we already set OMP_CLAUSE_SIZE in + handle_omp_array_sections above. For attach/detach + clauses, reset the OMP_CLAUSE_SIZE (representing a bias) + to zero here. */ + OMP_CLAUSE_SIZE (c) = size_zero_node; + break; + } + if (t == error_mark_node) + { + remove = true; + break; + } + /* OpenACC attach / detach clauses must be pointers. */ + if (cp_oacc_check_attachments (c)) + { + remove = true; + break; + } + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a + bias) to zero here, so it is not set erroneously to the + pointer size later on in gimplify.cc. */ + OMP_CLAUSE_SIZE (c) = size_zero_node; + if (REFERENCE_REF_P (t) + && TREE_CODE (TREE_OPERAND (t, 0)) == COMPONENT_REF) + { + t = TREE_OPERAND (t, 0); + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH) + OMP_CLAUSE_DECL (c) = t; + } + while (INDIRECT_REF_P (t) + || TREE_CODE (t) == ARRAY_REF) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + while (TREE_CODE (t) == COMPOUND_EXPR) + { + t = TREE_OPERAND (t, 1); + STRIP_NOPS (t); + } + if (TREE_CODE (t) == COMPONENT_REF + && invalid_nonstatic_memfn_p (EXPR_LOCATION (t), t, + tf_warning_or_error)) + remove = true; + indir_component_ref_p = false; + if (TREE_CODE (t) == COMPONENT_REF + && (INDIRECT_REF_P (TREE_OPERAND (t, 0)) + || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) + { + t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); + indir_component_ref_p = true; + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + if (TREE_CODE (t) == COMPONENT_REF + && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) + { + if (type_dependent_expression_p (t)) + break; + if (TREE_CODE (TREE_OPERAND (t, 1)) == FIELD_DECL + && DECL_BIT_FIELD (TREE_OPERAND (t, 1))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "bit-field %qE in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (!omp_mappable_type (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qE does not have a mappable type in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + if (TREE_TYPE (t) != error_mark_node + && !COMPLETE_TYPE_P (TREE_TYPE (t))) + cxx_incomplete_type_inform (TREE_TYPE (t)); + remove = true; + } + while (TREE_CODE (t) == COMPONENT_REF) + { + if (TREE_TYPE (TREE_OPERAND (t, 0)) + && (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) + == UNION_TYPE)) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qE is a member of a union", t); + remove = true; + break; + } + t = TREE_OPERAND (t, 0); + if (TREE_CODE (t) == MEM_REF) + { + if (maybe_ne (mem_ref_offset (t), 0)) + error_at (OMP_CLAUSE_LOCATION (c), + "cannot dereference %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + else + t = TREE_OPERAND (t, 0); + } + while (TREE_CODE (t) == MEM_REF + || INDIRECT_REF_P (t) + || TREE_CODE (t) == ARRAY_REF) + { + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); + } + } + if (remove) + break; + if (REFERENCE_REF_P (t)) + t = TREE_OPERAND (t, 0); + if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) + { + if (bitmap_bit_p (&map_field_head, DECL_UID (t)) + || (ort != C_ORT_ACC + && bitmap_bit_p (&map_head, DECL_UID (t)))) + goto handle_map_references; + } + } + if (!processing_template_decl + && TREE_CODE (t) == FIELD_DECL) + { + OMP_CLAUSE_DECL (c) + = finish_non_static_data_member (t, NULL_TREE, NULL_TREE); + break; + } + if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) + { + if (processing_template_decl && TREE_CODE (t) != OVERLOAD) + break; + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_POINTER + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH)) + break; + if (DECL_P (t)) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD is not a variable in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qE is not a variable in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (VAR_P (t) && CP_DECL_THREAD_LOCAL_P (t)) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qD is threadprivate variable in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + remove = true; + } + else if (!processing_template_decl + && !TYPE_REF_P (TREE_TYPE (t)) + && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_POINTER)) + && !indir_component_ref_p + && !cxx_mark_addressable (t)) + remove = true; + else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_POINTER))) + && t == OMP_CLAUSE_DECL (c) + && !type_dependent_expression_p (t) + && !omp_mappable_type (TYPE_REF_P (TREE_TYPE (t)) + ? TREE_TYPE (TREE_TYPE (t)) + : TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qD does not have a mappable type in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (c)]); + if (TREE_TYPE (t) != error_mark_node + && !COMPLETE_TYPE_P (TREE_TYPE (t))) + cxx_incomplete_type_inform (TREE_TYPE (t)); + remove = true; + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR + && !type_dependent_expression_p (t) + && !INDIRECT_TYPE_P (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (c), + "%qD is not a pointer variable", t); + remove = true; + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_IMPLICIT (c) + && (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, + DECL_UID (t)))) + remove = true; + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_POINTER)) + { + if (bitmap_bit_p (&generic_head, DECL_UID (t)) + || bitmap_bit_p (&firstprivate_head, DECL_UID (t)) + || bitmap_bit_p (&map_firstprivate_head, DECL_UID (t))) + { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); - remove = true; - } - else - bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); - } - else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) - { - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in motion clauses", t); - else if (ort == C_ORT_ACC) + remove = true; + } + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + { + if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", + t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears both in data and map clauses", t); + remove = true; + } + else + bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); + } + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + { + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in motion clauses", t); + else if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in map clauses", t); + remove = true; + } + else if (ort == C_ORT_ACC + && bitmap_bit_p (&generic_head, DECL_UID (t))) + { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in map clauses", t); - remove = true; - } - else if (ort == C_ORT_ACC - && bitmap_bit_p (&generic_head, DECL_UID (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); - remove = true; - } - else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) - || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) - { - if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); - remove = true; - } - else - { - bitmap_set_bit (&map_head, DECL_UID (t)); + remove = true; + } + else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) + || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) + { + if (ort == C_ORT_ACC) + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); + else + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears both in data and map clauses", t); + remove = true; + } + else + { + bitmap_set_bit (&map_head, DECL_UID (t)); - tree decl = OMP_CLAUSE_DECL (c); - if (t != decl - && (TREE_CODE (decl) == COMPONENT_REF - || (INDIRECT_REF_P (decl) - && TREE_CODE (TREE_OPERAND (decl, 0)) == COMPONENT_REF - && TYPE_REF_P (TREE_TYPE (TREE_OPERAND (decl, 0)))))) - bitmap_set_bit (&map_field_head, DECL_UID (t)); - } - handle_map_references: - if (!remove - && !processing_template_decl - && ort != C_ORT_DECLARE_SIMD - && TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) - { - t = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - { - OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); - } - else if (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_POINTER - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_REFERENCE) - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_ALWAYS_POINTER) - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_ATTACH_DETACH)) - { - grp_start_p = pc; - grp_sentinel = OMP_CLAUSE_CHAIN (c); + tree decl = OMP_CLAUSE_DECL (c); + if (t != decl + && (TREE_CODE (decl) == COMPONENT_REF + || (INDIRECT_REF_P (decl) + && (TREE_CODE (TREE_OPERAND (decl, 0)) + == COMPONENT_REF) + && TYPE_REF_P (TREE_TYPE (TREE_OPERAND (decl, + 0)))))) + bitmap_set_bit (&map_field_head, DECL_UID (t)); + } + handle_map_references: + if (!remove + && !processing_template_decl + && ort != C_ORT_DECLARE_SIMD + && TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) + { + t = OMP_CLAUSE_DECL (c); + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + { + OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); + if (OMP_CLAUSE_SIZE (c) == NULL_TREE) + OMP_CLAUSE_SIZE (c) + = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); + } + else if (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_POINTER + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_REFERENCE) + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_ALWAYS_POINTER) + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_ATTACH_DETACH)) + { + grp_start_p = pc; + grp_sentinel = OMP_CLAUSE_CHAIN (c); - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - if (TREE_CODE (t) == COMPONENT_REF) - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER); - else - OMP_CLAUSE_SET_MAP_KIND (c2, - GOMP_MAP_FIRSTPRIVATE_REFERENCE); - OMP_CLAUSE_DECL (c2) = t; - OMP_CLAUSE_SIZE (c2) = size_zero_node; - OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = c2; - OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); - c = c2; - } - } + tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), + OMP_CLAUSE_MAP); + if (TREE_CODE (t) == COMPONENT_REF) + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER); + else + OMP_CLAUSE_SET_MAP_KIND (c2, + GOMP_MAP_FIRSTPRIVATE_REFERENCE); + OMP_CLAUSE_DECL (c2) = t; + OMP_CLAUSE_SIZE (c2) = size_zero_node; + OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = c2; + OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); + if (OMP_CLAUSE_SIZE (c) == NULL_TREE) + OMP_CLAUSE_SIZE (c) + = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); + c = c2; + } + } + } break; case OMP_CLAUSE_ENTER: From 5fdb150cd4bf8f2da335e3f5c3a17aafcbc66dbe Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Mon, 14 Aug 2023 12:41:56 +0000 Subject: [PATCH 288/311] OpenMP/OpenACC: Rework clause expansion and nested struct handling This patch reworks clause expansion in the C, C++ and (to a lesser extent) Fortran front ends for OpenMP and OpenACC mapping nodes used in GPU offloading support. At present a single clause may be turned into several mapping nodes, or have its mapping type changed, in several places scattered through the front- and middle-end. The analysis relating to which particular transformations are needed for some given expression has become quite hard to follow. Briefly, we manipulate clause types in the following places: 1. During parsing, in c_omp_adjust_map_clauses. Depending on a set of rules, we may change a FIRSTPRIVATE_POINTER (etc.) mapping into ATTACH_DETACH, or mark the decl addressable. 2. In semantics.cc or c-typeck.cc, clauses are expanded in handle_omp_array_sections (called via {c_}finish_omp_clauses, or in finish_omp_clauses itself. The two cases are for processing array sections (the former), or non-array sections (the latter). 3. In gimplify.cc, we build sibling lists for struct accesses, which groups and sorts accesses along with their struct base, creating new ALLOC/RELEASE nodes for pointers. 4. In gimplify.cc:gimplify_adjust_omp_clauses, mapping nodes may be adjusted or created. This patch doesn't completely disrupt this scheme, though clause types are no longer adjusted in c_omp_adjust_map_clauses (step 1). Clause expansion in step 2 (for C and C++) now uses a single, unified mechanism, parts of which are also reused for analysis in step 3. Rather than the kind-of "ad-hoc" pattern matching on addresses used to expand clauses used at present, a new method for analysing addresses is introduced. This does a recursive-descent tree walk on expression nodes, and emits a vector of tokens describing each "part" of the address. This tokenized address can then be translated directly into mapping nodes, with the assurance that no part of the expression has been inadvertently skipped or misinterpreted. In this way, all the variations of ways pointers, arrays, references and component accesses might be combined can be teased apart into easily-understood cases - and we know we've "parsed" the whole address before we start analysis, so the right code paths can easily be selected. For example, a simple access "arr[idx]" might parse as: base-decl access-indexed-array or "mystruct->foo[x]" with a pointer "foo" component might parse as: base-decl access-pointer component-selector access-pointer A key observation is that support for "array" bases, e.g. accesses whose root nodes are not structures, but describe scalars or arrays, and also *one-level deep* structure accesses, have first-class support in gimplify and beyond. Expressions that use deeper struct accesses or e.g. multiple indirections were more problematic: some cases worked, but lots of cases didn't. This patch reimplements the support for those in gimplify.cc, again using the new "address tokenization" support. An expression like "mystruct->foo->bar[0:10]" used in a mapping node will translate the right-hand access directly in the front-end. The base for the access will be "mystruct->foo". This is handled recursively in gimplify.cc -- there may be several accesses of "mystruct"'s members on the same directive, so the sibling-list building machinery can be used again. (This was already being done for OpenACC, but the new implementation differs somewhat in details, and is more robust.) For OpenMP, in the case where the base pointer itself, i.e. "mystruct->foo" here, is NOT mapped on the same directive, we create a "fragile" mapping. This turns the "foo" component access into a zero-length allocation (which is a new feature for the runtime, so support has been added there too). A couple of changes have been made to how mapping clauses are turned into mapping nodes: The first change is based on the observation that it is probably never correct to use GOMP_MAP_ALWAYS_POINTER for component accesses (e.g. for references), because if the containing struct is already mapped on the target then the host version of the pointer in question will be corrupted if the struct is copied back from the target. This patch removes all such uses, across each of C, C++ and Fortran. The second change is to the way that GOMP_MAP_ATTACH_DETACH nodes are processed during sibling-list creation. For OpenMP, for pointer components, we must map the base pointer separately from an array section that uses the base pointer, so e.g. we must have both "map(mystruct.base)" and "map(mystruct.base[0:10])" mappings. These create nodes such as: GOMP_MAP_TOFROM mystruct.base G_M_TOFROM *mystruct.base [len: 10*elemsize] G_M_ATTACH_DETACH mystruct.base Instead of using the first of these directly when building the struct sibling list then skipping the group using GOMP_MAP_ATTACH_DETACH, leading to: GOMP_MAP_STRUCT mystruct [len: 1] GOMP_MAP_TOFROM mystruct.base we now introduce a new "mini-pass", omp_resolve_clause_dependencies, that drops the GOMP_MAP_TOFROM for the base pointer, marks the second group as having had a base-pointer mapping, then omp_build_struct_sibling_lists can create: GOMP_MAP_STRUCT mystruct [len: 1] GOMP_MAP_ALLOC mystruct.base [len: ptrsize] This ends up working better in many cases, particularly those involving references. (The "alloc" space is immediately overwritten by a pointer attachment, so this is mildly more efficient than a redundant TO mapping at runtime also.) There is support in the address tokenizer for "arbitrary" base expressions which aren't rooted at a decl, but that is not used as present because such addresses are disallowed at parse time. In the front-ends, the address tokenization machinery is mostly only used for clause expansion and not for diagnostics at present. It could be used for those too, which would allow more of my previous "address inspector" implementation to be removed. The new bits in gimplify.cc work with OpenACC also. This version of the patch addresses several first-pass review comments from Tobias, and fixes a few previously-missed cases for manually-managed ragged array mappings (including cases using references). Some arbitrary differences between handling of clause expansion for C vs. C++ have also been fixed, and some fragments from later in the patch series have been moved forward (where they were useful for fixing bugs). Several new test cases have been added. 2023-11-29 Julian Brown gcc/c-family/ * c-common.h (c_omp_region_type): Add C_ORT_EXIT_DATA, C_ORT_OMP_EXIT_DATA and C_ORT_ACC_TARGET. (omp_addr_token): Add forward declaration. (c_omp_address_inspector): New class. * c-omp.cc (c_omp_adjust_map_clauses): Mark decls addressable here, but do not change any mapping node types. (c_omp_address_inspector::unconverted_ref_origin, c_omp_address_inspector::component_access_p, c_omp_address_inspector::check_clause, c_omp_address_inspector::get_root_term, c_omp_address_inspector::map_supported_p, c_omp_address_inspector::get_origin, c_omp_address_inspector::maybe_unconvert_ref, c_omp_address_inspector::maybe_zero_length_array_section, c_omp_address_inspector::expand_array_base, c_omp_address_inspector::expand_component_selector, c_omp_address_inspector::expand_map_clause): New methods. (omp_expand_access_chain): New function. gcc/c/ * c-parser.cc (c_parser_oacc_all_clauses): Add TARGET_P parameter. Use to select region type for c_finish_omp_clauses call. (c_parser_oacc_loop): Update calls to c_parser_oacc_all_clauses. (c_parser_oacc_compute): Likewise. (c_parser_omp_target_data, c_parser_omp_target_enter_data): Support ATTACH kind. (c_parser_omp_target_exit_data): Support DETACH kind. (check_clauses): Handle GOMP_MAP_POINTER and GOMP_MAP_ATTACH here. * c-typeck.cc (handle_omp_array_sections_1, handle_omp_array_sections, c_finish_omp_clauses): Use c_omp_address_inspector class and OMP address tokenizer to analyze and expand map clause expressions. Fix some diagnostics. Fix "is OpenACC" condition for C_ORT_ACC_TARGET addition. gcc/cp/ * parser.cc (cp_parser_oacc_all_clauses): Add TARGET_P parameter. Use to select region type for finish_omp_clauses call. (cp_parser_omp_target_data, cp_parser_omp_target_enter_data): Support GOMP_MAP_ATTACH kind. (cp_parser_omp_target_exit_data): Support GOMP_MAP_DETACH kind. (cp_parser_oacc_declare): Update call to cp_parser_oacc_all_clauses. (cp_parser_oacc_loop): Update calls to cp_parser_oacc_all_clauses. (cp_parser_oacc_compute): Likewise. * pt.cc (tsubst_expr): Use C_ORT_ACC_TARGET for call to tsubst_omp_clauses for OpenACC compute regions. * semantics.cc (cp_omp_address_inspector): New class, derived from c_omp_address_inspector. (handle_omp_array_sections_1, handle_omp_array_sections, finish_omp_clauses): Use cp_omp_address_inspector class and OMP address tokenizer to analyze and expand OpenMP map clause expressions. Fix some diagnostics. Support C_ORT_ACC_TARGET. (finish_omp_target): Handle GOMP_MAP_POINTER. gcc/fortran/ * trans-openmp.cc (gfc_trans_omp_array_section): Add OPENMP parameter. Use GOMP_MAP_ATTACH_DETACH instead of GOMP_MAP_ALWAYS_POINTER for derived type components. (gfc_trans_omp_clauses): Update calls to gfc_trans_omp_array_section. gcc/ * gimplify.cc (build_struct_comp_nodes): Don't process GOMP_MAP_ATTACH_DETACH "middle" nodes here. (omp_mapping_group): Add REPROCESS_STRUCT and FRAGILE booleans for nested struct handling. (omp_strip_components_and_deref, omp_strip_indirections): Remove functions. (omp_get_attachment): Handle GOMP_MAP_DETACH here. (omp_group_last): Handle GOMP_MAP_*, GOMP_MAP_DETACH, GOMP_MAP_ATTACH_DETACH groups for "exit data" of reference-to-pointer component array sections. (omp_gather_mapping_groups_1): Initialise reprocess_struct and fragile fields. (omp_group_base): Handle GOMP_MAP_ATTACH_DETACH after GOMP_MAP_STRUCT. (omp_index_mapping_groups_1): Skip reprocess_struct groups. (omp_get_nonfirstprivate_group, omp_directive_maps_explicitly, omp_resolve_clause_dependencies, omp_first_chained_access_token): New functions. (omp_check_mapping_compatibility): Adjust accepted node combinations for "from" clauses using release instead of alloc. (omp_accumulate_sibling_list): Add GROUP_MAP, ADDR_TOKENS, FRAGILE_P, REPROCESSING_STRUCT, ADDED_TAIL parameters. Use OMP address tokenizer to analyze addresses. Reimplement nested struct handling, and implement "fragile groups". (omp_build_struct_sibling_lists): Adjust for changes to omp_accumulate_sibling_list. Recalculate bias for ATTACH_DETACH nodes after GOMP_MAP_STRUCT nodes. (gimplify_scan_omp_clauses): Call omp_resolve_clause_dependencies. Use OMP address tokenizer. (gimplify_adjust_omp_clauses_1): Use build_fold_indirect_ref_loc instead of build_simple_mem_ref_loc. * omp-general.cc (omp-general.h, tree-pretty-print.h): Include. (omp_addr_tokenizer): New namespace. (omp_addr_tokenizer::omp_addr_token): New. (omp_addr_tokenizer::omp_parse_component_selector, omp_addr_tokenizer::omp_parse_ref, omp_addr_tokenizer::omp_parse_pointer, omp_addr_tokenizer::omp_parse_access_method, omp_addr_tokenizer::omp_parse_access_methods, omp_addr_tokenizer::omp_parse_structure_base, omp_addr_tokenizer::omp_parse_structured_expr, omp_addr_tokenizer::omp_parse_array_expr, omp_addr_tokenizer::omp_access_chain_p, omp_addr_tokenizer::omp_accessed_addr): New functions. (omp_parse_expr, debug_omp_tokenized_addr): New functions. * omp-general.h (omp_addr_tokenizer::access_method_kinds, omp_addr_tokenizer::structure_base_kinds, omp_addr_tokenizer::token_type, omp_addr_tokenizer::omp_addr_token, omp_addr_tokenizer::omp_access_chain_p, omp_addr_tokenizer::omp_accessed_addr): New. (omp_addr_token, omp_parse_expr): New. * omp-low.cc (scan_sharing_clauses): Skip error check for references to pointers. * tree.h (OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED): New macro. gcc/testsuite/ * c-c++-common/gomp/clauses-2.c: Fix error output. * c-c++-common/gomp/target-implicit-map-2.c: Adjust scan output. * c-c++-common/gomp/target-50.c: Adjust scan output. * c-c++-common/gomp/target-enter-data-1.c: Adjust scan output. * g++.dg/gomp/static-component-1.C: New test. * gcc.dg/gomp/target-3.c: Adjust scan output. * gfortran.dg/gomp/map-9.f90: Adjust scan output. libgomp/ * target.c (gomp_map_pointer): Modify zero-length array section pointer handling. (gomp_attach_pointer): Likewise. (gomp_map_fields_existing): Use gomp_map_0len_lookup. (gomp_attach_pointer): Allow attaching null pointers (or Fortran "unassociated" pointers). (gomp_map_vars_internal): Handle zero-sized struct members. Add diagnostic for unmapped struct pointer members. * testsuite/libgomp.c-c++-common/baseptrs-1.c: New test. * testsuite/libgomp.c-c++-common/baseptrs-2.c: New test. * testsuite/libgomp.c-c++-common/baseptrs-6.c: New test. * testsuite/libgomp.c-c++-common/baseptrs-7.c: New test. * testsuite/libgomp.c-c++-common/ptr-attach-2.c: New test. * testsuite/libgomp.c-c++-common/target-implicit-map-2.c: Fix missing "free". * testsuite/libgomp.c-c++-common/target-implicit-map-5.c: New test. * testsuite/libgomp.c-c++-common/target-map-zlas-1.c: New test. * testsuite/libgomp.c++/class-array-1.C: New test. * testsuite/libgomp.c++/baseptrs-3.C: New test. * testsuite/libgomp.c++/baseptrs-4.C: New test. * testsuite/libgomp.c++/baseptrs-5.C: New test. * testsuite/libgomp.c++/baseptrs-8.C: New test. * testsuite/libgomp.c++/baseptrs-9.C: New test. * testsuite/libgomp.c++/ref-mapping-1.C: New test. * testsuite/libgomp.c++/target-48.C: New test. * testsuite/libgomp.c++/target-49.C: New test. * testsuite/libgomp.c++/target-exit-data-reftoptr-1.C: New test. * testsuite/libgomp.c++/target-lambda-1.C: Update for OpenMP 5.2 semantics. * testsuite/libgomp.c++/target-this-3.C: Likewise. * testsuite/libgomp.c++/target-this-4.C: Likewise. * testsuite/libgomp.fortran/struct-elem-map-1.f90: Add temporary XFAIL. * testsuite/libgomp.fortran/target-enter-data-6.f90: Likewise. --- gcc/c-family/c-common.h | 71 +- gcc/c-family/c-omp.cc | 875 ++++- gcc/c/c-parser.cc | 23 +- gcc/c/c-typeck.cc | 421 +-- gcc/cp/parser.cc | 23 +- gcc/cp/pt.cc | 4 +- gcc/cp/semantics.cc | 633 ++-- gcc/fortran/trans-openmp.cc | 34 +- gcc/gimplify.cc | 1079 +++++- gcc/omp-general.cc | 425 +++ gcc/omp-general.h | 69 + gcc/omp-low.cc | 7 +- gcc/testsuite/c-c++-common/gomp/clauses-2.c | 2 +- gcc/testsuite/c-c++-common/gomp/target-50.c | 2 +- .../c-c++-common/gomp/target-enter-data-1.c | 3 +- .../c-c++-common/gomp/target-implicit-map-2.c | 3 +- .../g++.dg/gomp/static-component-1.C | 23 + gcc/testsuite/gcc.dg/gomp/target-3.c | 2 +- gcc/testsuite/gfortran.dg/gomp/map-9.f90 | 2 +- gcc/tree.h | 4 + libgomp/target.c | 38 +- libgomp/testsuite/libgomp.c++/baseptrs-3.C | 275 ++ libgomp/testsuite/libgomp.c++/baseptrs-4.C | 3154 +++++++++++++++++ libgomp/testsuite/libgomp.c++/baseptrs-5.C | 62 + libgomp/testsuite/libgomp.c++/baseptrs-8.C | 70 + libgomp/testsuite/libgomp.c++/baseptrs-9.C | 57 + libgomp/testsuite/libgomp.c++/class-array-1.C | 59 + libgomp/testsuite/libgomp.c++/ref-mapping-1.C | 80 + libgomp/testsuite/libgomp.c++/target-48.C | 32 + libgomp/testsuite/libgomp.c++/target-49.C | 37 + .../libgomp.c++/target-exit-data-reftoptr-1.C | 34 + .../testsuite/libgomp.c++/target-lambda-1.C | 5 +- libgomp/testsuite/libgomp.c++/target-this-3.C | 11 +- libgomp/testsuite/libgomp.c++/target-this-4.C | 11 +- .../libgomp.c-c++-common/baseptrs-1.c | 50 + .../libgomp.c-c++-common/baseptrs-2.c | 70 + .../libgomp.c-c++-common/baseptrs-6.c | 69 + .../libgomp.c-c++-common/baseptrs-7.c | 56 + .../libgomp.c-c++-common/ptr-attach-2.c | 60 + .../target-implicit-map-2.c | 2 + .../target-implicit-map-5.c | 50 + .../libgomp.c-c++-common/target-map-zlas-1.c | 36 + .../libgomp.fortran/struct-elem-map-1.f90 | 3 + .../libgomp.fortran/target-enter-data-6.f90 | 10 + 44 files changed, 7216 insertions(+), 820 deletions(-) create mode 100644 gcc/testsuite/g++.dg/gomp/static-component-1.C create mode 100644 libgomp/testsuite/libgomp.c++/baseptrs-3.C create mode 100644 libgomp/testsuite/libgomp.c++/baseptrs-4.C create mode 100644 libgomp/testsuite/libgomp.c++/baseptrs-5.C create mode 100644 libgomp/testsuite/libgomp.c++/baseptrs-8.C create mode 100644 libgomp/testsuite/libgomp.c++/baseptrs-9.C create mode 100644 libgomp/testsuite/libgomp.c++/class-array-1.C create mode 100644 libgomp/testsuite/libgomp.c++/ref-mapping-1.C create mode 100644 libgomp/testsuite/libgomp.c++/target-48.C create mode 100644 libgomp/testsuite/libgomp.c++/target-49.C create mode 100644 libgomp/testsuite/libgomp.c++/target-exit-data-reftoptr-1.C create mode 100644 libgomp/testsuite/libgomp.c-c++-common/baseptrs-1.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/baseptrs-6.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/baseptrs-7.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c create mode 100644 libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 62d76c87cc00..6e7fc1b3aa35 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -1279,8 +1279,11 @@ enum c_omp_region_type C_ORT_ACC = 1 << 1, C_ORT_DECLARE_SIMD = 1 << 2, C_ORT_TARGET = 1 << 3, + C_ORT_EXIT_DATA = 1 << 4, C_ORT_OMP_DECLARE_SIMD = C_ORT_OMP | C_ORT_DECLARE_SIMD, - C_ORT_OMP_TARGET = C_ORT_OMP | C_ORT_TARGET + C_ORT_OMP_TARGET = C_ORT_OMP | C_ORT_TARGET, + C_ORT_OMP_EXIT_DATA = C_ORT_OMP | C_ORT_EXIT_DATA, + C_ORT_ACC_TARGET = C_ORT_ACC | C_ORT_TARGET }; extern tree c_finish_omp_master (location_t, tree); @@ -1317,6 +1320,72 @@ extern tree c_omp_check_context_selector (location_t, tree); extern void c_omp_mark_declare_variant (location_t, tree, tree); extern void c_omp_adjust_map_clauses (tree, bool); +namespace omp_addr_tokenizer { struct omp_addr_token; } +typedef omp_addr_tokenizer::omp_addr_token omp_addr_token; + +class c_omp_address_inspector +{ + location_t loc; + tree root_term; + bool indirections; + int map_supported; + +protected: + tree orig; + +public: + c_omp_address_inspector (location_t loc, tree t) + : loc (loc), root_term (NULL_TREE), indirections (false), + map_supported (-1), orig (t) + { + } + + ~c_omp_address_inspector () + { + } + + virtual bool processing_template_decl_p () + { + return false; + } + + virtual void emit_unmappable_type_notes (tree) + { + } + + virtual tree convert_from_reference (tree) + { + gcc_unreachable (); + } + + virtual tree build_array_ref (location_t loc, tree arr, tree idx) + { + tree eltype = TREE_TYPE (TREE_TYPE (arr)); + return build4_loc (loc, ARRAY_REF, eltype, arr, idx, NULL_TREE, + NULL_TREE); + } + + virtual bool check_clause (tree); + tree get_root_term (bool); + + tree unconverted_ref_origin (); + bool component_access_p (); + + bool map_supported_p (); + + static tree get_origin (tree); + static tree maybe_unconvert_ref (tree); + + bool maybe_zero_length_array_section (tree); + + tree expand_array_base (tree, vec &, tree, unsigned *, + c_omp_region_type); + tree expand_component_selector (tree, vec &, tree, + unsigned *, c_omp_region_type); + tree expand_map_clause (tree, tree, vec &, + c_omp_region_type); +}; + enum c_omp_directive_kind { C_OMP_DIR_STANDALONE, C_OMP_DIR_CONSTRUCT, diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc index 95b6c1e623fb..5e534aa21cd2 100644 --- a/gcc/c-family/c-omp.cc +++ b/gcc/c-family/c-omp.cc @@ -3169,8 +3169,9 @@ struct map_clause decl_mapped (false), omp_declare_target (false) { } }; -/* Adjust map clauses after normal clause parsing, mainly to turn specific - base-pointer map cases into attach/detach and mark them addressable. */ +/* Adjust map clauses after normal clause parsing, mainly to mark specific + base-pointer map cases addressable that may be turned into attach/detach + operations during gimplification. */ void c_omp_adjust_map_clauses (tree clauses, bool is_target) { @@ -3186,7 +3187,6 @@ c_omp_adjust_map_clauses (tree clauses, bool is_target) && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) { tree ptr = OMP_CLAUSE_DECL (c); - OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_ATTACH_DETACH); c_common_mark_addressable_vec (ptr); } return; @@ -3199,7 +3199,7 @@ c_omp_adjust_map_clauses (tree clauses, bool is_target) && DECL_P (OMP_CLAUSE_DECL (c))) { /* If this is for a target construct, the firstprivate pointer - is changed to attach/detach if either is true: + is marked addressable if either is true: (1) the base-pointer is mapped in this same construct, or (2) the base-pointer is a variable place on the device by "declare target" directives. @@ -3241,11 +3241,874 @@ c_omp_adjust_map_clauses (tree clauses, bool is_target) if (mc.firstprivate_ptr_p && (mc.decl_mapped || mc.omp_declare_target)) + c_common_mark_addressable_vec (OMP_CLAUSE_DECL (mc.clause)); + } +} + +/* Maybe strip off an indirection from a "converted" reference, then find the + origin of a pointer (i.e. without any offset). */ + +tree +c_omp_address_inspector::unconverted_ref_origin () +{ + tree t = orig; + + /* We may have a reference-typed component access at the outermost level + that has had convert_from_reference called on it. Get the un-dereferenced + reference itself. */ + t = maybe_unconvert_ref (t); + + /* Find base pointer for POINTER_PLUS_EXPR, etc. */ + t = get_origin (t); + + return t; +} + +/* Return TRUE if the address is a component access. */ + +bool +c_omp_address_inspector::component_access_p () +{ + tree t = maybe_unconvert_ref (orig); + + t = get_origin (t); + + return TREE_CODE (t) == COMPONENT_REF; +} + +/* Perform various checks on the address, as described by clause CLAUSE (we + only use its code and location here). */ + +bool +c_omp_address_inspector::check_clause (tree clause) +{ + tree t = unconverted_ref_origin (); + + if (TREE_CODE (t) != COMPONENT_REF) + return true; + + if (TREE_CODE (TREE_OPERAND (t, 1)) == FIELD_DECL + && DECL_BIT_FIELD (TREE_OPERAND (t, 1))) + { + error_at (OMP_CLAUSE_LOCATION (clause), + "bit-field %qE in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (clause)]); + return false; + } + else if (!processing_template_decl_p () + && !omp_mappable_type (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (clause), + "%qE does not have a mappable type in %qs clause", + t, omp_clause_code_name[OMP_CLAUSE_CODE (clause)]); + emit_unmappable_type_notes (TREE_TYPE (t)); + return false; + } + else if (TREE_TYPE (t) && TYPE_ATOMIC (TREE_TYPE (t))) + { + error_at (OMP_CLAUSE_LOCATION (clause), + "%<_Atomic%> %qE in %qs clause", t, + omp_clause_code_name[OMP_CLAUSE_CODE (clause)]); + return false; + } + + return true; +} + +/* Find the "root term" for the address. This is the innermost decl, etc. + of the access. */ + +tree +c_omp_address_inspector::get_root_term (bool checking) +{ + if (root_term && !checking) + return root_term; + + tree t = unconverted_ref_origin (); + + while (TREE_CODE (t) == COMPONENT_REF) + { + if (checking + && TREE_TYPE (TREE_OPERAND (t, 0)) + && TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) == UNION_TYPE) { - OMP_CLAUSE_SET_MAP_KIND (mc.clause, GOMP_MAP_ATTACH_DETACH); - c_common_mark_addressable_vec (OMP_CLAUSE_DECL (mc.clause)); + error_at (loc, "%qE is a member of a union", t); + return error_mark_node; + } + t = TREE_OPERAND (t, 0); + while (TREE_CODE (t) == MEM_REF + || TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == ARRAY_REF) + { + if (TREE_CODE (t) == MEM_REF + || TREE_CODE (t) == INDIRECT_REF) + indirections = true; + t = TREE_OPERAND (t, 0); + STRIP_NOPS (t); + if (TREE_CODE (t) == POINTER_PLUS_EXPR) + t = TREE_OPERAND (t, 0); } } + + root_term = t; + + return t; +} + +/* Return TRUE if the address is supported in mapping clauses. At present, + this means that the innermost expression is a DECL_P, but could be extended + to other types of expression in the future. */ + +bool +c_omp_address_inspector::map_supported_p () +{ + /* If we've already decided if the mapped address is supported, return + that. */ + if (map_supported != -1) + return map_supported; + + tree t = unconverted_ref_origin (); + + STRIP_NOPS (t); + + while (TREE_CODE (t) == INDIRECT_REF + || TREE_CODE (t) == MEM_REF + || TREE_CODE (t) == ARRAY_REF + || TREE_CODE (t) == COMPONENT_REF + || TREE_CODE (t) == COMPOUND_EXPR + || TREE_CODE (t) == SAVE_EXPR + || TREE_CODE (t) == POINTER_PLUS_EXPR + || TREE_CODE (t) == NON_LVALUE_EXPR + || TREE_CODE (t) == NOP_EXPR) + if (TREE_CODE (t) == COMPOUND_EXPR) + t = TREE_OPERAND (t, 1); + else + t = TREE_OPERAND (t, 0); + + STRIP_NOPS (t); + + map_supported = DECL_P (t); + + return map_supported; +} + +/* Get the origin of an address T, stripping off offsets and some other + bits. */ + +tree +c_omp_address_inspector::get_origin (tree t) +{ + while (1) + { + if (TREE_CODE (t) == COMPOUND_EXPR) + { + t = TREE_OPERAND (t, 1); + STRIP_NOPS (t); + } + else if (TREE_CODE (t) == POINTER_PLUS_EXPR + || TREE_CODE (t) == SAVE_EXPR) + t = TREE_OPERAND (t, 0); + else if (TREE_CODE (t) == INDIRECT_REF + && TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) == REFERENCE_TYPE) + t = TREE_OPERAND (t, 0); + else + break; + } + STRIP_NOPS (t); + return t; +} + +/* For an address T that might be a reference that has had + "convert_from_reference" called on it, return the actual reference without + any indirection. */ + +tree +c_omp_address_inspector::maybe_unconvert_ref (tree t) +{ + if (TREE_CODE (t) == INDIRECT_REF + && TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) == REFERENCE_TYPE) + return TREE_OPERAND (t, 0); + + return t; +} + +/* Return TRUE if CLAUSE might describe a zero-length array section. */ + +bool +c_omp_address_inspector::maybe_zero_length_array_section (tree clause) +{ + switch (OMP_CLAUSE_MAP_KIND (clause)) + { + case GOMP_MAP_ALLOC: + case GOMP_MAP_IF_PRESENT: + case GOMP_MAP_TO: + case GOMP_MAP_FROM: + case GOMP_MAP_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_ALWAYS_TOFROM: + case GOMP_MAP_PRESENT_ALLOC: + case GOMP_MAP_PRESENT_TO: + case GOMP_MAP_PRESENT_FROM: + case GOMP_MAP_PRESENT_TOFROM: + case GOMP_MAP_ALWAYS_PRESENT_TO: + case GOMP_MAP_ALWAYS_PRESENT_FROM: + case GOMP_MAP_ALWAYS_PRESENT_TOFROM: + case GOMP_MAP_RELEASE: + case GOMP_MAP_DELETE: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_FORCE_PRESENT: + return true; + default: + return false; + } +} + +/* Expand a chained access. We only expect to see a quite limited range of + expression types here, because e.g. you can't have an array of + references. */ + +static tree +omp_expand_access_chain (tree c, tree expr, vec &addr_tokens, + unsigned *idx, c_omp_region_type ort) +{ + using namespace omp_addr_tokenizer; + location_t loc = OMP_CLAUSE_LOCATION (c); + unsigned i = *idx; + tree c2 = NULL_TREE; + gomp_map_kind kind; + + if ((ort & C_ORT_EXIT_DATA) != 0 + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM + || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FROM + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DELETE + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_RELEASE + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_FROM + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_FROM + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_PRESENT_FROM + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_PRESENT_FROM))) + kind = GOMP_MAP_DETACH; + else + kind = GOMP_MAP_ATTACH; + + switch (addr_tokens[i]->u.access_kind) + { + case ACCESS_POINTER: + case ACCESS_POINTER_OFFSET: + { + tree virtual_origin + = fold_convert_loc (loc, ptrdiff_type_node, addr_tokens[i]->expr); + tree data_addr = omp_accessed_addr (addr_tokens, i, expr); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, kind); + OMP_CLAUSE_DECL (c2) = addr_tokens[i]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_INDEXED_ARRAY: + break; + + default: + return error_mark_node; + } + + if (c2) + { + OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = c2; + c = c2; + } + + *idx = ++i; + + if (i < addr_tokens.length () + && addr_tokens[i]->type == ACCESS_METHOD) + return omp_expand_access_chain (c, expr, addr_tokens, idx, ort); + + return c; +} + +/* Translate "array_base_decl access_method" to OMP mapping clauses. */ + +tree +c_omp_address_inspector::expand_array_base (tree c, + vec &addr_tokens, + tree expr, unsigned *idx, + c_omp_region_type ort) +{ + using namespace omp_addr_tokenizer; + location_t loc = OMP_CLAUSE_LOCATION (c); + int i = *idx; + tree decl = addr_tokens[i + 1]->expr; + bool decl_p = DECL_P (decl); + bool declare_target_p = (decl_p + && is_global_var (decl) + && lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (decl))); + bool map_p = OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP; + bool implicit_p = map_p && OMP_CLAUSE_MAP_IMPLICIT (c); + bool chain_p = omp_access_chain_p (addr_tokens, i + 1); + tree c2 = NULL_TREE, c3 = NULL_TREE; + unsigned consume_tokens = 2; + bool target_p = (ort & C_ORT_TARGET) != 0; + bool openmp_p = (ort & C_ORT_OMP) != 0; + + gcc_assert (i == 0); + + if (!openmp_p + && map_p + && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + { + i += 2; + *idx = i; + return c; + } + + switch (addr_tokens[i + 1]->u.access_kind) + { + case ACCESS_DIRECT: + if (decl_p && !target_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + + case ACCESS_REF: + { + /* Copy the referenced object. Note that we do this even for !MAP_P + clauses. */ + tree obj = convert_from_reference (addr_tokens[i + 1]->expr); + if (TREE_CODE (TREE_TYPE (obj)) == ARRAY_TYPE) + /* We have a ref to array: add a [0] element as the ME expects. */ + OMP_CLAUSE_DECL (c) = build_array_ref (loc, obj, integer_zero_node); + else + OMP_CLAUSE_DECL (c) = obj; + OMP_CLAUSE_SIZE (c) = TYPE_SIZE_UNIT (TREE_TYPE (obj)); + + if (!map_p) + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + } + + if (!target_p) + break; + + /* If we have a reference to a pointer, avoid using + FIRSTPRIVATE_REFERENCE here in case the pointer is modified in the + offload region (we can only do that if the pointer does not point + to a mapped block). We could avoid doing this if we don't have a + FROM mapping... */ + bool ref_to_ptr = TREE_CODE (TREE_TYPE (obj)) == POINTER_TYPE; + + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + if (!ref_to_ptr + && !declare_target_p + && decl_p) + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_REFERENCE); + else + { + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + } + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) = size_zero_node; + + if (ref_to_ptr) + { + c3 = c2; + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALLOC); + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (c2))); + } + } + break; + + case ACCESS_INDEXED_REF_TO_ARRAY: + { + if (!map_p) + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + } + + if (!target_p) + break; + + tree virtual_origin + = convert_from_reference (addr_tokens[i + 1]->expr); + virtual_origin = build_fold_addr_expr (virtual_origin); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + tree data_addr = omp_accessed_addr (addr_tokens, i + 1, expr); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + if (decl_p && target_p && !declare_target_p) + { + /* It appears that omp-low.cc mishandles cases where we have a + [reference to an] array of pointers such as: + + int *arr[N]; (or "int *(&arr)[N] = ...") + #pragma omp target map(arr[a][b:c]) + { ... } + + in such cases chain_p will be true. For now, fall back to + GOMP_MAP_POINTER. */ + enum gomp_map_kind k = chain_p ? GOMP_MAP_POINTER + : GOMP_MAP_FIRSTPRIVATE_REFERENCE; + OMP_CLAUSE_SET_MAP_KIND (c2, k); + } + else + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + } + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_INDEXED_ARRAY: + { + if (!map_p) + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + } + + /* The code handling "firstprivatize_array_bases" in gimplify.cc is + relevant here. What do we need to create for arrays at this + stage? (This condition doesn't feel quite right. FIXME?) */ + if (!target_p + && (TREE_CODE (TREE_TYPE (addr_tokens[i + 1]->expr)) + == ARRAY_TYPE)) + break; + + tree virtual_origin + = build_fold_addr_expr (addr_tokens[i + 1]->expr); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + tree data_addr = omp_accessed_addr (addr_tokens, i + 1, expr); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + if (decl_p && target_p) + { + /* See comment for ACCESS_INDEXED_REF_TO_ARRAY above. */ + enum gomp_map_kind k = chain_p ? GOMP_MAP_POINTER + : GOMP_MAP_FIRSTPRIVATE_POINTER; + OMP_CLAUSE_SET_MAP_KIND (c2, k); + } + else + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + } + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_POINTER: + case ACCESS_POINTER_OFFSET: + { + if (!map_p) + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + } + + unsigned last_access = i + 1; + tree virtual_origin; + + if (chain_p + && addr_tokens[i + 2]->type == ACCESS_METHOD + && addr_tokens[i + 2]->u.access_kind == ACCESS_INDEXED_ARRAY) + { + /* !!! This seems wrong for ACCESS_POINTER_OFFSET. */ + consume_tokens = 3; + chain_p = omp_access_chain_p (addr_tokens, i + 2); + last_access = i + 2; + virtual_origin + = build_array_ref (loc, addr_tokens[last_access]->expr, + integer_zero_node); + virtual_origin = build_fold_addr_expr (virtual_origin); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + } + else + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + addr_tokens[last_access]->expr); + tree data_addr = omp_accessed_addr (addr_tokens, last_access, expr); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + /* For OpenACC, use FIRSTPRIVATE_POINTER for decls even on non-compute + regions (e.g. "acc data" constructs). It'll be removed anyway in + gimplify.cc, but doing it this way maintains diagnostic + behaviour. */ + if (decl_p && (target_p || !openmp_p) && !chain_p && !declare_target_p) + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_POINTER); + else + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + } + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_REF_TO_POINTER: + case ACCESS_REF_TO_POINTER_OFFSET: + { + if (!map_p) + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + break; + } + + unsigned last_access = i + 1; + tree virtual_origin; + + if (chain_p + && addr_tokens[i + 2]->type == ACCESS_METHOD + && addr_tokens[i + 2]->u.access_kind == ACCESS_INDEXED_ARRAY) + { + /* !!! This seems wrong for ACCESS_POINTER_OFFSET. */ + consume_tokens = 3; + chain_p = omp_access_chain_p (addr_tokens, i + 2); + last_access = i + 2; + virtual_origin + = build_array_ref (loc, addr_tokens[last_access]->expr, + integer_zero_node); + virtual_origin = build_fold_addr_expr (virtual_origin); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + } + else + { + virtual_origin + = convert_from_reference (addr_tokens[last_access]->expr); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + } + + tree data_addr = omp_accessed_addr (addr_tokens, last_access, expr); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + if (decl_p && target_p && !chain_p && !declare_target_p) + { + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_REFERENCE); + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + } + else + { + if (decl_p) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c2) + = convert_from_reference (addr_tokens[i + 1]->expr); + } + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + default: + *idx = i + consume_tokens; + return error_mark_node; + } + + if (c3) + { + OMP_CLAUSE_CHAIN (c3) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c2) = c3; + OMP_CLAUSE_CHAIN (c) = c2; + if (implicit_p) + { + OMP_CLAUSE_MAP_IMPLICIT (c2) = 1; + OMP_CLAUSE_MAP_IMPLICIT (c3) = 1; + } + c = c3; + } + else if (c2) + { + OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = c2; + if (implicit_p) + OMP_CLAUSE_MAP_IMPLICIT (c2) = 1; + c = c2; + } + + i += consume_tokens; + *idx = i; + + if (chain_p && map_p) + return omp_expand_access_chain (c, expr, addr_tokens, idx, ort); + + return c; +} + +/* Translate "component_selector access_method" to OMP mapping clauses. */ + +tree +c_omp_address_inspector::expand_component_selector (tree c, + vec + &addr_tokens, + tree expr, unsigned *idx, + c_omp_region_type ort) +{ + using namespace omp_addr_tokenizer; + location_t loc = OMP_CLAUSE_LOCATION (c); + unsigned i = *idx; + tree c2 = NULL_TREE, c3 = NULL_TREE; + bool chain_p = omp_access_chain_p (addr_tokens, i + 1); + bool map_p = OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP; + + switch (addr_tokens[i + 1]->u.access_kind) + { + case ACCESS_DIRECT: + case ACCESS_INDEXED_ARRAY: + break; + + case ACCESS_REF: + { + /* Copy the referenced object. Note that we also do this for !MAP_P + clauses. */ + tree obj = convert_from_reference (addr_tokens[i + 1]->expr); + OMP_CLAUSE_DECL (c) = obj; + OMP_CLAUSE_SIZE (c) = TYPE_SIZE_UNIT (TREE_TYPE (obj)); + + if (!map_p) + break; + + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) = size_zero_node; + } + break; + + case ACCESS_INDEXED_REF_TO_ARRAY: + { + if (!map_p) + break; + + tree virtual_origin + = convert_from_reference (addr_tokens[i + 1]->expr); + virtual_origin = build_fold_addr_expr (virtual_origin); + virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + virtual_origin); + tree data_addr = omp_accessed_addr (addr_tokens, i + 1, expr); + + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_POINTER: + case ACCESS_POINTER_OFFSET: + { + if (!map_p) + break; + + tree virtual_origin + = fold_convert_loc (loc, ptrdiff_type_node, + addr_tokens[i + 1]->expr); + tree data_addr = omp_accessed_addr (addr_tokens, i + 1, expr); + + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c2) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + } + break; + + case ACCESS_REF_TO_POINTER: + case ACCESS_REF_TO_POINTER_OFFSET: + { + if (!map_p) + break; + + tree ptr = convert_from_reference (addr_tokens[i + 1]->expr); + tree virtual_origin = fold_convert_loc (loc, ptrdiff_type_node, + ptr); + tree data_addr = omp_accessed_addr (addr_tokens, i + 1, expr); + + /* Attach the pointer... */ + c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c2) = ptr; + OMP_CLAUSE_SIZE (c2) + = fold_build2_loc (loc, MINUS_EXPR, ptrdiff_type_node, + fold_convert_loc (loc, ptrdiff_type_node, + data_addr), + virtual_origin); + + /* ...and also the reference. */ + c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (c3) = addr_tokens[i + 1]->expr; + OMP_CLAUSE_SIZE (c3) = size_zero_node; + } + break; + + default: + *idx = i + 2; + return error_mark_node; + } + + if (c3) + { + OMP_CLAUSE_CHAIN (c3) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c2) = c3; + OMP_CLAUSE_CHAIN (c) = c2; + c = c3; + } + else if (c2) + { + OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = c2; + c = c2; + } + + i += 2; + *idx = i; + + if (chain_p && map_p) + return omp_expand_access_chain (c, expr, addr_tokens, idx, ort); + + return c; +} + +/* Expand a map clause into a group of mapping clauses, creating nodes to + attach/detach pointers and so forth as necessary. */ + +tree +c_omp_address_inspector::expand_map_clause (tree c, tree expr, + vec &addr_tokens, + c_omp_region_type ort) +{ + using namespace omp_addr_tokenizer; + unsigned i, length = addr_tokens.length (); + + for (i = 0; i < length;) + { + int remaining = length - i; + + if (remaining >= 2 + && addr_tokens[i]->type == ARRAY_BASE + && addr_tokens[i]->u.structure_base_kind == BASE_DECL + && addr_tokens[i + 1]->type == ACCESS_METHOD) + { + c = expand_array_base (c, addr_tokens, expr, &i, ort); + if (c == error_mark_node) + return error_mark_node; + } + else if (remaining >= 2 + && addr_tokens[i]->type == ARRAY_BASE + && addr_tokens[i]->u.structure_base_kind == BASE_ARBITRARY_EXPR + && addr_tokens[i + 1]->type == ACCESS_METHOD) + { + c = expand_array_base (c, addr_tokens, expr, &i, ort); + if (c == error_mark_node) + return error_mark_node; + } + else if (remaining >= 2 + && addr_tokens[i]->type == STRUCTURE_BASE + && addr_tokens[i]->u.structure_base_kind == BASE_DECL + && addr_tokens[i + 1]->type == ACCESS_METHOD) + { + if (addr_tokens[i + 1]->u.access_kind == ACCESS_DIRECT) + c_common_mark_addressable_vec (addr_tokens[i + 1]->expr); + i += 2; + while (addr_tokens[i]->type == ACCESS_METHOD) + i++; + } + else if (remaining >= 2 + && addr_tokens[i]->type == STRUCTURE_BASE + && addr_tokens[i]->u.structure_base_kind == BASE_ARBITRARY_EXPR + && addr_tokens[i + 1]->type == ACCESS_METHOD) + { + switch (addr_tokens[i + 1]->u.access_kind) + { + case ACCESS_DIRECT: + case ACCESS_POINTER: + i += 2; + while (addr_tokens[i]->type == ACCESS_METHOD) + i++; + break; + default: + return error_mark_node; + } + } + else if (remaining >= 2 + && addr_tokens[i]->type == COMPONENT_SELECTOR + && addr_tokens[i + 1]->type == ACCESS_METHOD) + { + c = expand_component_selector (c, addr_tokens, expr, &i, ort); + /* We used 'expr', so these must have been the last tokens. */ + gcc_assert (i == length); + if (c == error_mark_node) + return error_mark_node; + } + else if (remaining >= 3 + && addr_tokens[i]->type == COMPONENT_SELECTOR + && addr_tokens[i + 1]->type == STRUCTURE_BASE + && (addr_tokens[i + 1]->u.structure_base_kind + == BASE_COMPONENT_EXPR) + && addr_tokens[i + 2]->type == ACCESS_METHOD) + { + i += 3; + while (addr_tokens[i]->type == ACCESS_METHOD) + i++; + } + else + break; + } + + if (i == length) + return c; + + return error_mark_node; } const struct c_omp_directive c_omp_directives[] = { diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 0c301015d880..a7dc096011fa 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -19063,7 +19063,8 @@ c_parser_omp_clause_detach (c_parser *parser, tree list) static tree c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, - const char *where, bool finish_p = true) + const char *where, bool finish_p = true, + bool target_p = false) { tree clauses = NULL; bool first = true; @@ -19273,7 +19274,8 @@ c_parser_oacc_all_clauses (c_parser *parser, omp_clause_mask mask, c_parser_skip_to_pragma_eol (parser); if (finish_p) - return c_finish_omp_clauses (clauses, C_ORT_ACC); + return c_finish_omp_clauses (clauses, target_p ? C_ORT_ACC_TARGET + : C_ORT_ACC); return clauses; } @@ -20011,12 +20013,13 @@ c_parser_oacc_loop (location_t loc, c_parser *parser, char *p_name, mask |= OACC_LOOP_CLAUSE_MASK; tree clauses = c_parser_oacc_all_clauses (parser, mask, p_name, - cclauses == NULL); + /*finish_p=*/cclauses == NULL, + /*target=*/is_parallel); if (cclauses) { clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel); if (*cclauses) - *cclauses = c_finish_omp_clauses (*cclauses, C_ORT_ACC); + *cclauses = c_finish_omp_clauses (*cclauses, C_ORT_ACC_TARGET); if (clauses) clauses = c_finish_omp_clauses (clauses, C_ORT_ACC); } @@ -20144,7 +20147,9 @@ c_parser_oacc_compute (location_t loc, c_parser *parser, } } - tree clauses = c_parser_oacc_all_clauses (parser, mask, p_name); + tree clauses = c_parser_oacc_all_clauses (parser, mask, p_name, + /*finish_p=*/true, + /*target=*/true); tree block = c_begin_omp_parallel (); add_stmt (c_parser_omp_structured_block (parser, if_p)); @@ -23670,6 +23675,7 @@ c_parser_omp_target_data (location_t loc, c_parser *parser, bool *if_p) case GOMP_MAP_FIRSTPRIVATE_POINTER: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH: break; default: map_seen |= 1; @@ -23835,6 +23841,7 @@ c_parser_omp_target_enter_data (location_t loc, c_parser *parser, case GOMP_MAP_FIRSTPRIVATE_POINTER: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH: break; default: map_seen |= 1; @@ -23909,7 +23916,8 @@ c_parser_omp_target_exit_data (location_t loc, c_parser *parser, tree clauses = c_parser_omp_all_clauses (parser, OMP_TARGET_EXIT_DATA_CLAUSE_MASK, - "#pragma omp target exit data"); + "#pragma omp target exit data", false); + clauses = c_finish_omp_clauses (clauses, C_ORT_OMP_EXIT_DATA); c_omp_adjust_map_clauses (clauses, false); int map_seen = 0; for (tree *pc = &clauses; *pc;) @@ -23944,6 +23952,7 @@ c_parser_omp_target_exit_data (location_t loc, c_parser *parser, case GOMP_MAP_FIRSTPRIVATE_POINTER: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_DETACH: break; default: map_seen |= 1; @@ -24200,7 +24209,9 @@ check_clauses: case GOMP_MAP_PRESENT_ALLOC: case GOMP_MAP_FIRSTPRIVATE_POINTER: case GOMP_MAP_ALWAYS_POINTER: + case GOMP_MAP_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH: break; default: error_at (OMP_CLAUSE_LOCATION (*pc), diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 1e4615a713a0..18860c2373fb 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -13606,10 +13606,12 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, enum c_omp_region_type ort) { tree ret, low_bound, length, type; + bool openacc = (ort & C_ORT_ACC) != 0; if (TREE_CODE (t) != TREE_LIST) { if (error_operand_p (t)) return error_mark_node; + c_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); ret = t; if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_AFFINITY && OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND @@ -13619,59 +13621,17 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); return error_mark_node; } - while (INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - if (TREE_CODE (t) == COMPONENT_REF - && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM)) - { - if (DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - return error_mark_node; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) == UNION_TYPE) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - return error_mark_node; - } - t = TREE_OPERAND (t, 0); - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - if (ort == C_ORT_ACC && TREE_CODE (t) == MEM_REF) - { - if (maybe_ne (mem_ref_offset (t), 0)) - error_at (OMP_CLAUSE_LOCATION (c), - "cannot dereference %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - t = TREE_OPERAND (t, 0); - } - } - } + if (!ai.check_clause (c)) + return error_mark_node; + else if (ai.component_access_p () + && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM)) + t = ai.get_root_term (true); + else + t = ai.unconverted_ref_origin (); + if (t == error_mark_node) + return error_mark_node; if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) { if (DECL_P (t)) @@ -13766,7 +13726,7 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, { error_at (OMP_CLAUSE_LOCATION (c), "expected single pointer in %qs clause", - user_omp_clause_code_name (c, ort == C_ORT_ACC)); + user_omp_clause_code_name (c, openacc)); return error_mark_node; } } @@ -13991,7 +13951,7 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, /* Handle array sections for clause C. */ static bool -handle_omp_array_sections (tree c, enum c_omp_region_type ort) +handle_omp_array_sections (tree &c, enum c_omp_region_type ort) { bool maybe_zero_len = false; unsigned int first_non_one = 0; @@ -14200,58 +14160,47 @@ handle_omp_array_sections (tree c, enum c_omp_region_type ort) OMP_CLAUSE_DECL (c) = first; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR) return false; - if (size) - size = c_fully_fold (size, false, NULL); - OMP_CLAUSE_SIZE (c) = size; + /* Don't set OMP_CLAUSE_SIZE for bare attach/detach clauses. */ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP - || (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE)) - return false; - gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DEVICEPTR); - switch (OMP_CLAUSE_MAP_KIND (c)) + || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DETACH)) { - case GOMP_MAP_ALLOC: - case GOMP_MAP_IF_PRESENT: - case GOMP_MAP_TO: - case GOMP_MAP_FROM: - case GOMP_MAP_TOFROM: - case GOMP_MAP_ALWAYS_TO: - case GOMP_MAP_ALWAYS_FROM: - case GOMP_MAP_ALWAYS_TOFROM: - case GOMP_MAP_RELEASE: - case GOMP_MAP_DELETE: - case GOMP_MAP_FORCE_TO: - case GOMP_MAP_FORCE_FROM: - case GOMP_MAP_FORCE_TOFROM: - case GOMP_MAP_FORCE_PRESENT: - OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1; - break; - default: - break; + if (size) + size = c_fully_fold (size, false, NULL); + OMP_CLAUSE_SIZE (c) = size; } - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); - if (TREE_CODE (t) == COMPONENT_REF) - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); - else - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_POINTER); - OMP_CLAUSE_MAP_IMPLICIT (c2) = OMP_CLAUSE_MAP_IMPLICIT (c); - if (OMP_CLAUSE_MAP_KIND (c2) != GOMP_MAP_FIRSTPRIVATE_POINTER - && !c_mark_addressable (t)) + + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) return false; - OMP_CLAUSE_DECL (c2) = t; - t = build_fold_addr_expr (first); - t = fold_convert_loc (OMP_CLAUSE_LOCATION (c), ptrdiff_type_node, t); - tree ptr = OMP_CLAUSE_DECL (c2); - if (!POINTER_TYPE_P (TREE_TYPE (ptr))) - ptr = build_fold_addr_expr (ptr); - t = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR, - ptrdiff_type_node, t, - fold_convert_loc (OMP_CLAUSE_LOCATION (c), - ptrdiff_type_node, ptr)); - t = c_fully_fold (t, false, NULL); - OMP_CLAUSE_SIZE (c2) = t; - OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = c2; + + auto_vec addr_tokens; + + if (!omp_parse_expr (addr_tokens, first)) + return true; + + c_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + tree nc = ai.expand_map_clause (c, first, addr_tokens, ort); + if (nc != error_mark_node) + { + using namespace omp_addr_tokenizer; + + if (ai.maybe_zero_length_array_section (c)) + OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1; + + /* !!! If we're accessing a base decl via chained access + methods (e.g. multiple indirections), duplicate clause + detection won't work properly. Skip it in that case. */ + if ((addr_tokens[0]->type == STRUCTURE_BASE + || addr_tokens[0]->type == ARRAY_BASE) + && addr_tokens[0]->u.structure_base_kind == BASE_DECL + && addr_tokens[1]->type == ACCESS_METHOD + && omp_access_chain_p (addr_tokens, 1)) + c = nc; + + return false; + } } return false; } @@ -14517,7 +14466,6 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) tree ordered_clause = NULL_TREE; tree schedule_clause = NULL_TREE; bool oacc_async = false; - bool indir_component_ref_p = false; tree last_iterators = NULL_TREE; bool last_iterators_remove = false; tree *nogroup_seen = NULL; @@ -14528,6 +14476,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bool allocate_seen = false; bool implicit_moved = false; bool target_in_reduction_seen = false; + bool openacc = (ort & C_ORT_ACC) != 0; bitmap_obstack_initialize (NULL); bitmap_initialize (&generic_head, &bitmap_default_obstack); @@ -14543,7 +14492,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack); bitmap_initialize (&is_on_device_head, &bitmap_default_obstack); - if (ort & C_ORT_ACC) + if (openacc) for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC) { @@ -14937,8 +14886,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) omp_clause_code_name[OMP_CLAUSE_CODE (c)]); remove = true; } - else if ((ort == C_ORT_ACC - && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) + else if ((openacc && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) || (ort == C_ORT_OMP && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR || (OMP_CLAUSE_CODE (c) @@ -14961,7 +14909,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) if (bitmap_bit_p (&oacc_reduction_head, DECL_UID (t))) { error_at (OMP_CLAUSE_LOCATION (c), - ort == C_ORT_ACC + openacc ? "%qD appears more than once in reduction clauses" : "%qD appears more than once in data clauses", t); @@ -14984,7 +14932,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR) && bitmap_bit_p (&map_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -15049,9 +14997,10 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qE appears more than once in data clauses", t); remove = true; } - else if (bitmap_bit_p (&map_head, DECL_UID (t))) + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c) @@ -15319,6 +15268,9 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_FROM: case OMP_CLAUSE__CACHE_: { + using namespace omp_addr_tokenizer; + auto_vec addr_tokens; + t = OMP_CLAUSE_DECL (c); if (TREE_CODE (t) == TREE_LIST) { @@ -15347,56 +15299,68 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } while (TREE_CODE (t) == ARRAY_REF) t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + + c_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + if (!omp_parse_expr (addr_tokens, t)) { - do - { - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - while (TREE_CODE (t) == COMPONENT_REF - || TREE_CODE (t) == ARRAY_REF); + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; + } + + /* This check is to determine if this will be the only map + node created for this clause. Otherwise, we'll check + the following FIRSTPRIVATE_POINTER or ATTACH_DETACH + node on the next iteration(s) of the loop. */ + if (addr_tokens.length () >= 4 + && addr_tokens[0]->type == STRUCTURE_BASE + && addr_tokens[0]->u.structure_base_kind == BASE_DECL + && addr_tokens[1]->type == ACCESS_METHOD + && addr_tokens[2]->type == COMPONENT_SELECTOR + && addr_tokens[3]->type == ACCESS_METHOD + && (addr_tokens[3]->u.access_kind == ACCESS_DIRECT + || (addr_tokens[3]->u.access_kind + == ACCESS_INDEXED_ARRAY))) + { + tree rt = addr_tokens[1]->expr; + + gcc_assert (DECL_P (rt)); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) + && (bitmap_bit_p (&map_head, DECL_UID (rt)) + || bitmap_bit_p (&map_field_head, DECL_UID (rt)) || bitmap_bit_p (&map_firstprivate_head, - DECL_UID (t)))) + DECL_UID (rt)))) { remove = true; break; } - if (bitmap_bit_p (&map_field_head, DECL_UID (t))) + if (bitmap_bit_p (&map_field_head, DECL_UID (rt))) break; - if (bitmap_bit_p (&map_head, DECL_UID (t))) + if (bitmap_bit_p (&map_head, DECL_UID (rt))) { if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in motion " - "clauses", t); - else if (ort == C_ORT_ACC) + "clauses", rt); + else if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data " - "clauses", t); + "clauses", rt); else error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in map " - "clauses", t); + "clauses", rt); remove = true; } else { - bitmap_set_bit (&map_head, DECL_UID (t)); - bitmap_set_bit (&map_field_head, DECL_UID (t)); + bitmap_set_bit (&map_head, DECL_UID (rt)); + bitmap_set_bit (&map_field_head, DECL_UID (rt)); } } } @@ -15404,7 +15368,8 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) remove = true; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH) + && !OMP_CLAUSE_SIZE (c)) /* In this case, we have a single array element which is a pointer, and we already set OMP_CLAUSE_SIZE in handle_omp_array_sections above. For attach/detach @@ -15413,6 +15378,14 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) OMP_CLAUSE_SIZE (c) = size_zero_node; break; } + else if (!omp_parse_expr (addr_tokens, t)) + { + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; + } if (t == error_mark_node) { remove = true; @@ -15426,101 +15399,47 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH) + && !OMP_CLAUSE_SIZE (c)) /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a bias) to zero here, so it is not set erroneously to the pointer size later on in gimplify.cc. */ OMP_CLAUSE_SIZE (c) = size_zero_node; - while (INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) + + c_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + if (!ai.check_clause (c)) { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - indir_component_ref_p = false; - if (TREE_CODE (t) == COMPONENT_REF - && (TREE_CODE (TREE_OPERAND (t, 0)) == MEM_REF - || INDIRECT_REF_P (TREE_OPERAND (t, 0)) - || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) - { - t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); - indir_component_ref_p = true; - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); + remove = true; + break; } - if (TREE_CODE (t) == COMPONENT_REF - && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) + if (!ai.map_supported_p ()) { - if (DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (!omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE does not have a mappable type in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (TYPE_ATOMIC (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%<_Atomic%> %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) - == UNION_TYPE) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - remove = true; - break; - } - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF) - { - if (maybe_ne (mem_ref_offset (t), 0)) - error_at (OMP_CLAUSE_LOCATION (c), - "cannot dereference %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - if (remove) - break; - if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) - { - if (bitmap_bit_p (&map_field_head, DECL_UID (t)) - || (ort != C_ORT_ACC - && bitmap_bit_p (&map_head, DECL_UID (t)))) - break; - } + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; } + + gcc_assert ((addr_tokens[0]->type == ARRAY_BASE + || addr_tokens[0]->type == STRUCTURE_BASE) + && addr_tokens[1]->type == ACCESS_METHOD); + + t = addr_tokens[1]->expr; + + if (addr_tokens[0]->u.structure_base_kind != BASE_DECL) + goto skip_decl_checks; + + /* For OpenMP, we can access a struct "t" and "t.d" on the same + mapping. OpenACC allows multiple fields of the same structure + to be written. */ + if (addr_tokens[0]->type == STRUCTURE_BASE + && (bitmap_bit_p (&map_field_head, DECL_UID (t)) + || (!openacc && bitmap_bit_p (&map_head, DECL_UID (t))))) + goto skip_decl_checks; + if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) { error_at (OMP_CLAUSE_LOCATION (c), @@ -15538,7 +15457,6 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) else if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER)) - && !indir_component_ref_p && !c_mark_addressable (t)) remove = true; else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -15584,27 +15502,25 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) remove = true; } else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + && !bitmap_bit_p (&map_field_head, DECL_UID (t)) + && openacc) { - if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", - t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); remove = true; } else bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); } else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + && !bitmap_bit_p (&map_field_head, DECL_UID (t)) + && ort != C_ORT_OMP + && ort != C_ORT_OMP_EXIT_DATA) { if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in motion clauses", t); - else if (ort == C_ORT_ACC) + else if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -15612,8 +15528,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qD appears more than once in map clauses", t); remove = true; } - else if (ort == C_ORT_ACC - && bitmap_bit_p (&generic_head, DECL_UID (t))) + else if (openacc && bitmap_bit_p (&generic_head, DECL_UID (t))) { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); @@ -15622,7 +15537,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -15630,13 +15545,37 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qD appears both in data and map clauses", t); remove = true; } - else + else if (!omp_access_chain_p (addr_tokens, 1)) { bitmap_set_bit (&map_head, DECL_UID (t)); if (t != OMP_CLAUSE_DECL (c) && TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF) bitmap_set_bit (&map_field_head, DECL_UID (t)); } + + skip_decl_checks: + /* If we call omp_expand_map_clause in handle_omp_array_sections, + the containing loop (here) iterates through the new nodes + created by that expansion. Avoid expanding those again (just + by checking the node type). */ + if (!remove + && ort != C_ORT_DECLARE_SIMD + && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_REFERENCE) + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ALWAYS_POINTER + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_DETACH))) + { + grp_start_p = pc; + grp_sentinel = OMP_CLAUSE_CHAIN (c); + tree nc = ai.expand_map_clause (c, OMP_CLAUSE_DECL (c), + addr_tokens, ort); + if (nc != error_mark_node) + c = nc; + } } break; @@ -15714,7 +15653,7 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) if (TREE_CODE (TREE_TYPE (t)) != POINTER_TYPE) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR - && ort != C_ORT_ACC) + && !openacc) { error_at (OMP_CLAUSE_LOCATION (c), "%qs variable is not a pointer", diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index fe8845b0fc31..58e910d64afe 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -41426,7 +41426,7 @@ cp_parser_oacc_compute_clause_self (cp_parser *parser, tree list) static tree cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, const char *where, cp_token *pragma_tok, - bool finish_p = true) + bool finish_p = true, bool target_p = false) { tree clauses = NULL; bool first = true; @@ -41645,7 +41645,8 @@ cp_parser_oacc_all_clauses (cp_parser *parser, omp_clause_mask mask, cp_parser_skip_to_pragma_eol (parser, pragma_tok); if (finish_p) - return finish_omp_clauses (clauses, C_ORT_ACC); + return finish_omp_clauses (clauses, target_p ? C_ORT_ACC_TARGET + : C_ORT_ACC); return clauses; } @@ -46236,6 +46237,7 @@ cp_parser_omp_target_data (cp_parser *parser, cp_token *pragma_tok, bool *if_p) case GOMP_MAP_FIRSTPRIVATE_REFERENCE: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH: break; default: map_seen |= 1; @@ -46358,6 +46360,7 @@ cp_parser_omp_target_enter_data (cp_parser *parser, cp_token *pragma_tok, case GOMP_MAP_FIRSTPRIVATE_REFERENCE: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH: break; default: map_seen |= 1; @@ -46436,7 +46439,9 @@ cp_parser_omp_target_exit_data (cp_parser *parser, cp_token *pragma_tok, tree clauses = cp_parser_omp_all_clauses (parser, OMP_TARGET_EXIT_DATA_CLAUSE_MASK, - "#pragma omp target exit data", pragma_tok); + "#pragma omp target exit data", pragma_tok, + false); + clauses = finish_omp_clauses (clauses, C_ORT_OMP_EXIT_DATA); c_omp_adjust_map_clauses (clauses, false); int map_seen = 0; for (tree *pc = &clauses; *pc;) @@ -46472,6 +46477,7 @@ cp_parser_omp_target_exit_data (cp_parser *parser, cp_token *pragma_tok, case GOMP_MAP_FIRSTPRIVATE_REFERENCE: case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_DETACH: break; default: map_seen |= 1; @@ -46853,7 +46859,7 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok) bool found_in_scope = global_bindings_p (); clauses = cp_parser_oacc_all_clauses (parser, OACC_DECLARE_CLAUSE_MASK, - "#pragma acc declare", pragma_tok, true); + "#pragma acc declare", pragma_tok); if (omp_find_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE) @@ -47101,12 +47107,13 @@ cp_parser_oacc_loop (cp_parser *parser, cp_token *pragma_tok, char *p_name, mask |= OACC_LOOP_CLAUSE_MASK; tree clauses = cp_parser_oacc_all_clauses (parser, mask, p_name, pragma_tok, - cclauses == NULL); + /*finish_p=*/cclauses == NULL, + /*target=*/is_parallel); if (cclauses) { clauses = c_oacc_split_loop_clauses (clauses, cclauses, is_parallel); if (*cclauses) - *cclauses = finish_omp_clauses (*cclauses, C_ORT_ACC); + *cclauses = finish_omp_clauses (*cclauses, C_ORT_ACC_TARGET); if (clauses) clauses = finish_omp_clauses (clauses, C_ORT_ACC); } @@ -47242,7 +47249,9 @@ cp_parser_oacc_compute (cp_parser *parser, cp_token *pragma_tok, } } - tree clauses = cp_parser_oacc_all_clauses (parser, mask, p_name, pragma_tok); + tree clauses = cp_parser_oacc_all_clauses (parser, mask, p_name, pragma_tok, + /*finish_p=*/true, + /*target=*/true); tree block = begin_omp_parallel (); unsigned int save = cp_parser_begin_omp_structured_block (parser); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 208fa21032e3..0dd0a9c644c1 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -18727,8 +18727,8 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t complain, tree in_decl) case OACC_KERNELS: case OACC_PARALLEL: case OACC_SERIAL: - tmp = tsubst_omp_clauses (OMP_CLAUSES (t), C_ORT_ACC, args, complain, - in_decl); + tmp = tsubst_omp_clauses (OMP_CLAUSES (t), C_ORT_ACC_TARGET, args, + complain, in_decl); stmt = begin_omp_parallel (); RECUR (OMP_BODY (t)); finish_omp_construct (TREE_CODE (t), stmt, tmp); diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 508322b2616a..64839b1ac871 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -5273,6 +5273,54 @@ omp_privatize_field (tree t, bool shared) return v; } +/* C++ specialisation of the c_omp_address_inspector class. */ + +class cp_omp_address_inspector : public c_omp_address_inspector +{ +public: + cp_omp_address_inspector (location_t loc, tree t) + : c_omp_address_inspector (loc, t) + { + } + + ~cp_omp_address_inspector () + { + } + + bool processing_template_decl_p () + { + return processing_template_decl; + } + + void emit_unmappable_type_notes (tree t) + { + if (TREE_TYPE (t) != error_mark_node + && !COMPLETE_TYPE_P (TREE_TYPE (t))) + cxx_incomplete_type_inform (TREE_TYPE (t)); + } + + tree convert_from_reference (tree x) + { + return ::convert_from_reference (x); + } + + tree build_array_ref (location_t loc, tree arr, tree idx) + { + return ::build_array_ref (loc, arr, idx); + } + + bool check_clause (tree clause) + { + if (TREE_CODE (orig) == COMPONENT_REF + && invalid_nonstatic_memfn_p (EXPR_LOCATION (orig), orig, + tf_warning_or_error)) + return false; + if (!c_omp_address_inspector::check_clause (clause)) + return false; + return true; + } +}; + /* Helper function for handle_omp_array_sections. Called recursively to handle multiple array-section-subscripts. C is the clause, T current expression (initially OMP_CLAUSE_DECL), which is either @@ -5299,63 +5347,27 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, enum c_omp_region_type ort) { tree ret, low_bound, length, type; + bool openacc = (ort & C_ORT_ACC) != 0; if (TREE_CODE (t) != TREE_LIST) { if (error_operand_p (t)) return error_mark_node; - if (REFERENCE_REF_P (t) - && TREE_CODE (TREE_OPERAND (t, 0)) == COMPONENT_REF) - t = TREE_OPERAND (t, 0); - ret = t; - while (INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - if (TREE_CODE (t) == COMPONENT_REF - && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO - || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM) - && !type_dependent_expression_p (t)) - { - if (TREE_CODE (TREE_OPERAND (t, 1)) == FIELD_DECL - && DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - return error_mark_node; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_TYPE (TREE_OPERAND (t, 0)) - && TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) == UNION_TYPE) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - return error_mark_node; - } - t = TREE_OPERAND (t, 0); - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - if (REFERENCE_REF_P (t)) - t = TREE_OPERAND (t, 0); - } + + cp_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + tree t_refto = ai.maybe_unconvert_ref (t); + + if (!ai.check_clause (c)) + return error_mark_node; + else if (ai.component_access_p () + && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_TO + || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FROM)) + t = ai.get_root_term (true); + else + t = ai.unconverted_ref_origin (); + if (t == error_mark_node) + return error_mark_node; + ret = t_refto; if (TREE_CODE (t) == FIELD_DECL) ret = finish_non_static_data_member (t, NULL_TREE, NULL_TREE); else if (!VAR_P (t) && TREE_CODE (t) != PARM_DECL) @@ -5452,7 +5464,7 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, { error_at (OMP_CLAUSE_LOCATION (c), "expected single pointer in %qs clause", - user_omp_clause_code_name (c, ort == C_ORT_ACC)); + user_omp_clause_code_name (c, openacc)); return error_mark_node; } } @@ -5689,7 +5701,7 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, /* Handle array sections for clause C. */ static bool -handle_omp_array_sections (tree c, enum c_omp_region_type ort) +handle_omp_array_sections (tree &c, enum c_omp_region_type ort) { bool maybe_zero_len = false; unsigned int first_non_one = 0; @@ -5897,118 +5909,73 @@ handle_omp_array_sections (tree c, enum c_omp_region_type ort) OMP_CLAUSE_DECL (c) = first; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_HAS_DEVICE_ADDR) return false; - OMP_CLAUSE_SIZE (c) = size; + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DETACH)) + OMP_CLAUSE_SIZE (c) = size; if (TREE_CODE (t) == FIELD_DECL) t = finish_non_static_data_member (t, NULL_TREE, NULL_TREE); - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP - || (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE)) + + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) return false; - switch (OMP_CLAUSE_MAP_KIND (c)) - { - case GOMP_MAP_ALLOC: - case GOMP_MAP_IF_PRESENT: - case GOMP_MAP_TO: - case GOMP_MAP_FROM: - case GOMP_MAP_TOFROM: - case GOMP_MAP_ALWAYS_TO: - case GOMP_MAP_ALWAYS_FROM: - case GOMP_MAP_ALWAYS_TOFROM: - case GOMP_MAP_PRESENT_ALLOC: - case GOMP_MAP_PRESENT_TO: - case GOMP_MAP_PRESENT_FROM: - case GOMP_MAP_PRESENT_TOFROM: - case GOMP_MAP_ALWAYS_PRESENT_TO: - case GOMP_MAP_ALWAYS_PRESENT_FROM: - case GOMP_MAP_ALWAYS_PRESENT_TOFROM: - case GOMP_MAP_RELEASE: - case GOMP_MAP_DELETE: - case GOMP_MAP_FORCE_TO: - case GOMP_MAP_FORCE_FROM: - case GOMP_MAP_FORCE_TOFROM: - case GOMP_MAP_FORCE_PRESENT: - OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1; - break; - default: - break; - } - bool reference_always_pointer = true; - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - if (TREE_CODE (t) == COMPONENT_REF) - { - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ATTACH_DETACH); - if ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP - && TYPE_REF_P (TREE_TYPE (t))) + if (TREE_CODE (first) == INDIRECT_REF) + { + /* Detect and skip adding extra nodes for pointer-to-member + mappings. These are unsupported for now. */ + tree tmp = TREE_OPERAND (first, 0); + + if (TREE_CODE (tmp) == NON_LVALUE_EXPR) + tmp = TREE_OPERAND (tmp, 0); + + if (TREE_CODE (tmp) == INDIRECT_REF) + tmp = TREE_OPERAND (tmp, 0); + + if (TREE_CODE (tmp) == POINTER_PLUS_EXPR) { - if (TREE_CODE (TREE_TYPE (TREE_TYPE (t))) == ARRAY_TYPE) - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER); - else - t = convert_from_reference (t); - - reference_always_pointer = false; + tree offset = TREE_OPERAND (tmp, 1); + STRIP_NOPS (offset); + if (TYPE_PTRMEM_P (TREE_TYPE (offset))) + { + sorry_at (OMP_CLAUSE_LOCATION (c), + "pointer-to-member mapping %qE not supported", + OMP_CLAUSE_DECL (c)); + return true; + } } } - else if (REFERENCE_REF_P (t) - && TREE_CODE (TREE_OPERAND (t, 0)) == COMPONENT_REF) - { - gomp_map_kind k; - if ((ort & C_ORT_OMP_DECLARE_SIMD) == C_ORT_OMP - && TREE_CODE (TREE_TYPE (t)) == POINTER_TYPE) - k = GOMP_MAP_ATTACH_DETACH; - else - { - t = TREE_OPERAND (t, 0); - k = (ort == C_ORT_ACC - ? GOMP_MAP_ATTACH_DETACH : GOMP_MAP_ALWAYS_POINTER); - } - OMP_CLAUSE_SET_MAP_KIND (c2, k); - } - else - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FIRSTPRIVATE_POINTER); - OMP_CLAUSE_MAP_IMPLICIT (c2) = OMP_CLAUSE_MAP_IMPLICIT (c); - if (OMP_CLAUSE_MAP_KIND (c2) != GOMP_MAP_FIRSTPRIVATE_POINTER - && !cxx_mark_addressable (t)) - return false; - OMP_CLAUSE_DECL (c2) = t; - t = build_fold_addr_expr (first); - t = fold_convert_loc (OMP_CLAUSE_LOCATION (c), - ptrdiff_type_node, t); - tree ptr = OMP_CLAUSE_DECL (c2); - ptr = convert_from_reference (ptr); - if (!INDIRECT_TYPE_P (TREE_TYPE (ptr))) - ptr = build_fold_addr_expr (ptr); - t = fold_build2_loc (OMP_CLAUSE_LOCATION (c), MINUS_EXPR, - ptrdiff_type_node, t, - fold_convert_loc (OMP_CLAUSE_LOCATION (c), - ptrdiff_type_node, ptr)); - OMP_CLAUSE_SIZE (c2) = t; - OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = c2; - ptr = OMP_CLAUSE_DECL (c2); - if (reference_always_pointer - && OMP_CLAUSE_MAP_KIND (c2) != GOMP_MAP_FIRSTPRIVATE_POINTER - && TYPE_REF_P (TREE_TYPE (ptr)) - && INDIRECT_TYPE_P (TREE_TYPE (TREE_TYPE (ptr)))) + /* FIRST represents the first item of data that we are mapping. + E.g. if we're mapping an array, FIRST might resemble + "foo.bar.myarray[0]". */ + + auto_vec addr_tokens; + + if (!omp_parse_expr (addr_tokens, first)) + return true; + + cp_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + tree nc = ai.expand_map_clause (c, first, addr_tokens, ort); + if (nc != error_mark_node) { - tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c3, OMP_CLAUSE_MAP_KIND (c2)); - OMP_CLAUSE_MAP_IMPLICIT (c2) = OMP_CLAUSE_MAP_IMPLICIT (c); - OMP_CLAUSE_DECL (c3) = ptr; - if (OMP_CLAUSE_MAP_KIND (c2) == GOMP_MAP_ALWAYS_POINTER - || OMP_CLAUSE_MAP_KIND (c2) == GOMP_MAP_ATTACH_DETACH) - { - OMP_CLAUSE_DECL (c2) = build_simple_mem_ref (ptr); - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER); - } - else - OMP_CLAUSE_DECL (c2) = convert_from_reference (ptr); - OMP_CLAUSE_SIZE (c3) = size_zero_node; - OMP_CLAUSE_CHAIN (c3) = OMP_CLAUSE_CHAIN (c2); - OMP_CLAUSE_CHAIN (c2) = c3; + using namespace omp_addr_tokenizer; + + if (ai.maybe_zero_length_array_section (c)) + OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c) = 1; + + /* !!! If we're accessing a base decl via chained access + methods (e.g. multiple indirections), duplicate clause + detection won't work properly. Skip it in that case. */ + if ((addr_tokens[0]->type == STRUCTURE_BASE + || addr_tokens[0]->type == ARRAY_BASE) + && addr_tokens[0]->u.structure_base_kind == BASE_DECL + && addr_tokens[1]->type == ACCESS_METHOD + && omp_access_chain_p (addr_tokens, 1)) + c = nc; + + return false; } } } @@ -6936,6 +6903,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bitmap_head oacc_reduction_head, is_on_device_head; tree c, t, *pc; tree safelen = NULL_TREE; + bool openacc = (ort & C_ORT_ACC) != 0; bool branch_seen = false; bool copyprivate_seen = false; bool ordered_seen = false; @@ -6968,7 +6936,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) bitmap_initialize (&oacc_reduction_head, &bitmap_default_obstack); bitmap_initialize (&is_on_device_head, &bitmap_default_obstack); - if (ort & C_ORT_ACC) + if (openacc) for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_ASYNC) { @@ -7217,7 +7185,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) t = OMP_CLAUSE_DECL (c); check_dup_generic_t: if (t == current_class_ptr - && ((ort != C_ORT_OMP_DECLARE_SIMD && ort != C_ORT_ACC) + && ((ort != C_ORT_OMP_DECLARE_SIMD && !openacc) || (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR && OMP_CLAUSE_CODE (c) != OMP_CLAUSE_UNIFORM))) { @@ -7242,7 +7210,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) omp_clause_code_name[OMP_CLAUSE_CODE (c)]); remove = true; } - else if ((ort == C_ORT_ACC + else if ((openacc && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION) || (ort == C_ORT_OMP && (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR @@ -7266,7 +7234,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) if (bitmap_bit_p (&oacc_reduction_head, DECL_UID (t))) { error_at (OMP_CLAUSE_LOCATION (c), - ort == C_ORT_ACC + openacc ? "%qD appears more than once in reduction clauses" : "%qD appears more than once in data clauses", t); @@ -7289,7 +7257,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR) && bitmap_bit_p (&map_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -7351,7 +7319,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) omp_note_field_privatization (t, OMP_CLAUSE_DECL (c)); else t = OMP_CLAUSE_DECL (c); - if (ort != C_ORT_ACC && t == current_class_ptr) + if (!openacc && t == current_class_ptr) { error_at (OMP_CLAUSE_LOCATION (c), "% allowed in OpenMP only in %" @@ -7387,9 +7355,10 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qD appears more than once in data clauses", t); remove = true; } - else if (bitmap_bit_p (&map_head, DECL_UID (t))) + else if (bitmap_bit_p (&map_head, DECL_UID (t)) + || bitmap_bit_p (&map_field_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c) @@ -7410,7 +7379,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) omp_note_field_privatization (t, OMP_CLAUSE_DECL (c)); else t = OMP_CLAUSE_DECL (c); - if (ort != C_ORT_ACC && t == current_class_ptr) + if (!openacc && t == current_class_ptr) { error_at (OMP_CLAUSE_LOCATION (c), "% allowed in OpenMP only in %" @@ -8211,6 +8180,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) case OMP_CLAUSE_FROM: case OMP_CLAUSE__CACHE_: { + using namespace omp_addr_tokenizer; + auto_vec addr_tokens; + t = OMP_CLAUSE_DECL (c); if (TREE_CODE (t) == TREE_LIST) { @@ -8237,58 +8209,73 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } while (TREE_CODE (t) == ARRAY_REF) t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == COMPONENT_REF - && TREE_CODE (TREE_TYPE (t)) == ARRAY_TYPE) + + if (type_dependent_expression_p (t)) + break; + + cp_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + if (!ai.map_supported_p () + || !omp_parse_expr (addr_tokens, t)) { - do - { - t = TREE_OPERAND (t, 0); - if (REFERENCE_REF_P (t)) - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t)) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - while (TREE_CODE (t) == COMPONENT_REF - || TREE_CODE (t) == ARRAY_REF); + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; + } + + /* This check is to determine if this will be the only map + node created for this clause. Otherwise, we'll check + the following FIRSTPRIVATE_POINTER, + FIRSTPRIVATE_REFERENCE or ATTACH_DETACH node on the next + iteration(s) of the loop. */ + if (addr_tokens.length () >= 4 + && addr_tokens[0]->type == STRUCTURE_BASE + && addr_tokens[0]->u.structure_base_kind == BASE_DECL + && addr_tokens[1]->type == ACCESS_METHOD + && addr_tokens[2]->type == COMPONENT_SELECTOR + && addr_tokens[3]->type == ACCESS_METHOD + && (addr_tokens[3]->u.access_kind == ACCESS_DIRECT + || (addr_tokens[3]->u.access_kind + == ACCESS_INDEXED_ARRAY))) + { + tree rt = addr_tokens[1]->expr; + + gcc_assert (DECL_P (rt)); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_IMPLICIT (c) - && (bitmap_bit_p (&map_head, DECL_UID (t)) - || bitmap_bit_p (&map_field_head, DECL_UID (t)) + && (bitmap_bit_p (&map_head, DECL_UID (rt)) + || bitmap_bit_p (&map_field_head, DECL_UID (rt)) || bitmap_bit_p (&map_firstprivate_head, - DECL_UID (t)))) + DECL_UID (rt)))) { remove = true; break; } - if (bitmap_bit_p (&map_field_head, DECL_UID (t))) + if (bitmap_bit_p (&map_field_head, DECL_UID (rt))) break; - if (bitmap_bit_p (&map_head, DECL_UID (t))) + if (bitmap_bit_p (&map_head, DECL_UID (rt))) { if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in motion" - " clauses", t); - else if (ort == C_ORT_ACC) + " clauses", rt); + else if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data" - " clauses", t); + " clauses", rt); else error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in map" - " clauses", t); + " clauses", rt); remove = true; } else { - bitmap_set_bit (&map_head, DECL_UID (t)); - bitmap_set_bit (&map_field_head, DECL_UID (t)); + bitmap_set_bit (&map_head, DECL_UID (rt)); + bitmap_set_bit (&map_field_head, DECL_UID (rt)); } } } @@ -8296,7 +8283,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) remove = true; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH) + && !OMP_CLAUSE_SIZE (c)) /* In this case, we have a single array element which is a pointer, and we already set OMP_CLAUSE_SIZE in handle_omp_array_sections above. For attach/detach @@ -8305,6 +8293,16 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) OMP_CLAUSE_SIZE (c) = size_zero_node; break; } + else if (type_dependent_expression_p (t)) + break; + else if (!omp_parse_expr (addr_tokens, t)) + { + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; + } if (t == error_mark_node) { remove = true; @@ -8318,115 +8316,55 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) } if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH)) + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DETACH) + && !OMP_CLAUSE_SIZE (c)) /* For attach/detach clauses, set OMP_CLAUSE_SIZE (representing a bias) to zero here, so it is not set erroneously to the pointer size later on in gimplify.cc. */ OMP_CLAUSE_SIZE (c) = size_zero_node; - if (REFERENCE_REF_P (t) - && TREE_CODE (TREE_OPERAND (t, 0)) == COMPONENT_REF) + + cp_omp_address_inspector ai (OMP_CLAUSE_LOCATION (c), t); + + if (!ai.check_clause (c)) { - t = TREE_OPERAND (t, 0); - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH) - OMP_CLAUSE_DECL (c) = t; + remove = true; + break; } - while (INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) + + if (!ai.map_supported_p ()) { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); + sorry_at (OMP_CLAUSE_LOCATION (c), + "unsupported map expression %qE", + OMP_CLAUSE_DECL (c)); + remove = true; + break; } - while (TREE_CODE (t) == COMPOUND_EXPR) - { - t = TREE_OPERAND (t, 1); - STRIP_NOPS (t); - } - if (TREE_CODE (t) == COMPONENT_REF - && invalid_nonstatic_memfn_p (EXPR_LOCATION (t), t, - tf_warning_or_error)) - remove = true; - indir_component_ref_p = false; - if (TREE_CODE (t) == COMPONENT_REF - && (INDIRECT_REF_P (TREE_OPERAND (t, 0)) - || TREE_CODE (TREE_OPERAND (t, 0)) == ARRAY_REF)) - { - t = TREE_OPERAND (TREE_OPERAND (t, 0), 0); - indir_component_ref_p = true; - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - if (TREE_CODE (t) == COMPONENT_REF - && OMP_CLAUSE_CODE (c) != OMP_CLAUSE__CACHE_) - { - if (type_dependent_expression_p (t)) - break; - if (TREE_CODE (TREE_OPERAND (t, 1)) == FIELD_DECL - && DECL_BIT_FIELD (TREE_OPERAND (t, 1))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "bit-field %qE in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - remove = true; - } - else if (!omp_mappable_type (TREE_TYPE (t))) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE does not have a mappable type in %qs clause", - t, omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - if (TREE_TYPE (t) != error_mark_node - && !COMPLETE_TYPE_P (TREE_TYPE (t))) - cxx_incomplete_type_inform (TREE_TYPE (t)); - remove = true; - } - while (TREE_CODE (t) == COMPONENT_REF) - { - if (TREE_TYPE (TREE_OPERAND (t, 0)) - && (TREE_CODE (TREE_TYPE (TREE_OPERAND (t, 0))) - == UNION_TYPE)) - { - error_at (OMP_CLAUSE_LOCATION (c), - "%qE is a member of a union", t); - remove = true; - break; - } - t = TREE_OPERAND (t, 0); - if (TREE_CODE (t) == MEM_REF) - { - if (maybe_ne (mem_ref_offset (t), 0)) - error_at (OMP_CLAUSE_LOCATION (c), - "cannot dereference %qE in %qs clause", t, - omp_clause_code_name[OMP_CLAUSE_CODE (c)]); - else - t = TREE_OPERAND (t, 0); - } - while (TREE_CODE (t) == MEM_REF - || INDIRECT_REF_P (t) - || TREE_CODE (t) == ARRAY_REF) - { - t = TREE_OPERAND (t, 0); - STRIP_NOPS (t); - if (TREE_CODE (t) == POINTER_PLUS_EXPR) - t = TREE_OPERAND (t, 0); - } - } - if (remove) - break; - if (REFERENCE_REF_P (t)) - t = TREE_OPERAND (t, 0); - if (VAR_P (t) || TREE_CODE (t) == PARM_DECL) - { - if (bitmap_bit_p (&map_field_head, DECL_UID (t)) - || (ort != C_ORT_ACC - && bitmap_bit_p (&map_head, DECL_UID (t)))) - goto handle_map_references; - } - } - if (!processing_template_decl - && TREE_CODE (t) == FIELD_DECL) + + gcc_assert ((addr_tokens[0]->type == ARRAY_BASE + || addr_tokens[0]->type == STRUCTURE_BASE) + && addr_tokens[1]->type == ACCESS_METHOD); + + t = addr_tokens[1]->expr; + + /* This is used to prevent cxx_mark_addressable from being called + on 'this' for expressions like 'this->a', i.e. typical member + accesses. */ + indir_component_ref_p + = (addr_tokens[0]->type == STRUCTURE_BASE + && addr_tokens[1]->u.access_kind != ACCESS_DIRECT); + + if (addr_tokens[0]->u.structure_base_kind != BASE_DECL) + goto skip_decl_checks; + + /* For OpenMP, we can access a struct "t" and "t.d" on the same + mapping. OpenACC allows multiple fields of the same structure + to be written. */ + if (addr_tokens[0]->type == STRUCTURE_BASE + && (bitmap_bit_p (&map_field_head, DECL_UID (t)) + || (!openacc && bitmap_bit_p (&map_head, DECL_UID (t))))) + goto skip_decl_checks; + + if (!processing_template_decl && TREE_CODE (t) == FIELD_DECL) { OMP_CLAUSE_DECL (c) = finish_non_static_data_member (t, NULL_TREE, NULL_TREE); @@ -8464,12 +8402,17 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER)) && !indir_component_ref_p + && (t != current_class_ptr + || OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH) && !cxx_mark_addressable (t)) remove = true; else if (!(OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER || (OMP_CLAUSE_MAP_KIND (c) - == GOMP_MAP_FIRSTPRIVATE_POINTER))) + == GOMP_MAP_FIRSTPRIVATE_POINTER) + || (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_ATTACH_DETACH))) && t == OMP_CLAUSE_DECL (c) && !type_dependent_expression_p (t) && !omp_mappable_type (TYPE_REF_P (TREE_TYPE (t)) @@ -8513,27 +8456,29 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) remove = true; } else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + && !bitmap_bit_p (&map_field_head, DECL_UID (t)) + && openacc) { - if (ort == C_ORT_ACC) - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears more than once in data clauses", - t); - else - error_at (OMP_CLAUSE_LOCATION (c), - "%qD appears both in data and map clauses", t); + error_at (OMP_CLAUSE_LOCATION (c), + "%qD appears more than once in data clauses", t); remove = true; } else bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (c) + == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + bitmap_set_bit (&map_firstprivate_head, DECL_UID (t)); else if (bitmap_bit_p (&map_head, DECL_UID (t)) - && !bitmap_bit_p (&map_field_head, DECL_UID (t))) + && !bitmap_bit_p (&map_field_head, DECL_UID (t)) + && ort != C_ORT_OMP + && ort != C_ORT_OMP_EXIT_DATA) { if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in motion clauses", t); - else if (ort == C_ORT_ACC) + else if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -8541,8 +8486,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qD appears more than once in map clauses", t); remove = true; } - else if (ort == C_ORT_ACC - && bitmap_bit_p (&generic_head, DECL_UID (t))) + else if (openacc && bitmap_bit_p (&generic_head, DECL_UID (t))) { error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); @@ -8551,7 +8495,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) else if (bitmap_bit_p (&firstprivate_head, DECL_UID (t)) || bitmap_bit_p (&is_on_device_head, DECL_UID (t))) { - if (ort == C_ORT_ACC) + if (openacc) error_at (OMP_CLAUSE_LOCATION (c), "%qD appears more than once in data clauses", t); else @@ -8559,7 +8503,7 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) "%qD appears both in data and map clauses", t); remove = true; } - else + else if (!omp_access_chain_p (addr_tokens, 1)) { bitmap_set_bit (&map_head, DECL_UID (t)); @@ -8573,49 +8517,30 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) 0)))))) bitmap_set_bit (&map_field_head, DECL_UID (t)); } - handle_map_references: + + skip_decl_checks: + /* If we call ai.expand_map_clause in handle_omp_array_sections, + the containing loop (here) iterates through the new nodes + created by that expansion. Avoid expanding those again (just + by checking the node type). */ if (!remove && !processing_template_decl && ort != C_ORT_DECLARE_SIMD - && TYPE_REF_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))) + && (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP + || (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_FIRSTPRIVATE_REFERENCE) + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ALWAYS_POINTER + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_DETACH))) { - t = OMP_CLAUSE_DECL (c); - if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) - { - OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); - } - else if (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_POINTER - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_FIRSTPRIVATE_REFERENCE) - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_ALWAYS_POINTER) - && (OMP_CLAUSE_MAP_KIND (c) - != GOMP_MAP_ATTACH_DETACH)) - { - grp_start_p = pc; - grp_sentinel = OMP_CLAUSE_CHAIN (c); - - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), - OMP_CLAUSE_MAP); - if (TREE_CODE (t) == COMPONENT_REF) - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_ALWAYS_POINTER); - else - OMP_CLAUSE_SET_MAP_KIND (c2, - GOMP_MAP_FIRSTPRIVATE_REFERENCE); - OMP_CLAUSE_DECL (c2) = t; - OMP_CLAUSE_SIZE (c2) = size_zero_node; - OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = c2; - OMP_CLAUSE_DECL (c) = build_simple_mem_ref (t); - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (t))); - c = c2; - } + grp_start_p = pc; + grp_sentinel = OMP_CLAUSE_CHAIN (c); + tree nc = ai.expand_map_clause (c, OMP_CLAUSE_DECL (c), + addr_tokens, ort); + if (nc != error_mark_node) + c = nc; } } break; @@ -9015,7 +8940,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort) if (grp_start_p) { /* If we found a clause to remove, we want to remove the whole - expanded group, otherwise gimplify can get confused. */ + expanded group, otherwise gimplify + (omp_resolve_clause_dependencies) can get confused. */ *grp_start_p = grp_sentinel; pc = grp_start_p; grp_start_p = NULL; @@ -10203,6 +10129,7 @@ finish_omp_target (location_t loc, tree clauses, tree body, bool combined_p) case GOMP_MAP_ATTACH_DETACH: case GOMP_MAP_ATTACH: case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_POINTER: case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: break; default: diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index f7c73a5d2734..c6061cea62a5 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -2405,8 +2405,8 @@ static vec *doacross_steps; static void gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, gfc_omp_namelist *n, tree decl, bool element, - gomp_map_kind ptr_kind, tree &node, tree &node2, - tree &node3, tree &node4) + bool openmp, gomp_map_kind ptr_kind, tree &node, + tree &node2, tree &node3, tree &node4) { gfc_se se; tree ptr, ptr2; @@ -2529,7 +2529,7 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, struct - and adding an 'alloc: for the 'desc.data' pointer, which would break as the 'desc' (the descriptor) is also mapped (see node4 above). */ - if (ptr_kind == GOMP_MAP_ATTACH_DETACH) + if (ptr_kind == GOMP_MAP_ATTACH_DETACH && !openmp) STRIP_NOPS (OMP_CLAUSE_DECL (node3)); } else @@ -2547,7 +2547,7 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, decl, offset, NULL_TREE, NULL_TREE); OMP_CLAUSE_DECL (node) = offset; - if (ptr_kind == GOMP_MAP_ALWAYS_POINTER) + if (ptr_kind == GOMP_MAP_ATTACH_DETACH && openmp) return; } else @@ -3538,8 +3538,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && !(POINTER_TYPE_P (type) && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (type)))) k = GOMP_MAP_FIRSTPRIVATE_POINTER; - gfc_trans_omp_array_section (block, op, n, decl, element, k, - node, node2, node3, node4); + gfc_trans_omp_array_section (block, op, n, decl, element, + !openacc, k, node, node2, + node3, node4); } else if (n->expr && n->expr->expr_type == EXPR_VARIABLE @@ -3578,10 +3579,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); - gomp_map_kind kind - = (openacc ? GOMP_MAP_ATTACH_DETACH - : GOMP_MAP_ALWAYS_POINTER); - OMP_CLAUSE_SET_MAP_KIND (node2, kind); + OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = POINTER_TYPE_P (TREE_TYPE (se.expr)) ? se.expr @@ -3599,6 +3597,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, fold_convert (size_type_node, se.string_length), TYPE_SIZE_UNIT (tmp)); + gomp_map_kind kind; if (n->u.map_op == OMP_MAP_DELETE) kind = GOMP_MAP_DELETE; else if (op == EXEC_OMP_TARGET_EXIT_DATA) @@ -3685,9 +3684,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, - openacc - ? GOMP_MAP_ATTACH_DETACH - : GOMP_MAP_ALWAYS_POINTER); + GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = build_fold_addr_expr (data); OMP_CLAUSE_SIZE (node2) = size_int (0); } @@ -3788,9 +3785,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, - openacc - ? GOMP_MAP_ATTACH_DETACH - : GOMP_MAP_ALWAYS_POINTER); + GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (inner); /* Similar to gfc_trans_omp_array_section (details @@ -3813,11 +3808,10 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { /* An array element or section. */ bool element = lastref->u.ar.type == AR_ELEMENT; - gomp_map_kind kind = (openacc ? GOMP_MAP_ATTACH_DETACH - : GOMP_MAP_ALWAYS_POINTER); + gomp_map_kind kind = GOMP_MAP_ATTACH_DETACH; gfc_trans_omp_array_section (block, op, n, inner, element, - kind, node, node2, node3, - node4); + !openacc, kind, node, node2, + node3, node4); } else gcc_unreachable (); diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index afeaea873c08..26df5b0a8298 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -9195,8 +9195,7 @@ build_omp_struct_comp_nodes (enum tree_code code, tree grp_start, tree grp_end, if (grp_mid && OMP_CLAUSE_CODE (grp_mid) == OMP_CLAUSE_MAP - && (OMP_CLAUSE_MAP_KIND (grp_mid) == GOMP_MAP_ALWAYS_POINTER - || OMP_CLAUSE_MAP_KIND (grp_mid) == GOMP_MAP_ATTACH_DETACH)) + && OMP_CLAUSE_MAP_KIND (grp_mid) == GOMP_MAP_ALWAYS_POINTER) { tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP); @@ -9292,6 +9291,12 @@ struct omp_mapping_group { /* If we've removed the group but need to reindex, mark the group as deleted. */ bool deleted; + /* The group points to an already-created "GOMP_MAP_STRUCT + GOMP_MAP_ATTACH_DETACH" pair. */ + bool reprocess_struct; + /* The group should use "zero-length" allocations for pointers that are not + mapped "to" on the same directive. */ + bool fragile; struct omp_mapping_group *sibling; struct omp_mapping_group *next; }; @@ -9333,38 +9338,6 @@ omp_get_base_pointer (tree expr) return NULL_TREE; } -/* Remove COMPONENT_REFS and indirections from EXPR. */ - -static tree -omp_strip_components_and_deref (tree expr) -{ - while (TREE_CODE (expr) == COMPONENT_REF - || INDIRECT_REF_P (expr) - || (TREE_CODE (expr) == MEM_REF - && integer_zerop (TREE_OPERAND (expr, 1))) - || TREE_CODE (expr) == POINTER_PLUS_EXPR - || TREE_CODE (expr) == COMPOUND_EXPR) - if (TREE_CODE (expr) == COMPOUND_EXPR) - expr = TREE_OPERAND (expr, 1); - else - expr = TREE_OPERAND (expr, 0); - - STRIP_NOPS (expr); - - return expr; -} - -static tree -omp_strip_indirections (tree expr) -{ - while (INDIRECT_REF_P (expr) - || (TREE_CODE (expr) == MEM_REF - && integer_zerop (TREE_OPERAND (expr, 1)))) - expr = TREE_OPERAND (expr, 0); - - return expr; -} - /* An attach or detach operation depends directly on the address being attached/detached. Return that address, or none if there are no attachments/detachments. */ @@ -9418,6 +9391,7 @@ omp_get_attachment (omp_mapping_group *grp) case GOMP_MAP_ATTACH_DETACH: case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DETACH: return OMP_CLAUSE_DECL (node); default: @@ -9493,23 +9467,43 @@ omp_group_last (tree *start_p) == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION) || (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION) + || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_ALWAYS_POINTER || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_TO_PSET)) { - grp_last_p = &OMP_CLAUSE_CHAIN (c); - c = nc; tree nc2 = OMP_CLAUSE_CHAIN (nc); + if (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH) + { + /* In the specific case we're doing "exit data" on an array + slice of a reference-to-pointer struct component, we will see + DETACH followed by ATTACH_DETACH here. We want to treat that + as a single group. In other cases DETACH might represent a + stand-alone "detach" clause, so we don't want to consider + that part of the group. */ + if (nc2 + && OMP_CLAUSE_CODE (nc2) == OMP_CLAUSE_MAP + && OMP_CLAUSE_MAP_KIND (nc2) == GOMP_MAP_ATTACH_DETACH) + goto consume_two_nodes; + else + break; + } if (nc2 && OMP_CLAUSE_CODE (nc2) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION) && OMP_CLAUSE_MAP_KIND (nc2) == GOMP_MAP_ATTACH) { + consume_two_nodes: grp_last_p = &OMP_CLAUSE_CHAIN (nc); c = nc2; - nc2 = OMP_CLAUSE_CHAIN (nc2); + nc = OMP_CLAUSE_CHAIN (nc2); + } + else + { + grp_last_p = &OMP_CLAUSE_CHAIN (c); + c = nc; + nc = nc2; } - nc = nc2; } break; @@ -9573,6 +9567,8 @@ omp_gather_mapping_groups_1 (tree *list_p, vec *groups, grp.mark = UNVISITED; grp.sibling = NULL; grp.deleted = false; + grp.reprocess_struct = false; + grp.fragile = false; grp.next = NULL; groups->safe_push (grp); @@ -9659,6 +9655,7 @@ omp_group_base (omp_mapping_group *grp, unsigned int *chained, case GOMP_MAP_ALWAYS_POINTER: case GOMP_MAP_ATTACH_DETACH: case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DETACH: return *grp->grp_start; default: @@ -9707,6 +9704,8 @@ omp_group_base (omp_mapping_group *grp, unsigned int *chained, *firstprivate = OMP_CLAUSE_DECL (node); node = OMP_CLAUSE_CHAIN (node); } + else if (OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_ATTACH_DETACH) + node = OMP_CLAUSE_CHAIN (node); *chained = num_mappings; return node; } @@ -9758,6 +9757,9 @@ omp_index_mapping_groups_1 (hash_mapreprocess_struct) + continue; + tree fpp; unsigned int chained; tree node = omp_group_base (grp, &chained, &fpp); @@ -10250,6 +10252,89 @@ omp_lastprivate_for_combined_outer_constructs (struct gimplify_omp_ctx *octx, omp_notice_variable (octx, decl, true); } +/* We might have indexed several groups for DECL, e.g. a "TO" mapping and also + a "FIRSTPRIVATE" mapping. Return the one that isn't firstprivate, etc. */ + +static omp_mapping_group * +omp_get_nonfirstprivate_group (hash_map *grpmap, + tree decl, bool allow_deleted = false) +{ + omp_mapping_group **to_group_p = grpmap->get (decl); + + if (!to_group_p) + return NULL; + + omp_mapping_group *to_group = *to_group_p; + + for (; to_group; to_group = to_group->sibling) + { + tree grp_end = to_group->grp_end; + switch (OMP_CLAUSE_MAP_KIND (grp_end)) + { + case GOMP_MAP_FIRSTPRIVATE_POINTER: + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + break; + + default: + if (allow_deleted || !to_group->deleted) + return to_group; + } + } + + return NULL; +} + +/* Return TRUE if the directive (whose clauses are described by the hash table + of mapping groups, GRPMAP) maps DECL explicitly. If TO_SPECIFICALLY is + true, only count TO mappings. If ALLOW_DELETED is true, ignore the + "deleted" flag for groups. If CONTAINED_IN_STRUCT is true, also return + TRUE if DECL is mapped as a member of a whole-struct mapping. */ + +static bool +omp_directive_maps_explicitly (hash_map *grpmap, + tree decl, omp_mapping_group **base_group, + bool to_specifically, bool allow_deleted, + bool contained_in_struct) +{ + omp_mapping_group *decl_group + = omp_get_nonfirstprivate_group (grpmap, decl, allow_deleted); + + *base_group = NULL; + + if (decl_group) + { + tree grp_first = *decl_group->grp_start; + /* We might be called during omp_build_struct_sibling_lists, when + GOMP_MAP_STRUCT might have been inserted at the start of the group. + Skip over that, and also possibly the node after it. */ + if (OMP_CLAUSE_MAP_KIND (grp_first) == GOMP_MAP_STRUCT) + { + grp_first = OMP_CLAUSE_CHAIN (grp_first); + if (OMP_CLAUSE_MAP_KIND (grp_first) == GOMP_MAP_FIRSTPRIVATE_POINTER + || (OMP_CLAUSE_MAP_KIND (grp_first) + == GOMP_MAP_FIRSTPRIVATE_REFERENCE) + || OMP_CLAUSE_MAP_KIND (grp_first) == GOMP_MAP_ATTACH_DETACH) + grp_first = OMP_CLAUSE_CHAIN (grp_first); + } + enum gomp_map_kind first_kind = OMP_CLAUSE_MAP_KIND (grp_first); + if (!to_specifically + || GOMP_MAP_COPY_TO_P (first_kind) + || first_kind == GOMP_MAP_ALLOC) + { + *base_group = decl_group; + return true; + } + } + + if (contained_in_struct + && omp_mapped_by_containing_struct (grpmap, decl, base_group)) + return true; + + return false; +} + /* If we have mappings INNER and OUTER, where INNER is a component access and OUTER is a mapping of the whole containing struct, check that the mappings are compatible. We'll be deleting the inner mapping, so we need to make @@ -10283,18 +10368,23 @@ omp_check_mapping_compatibility (location_t loc, case GOMP_MAP_ALWAYS_FROM: if (inner_kind == GOMP_MAP_FORCE_PRESENT - || inner_kind == GOMP_MAP_ALLOC + || inner_kind == GOMP_MAP_RELEASE || inner_kind == GOMP_MAP_FROM) return true; break; case GOMP_MAP_TO: - case GOMP_MAP_FROM: if (inner_kind == GOMP_MAP_FORCE_PRESENT || inner_kind == GOMP_MAP_ALLOC) return true; break; + case GOMP_MAP_FROM: + if (inner_kind == GOMP_MAP_RELEASE + || inner_kind == GOMP_MAP_FORCE_PRESENT) + return true; + break; + case GOMP_MAP_ALWAYS_TOFROM: case GOMP_MAP_TOFROM: if (inner_kind == GOMP_MAP_FORCE_PRESENT @@ -10316,6 +10406,261 @@ omp_check_mapping_compatibility (location_t loc, return false; } +/* This function handles several cases where clauses on a mapping directive + can interact with each other. + + If we have a FIRSTPRIVATE_POINTER node and we're also mapping the pointer + on the same directive, change the mapping of the first node to + ATTACH_DETACH. We should have detected that this will happen already in + c-omp.cc:c_omp_adjust_map_clauses and marked the appropriate decl + as addressable. (If we didn't, bail out.) + + If we have a FIRSTPRIVATE_REFERENCE (for a reference to pointer) and we're + mapping the base pointer also, we may need to change the mapping type to + ATTACH_DETACH and synthesize an alloc node for the reference itself. + + If we have an ATTACH_DETACH node, this is an array section with a pointer + base. If we're mapping the base on the same directive too, we can drop its + mapping. However, if we have a reference to pointer, make other appropriate + adjustments to the mapping nodes instead. + + If we have a component access but we're also mapping the whole of the + containing struct, drop the former access. + + If the expression is a component access, and we're also mapping a base + pointer used in that component access in the same expression, change the + mapping type of the latter to ALLOC (ready for processing by + omp_build_struct_sibling_lists). */ + +void +omp_resolve_clause_dependencies (enum tree_code code, + vec *groups, + hash_map *grpmap) +{ + int i; + omp_mapping_group *grp; + bool repair_chain = false; + + FOR_EACH_VEC_ELT (*groups, i, grp) + { + tree grp_end = grp->grp_end; + tree decl = OMP_CLAUSE_DECL (grp_end); + + gcc_assert (OMP_CLAUSE_CODE (grp_end) == OMP_CLAUSE_MAP); + + switch (OMP_CLAUSE_MAP_KIND (grp_end)) + { + case GOMP_MAP_FIRSTPRIVATE_POINTER: + { + omp_mapping_group *to_group + = omp_get_nonfirstprivate_group (grpmap, decl); + + if (!to_group || to_group == grp) + continue; + + tree grp_first = *to_group->grp_start; + enum gomp_map_kind first_kind = OMP_CLAUSE_MAP_KIND (grp_first); + + if ((GOMP_MAP_COPY_TO_P (first_kind) + || first_kind == GOMP_MAP_ALLOC) + && (OMP_CLAUSE_MAP_KIND (to_group->grp_end) + != GOMP_MAP_FIRSTPRIVATE_POINTER)) + { + gcc_assert (TREE_ADDRESSABLE (OMP_CLAUSE_DECL (grp_end))); + OMP_CLAUSE_SET_MAP_KIND (grp_end, GOMP_MAP_ATTACH_DETACH); + } + } + break; + + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + { + tree ptr = build_fold_indirect_ref (decl); + + omp_mapping_group *to_group + = omp_get_nonfirstprivate_group (grpmap, ptr); + + if (!to_group || to_group == grp) + continue; + + tree grp_first = *to_group->grp_start; + enum gomp_map_kind first_kind = OMP_CLAUSE_MAP_KIND (grp_first); + + if (GOMP_MAP_COPY_TO_P (first_kind) + || first_kind == GOMP_MAP_ALLOC) + { + OMP_CLAUSE_SET_MAP_KIND (grp_end, GOMP_MAP_ATTACH_DETACH); + OMP_CLAUSE_DECL (grp_end) = ptr; + if ((OMP_CLAUSE_CHAIN (*to_group->grp_start) + == to_group->grp_end) + && (OMP_CLAUSE_MAP_KIND (to_group->grp_end) + == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + { + gcc_assert (TREE_ADDRESSABLE + (OMP_CLAUSE_DECL (to_group->grp_end))); + OMP_CLAUSE_SET_MAP_KIND (to_group->grp_end, + GOMP_MAP_ATTACH_DETACH); + + location_t loc = OMP_CLAUSE_LOCATION (to_group->grp_end); + tree alloc + = build_omp_clause (loc, OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (alloc, GOMP_MAP_ALLOC); + tree tmp = build_fold_addr_expr (OMP_CLAUSE_DECL + (to_group->grp_end)); + tree char_ptr_type = build_pointer_type (char_type_node); + OMP_CLAUSE_DECL (alloc) + = build2 (MEM_REF, char_type_node, + tmp, + build_int_cst (char_ptr_type, 0)); + OMP_CLAUSE_SIZE (alloc) = TYPE_SIZE_UNIT (TREE_TYPE (tmp)); + + OMP_CLAUSE_CHAIN (alloc) + = OMP_CLAUSE_CHAIN (*to_group->grp_start); + OMP_CLAUSE_CHAIN (*to_group->grp_start) = alloc; + } + } + } + break; + + case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + { + tree base_ptr, referenced_ptr_node = NULL_TREE; + + while (TREE_CODE (decl) == ARRAY_REF) + decl = TREE_OPERAND (decl, 0); + + if (TREE_CODE (decl) == INDIRECT_REF) + decl = TREE_OPERAND (decl, 0); + + /* Only component accesses. */ + if (DECL_P (decl)) + continue; + + /* We want the pointer itself when checking if the base pointer is + mapped elsewhere in the same directive -- if we have a + reference to the pointer, don't use that. */ + + if (TREE_CODE (TREE_TYPE (decl)) == REFERENCE_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == POINTER_TYPE) + { + referenced_ptr_node = OMP_CLAUSE_CHAIN (*grp->grp_start); + base_ptr = OMP_CLAUSE_DECL (referenced_ptr_node); + } + else + base_ptr = decl; + + gomp_map_kind zlas_kind + = (code == OACC_EXIT_DATA || code == OMP_TARGET_EXIT_DATA) + ? GOMP_MAP_DETACH : GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION; + + if (TREE_CODE (TREE_TYPE (base_ptr)) == POINTER_TYPE) + { + /* If we map the base TO, and we're doing an attachment, we can + skip the TO mapping altogether and create an ALLOC mapping + instead, since the attachment will overwrite the device + pointer in that location immediately anyway. Otherwise, + change our mapping to + GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION in case the + attachment target has not been copied to the device already + by some earlier directive. */ + + bool base_mapped_to = false; + + omp_mapping_group *base_group; + + if (omp_directive_maps_explicitly (grpmap, base_ptr, + &base_group, false, true, + false)) + { + if (referenced_ptr_node) + { + base_mapped_to = true; + if ((OMP_CLAUSE_MAP_KIND (base_group->grp_end) + == GOMP_MAP_ATTACH_DETACH) + && (OMP_CLAUSE_CHAIN (*base_group->grp_start) + == base_group->grp_end)) + { + OMP_CLAUSE_CHAIN (*base_group->grp_start) + = OMP_CLAUSE_CHAIN (base_group->grp_end); + base_group->grp_end = *base_group->grp_start; + repair_chain = true; + } + } + else + { + base_group->deleted = true; + OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED (grp_end) = 1; + } + } + + /* We're dealing with a reference to a pointer, and we are + attaching both the reference and the pointer. We know the + reference itself is on the target, because we are going to + create an ALLOC node for it in accumulate_sibling_list. The + pointer might be on the target already or it might not, but + if it isn't then it's not an error, so use + GOMP_MAP_ATTACH_ZLAS for it. */ + if (!base_mapped_to && referenced_ptr_node) + OMP_CLAUSE_SET_MAP_KIND (referenced_ptr_node, zlas_kind); + } + else if (TREE_CODE (TREE_TYPE (base_ptr)) == REFERENCE_TYPE + && (TREE_CODE (TREE_TYPE (TREE_TYPE (base_ptr))) + == ARRAY_TYPE) + && OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION + (*grp->grp_start)) + OMP_CLAUSE_SET_MAP_KIND (grp->grp_end, zlas_kind); + } + break; + + case GOMP_MAP_ATTACH: + /* Ignore standalone attach here. */ + break; + + default: + { + omp_mapping_group *struct_group; + if (omp_mapped_by_containing_struct (grpmap, decl, &struct_group) + && *grp->grp_start == grp_end) + { + omp_check_mapping_compatibility (OMP_CLAUSE_LOCATION (grp_end), + struct_group, grp); + /* Remove the whole of this mapping -- redundant. */ + grp->deleted = true; + } + + tree base = decl; + while ((base = omp_get_base_pointer (base))) + { + omp_mapping_group *base_group; + + if (omp_directive_maps_explicitly (grpmap, base, &base_group, + true, true, false)) + { + tree grp_first = *base_group->grp_start; + OMP_CLAUSE_SET_MAP_KIND (grp_first, GOMP_MAP_ALLOC); + } + } + } + } + } + + if (repair_chain) + { + /* Group start pointers may have become detached from the + OMP_CLAUSE_CHAIN of previous groups if elements were removed from the + end of those groups. Fix that now. */ + tree *new_next = NULL; + FOR_EACH_VEC_ELT (*groups, i, grp) + { + if (new_next) + grp->grp_start = new_next; + + new_next = &OMP_CLAUSE_CHAIN (grp->grp_end); + } + } +} + /* Similar to omp_resolve_clause_dependencies, but for OpenACC. The only clause dependencies we handle for now are struct element mappings and whole-struct mappings on the same directive, and duplicate clause @@ -10533,6 +10878,19 @@ omp_siblist_move_concat_nodes_after (tree first_new, tree *last_new_tail, return continue_at; } +static omp_addr_token * +omp_first_chained_access_token (vec &addr_tokens) +{ + using namespace omp_addr_tokenizer; + int idx = addr_tokens.length () - 1; + gcc_assert (idx >= 0); + if (addr_tokens[idx]->type != ACCESS_METHOD) + return addr_tokens[idx]; + while (idx > 0 && addr_tokens[idx - 1]->type == ACCESS_METHOD) + idx--; + return addr_tokens[idx]; +} + /* Mapping struct members causes an additional set of nodes to be created, starting with GOMP_MAP_STRUCT followed by a number of mappings equal to the number of members being mapped, in order of ascending position (address or @@ -10574,129 +10932,285 @@ static tree * omp_accumulate_sibling_list (enum omp_region_type region_type, enum tree_code code, hash_map - *&struct_map_to_clause, tree *grp_start_p, - tree grp_end, tree *inner) + *&struct_map_to_clause, + hash_map *group_map, + tree *grp_start_p, tree grp_end, + vec &addr_tokens, tree **inner, + bool *fragile_p, bool reprocessing_struct, + tree **added_tail) { + using namespace omp_addr_tokenizer; poly_offset_int coffset; poly_int64 cbitpos; tree ocd = OMP_CLAUSE_DECL (grp_end); bool openmp = !(region_type & ORT_ACC); + bool target = (region_type & ORT_TARGET) != 0; tree *continue_at = NULL; while (TREE_CODE (ocd) == ARRAY_REF) ocd = TREE_OPERAND (ocd, 0); - if (INDIRECT_REF_P (ocd)) - ocd = TREE_OPERAND (ocd, 0); + if (*fragile_p) + { + omp_mapping_group *to_group + = omp_get_nonfirstprivate_group (group_map, ocd, true); + + if (to_group) + return NULL; + } + + omp_addr_token *last_token = omp_first_chained_access_token (addr_tokens); + if (last_token->type == ACCESS_METHOD) + { + switch (last_token->u.access_kind) + { + case ACCESS_REF: + case ACCESS_REF_TO_POINTER: + case ACCESS_REF_TO_POINTER_OFFSET: + case ACCESS_INDEXED_REF_TO_ARRAY: + /* We may see either a bare reference or a dereferenced + "convert_from_reference"-like one here. Handle either way. */ + if (TREE_CODE (ocd) == INDIRECT_REF) + ocd = TREE_OPERAND (ocd, 0); + gcc_assert (TREE_CODE (TREE_TYPE (ocd)) == REFERENCE_TYPE); + break; + + default: + ; + } + } tree base = extract_base_bit_offset (ocd, &cbitpos, &coffset); + int base_token; + for (base_token = addr_tokens.length () - 1; base_token >= 0; base_token--) + { + if (addr_tokens[base_token]->type == ARRAY_BASE + || addr_tokens[base_token]->type == STRUCTURE_BASE) + break; + } + + /* The two expressions in the assertion below aren't quite the same: if we + have 'struct_base_decl access_indexed_array' for something like + "myvar[2].x" then base will be "myvar" and addr_tokens[base_token]->expr + will be "myvar[2]" -- the actual base of the structure. + The former interpretation leads to a strange situation where we get + struct(myvar) alloc(myvar[2].ptr1) + That is, the array of structures is kind of treated as one big structure + for the purposes of gathering sibling lists, etc. */ + /* gcc_assert (base == addr_tokens[base_token]->expr); */ + bool ptr = (OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ALWAYS_POINTER); bool attach_detach = ((OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ATTACH_DETACH) || (OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)); - bool attach = (OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ATTACH - || OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_DETACH); - - /* FIXME: If we're not mapping the base pointer in some other clause on this - directive, I think we want to create ALLOC/RELEASE here -- i.e. not - early-exit. */ - if (openmp && attach_detach) - return NULL; if (!struct_map_to_clause || struct_map_to_clause->get (base) == NULL) { tree l = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP); - gomp_map_kind k = attach ? GOMP_MAP_FORCE_PRESENT : GOMP_MAP_STRUCT; - - OMP_CLAUSE_SET_MAP_KIND (l, k); + OMP_CLAUSE_SET_MAP_KIND (l, GOMP_MAP_STRUCT); OMP_CLAUSE_DECL (l) = unshare_expr (base); + OMP_CLAUSE_SIZE (l) = size_int (1); - OMP_CLAUSE_SIZE (l) - = (!attach ? size_int (1) - : (DECL_P (OMP_CLAUSE_DECL (l)) - ? DECL_SIZE_UNIT (OMP_CLAUSE_DECL (l)) - : TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (l))))); if (struct_map_to_clause == NULL) struct_map_to_clause = new hash_map; struct_map_to_clause->put (base, l); + /* On first iterating through the clause list, we insert the struct node + just before the component access node that triggers the initial + omp_accumulate_sibling_list call for a particular sibling list (and + it then forms the first entry in that list). When reprocessing + struct bases that are themselves component accesses, we insert the + struct node on an off-side list to avoid inserting the new + GOMP_MAP_STRUCT into the middle of the old one. */ + tree *insert_node_pos = reprocessing_struct ? *added_tail : grp_start_p; + if (ptr || attach_detach) { tree extra_node; tree alloc_node = build_omp_struct_comp_nodes (code, *grp_start_p, grp_end, &extra_node); + tree *tail; OMP_CLAUSE_CHAIN (l) = alloc_node; - tree *insert_node_pos = grp_start_p; - if (extra_node) { OMP_CLAUSE_CHAIN (extra_node) = *insert_node_pos; OMP_CLAUSE_CHAIN (alloc_node) = extra_node; + tail = &OMP_CLAUSE_CHAIN (extra_node); } else - OMP_CLAUSE_CHAIN (alloc_node) = *insert_node_pos; + { + OMP_CLAUSE_CHAIN (alloc_node) = *insert_node_pos; + tail = &OMP_CLAUSE_CHAIN (alloc_node); + } + + /* For OpenMP semantics, we don't want to implicitly allocate + space for the pointer here for non-compute regions (e.g. "enter + data"). A FRAGILE_P node is only being created so that + omp-low.cc is able to rewrite the struct properly. + For references (to pointers), we want to actually allocate the + space for the reference itself in the sorted list following the + struct node. + For pointers, we want to allocate space if we had an explicit + mapping of the attachment point, but not otherwise. */ + if (*fragile_p + || (openmp + && !target + && attach_detach + && TREE_CODE (TREE_TYPE (ocd)) == POINTER_TYPE + && !OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED (grp_end))) + { + if (!lang_GNU_Fortran ()) + /* In Fortran, pointers are dereferenced automatically, but may + be unassociated. So we still want to allocate space for the + pointer (as the base for an attach operation that should be + present in the same directive's clause list also). */ + OMP_CLAUSE_SIZE (alloc_node) = size_zero_node; + OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (alloc_node) = 1; + } *insert_node_pos = l; + + if (reprocessing_struct) + { + /* When reprocessing a struct node group used as the base of a + subcomponent access, if we have a reference-to-pointer base, + we will see: + struct(**ptr) attach(*ptr) + whereas for a non-reprocess-struct group, we see, e.g.: + tofrom(**ptr) attach(*ptr) attach(ptr) + and we create the "alloc" for the second "attach", i.e. + for the reference itself. When reprocessing a struct group we + thus change the pointer attachment into a reference attachment + by stripping the indirection. (The attachment of the + referenced pointer must happen elsewhere, either on the same + directive, or otherwise.) */ + tree adecl = OMP_CLAUSE_DECL (alloc_node); + + if ((TREE_CODE (adecl) == INDIRECT_REF + || (TREE_CODE (adecl) == MEM_REF + && integer_zerop (TREE_OPERAND (adecl, 1)))) + && (TREE_CODE (TREE_TYPE (TREE_OPERAND (adecl, 0))) + == REFERENCE_TYPE) + && (TREE_CODE (TREE_TYPE (TREE_TYPE + (TREE_OPERAND (adecl, 0)))) == POINTER_TYPE)) + OMP_CLAUSE_DECL (alloc_node) = TREE_OPERAND (adecl, 0); + + *added_tail = tail; + } } else { gcc_assert (*grp_start_p == grp_end); - grp_start_p = omp_siblist_insert_node_after (l, grp_start_p); + if (reprocessing_struct) + { + /* If we don't have an attach/detach node, this is a + "target data" directive or similar, not an offload region. + Synthesize an "alloc" node using just the initiating + GOMP_MAP_STRUCT decl. */ + gomp_map_kind k = (code == OMP_TARGET_EXIT_DATA + || code == OACC_EXIT_DATA) + ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; + tree alloc_node + = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (alloc_node, k); + OMP_CLAUSE_DECL (alloc_node) = unshare_expr (last_token->expr); + OMP_CLAUSE_SIZE (alloc_node) + = TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (alloc_node))); + + OMP_CLAUSE_CHAIN (alloc_node) = OMP_CLAUSE_CHAIN (l); + OMP_CLAUSE_CHAIN (l) = alloc_node; + *insert_node_pos = l; + *added_tail = &OMP_CLAUSE_CHAIN (alloc_node); + } + else + grp_start_p = omp_siblist_insert_node_after (l, insert_node_pos); } - tree noind = omp_strip_indirections (base); + unsigned last_access = base_token + 1; - if (!openmp - && (region_type & ORT_TARGET) - && TREE_CODE (noind) == COMPONENT_REF) + while (last_access + 1 < addr_tokens.length () + && addr_tokens[last_access + 1]->type == ACCESS_METHOD) + last_access++; + + if ((region_type & ORT_TARGET) + && addr_tokens[base_token + 1]->type == ACCESS_METHOD) { - /* The base for this component access is a struct component access - itself. Insert a node to be processed on the next iteration of - our caller's loop, which will subsequently be turned into a new, - inner GOMP_MAP_STRUCT mapping. + bool base_ref = false; + access_method_kinds access_kind + = addr_tokens[last_access]->u.access_kind; - We need to do this else the non-DECL_P base won't be - rewritten correctly in the offloaded region. */ + switch (access_kind) + { + case ACCESS_DIRECT: + case ACCESS_INDEXED_ARRAY: + return NULL; + + case ACCESS_REF: + case ACCESS_REF_TO_POINTER: + case ACCESS_REF_TO_POINTER_OFFSET: + case ACCESS_INDEXED_REF_TO_ARRAY: + base_ref = true; + break; + + default: + ; + } tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), OMP_CLAUSE_MAP); - OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_FORCE_PRESENT); - OMP_CLAUSE_DECL (c2) = unshare_expr (noind); - OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (TREE_TYPE (noind)); - *inner = c2; - return NULL; - } + enum gomp_map_kind mkind; + omp_mapping_group *decl_group; + tree use_base; + switch (access_kind) + { + case ACCESS_POINTER: + case ACCESS_POINTER_OFFSET: + use_base = addr_tokens[last_access]->expr; + break; + case ACCESS_REF_TO_POINTER: + case ACCESS_REF_TO_POINTER_OFFSET: + use_base + = build_fold_indirect_ref (addr_tokens[last_access]->expr); + break; + default: + use_base = addr_tokens[base_token]->expr; + } + bool mapped_to_p + = omp_directive_maps_explicitly (group_map, use_base, &decl_group, + true, false, true); + if (addr_tokens[base_token]->type == STRUCTURE_BASE + && DECL_P (addr_tokens[last_access]->expr) + && !mapped_to_p) + mkind = base_ref ? GOMP_MAP_FIRSTPRIVATE_REFERENCE + : GOMP_MAP_FIRSTPRIVATE_POINTER; + else + mkind = GOMP_MAP_ATTACH_DETACH; - tree sdecl = omp_strip_components_and_deref (base); - - if (POINTER_TYPE_P (TREE_TYPE (sdecl)) && (region_type & ORT_TARGET)) - { - tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), - OMP_CLAUSE_MAP); - bool base_ref - = (INDIRECT_REF_P (base) - && ((TREE_CODE (TREE_TYPE (TREE_OPERAND (base, 0))) - == REFERENCE_TYPE) - || (INDIRECT_REF_P (TREE_OPERAND (base, 0)) - && (TREE_CODE (TREE_TYPE (TREE_OPERAND - (TREE_OPERAND (base, 0), 0))) - == REFERENCE_TYPE)))); - enum gomp_map_kind mkind = base_ref ? GOMP_MAP_FIRSTPRIVATE_REFERENCE - : GOMP_MAP_FIRSTPRIVATE_POINTER; OMP_CLAUSE_SET_MAP_KIND (c2, mkind); - OMP_CLAUSE_DECL (c2) = sdecl; + /* If we have a reference to pointer base, we want to attach the + pointer here, not the reference. The reference attachment happens + elsewhere. */ + bool ref_to_ptr + = (access_kind == ACCESS_REF_TO_POINTER + || access_kind == ACCESS_REF_TO_POINTER_OFFSET); + tree sdecl = addr_tokens[last_access]->expr; + tree sdecl_ptr = ref_to_ptr ? build_fold_indirect_ref (sdecl) + : sdecl; + /* For the FIRSTPRIVATE_REFERENCE after the struct node, we + want to use the reference itself for the decl, but we + still want to use the pointer to calculate the bias. */ + OMP_CLAUSE_DECL (c2) = (mkind == GOMP_MAP_ATTACH_DETACH) + ? sdecl_ptr : sdecl; + sdecl = sdecl_ptr; tree baddr = build_fold_addr_expr (base); baddr = fold_convert_loc (OMP_CLAUSE_LOCATION (grp_end), ptrdiff_type_node, baddr); - /* This isn't going to be good enough when we add support for more - complicated lvalue expressions. FIXME. */ - if (TREE_CODE (TREE_TYPE (sdecl)) == REFERENCE_TYPE - && TREE_CODE (TREE_TYPE (TREE_TYPE (sdecl))) == POINTER_TYPE) - sdecl = build_simple_mem_ref (sdecl); tree decladdr = fold_convert_loc (OMP_CLAUSE_LOCATION (grp_end), ptrdiff_type_node, sdecl); OMP_CLAUSE_SIZE (c2) @@ -10705,24 +11219,46 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, /* Insert after struct node. */ OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (l); OMP_CLAUSE_CHAIN (l) = c2; + + if (addr_tokens[base_token]->type == STRUCTURE_BASE + && (addr_tokens[base_token]->u.structure_base_kind + == BASE_COMPONENT_EXPR) + && mkind == GOMP_MAP_ATTACH_DETACH + && addr_tokens[last_access]->u.access_kind != ACCESS_REF) + { + *inner = insert_node_pos; + if (openmp) + *fragile_p = true; + return NULL; + } } + if (addr_tokens[base_token]->type == STRUCTURE_BASE + && (addr_tokens[base_token]->u.structure_base_kind + == BASE_COMPONENT_EXPR) + && addr_tokens[last_access]->u.access_kind == ACCESS_REF) + *inner = insert_node_pos; + return NULL; } else if (struct_map_to_clause) { tree *osc = struct_map_to_clause->get (base); tree *sc = NULL, *scp = NULL; + unsigned HOST_WIDE_INT i, elems = tree_to_uhwi (OMP_CLAUSE_SIZE (*osc)); sc = &OMP_CLAUSE_CHAIN (*osc); /* The struct mapping might be immediately followed by a - FIRSTPRIVATE_POINTER and/or FIRSTPRIVATE_REFERENCE -- if it's an - indirect access or a reference, or both. (This added node is removed - in omp-low.c after it has been processed there.) */ - if (*sc != grp_end - && (OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_FIRSTPRIVATE_POINTER - || OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) + FIRSTPRIVATE_POINTER, FIRSTPRIVATE_REFERENCE or an ATTACH_DETACH -- + if it's an indirect access or a reference, or if the structure base + is not a decl. The FIRSTPRIVATE_* nodes are removed in omp-low.cc + after they have been processed there, and ATTACH_DETACH nodes are + recomputed and moved out of the GOMP_MAP_STRUCT construct once + sibling list building is complete. */ + if (OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_FIRSTPRIVATE_POINTER + || OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_FIRSTPRIVATE_REFERENCE + || OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_ATTACH_DETACH) sc = &OMP_CLAUSE_CHAIN (*sc); - for (; *sc != grp_end; sc = &OMP_CLAUSE_CHAIN (*sc)) + for (i = 0; i < elems; i++, sc = &OMP_CLAUSE_CHAIN (*sc)) if ((ptr || attach_detach) && sc == grp_start_p) break; else if (TREE_CODE (OMP_CLAUSE_DECL (*sc)) != COMPONENT_REF @@ -10754,6 +11290,27 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, break; if (scp) continue; + if ((region_type & ORT_ACC) != 0) + { + /* For OpenACC, allow (ignore) duplicate struct accesses in + the middle of a mapping clause, e.g. "mystruct->foo" in: + copy(mystruct->foo->bar) copy(mystruct->foo->qux). */ + if (reprocessing_struct + && known_eq (coffset, offset) + && known_eq (cbitpos, bitpos)) + return NULL; + } + else if (known_eq (coffset, offset) + && known_eq (cbitpos, bitpos)) + { + /* Having two struct members at the same offset doesn't work, + so make sure we don't. (We're allowed to ignore this. + Should we report the error?) */ + /*error_at (OMP_CLAUSE_LOCATION (grp_end), + "duplicate struct member %qE in map clauses", + OMP_CLAUSE_DECL (grp_end));*/ + return NULL; + } if (maybe_lt (coffset, offset) || (known_eq (coffset, offset) && maybe_lt (cbitpos, bitpos))) @@ -10765,9 +11322,48 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, } } - if (!attach) - OMP_CLAUSE_SIZE (*osc) - = size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc), size_one_node); + OMP_CLAUSE_SIZE (*osc) + = size_binop (PLUS_EXPR, OMP_CLAUSE_SIZE (*osc), size_one_node); + + if (reprocessing_struct) + { + /* If we're reprocessing a struct node, we don't want to do most of + the list manipulation below. We only need to handle the (pointer + or reference) attach/detach case. */ + tree extra_node, alloc_node; + if (attach_detach) + alloc_node = build_omp_struct_comp_nodes (code, *grp_start_p, + grp_end, &extra_node); + else + { + /* If we don't have an attach/detach node, this is a + "target data" directive or similar, not an offload region. + Synthesize an "alloc" node using just the initiating + GOMP_MAP_STRUCT decl. */ + gomp_map_kind k = (code == OMP_TARGET_EXIT_DATA + || code == OACC_EXIT_DATA) + ? GOMP_MAP_RELEASE : GOMP_MAP_ALLOC; + alloc_node + = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (alloc_node, k); + OMP_CLAUSE_DECL (alloc_node) = unshare_expr (last_token->expr); + OMP_CLAUSE_SIZE (alloc_node) + = TYPE_SIZE_UNIT (TREE_TYPE (OMP_CLAUSE_DECL (alloc_node))); + } + + if (scp) + omp_siblist_insert_node_after (alloc_node, scp); + else + { + tree *new_end = omp_siblist_insert_node_after (alloc_node, sc); + if (sc == *added_tail) + *added_tail = new_end; + } + + return NULL; + } + if (ptr || attach_detach) { tree cl = NULL_TREE, extra_node; @@ -10775,6 +11371,18 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, grp_end, &extra_node); tree *tail_chain = NULL; + if (*fragile_p + || (openmp + && !target + && attach_detach + && TREE_CODE (TREE_TYPE (ocd)) == POINTER_TYPE + && !OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED (grp_end))) + { + if (!lang_GNU_Fortran ()) + OMP_CLAUSE_SIZE (alloc_node) = size_zero_node; + OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (alloc_node) = 1; + } + /* Here, we have: grp_end : the last (or only) node in this group. @@ -10860,12 +11468,15 @@ omp_build_struct_sibling_lists (enum tree_code code, omp_mapping_group *> **grpmap, tree *list_p) { + using namespace omp_addr_tokenizer; unsigned i; omp_mapping_group *grp; hash_map *struct_map_to_clause = NULL; bool success = true; tree *new_next = NULL; tree *tail = &OMP_CLAUSE_CHAIN ((*groups)[groups->length () - 1].grp_end); + tree added_nodes = NULL_TREE; + tree *added_tail = &added_nodes; auto_vec pre_hwm_groups; FOR_EACH_VEC_ELT (*groups, i, grp) @@ -10873,9 +11484,10 @@ omp_build_struct_sibling_lists (enum tree_code code, tree c = grp->grp_end; tree decl = OMP_CLAUSE_DECL (c); tree grp_end = grp->grp_end; + auto_vec addr_tokens; tree sentinel = OMP_CLAUSE_CHAIN (grp_end); - if (new_next) + if (new_next && !grp->reprocess_struct) grp->grp_start = new_next; new_next = NULL; @@ -10886,7 +11498,7 @@ omp_build_struct_sibling_lists (enum tree_code code, continue; /* Skip groups we marked for deletion in - oacc_resolve_clause_dependencies. */ + {omp,oacc}_resolve_clause_dependencies. */ if (grp->deleted) continue; @@ -10903,6 +11515,39 @@ omp_build_struct_sibling_lists (enum tree_code code, continue; } + tree expr = decl; + + while (TREE_CODE (expr) == ARRAY_REF) + expr = TREE_OPERAND (expr, 0); + + if (!omp_parse_expr (addr_tokens, expr)) + continue; + + omp_addr_token *last_token + = omp_first_chained_access_token (addr_tokens); + + /* A mapping of a reference to a pointer member that doesn't specify an + array section, etc., like this: + *mystruct.ref_to_ptr + should not be processed by the struct sibling-list handling code -- + it just transfers the referenced pointer. + + In contrast, the quite similar-looking construct: + *mystruct.ptr + which is equivalent to e.g. + mystruct.ptr[0] + *does* trigger sibling-list processing. + + An exception for the former case is for "fragile" groups where the + reference itself is not handled otherwise; this is subject to special + handling in omp_accumulate_sibling_list also. */ + + if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE + && last_token->type == ACCESS_METHOD + && last_token->u.access_kind == ACCESS_REF + && !grp->fragile) + continue; + tree d = decl; if (TREE_CODE (d) == ARRAY_REF) { @@ -10931,14 +11576,7 @@ omp_build_struct_sibling_lists (enum tree_code code, omp_mapping_group *wholestruct; if (omp_mapped_by_containing_struct (*grpmap, OMP_CLAUSE_DECL (c), &wholestruct)) - { - if (!(region_type & ORT_ACC) - && *grp_start_p == grp_end) - /* Remove the whole of this mapping -- redundant. */ - grp->deleted = true; - - continue; - } + continue; if (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_TO_PSET && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH @@ -10965,27 +11603,30 @@ omp_build_struct_sibling_lists (enum tree_code code, goto error_out; } - tree inner = NULL_TREE; + tree *inner = NULL; + bool fragile_p = grp->fragile; new_next = omp_accumulate_sibling_list (region_type, code, - struct_map_to_clause, grp_start_p, - grp_end, &inner); + struct_map_to_clause, *grpmap, + grp_start_p, grp_end, addr_tokens, + &inner, &fragile_p, + grp->reprocess_struct, &added_tail); if (inner) { - if (new_next && *new_next == NULL_TREE) - *new_next = inner; - else - *tail = inner; - - OMP_CLAUSE_CHAIN (inner) = NULL_TREE; omp_mapping_group newgrp; - newgrp.grp_start = new_next ? new_next : tail; - newgrp.grp_end = inner; + newgrp.grp_start = inner; + if (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (*inner)) + == GOMP_MAP_ATTACH_DETACH) + newgrp.grp_end = OMP_CLAUSE_CHAIN (*inner); + else + newgrp.grp_end = *inner; newgrp.mark = UNVISITED; newgrp.sibling = NULL; newgrp.deleted = false; + newgrp.reprocess_struct = true; + newgrp.fragile = fragile_p; newgrp.next = NULL; groups->safe_push (newgrp); @@ -10996,8 +11637,6 @@ omp_build_struct_sibling_lists (enum tree_code code, *grpmap = omp_reindex_mapping_groups (list_p, groups, &pre_hwm_groups, sentinel); - - tail = &OMP_CLAUSE_CHAIN (inner); } } } @@ -11026,6 +11665,61 @@ omp_build_struct_sibling_lists (enum tree_code code, tail = &OMP_CLAUSE_CHAIN (*tail); } + /* Tack on the struct nodes added during nested struct reprocessing. */ + if (added_nodes) + { + *tail = added_nodes; + tail = added_tail; + } + + /* Now we have finished building the struct sibling lists, reprocess + newly-added "attach" nodes: we need the address of the first + mapped element of each struct sibling list for the bias of the attach + operation -- not necessarily the base address of the whole struct. */ + if (struct_map_to_clause) + for (hash_map::iterator iter + = struct_map_to_clause->begin (); + iter != struct_map_to_clause->end (); + ++iter) + { + tree struct_node = (*iter).second; + gcc_assert (OMP_CLAUSE_CODE (struct_node) == OMP_CLAUSE_MAP); + tree attach = OMP_CLAUSE_CHAIN (struct_node); + + if (OMP_CLAUSE_CODE (attach) != OMP_CLAUSE_MAP + || OMP_CLAUSE_MAP_KIND (attach) != GOMP_MAP_ATTACH_DETACH) + continue; + + OMP_CLAUSE_SET_MAP_KIND (attach, GOMP_MAP_ATTACH); + + /* Sanity check: the standalone attach node will not work if we have + an "enter data" operation (because for those, variables need to be + mapped separately and attach nodes must be grouped together with the + base they attach to). We should only have created the + ATTACH_DETACH node after GOMP_MAP_STRUCT for a target region, so + this should never be true. */ + gcc_assert ((region_type & ORT_TARGET) != 0); + + /* This is the first sorted node in the struct sibling list. Use it + to recalculate the correct bias to use. + (&first_node - attach_decl). */ + tree first_node = OMP_CLAUSE_DECL (OMP_CLAUSE_CHAIN (attach)); + first_node = build_fold_addr_expr (first_node); + first_node = fold_convert (ptrdiff_type_node, first_node); + tree attach_decl = OMP_CLAUSE_DECL (attach); + attach_decl = fold_convert (ptrdiff_type_node, attach_decl); + OMP_CLAUSE_SIZE (attach) + = fold_build2 (MINUS_EXPR, ptrdiff_type_node, first_node, + attach_decl); + + /* Remove GOMP_MAP_ATTACH node from after struct node. */ + OMP_CLAUSE_CHAIN (struct_node) = OMP_CLAUSE_CHAIN (attach); + /* ...and re-insert it at the end of our clause list. */ + *tail = attach; + OMP_CLAUSE_CHAIN (attach) = NULL_TREE; + tail = &OMP_CLAUSE_CHAIN (attach); + } + error_out: if (struct_map_to_clause) delete struct_map_to_clause; @@ -11041,6 +11735,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, enum omp_region_type region_type, enum tree_code code) { + using namespace omp_addr_tokenizer; struct gimplify_omp_ctx *ctx, *outer_ctx; tree c; tree *orig_list_p = list_p; @@ -11086,6 +11781,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, hash_map *grpmap; grpmap = omp_index_mapping_groups (groups); + omp_resolve_clause_dependencies (code, groups, grpmap); omp_build_struct_sibling_lists (code, region_type, groups, &grpmap, list_p); @@ -11182,6 +11878,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, const char *check_non_private = NULL; unsigned int flags; tree decl; + auto_vec addr_tokens; switch (OMP_CLAUSE_CODE (c)) { @@ -11488,6 +12185,13 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OMP_CLAUSE_MAP: decl = OMP_CLAUSE_DECL (c); + + if (!omp_parse_expr (addr_tokens, decl)) + { + remove = true; + break; + } + if (error_operand_p (decl)) remove = true; switch (code) @@ -11497,13 +12201,18 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, case OACC_DATA: if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) break; + goto check_firstprivate; + case OACC_ENTER_DATA: + case OACC_EXIT_DATA: + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH + && addr_tokens[0]->type == ARRAY_BASE) + remove = true; /* FALLTHRU */ case OMP_TARGET_DATA: case OMP_TARGET_ENTER_DATA: case OMP_TARGET_EXIT_DATA: - case OACC_ENTER_DATA: - case OACC_EXIT_DATA: case OACC_HOST_DATA: + check_firstprivate: if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER || (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)) @@ -11533,8 +12242,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, } } if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) = DECL_P (decl) ? DECL_SIZE_UNIT (decl) - : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + { + /* Sanity check: attach/detach map kinds use the size as a bias, + and it's never right to use the decl size for such + mappings. */ + gcc_assert (OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FORCE_DETACH + && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_ATTACH_DETACH + && (OMP_CLAUSE_MAP_KIND (c) + != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)); + OMP_CLAUSE_SIZE (c) = DECL_P (decl) ? DECL_SIZE_UNIT (decl) + : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + } if (gimplify_expr (&OMP_CLAUSE_SIZE (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) { @@ -11555,26 +12275,22 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, GOVD_FIRSTPRIVATE | GOVD_SEEN); } - if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_STRUCT) + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_STRUCT + && (addr_tokens[0]->type == STRUCTURE_BASE + || addr_tokens[0]->type == ARRAY_BASE) + && addr_tokens[0]->u.structure_base_kind == BASE_DECL) { - tree base = omp_strip_components_and_deref (decl); - if (DECL_P (base)) - { - decl = base; - splay_tree_node n - = splay_tree_lookup (ctx->variables, - (splay_tree_key) decl); - if (seen_error () - && n - && (n->value & (GOVD_MAP | GOVD_FIRSTPRIVATE)) != 0) - { - remove = true; - break; - } - flags = GOVD_MAP | GOVD_EXPLICIT; + gcc_assert (addr_tokens[1]->type == ACCESS_METHOD); + /* If we got to this struct via a chain of pointers, maybe we + want to map it implicitly instead. */ + if (omp_access_chain_p (addr_tokens, 1)) + break; + decl = addr_tokens[1]->expr; + flags = GOVD_MAP | GOVD_EXPLICIT; - goto do_add_decl; - } + gcc_assert (addr_tokens[1]->u.access_kind != ACCESS_DIRECT + || TREE_ADDRESSABLE (decl)); + goto do_add_decl; } if (TREE_CODE (decl) == TARGET_EXPR) @@ -11805,6 +12521,42 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, ? GOMP_MAP_DETACH : GOMP_MAP_ATTACH); OMP_CLAUSE_SET_MAP_KIND (c, map_kind); + + /* If we have attach/detach but the decl we have is a pointer to + pointer, we're probably mapping the "base level" array + implicitly. Make sure we don't add the decl as if we mapped + it explicitly. That is, + + int **arr; + [...] + #pragma omp target map(arr[a][b:c]) + + should *not* map "arr" explicitly. That way we get a + zero-length "alloc" mapping for it, and assuming it's been + mapped by some previous directive, etc., things work as they + should. */ + + tree basetype = TREE_TYPE (addr_tokens[0]->expr); + + if (TREE_CODE (basetype) == REFERENCE_TYPE) + basetype = TREE_TYPE (basetype); + + if (code == OMP_TARGET + && addr_tokens[0]->type == ARRAY_BASE + && addr_tokens[0]->u.structure_base_kind == BASE_DECL + && TREE_CODE (basetype) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (basetype)) == POINTER_TYPE) + break; + } + else if ((code == OACC_ENTER_DATA + || code == OACC_EXIT_DATA + || code == OACC_PARALLEL) + && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ATTACH_DETACH) + { + enum gomp_map_kind map_kind = (code == OACC_EXIT_DATA + ? GOMP_MAP_DETACH + : GOMP_MAP_ATTACH); + OMP_CLAUSE_SET_MAP_KIND (c, map_kind); } goto do_add; @@ -12713,7 +13465,7 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) if (TREE_CODE (TREE_TYPE (decl)) == REFERENCE_TYPE && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == POINTER_TYPE) OMP_CLAUSE_DECL (clause) - = build_simple_mem_ref_loc (input_location, decl); + = build_fold_indirect_ref_loc (input_location, decl); OMP_CLAUSE_DECL (clause) = build2 (MEM_REF, char_type_node, OMP_CLAUSE_DECL (clause), build_int_cst (build_pointer_type (char_type_node), 0)); @@ -12721,7 +13473,16 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data) OMP_CLAUSE_SIZE (nc) = size_zero_node; OMP_CLAUSE_SET_MAP_KIND (clause, GOMP_MAP_ALLOC); OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (clause) = 1; - OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_FIRSTPRIVATE_POINTER); + tree dtype = TREE_TYPE (decl); + if (TREE_CODE (dtype) == REFERENCE_TYPE) + dtype = TREE_TYPE (dtype); + /* FIRSTPRIVATE_POINTER doesn't work well if we have a + multiply-indirected pointer. */ + if (TREE_CODE (dtype) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (dtype)) == POINTER_TYPE) + OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_POINTER); + else + OMP_CLAUSE_SET_MAP_KIND (nc, GOMP_MAP_FIRSTPRIVATE_POINTER); OMP_CLAUSE_CHAIN (nc) = chain; OMP_CLAUSE_CHAIN (clause) = nc; struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp; diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index 8241574651d9..7f1ad0f4e452 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -45,6 +45,8 @@ along with GCC; see the file COPYING3. If not see #include "data-streamer.h" #include "streamer-hooks.h" #include "opts.h" +#include "omp-general.h" +#include "tree-pretty-print.h" enum omp_requires omp_requires_mask; @@ -3155,4 +3157,427 @@ omp_runtime_api_call (const_tree fndecl) return omp_runtime_api_procname (IDENTIFIER_POINTER (declname)); } +namespace omp_addr_tokenizer { + +/* We scan an expression by recursive descent, and build a vector of + "omp_addr_token *" pointers representing a "parsed" version of the + expression. The grammar we use is something like this: + + expr0:: + expr [section-access] + + expr:: + structured-expr access-method + | array-base access-method + + structured-expr:: + structure-base component-selector + + arbitrary-expr:: + (anything else) + + structure-base:: + DECL access-method + | structured-expr access-method + | arbitrary-expr access-method + + array-base:: + DECL + | arbitrary-expr + + access-method:: + DIRECT + | REF + | POINTER + | REF_TO_POINTER + | POINTER_OFFSET + | REF_TO_POINTER_OFFSET + | INDEXED_ARRAY + | INDEXED_REF_TO_ARRAY + | index-expr + + index-expr:: + INDEX_EXPR access-method + + component-selector:: + component-selector COMPONENT_REF + | component-selector ARRAY_REF + | COMPONENT_REF + + This tokenized form is then used both in parsing, for OpenMP clause + expansion (for C and C++) and in gimplify.cc for sibling-list handling + (for C, C++ and Fortran). */ + +omp_addr_token::omp_addr_token (token_type t, tree e) + : type(t), expr(e) +{ +} + +omp_addr_token::omp_addr_token (access_method_kinds k, tree e) + : type(ACCESS_METHOD), expr(e) +{ + u.access_kind = k; +} + +omp_addr_token::omp_addr_token (token_type t, structure_base_kinds k, tree e) + : type(t), expr(e) +{ + u.structure_base_kind = k; +} + +static bool +omp_parse_component_selector (tree *expr0) +{ + tree expr = *expr0; + tree last_component = NULL_TREE; + + while (TREE_CODE (expr) == COMPONENT_REF + || TREE_CODE (expr) == ARRAY_REF) + { + if (TREE_CODE (expr) == COMPONENT_REF) + last_component = expr; + + expr = TREE_OPERAND (expr, 0); + + if (TREE_CODE (TREE_TYPE (expr)) == REFERENCE_TYPE) + break; + } + + if (!last_component) + return false; + + *expr0 = last_component; + return true; +} + +/* This handles references that have had convert_from_reference called on + them, and also those that haven't. */ + +static bool +omp_parse_ref (tree *expr0) +{ + tree expr = *expr0; + + if (TREE_CODE (TREE_TYPE (expr)) == REFERENCE_TYPE) + return true; + else if ((TREE_CODE (expr) == INDIRECT_REF + || (TREE_CODE (expr) == MEM_REF + && integer_zerop (TREE_OPERAND (expr, 1)))) + && TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) == REFERENCE_TYPE) + { + *expr0 = TREE_OPERAND (expr, 0); + return true; + } + + return false; +} + +static bool +omp_parse_pointer (tree *expr0, bool *has_offset) +{ + tree expr = *expr0; + + *has_offset = false; + + if ((TREE_CODE (expr) == INDIRECT_REF + || (TREE_CODE (expr) == MEM_REF + && integer_zerop (TREE_OPERAND (expr, 1)))) + && TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) == POINTER_TYPE) + { + expr = TREE_OPERAND (expr, 0); + + /* The Fortran FE sometimes emits a no-op cast here. */ + STRIP_NOPS (expr); + + while (1) + { + if (TREE_CODE (expr) == COMPOUND_EXPR) + { + expr = TREE_OPERAND (expr, 1); + STRIP_NOPS (expr); + } + else if (TREE_CODE (expr) == SAVE_EXPR) + expr = TREE_OPERAND (expr, 0); + else if (TREE_CODE (expr) == POINTER_PLUS_EXPR) + { + *has_offset = true; + expr = TREE_OPERAND (expr, 0); + } + else + break; + } + + STRIP_NOPS (expr); + + *expr0 = expr; + return true; + } + + return false; +} + +static bool +omp_parse_access_method (tree *expr0, enum access_method_kinds *kind) +{ + tree expr = *expr0; + bool has_offset; + + if (omp_parse_ref (&expr)) + *kind = ACCESS_REF; + else if (omp_parse_pointer (&expr, &has_offset)) + { + if (omp_parse_ref (&expr)) + *kind = has_offset ? ACCESS_REF_TO_POINTER_OFFSET + : ACCESS_REF_TO_POINTER; + else + *kind = has_offset ? ACCESS_POINTER_OFFSET : ACCESS_POINTER; + } + else if (TREE_CODE (expr) == ARRAY_REF) + { + while (TREE_CODE (expr) == ARRAY_REF) + expr = TREE_OPERAND (expr, 0); + if (omp_parse_ref (&expr)) + *kind = ACCESS_INDEXED_REF_TO_ARRAY; + else + *kind = ACCESS_INDEXED_ARRAY; + } + else + *kind = ACCESS_DIRECT; + + STRIP_NOPS (expr); + + *expr0 = expr; + return true; +} + +static bool +omp_parse_access_methods (vec &addr_tokens, tree *expr0) +{ + tree expr = *expr0; + enum access_method_kinds kind; + tree am_expr; + + if (omp_parse_access_method (&expr, &kind)) + am_expr = expr; + + if (TREE_CODE (expr) == INDIRECT_REF + || TREE_CODE (expr) == MEM_REF + || TREE_CODE (expr) == ARRAY_REF) + omp_parse_access_methods (addr_tokens, &expr); + + addr_tokens.safe_push (new omp_addr_token (kind, am_expr)); + + *expr0 = expr; + return true; +} + +static bool omp_parse_structured_expr (vec &, tree *); + +static bool +omp_parse_structure_base (vec &addr_tokens, + tree *expr0, structure_base_kinds *kind, + vec &base_access_tokens, + bool allow_structured = true) +{ + tree expr = *expr0; + + if (allow_structured) + omp_parse_access_methods (base_access_tokens, &expr); + + if (DECL_P (expr)) + { + *kind = BASE_DECL; + return true; + } + + if (allow_structured && omp_parse_structured_expr (addr_tokens, &expr)) + { + *kind = BASE_COMPONENT_EXPR; + *expr0 = expr; + return true; + } + + *kind = BASE_ARBITRARY_EXPR; + *expr0 = expr; + return true; +} + +static bool +omp_parse_structured_expr (vec &addr_tokens, tree *expr0) +{ + tree expr = *expr0; + tree base_component = NULL_TREE; + structure_base_kinds struct_base_kind; + auto_vec base_access_tokens; + + if (omp_parse_component_selector (&expr)) + base_component = expr; + else + return false; + + gcc_assert (TREE_CODE (expr) == COMPONENT_REF); + expr = TREE_OPERAND (expr, 0); + + tree structure_base = expr; + + if (!omp_parse_structure_base (addr_tokens, &expr, &struct_base_kind, + base_access_tokens)) + return false; + + addr_tokens.safe_push (new omp_addr_token (STRUCTURE_BASE, struct_base_kind, + structure_base)); + addr_tokens.safe_splice (base_access_tokens); + addr_tokens.safe_push (new omp_addr_token (COMPONENT_SELECTOR, + base_component)); + + *expr0 = expr; + + return true; +} + +static bool +omp_parse_array_expr (vec &addr_tokens, tree *expr0) +{ + tree expr = *expr0; + structure_base_kinds s_kind; + auto_vec base_access_tokens; + + if (!omp_parse_structure_base (addr_tokens, &expr, &s_kind, + base_access_tokens, false)) + return false; + + addr_tokens.safe_push (new omp_addr_token (ARRAY_BASE, s_kind, expr)); + addr_tokens.safe_splice (base_access_tokens); + + *expr0 = expr; + return true; +} + +/* Return TRUE if the ACCESS_METHOD token at index 'i' has a further + ACCESS_METHOD chained after it (e.g., if we're processing an expression + containing multiple pointer indirections). */ + +bool +omp_access_chain_p (vec &addr_tokens, unsigned i) +{ + gcc_assert (addr_tokens[i]->type == ACCESS_METHOD); + return (i + 1 < addr_tokens.length () + && addr_tokens[i + 1]->type == ACCESS_METHOD); +} + +/* Return the address of the object accessed by the ACCESS_METHOD token + at 'i': either of the next access method's expr, or of EXPR if we're at + the end of the list of tokens. */ + +tree +omp_accessed_addr (vec &addr_tokens, unsigned i, tree expr) +{ + if (i + 1 < addr_tokens.length ()) + return build_fold_addr_expr (addr_tokens[i + 1]->expr); + else + return build_fold_addr_expr (expr); +} + +} /* namespace omp_addr_tokenizer. */ + +bool +omp_parse_expr (vec &addr_tokens, tree expr) +{ + using namespace omp_addr_tokenizer; + auto_vec expr_access_tokens; + + if (!omp_parse_access_methods (expr_access_tokens, &expr)) + return false; + + if (omp_parse_structured_expr (addr_tokens, &expr)) + ; + else if (omp_parse_array_expr (addr_tokens, &expr)) + ; + else + return false; + + addr_tokens.safe_splice (expr_access_tokens); + + return true; +} + +DEBUG_FUNCTION void +debug_omp_tokenized_addr (vec &addr_tokens, + bool with_exprs) +{ + using namespace omp_addr_tokenizer; + const char *sep = with_exprs ? " " : ""; + + for (auto e : addr_tokens) + { + const char *pfx = ""; + + fputs (sep, stderr); + + switch (e->type) + { + case COMPONENT_SELECTOR: + fputs ("component_selector", stderr); + break; + case ACCESS_METHOD: + switch (e->u.access_kind) + { + case ACCESS_DIRECT: + fputs ("access_direct", stderr); + break; + case ACCESS_REF: + fputs ("access_ref", stderr); + break; + case ACCESS_POINTER: + fputs ("access_pointer", stderr); + break; + case ACCESS_POINTER_OFFSET: + fputs ("access_pointer_offset", stderr); + break; + case ACCESS_REF_TO_POINTER: + fputs ("access_ref_to_pointer", stderr); + break; + case ACCESS_REF_TO_POINTER_OFFSET: + fputs ("access_ref_to_pointer_offset", stderr); + break; + case ACCESS_INDEXED_ARRAY: + fputs ("access_indexed_array", stderr); + break; + case ACCESS_INDEXED_REF_TO_ARRAY: + fputs ("access_indexed_ref_to_array", stderr); + break; + } + break; + case ARRAY_BASE: + case STRUCTURE_BASE: + pfx = e->type == ARRAY_BASE ? "array_" : "struct_"; + switch (e->u.structure_base_kind) + { + case BASE_DECL: + fprintf (stderr, "%sbase_decl", pfx); + break; + case BASE_COMPONENT_EXPR: + fputs ("base_component_expr", stderr); + break; + case BASE_ARBITRARY_EXPR: + fprintf (stderr, "%sbase_arbitrary_expr", pfx); + break; + } + break; + } + if (with_exprs) + { + fputs (" [", stderr); + print_generic_expr (stderr, e->expr); + fputc (']', stderr); + sep = ",\n "; + } + else + sep = " "; + } + + fputs ("\n", stderr); +} + + #include "gt-omp-general.h" diff --git a/gcc/omp-general.h b/gcc/omp-general.h index 1a52bfdb56b7..759e84738c65 100644 --- a/gcc/omp-general.h +++ b/gcc/omp-general.h @@ -153,4 +153,73 @@ get_openacc_privatization_dump_flags () extern tree omp_build_component_ref (tree obj, tree field); +namespace omp_addr_tokenizer { + +/* These are the ways of accessing a variable that have special-case handling + in the middle end (gimplify, omp-lower, etc.). */ + +/* These are the kinds of access that an ACCESS_METHOD token can represent. */ + +enum access_method_kinds +{ + ACCESS_DIRECT, + ACCESS_REF, + ACCESS_POINTER, + ACCESS_REF_TO_POINTER, + ACCESS_POINTER_OFFSET, + ACCESS_REF_TO_POINTER_OFFSET, + ACCESS_INDEXED_ARRAY, + ACCESS_INDEXED_REF_TO_ARRAY +}; + +/* These are the kinds that a STRUCTURE_BASE or ARRAY_BASE (except + BASE_COMPONENT_EXPR) can represent. */ + +enum structure_base_kinds +{ + BASE_DECL, + BASE_COMPONENT_EXPR, + BASE_ARBITRARY_EXPR +}; + +/* The coarse type for an address token. These can have subtypes for + ARRAY_BASE or STRUCTURE_BASE (structure_base_kinds) or ACCESS_METHOD + (access_method_kinds). */ + +enum token_type +{ + ARRAY_BASE, + STRUCTURE_BASE, + COMPONENT_SELECTOR, + ACCESS_METHOD +}; + +/* The struct that forms a single token of an address expression as parsed by + omp_parse_expr. These are typically held in a vec after parsing. */ + +struct omp_addr_token +{ + enum token_type type; + tree expr; + + union + { + access_method_kinds access_kind; + structure_base_kinds structure_base_kind; + } u; + + omp_addr_token (token_type, tree); + omp_addr_token (access_method_kinds, tree); + omp_addr_token (token_type, structure_base_kinds, tree); +}; + +extern bool omp_access_chain_p (vec &, unsigned); +extern tree omp_accessed_addr (vec &, unsigned, tree); + +} + +typedef omp_addr_tokenizer::omp_addr_token omp_addr_token; + +extern bool omp_parse_expr (vec &, tree); + #endif /* GCC_OMP_GENERAL_H */ diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index dd802ca37a6e..9816590ff8c5 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -1604,10 +1604,13 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) { /* If this is an offloaded region, an attach operation should only exist when the pointer variable is mapped in a prior - clause. + clause. An exception is if we have a reference (to pointer): + in that case we should have mapped "*decl" in a previous + mapping instead of "decl". Skip the assertion in that case. If we had an error, we may not have attempted to sort clauses properly, so avoid the test. */ - if (is_gimple_omp_offloaded (ctx->stmt) + if (TREE_CODE (TREE_TYPE (decl)) != REFERENCE_TYPE + && is_gimple_omp_offloaded (ctx->stmt) && !seen_error ()) gcc_assert (maybe_lookup_decl (decl, ctx) diff --git a/gcc/testsuite/c-c++-common/gomp/clauses-2.c b/gcc/testsuite/c-c++-common/gomp/clauses-2.c index bbc8fb4e32bf..8f98d57a312f 100644 --- a/gcc/testsuite/c-c++-common/gomp/clauses-2.c +++ b/gcc/testsuite/c-c++-common/gomp/clauses-2.c @@ -11,7 +11,7 @@ foo (int *p, int q, struct S t, int i, int j, int k, int l) bar (p); #pragma omp target firstprivate (p), map (p[0]) /* { dg-error "appears more than once in data clauses" } */ bar (p); - #pragma omp target map (p[0]) map (p) /* { dg-error "appears both in data and map clauses" } */ + #pragma omp target map (p[0]) map (p) bar (p); #pragma omp target map (p) , map (p[0]) bar (p); diff --git a/gcc/testsuite/c-c++-common/gomp/target-50.c b/gcc/testsuite/c-c++-common/gomp/target-50.c index 41f1d37845cf..a30a25e08936 100644 --- a/gcc/testsuite/c-c++-common/gomp/target-50.c +++ b/gcc/testsuite/c-c++-common/gomp/target-50.c @@ -17,7 +17,7 @@ int main() #pragma omp target map(tofrom: tmp->arr[0:10]) map(to: tmp->arr) { } -/* { dg-final { scan-tree-dump-times {map\(struct:\*tmp \[len: 1\]\) map\(to:tmp[._0-9]*->arr \[len: [0-9]+\]\) map\(tofrom:\*_[0-9]+ \[len: [0-9]+\]\) map\(attach:tmp[._0-9]*->arr \[bias: 0\]\)} 2 "gimple" { target { ! { nvptx*-*-* amdgcn*-*-* } } } } } */ +/* { dg-final { scan-tree-dump-times {map\(struct:\*tmp \[len: 1\]\) map\(alloc:tmp[._0-9]*->arr \[len: [0-9]+\]\) map\(tofrom:\*_[0-9]+ \[len: [0-9]+\]\) map\(attach:tmp[._0-9]*->arr \[bias: 0\]\)} 2 "gimple" { target { ! { nvptx*-*-* amdgcn*-*-* } } } } } */ return 0; } diff --git a/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c b/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c index ce766d29e2dc..4913d338e5f9 100644 --- a/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c +++ b/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c @@ -21,4 +21,5 @@ void func (struct foo *f, int n, int m) #pragma omp target enter data map (to: f->bars[n].vectors[:f->bars[n].num_vectors]) } -/* { dg-final { scan-tree-dump-times "map\\(to:\\*_\[0-9\]+ \\\[len: _\[0-9\]+\\\]\\) map\\(attach:\[^-\]+->vectors \\\[bias: \[^\]\]+\\\]\\)" 3 "gimple" } } */ +/* { dg-final { scan-tree-dump-times {map\(struct:\*f \[len: 1\]\) map\(alloc:[a-z0-9\._]+->vectors \[len: 0\]\) map\(to:\*_[0-9]+ \[len: _[0-9]+\]\) map\(attach:[a-z0-9\._]+->vectors \[bias: [^\]]+\]\) map\(attach:\*_[0-9]+ \[bias: _[0-9]+\]\)} 1 "gimple" } } */ +/* { dg-final { scan-tree-dump-times {map\(struct:\*\(f->bars \+ \(sizetype\) \(\([^\)]+\) n \* 16\)\) \[len: 1\]\) map\(alloc:[a-z0-9\._]+->vectors \[len: 0\]\) map\(to:\*_[0-9]+ \[len: _[0-9]+\]\) map\(attach:[a-z0-9\._]+->vectors \[bias: [^\]]+\]\)} 2 "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c b/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c index 3aa1a8fc55ec..222272df5b1e 100644 --- a/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c +++ b/gcc/testsuite/c-c++-common/gomp/target-implicit-map-2.c @@ -49,4 +49,5 @@ main (void) /* { dg-final { scan-tree-dump {#pragma omp target num_teams.* map\(tofrom:a \[len: [0-9]+\]\[implicit\]\)} "gimple" } } */ -/* { dg-final { scan-tree-dump {#pragma omp target num_teams.* map\(tofrom:a \[len: [0-9]+\]\[implicit\]\) map\(tofrom:\*_[0-9]+ \[len: [0-9]+\]\) map\(attach:a\.ptr \[bias: 0\]\)} "gimple" } } */ +/* { dg-final { scan-tree-dump {#pragma omp target num_teams.* map\(struct:a \[len: 1\]\) map\(alloc:a\.ptr \[len: [0-9]+\]\) map\(tofrom:\*_[0-9]+ \[len: [0-9]+\]\) map\(attach:a\.ptr \[bias: 0\]\)} "gimple" } } */ +/* { dg-final { scan-tree-dump-not {map\(struct:a \[len: 1\]\) map\(alloc:a\.ptr \[len: 0\]\)} "gimple" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/static-component-1.C b/gcc/testsuite/g++.dg/gomp/static-component-1.C new file mode 100644 index 000000000000..c2f959335674 --- /dev/null +++ b/gcc/testsuite/g++.dg/gomp/static-component-1.C @@ -0,0 +1,23 @@ +/* { dg-do compile } */ + +/* Types with static members should be mappable. */ + +struct A { + static int x[10]; +}; + +struct B { + A a; +}; + +int +main (int argc, char *argv[]) +{ + B *b = new B; +#pragma omp target map(b->a) + ; + B bb; +#pragma omp target map(bb.a) + ; + delete b; +} diff --git a/gcc/testsuite/gcc.dg/gomp/target-3.c b/gcc/testsuite/gcc.dg/gomp/target-3.c index 3e7921270c92..3d5e05f85710 100644 --- a/gcc/testsuite/gcc.dg/gomp/target-3.c +++ b/gcc/testsuite/gcc.dg/gomp/target-3.c @@ -13,4 +13,4 @@ void foo (struct S *s) #pragma omp target enter data map (alloc: s->a, s->b) } -/* { dg-final { scan-tree-dump-times "map\\(struct:\\*s \\\[len: 2\\\]\\) map\\(alloc:s->a \\\[len: \[0-9\]+\\\]\\) map\\(alloc:s->b \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map\\(struct:\\*s \\\[len: 2\\\]\\) map\\(alloc:s\[\\._0-9\]+->a \\\[len: \[0-9\]+\\\]\\) map\\(alloc:s\[\\._0-9\]+->b \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ diff --git a/gcc/testsuite/gfortran.dg/gomp/map-9.f90 b/gcc/testsuite/gfortran.dg/gomp/map-9.f90 index b770b931beef..f930a49d9fff 100644 --- a/gcc/testsuite/gfortran.dg/gomp/map-9.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/map-9.f90 @@ -2,7 +2,7 @@ ! PR fortran/108545 -! { dg-final { scan-tree-dump "#pragma omp target enter data map\\(struct:x \\\[len: 1\\\]\\) map\\(always,to:x.a \\\[len: \[0-9\]+\\\]\\) map\\(to:MEM \\\[\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\)_\[0-9\]+] \\\[len: _\[0-9\]+\\\]\\) map\\(always_pointer:x.a.data \\\[pointer assign, bias: 0\\\]\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp target enter data map\\(struct:x \\\[len: 1\\\]\\) map\\(always,to:x\.a \\\[len: \[0-9\]+\\\]\\) map\\(to:MEM \\\[\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\)_\[0-9\]+] \\\[len: _\[0-9\]+\\\]\\) map\\(attach:x\.a\.data \\\[bias: 0\\\]\\)" "omplower" } } program p type t diff --git a/gcc/tree.h b/gcc/tree.h index 59af8920f024..986e7604d931 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1827,6 +1827,10 @@ class auto_suppress_location_wrappers NOTE: this is different than OMP_CLAUSE_MAP_IMPLICIT. */ #define OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)->base.deprecated_flag) +/* Nonzero for an attach/detach node whose decl was explicitly mapped on the + same directive. */ +#define OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED(NODE) \ + TREE_STATIC (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) /* Flag that 'OMP_CLAUSE_DECL (NODE)' is to be made addressable during OMP lowering. */ diff --git a/libgomp/target.c b/libgomp/target.c index 0637d34f1258..f435521c2dd0 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -703,7 +703,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, if (n == NULL) { if (allow_zero_length_array_sections) - cur_node.tgt_offset = 0; + cur_node.tgt_offset = cur_node.host_start; else { gomp_mutex_unlock (&devicep->lock); @@ -742,7 +742,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start + sizes[i]; - splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); + splay_tree_key n2 = gomp_map_0len_lookup (mem_map, &cur_node); kind = get_kind (short_mapkind, kinds, i); implicit = get_implicit (short_mapkind, kinds, i); if (n2 @@ -839,8 +839,20 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, if ((void *) target == NULL) { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("attempt to attach null pointer"); + /* As a special case, allow attaching NULL host pointers. This + allows e.g. unassociated Fortran pointers to be mapped + properly. */ + data = 0; + + gomp_debug (1, + "%s: attaching NULL host pointer, target %p " + "(struct base %p)\n", __FUNCTION__, (void *) devptr, + (void *) (n->tgt->tgt_start + n->tgt_offset)); + + gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, + sizeof (void *), true, cbufp); + + return; } s.host_start = target + bias; @@ -851,9 +863,8 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, { if (allow_zero_length_array_sections) /* When allowing attachment to zero-length array sections, we - allow attaching to NULL pointers when the target region is not - mapped. */ - data = 0; + copy the host pointer when the target region is not mapped. */ + data = target; else { gomp_mutex_unlock (&devicep->lock); @@ -1097,7 +1108,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].key = NULL; if (!aq && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) - & typemask)) + & typemask) + && sizes[i] != 0) gomp_coalesce_buf_add (&cbuf, tgt_size - cur_node.host_end + (uintptr_t) hostaddrs[i], @@ -1464,7 +1476,17 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, + sizes[last]; if (tgt->list[first].key != NULL) continue; + if (sizes[last] == 0) + cur_node.host_end++; n = splay_tree_lookup (mem_map, &cur_node); + if (sizes[last] == 0) + cur_node.host_end--; + if (n == NULL && cur_node.host_start == cur_node.host_end) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Struct pointer member not mapped (%p)", + (void*) hostaddrs[first]); + } if (n == NULL) { size_t align = (size_t) 1 << (kind >> rshift); diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-3.C b/libgomp/testsuite/libgomp.c++/baseptrs-3.C new file mode 100644 index 000000000000..39a48a40920a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/baseptrs-3.C @@ -0,0 +1,275 @@ +#include +#include +#include + +struct sa0 +{ + int *ptr; +}; + +struct sb0 +{ + int arr[10]; +}; + +struct sc0 +{ + sa0 a; + sb0 b; + sc0 (sa0 &my_a, sb0 &my_b) : a(my_a), b(my_b) {} +}; + +void +foo0 () +{ + sa0 my_a; + sb0 my_b; + + my_a.ptr = (int *) malloc (sizeof (int) * 10); + sc0 my_c(my_a, my_b); + + memset (my_c.a.ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c.a.ptr, my_c.a.ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_c.a.ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c.a.ptr[i] == i); + + memset (my_c.b.arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c.b.arr[:10]) + { + for (int i = 0; i < 10; i++) + my_c.b.arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c.b.arr[i] == i); + + free (my_a.ptr); +} + +struct sa +{ + int *ptr; +}; + +struct sb +{ + int arr[10]; +}; + +struct sc +{ + sa &a; + sb &b; + sc (sa &my_a, sb &my_b) : a(my_a), b(my_b) {} +}; + +void +foo () +{ + sa my_a; + sb my_b; + + my_a.ptr = (int *) malloc (sizeof (int) * 10); + sc my_c(my_a, my_b); + + memset (my_c.a.ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c.a.ptr, my_c.a.ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_c.a.ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c.a.ptr[i] == i); + + memset (my_c.b.arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c.b.arr[:10]) + { + for (int i = 0; i < 10; i++) + my_c.b.arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c.b.arr[i] == i); + + free (my_a.ptr); +} + +void +bar () +{ + sa my_a; + sb my_b; + + my_a.ptr = (int *) malloc (sizeof (int) * 10); + sc my_c(my_a, my_b); + sc &my_cref = my_c; + + memset (my_cref.a.ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_cref.a.ptr, my_cref.a.ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_cref.a.ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_cref.a.ptr[i] == i); + + memset (my_cref.b.arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_cref.b.arr[:10]) + { + for (int i = 0; i < 10; i++) + my_cref.b.arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_cref.b.arr[i] == i); + + free (my_a.ptr); +} + +struct scp0 +{ + sa *a; + sb *b; + scp0 (sa *my_a, sb *my_b) : a(my_a), b(my_b) {} +}; + +void +foop0 () +{ + sa *my_a = new sa; + sb *my_b = new sb; + + my_a->ptr = new int[10]; + scp0 *my_c = new scp0(my_a, my_b); + + memset (my_c->a->ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c->a, my_c->a[:1], my_c->a->ptr, my_c->a->ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_c->a->ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c->a->ptr[i] == i); + + memset (my_c->b->arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c->b, my_c->b[:1], my_c->b->arr[:10]) + { + for (int i = 0; i < 10; i++) + my_c->b->arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c->b->arr[i] == i); + + delete[] my_a->ptr; + delete my_a; + delete my_b; +} + +struct scp +{ + sa *&a; + sb *&b; + scp (sa *&my_a, sb *&my_b) : a(my_a), b(my_b) {} +}; + +void +foop () +{ + sa *my_a = new sa; + sb *my_b = new sb; + + my_a->ptr = new int[10]; + scp *my_c = new scp(my_a, my_b); + + memset (my_c->a->ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c->a, my_c->a[:1], my_c->a->ptr, my_c->a->ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_c->a->ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c->a->ptr[i] == i); + + memset (my_c->b->arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_c->b, my_c->b[:1], my_c->b->arr[:10]) + { + for (int i = 0; i < 10; i++) + my_c->b->arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_c->b->arr[i] == i); + + delete[] my_a->ptr; + delete my_a; + delete my_b; +} + +void +barp () +{ + sa *my_a = new sa; + sb *my_b = new sb; + + my_a->ptr = new int[10]; + scp *my_c = new scp(my_a, my_b); + scp *&my_cref = my_c; + + memset (my_cref->a->ptr, 0, sizeof (int) * 10); + + #pragma omp target map (my_cref->a, my_cref->a[:1], my_cref->a->ptr, \ + my_cref->a->ptr[:10]) + { + for (int i = 0; i < 10; i++) + my_cref->a->ptr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_cref->a->ptr[i] == i); + + memset (my_cref->b->arr, 0, sizeof (int) * 10); + + #pragma omp target map (my_cref->b, my_cref->b[:1], my_cref->b->arr[:10]) + { + for (int i = 0; i < 10; i++) + my_cref->b->arr[i] = i; + } + + for (int i = 0; i < 10; i++) + assert (my_cref->b->arr[i] == i); + + delete my_a->ptr; + delete my_a; + delete my_b; +} + +int main (int argc, char *argv[]) +{ + foo0 (); + foo (); + bar (); + foop0 (); + foop (); + barp (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-4.C b/libgomp/testsuite/libgomp.c++/baseptrs-4.C new file mode 100644 index 000000000000..196029ac1868 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/baseptrs-4.C @@ -0,0 +1,3154 @@ +// { dg-do run } + +#include +#include + +#define MAP_DECLS + +#define NONREF_DECL_BASE +#define REF_DECL_BASE +#define PTR_DECL_BASE +#define REF2PTR_DECL_BASE + +#define ARRAY_DECL_BASE +// Needs map clause "lvalue"-parsing support. +//#define REF2ARRAY_DECL_BASE +#define PTR_OFFSET_DECL_BASE +// Needs map clause "lvalue"-parsing support. +//#define REF2PTR_OFFSET_DECL_BASE + +#define MAP_SECTIONS + +#define NONREF_DECL_MEMBER_SLICE +#define NONREF_DECL_MEMBER_SLICE_BASEPTR +#define REF_DECL_MEMBER_SLICE +#define REF_DECL_MEMBER_SLICE_BASEPTR +#define PTR_DECL_MEMBER_SLICE +#define PTR_DECL_MEMBER_SLICE_BASEPTR +#define REF2PTR_DECL_MEMBER_SLICE +#define REF2PTR_DECL_MEMBER_SLICE_BASEPTR + +#define ARRAY_DECL_MEMBER_SLICE +#define ARRAY_DECL_MEMBER_SLICE_BASEPTR +// Needs map clause "lvalue"-parsing support. +//#define REF2ARRAY_DECL_MEMBER_SLICE +//#define REF2ARRAY_DECL_MEMBER_SLICE_BASEPTR +#define PTR_OFFSET_DECL_MEMBER_SLICE +#define PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +// Needs map clause "lvalue"-parsing support. +//#define REF2PTR_OFFSET_DECL_MEMBER_SLICE +//#define REF2PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + +#define PTRARRAY_DECL_MEMBER_SLICE +#define PTRARRAY_DECL_MEMBER_SLICE_BASEPTR +// Needs map clause "lvalue"-parsing support. +//#define REF2PTRARRAY_DECL_MEMBER_SLICE +//#define REF2PTRARRAY_DECL_MEMBER_SLICE_BASEPTR +#define PTRPTR_OFFSET_DECL_MEMBER_SLICE +#define PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +// Needs map clause "lvalue"-parsing support. +//#define REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE +//#define REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + +#define NONREF_COMPONENT_BASE +#define NONREF_COMPONENT_MEMBER_SLICE +#define NONREF_COMPONENT_MEMBER_SLICE_BASEPTR + +#define REF_COMPONENT_BASE +#define REF_COMPONENT_MEMBER_SLICE +#define REF_COMPONENT_MEMBER_SLICE_BASEPTR + +#define PTR_COMPONENT_BASE +#define PTR_COMPONENT_MEMBER_SLICE +#define PTR_COMPONENT_MEMBER_SLICE_BASEPTR + +#define REF2PTR_COMPONENT_BASE +#define REF2PTR_COMPONENT_MEMBER_SLICE +#define REF2PTR_COMPONENT_MEMBER_SLICE_BASEPTR + +#ifdef MAP_DECLS +void +map_decls (void) +{ + int x = 0; + int &y = x; + int arr[4]; + int (&arrref)[4] = arr; + int *z = &arr[0]; + int *&t = z; + + memset (arr, 0, sizeof arr); + + #pragma omp target map(x) + { + x++; + } + + #pragma omp target map(y) + { + y++; + } + + assert (x == 2); + assert (y == 2); + + /* "A variable that is of type pointer is treated as if it is the base + pointer of a zero-length array section that appeared as a list item in a + map clause." */ + #pragma omp target map(z) + { + z++; + } + + /* "A variable that is of type reference to pointer is treated as if it had + appeared in a map clause as a zero-length array section." + + The pointer here is *not* associated with a target address, so we're not + disallowed from modifying it. */ + #pragma omp target map(t) + { + t++; + } + + assert (z == &arr[2]); + assert (t == &arr[2]); + + #pragma omp target map(arr) + { + arr[2]++; + } + + #pragma omp target map(arrref) + { + arrref[2]++; + } + + assert (arr[2] == 2); + assert (arrref[2] == 2); +} +#endif + +struct S { + int a; + int &b; + int *c; + int *&d; + int e[4]; + int (&f)[4]; + + S(int a1, int &b1, int *c1, int *&d1) : + a(a1), b(b1), c(c1), d(d1), f(e) + { + memset (e, 0, sizeof e); + } +}; + +#ifdef NONREF_DECL_BASE +void +nonref_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys(a, b, &c, d); + + #pragma omp target map(mys.a) + { + mys.a++; + } + + #pragma omp target map(mys.b) + { + mys.b++; + } + + assert (mys.a == 1); + assert (mys.b == 1); + + #pragma omp target map(mys.c) + { + mys.c++; + } + + #pragma omp target map(mys.d) + { + mys.d++; + } + + assert (mys.c == &c + 1); + assert (mys.d == &c + 1); + + #pragma omp target map(mys.e) + { + mys.e[0]++; + } + + #pragma omp target map(mys.f) + { + mys.f[0]++; + } + + assert (mys.e[0] == 2); + assert (mys.f[0] == 2); +} +#endif + +#ifdef REF_DECL_BASE +void +ref_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig(a, b, &c, d); + S &mys = mys_orig; + + #pragma omp target map(mys.a) + { + mys.a++; + } + + #pragma omp target map(mys.b) + { + mys.b++; + } + + assert (mys.a == 1); + assert (mys.b == 1); + + #pragma omp target map(mys.c) + { + mys.c++; + } + + #pragma omp target map(mys.d) + { + mys.d++; + } + + assert (mys.c == &c + 1); + assert (mys.d == &c + 1); + + #pragma omp target map(mys.e) + { + mys.e[0]++; + } + + #pragma omp target map(mys.f) + { + mys.f[0]++; + } + + assert (mys.e[0] == 2); + assert (mys.f[0] == 2); +} +#endif + +#ifdef PTR_DECL_BASE +void +ptr_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig(a, b, &c, d); + S *mys = &mys_orig; + + #pragma omp target map(mys->a) + { + mys->a++; + } + + #pragma omp target map(mys->b) + { + mys->b++; + } + + assert (mys->a == 1); + assert (mys->b == 1); + + #pragma omp target map(mys->c) + { + mys->c++; + } + + #pragma omp target map(mys->d) + { + mys->d++; + } + + assert (mys->c == &c + 1); + assert (mys->d == &c + 1); + + #pragma omp target map(mys->e) + { + mys->e[0]++; + } + + #pragma omp target map(mys->f) + { + mys->f[0]++; + } + + assert (mys->e[0] == 2); + assert (mys->f[0] == 2); +} +#endif + +#ifdef REF2PTR_DECL_BASE +void +ref2ptr_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig(a, b, &c, d); + S *mysp = &mys_orig; + S *&mys = mysp; + + #pragma omp target map(mys->a) + { + mys->a++; + } + + #pragma omp target map(mys->b) + { + mys->b++; + } + + assert (mys->a == 1); + assert (mys->b == 1); + + #pragma omp target map(mys->c) + { + mys->c++; + } + + #pragma omp target map(mys->d) + { + mys->d++; + } + + assert (mys->c == &c + 1); + assert (mys->d == &c + 1); + + #pragma omp target map(mys->e) + { + mys->e[0]++; + } + + #pragma omp target map(mys->f) + { + mys->f[0]++; + } + + assert (mys->e[0] == 2); + assert (mys->f[0] == 2); +} +#endif + +#ifdef ARRAY_DECL_BASE +void +array_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys[4] = + { + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d) + }; + + #pragma omp target map(mys[2].a) + { + mys[2].a++; + } + + #pragma omp target map(mys[2].b) + { + mys[2].b++; + } + + assert (mys[2].a == 1); + assert (mys[2].b == 1); + + #pragma omp target map(mys[2].c) + { + mys[2].c++; + } + + #pragma omp target map(mys[2].d) + { + mys[2].d++; + } + + assert (mys[2].c == &c + 1); + assert (mys[2].d == &c + 1); + + #pragma omp target map(mys[2].e) + { + mys[2].e[0]++; + } + + #pragma omp target map(mys[2].f) + { + mys[2].f[0]++; + } + + assert (mys[2].e[0] == 2); + assert (mys[2].f[0] == 2); +} +#endif + +#ifdef REF2ARRAY_DECL_BASE +void +ref2array_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig[4] = + { + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d) + }; + S (&mys)[4] = mys_orig; + + #pragma omp target map(mys[2].a) + { + mys[2].a++; + } + + #pragma omp target map(mys[2].b) + { + mys[2].b++; + } + + assert (mys[2].a == 1); + assert (mys[2].b == 1); + + #pragma omp target map(mys[2].c) + { + mys[2].c++; + } + + #pragma omp target map(mys[2].d) + { + mys[2].d++; + } + + assert (mys[2].c == &c + 1); + assert (mys[2].d == &c + 1); + + #pragma omp target map(mys[2].e) + { + mys[2].e[0]++; + } + + #pragma omp target map(mys[2].f) + { + mys[2].f[0]++; + } + + assert (mys[2].e[0] == 2); + assert (mys[2].f[0] == 2); +} +#endif + +#ifdef PTR_OFFSET_DECL_BASE +void +ptr_offset_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig[4] = + { + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d) + }; + S *mys = &mys_orig[0]; + + #pragma omp target map(mys[2].a) + { + mys[2].a++; + } + + #pragma omp target map(mys[2].b) + { + mys[2].b++; + } + + assert (mys[2].a == 1); + assert (mys[2].b == 1); + + #pragma omp target map(mys[2].c) + { + mys[2].c++; + } + + #pragma omp target map(mys[2].d) + { + mys[2].d++; + } + + assert (mys[2].c == &c + 1); + assert (mys[2].d == &c + 1); + + #pragma omp target map(mys[2].e) + { + mys[2].e[0]++; + } + + #pragma omp target map(mys[2].f) + { + mys[2].f[0]++; + } + + assert (mys[2].e[0] == 2); + assert (mys[2].f[0] == 2); +} +#endif + +#ifdef REF2PTR_OFFSET_DECL_BASE +void +ref2ptr_offset_decl_base (void) +{ + int a = 0, b = 0, c, *d = &c; + S mys_orig[4] = + { + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d), + S(a, b, &c, d) + }; + S *mys_ptr = &mys_orig[0]; + S *&mys = mys_ptr; + + #pragma omp target map(mys[2].a) + { + mys[2].a++; + } + + #pragma omp target map(mys[2].b) + { + mys[2].b++; + } + + assert (mys[2].a == 1); + assert (mys[2].b == 1); + + #pragma omp target map(mys[2].c) + { + mys[2].c++; + } + + #pragma omp target map(mys[2].d) + { + mys[2].d++; + } + + assert (mys[2].c == &c + 1); + assert (mys[2].d == &c + 1); + + #pragma omp target map(mys[2].e) + { + mys[2].e[0]++; + } + + #pragma omp target map(mys[2].f) + { + mys[2].f[0]++; + } + + assert (mys[2].e[0] == 2); + assert (mys[2].f[0] == 2); +} +#endif + +#ifdef MAP_SECTIONS +void +map_sections (void) +{ + int arr[10]; + int *ptr; + int (&arrref)[10] = arr; + int *&ptrref = ptr; + + ptr = new int[10]; + memset (ptr, 0, sizeof (int) * 10); + memset (arr, 0, sizeof (int) * 10); + + #pragma omp target map(arr[0:10]) + { + arr[2]++; + } + + #pragma omp target map(ptr[0:10]) + { + ptr[2]++; + } + + #pragma omp target map(arrref[0:10]) + { + arrref[2]++; + } + + #pragma omp target map(ptrref[0:10]) + { + ptrref[2]++; + } + + assert (arr[2] == 2); + assert (ptr[2] == 2); + + delete ptr; +} +#endif + +struct T { + int a[10]; + int (&b)[10]; + int *c; + int *&d; + + T(int (&b1)[10], int *c1, int *&d1) : b(b1), c(c1), d(d1) + { + memset (a, 0, sizeof a); + } +}; + +#ifdef NONREF_DECL_MEMBER_SLICE +void +nonref_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt(c, &c[0], d); + + memset (c, 0, sizeof c); + + #pragma omp target map(myt.a[0:10]) + { + myt.a[2]++; + } + + #pragma omp target map(myt.b[0:10]) + { + myt.b[2]++; + } + + #pragma omp target enter data map(to: myt.c) + + #pragma omp target map(myt.c[0:10]) + { + myt.c[2]++; + } + + #pragma omp target exit data map(release: myt.c) + + #pragma omp target enter data map(to: myt.d) + + #pragma omp target map(myt.d[0:10]) + { + myt.d[2]++; + } + + #pragma omp target exit data map(from: myt.d) + + assert (myt.a[2] == 1); + assert (myt.b[2] == 3); + assert (myt.c[2] == 3); + assert (myt.d[2] == 3); +} +#endif + +#ifdef NONREF_DECL_MEMBER_SLICE_BASEPTR +void +nonref_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt(c, &c[0], d); + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt.c) map(myt.c[0:10]) + { + myt.c[2]++; + } + + #pragma omp target map(to:myt.d) map(myt.d[0:10]) + { + myt.d[2]++; + } + + assert (myt.c[2] == 2); + assert (myt.d[2] == 2); +} +#endif + +#ifdef REF_DECL_MEMBER_SLICE +void +ref_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T &myt = myt_real; + + memset (c, 0, sizeof c); + + #pragma omp target map(myt.a[0:10]) + { + myt.a[2]++; + } + + #pragma omp target map(myt.b[0:10]) + { + myt.b[2]++; + } + + #pragma omp target enter data map(to: myt.c) + + #pragma omp target map(myt.c[0:10]) + { + myt.c[2]++; + } + + #pragma omp target exit data map(release: myt.c) + + #pragma omp target enter data map(to: myt.d) + + #pragma omp target map(myt.d[0:10]) + { + myt.d[2]++; + } + + #pragma omp target exit data map(release: myt.d) + + assert (myt.a[2] == 1); + assert (myt.b[2] == 3); + assert (myt.c[2] == 3); + assert (myt.d[2] == 3); +} +#endif + +#ifdef REF_DECL_MEMBER_SLICE_BASEPTR +void +ref_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T &myt = myt_real; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt.c) map(myt.c[0:10]) + { + myt.c[2]++; + } + + #pragma omp target map(to:myt.d) map(myt.d[0:10]) + { + myt.d[2]++; + } + + assert (myt.c[2] == 2); + assert (myt.d[2] == 2); +} +#endif + +#ifdef PTR_DECL_MEMBER_SLICE +void +ptr_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt = &myt_real; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt) + + #pragma omp target map(myt->a[0:10]) + { + myt->a[2]++; + } + + #pragma omp target map(myt->b[0:10]) + { + myt->b[2]++; + } + + #pragma omp target enter data map(to: myt->c) + + #pragma omp target map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target exit data map(release: myt->c) + + #pragma omp target enter data map(to: myt->d) + + #pragma omp target map(myt->d[0:10]) + { + myt->d[2]++; + } + + #pragma omp target exit data map(release: myt, myt->d) + + assert (myt->a[2] == 1); + assert (myt->b[2] == 3); + assert (myt->c[2] == 3); + assert (myt->d[2] == 3); +} +#endif + +#ifdef PTR_DECL_MEMBER_SLICE_BASEPTR +void +ptr_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt = &myt_real; + + memset (c, 0, sizeof c); + + // These ones have an implicit firstprivate for 'myt'. + #pragma omp target map(to:myt->c) map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target map(to:myt->d) map(myt->d[0:10]) + { + myt->d[2]++; + } + + // These ones have an explicit "TO" mapping for 'myt'. + #pragma omp target map(to:myt) map(to:myt->c) map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target map(to:myt) map(to:myt->d) map(myt->d[0:10]) + { + myt->d[2]++; + } + + assert (myt->c[2] == 4); + assert (myt->d[2] == 4); +} +#endif + +#ifdef REF2PTR_DECL_MEMBER_SLICE +void +ref2ptr_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptr = &myt_real; + T *&myt = myt_ptr; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt) + + #pragma omp target map(myt->a[0:10]) + { + myt->a[2]++; + } + + #pragma omp target map(myt->b[0:10]) + { + myt->b[2]++; + } + + #pragma omp target enter data map(to: myt->c) + + #pragma omp target map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target exit data map(release: myt->c) + + #pragma omp target enter data map(to: myt->d) + + #pragma omp target map(myt->d[0:10]) + { + myt->d[2]++; + } + + #pragma omp target exit data map(from: myt, myt->d) + + assert (myt->a[2] == 1); + assert (myt->b[2] == 3); + assert (myt->c[2] == 3); + assert (myt->d[2] == 3); +} +#endif + +#ifdef REF2PTR_DECL_MEMBER_SLICE_BASEPTR +void +ref2ptr_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptr = &myt_real; + T *&myt = myt_ptr; + + memset (c, 0, sizeof c); + + // These ones have an implicit firstprivate for 'myt'. + #pragma omp target map(to:myt->c) map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target map(to:myt->d) map(myt->d[0:10]) + { + myt->d[2]++; + } + + // These ones have an explicit "TO" mapping for 'myt'. + #pragma omp target map(to:myt) map(to:myt->c) map(myt->c[0:10]) + { + myt->c[2]++; + } + + #pragma omp target map(to:myt) map(to:myt->d) map(myt->d[0:10]) + { + myt->d[2]++; + } + + assert (myt->c[2] == 4); + assert (myt->d[2] == 4); +} +#endif + +#ifdef ARRAY_DECL_MEMBER_SLICE +void +array_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + + memset (c, 0, sizeof c); + + #pragma omp target map(myt[2].a[0:10]) + { + myt[2].a[2]++; + } + + #pragma omp target map(myt[2].b[0:10]) + { + myt[2].b[2]++; + } + + #pragma omp target enter data map(to: myt[2].c) + + #pragma omp target map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target exit data map(release: myt[2].c) + + #pragma omp target enter data map(to: myt[2].d) + + #pragma omp target map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + #pragma omp target exit data map(release: myt[2].d) + + assert (myt[2].a[2] == 1); + assert (myt[2].b[2] == 3); + assert (myt[2].c[2] == 3); + assert (myt[2].d[2] == 3); +} +#endif + +#ifdef ARRAY_DECL_MEMBER_SLICE_BASEPTR +void +array_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + assert (myt[2].c[2] == 2); + assert (myt[2].d[2] == 2); +} +#endif + +#ifdef REF2ARRAY_DECL_MEMBER_SLICE +void +ref2array_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T (&myt)[4] = myt_real; + + memset (c, 0, sizeof c); + + #pragma omp target map(myt[2].a[0:10]) + { + myt[2].a[2]++; + } + + #pragma omp target map(myt[2].b[0:10]) + { + myt[2].b[2]++; + } + + #pragma omp target enter data map(to: myt[2].c) + + #pragma omp target map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target exit data map(release: myt[2].c) + + #pragma omp target enter data map(to: myt[2].d) + + #pragma omp target map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + #pragma omp target exit data map(release: myt[2].d) + + assert (myt[2].a[2] == 1); + assert (myt[2].b[2] == 3); + assert (myt[2].c[2] == 3); + assert (myt[2].d[2] == 3); +} +#endif + +#ifdef REF2ARRAY_DECL_MEMBER_SLICE_BASEPTR +void +ref2array_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T (&myt)[4] = myt_real; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + assert (myt[2].c[2] == 2); + assert (myt[2].d[2] == 2); +} +#endif + +#ifdef PTR_OFFSET_DECL_MEMBER_SLICE +void +ptr_offset_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T *myt = &myt_real[0]; + + memset (c, 0, sizeof c); + + #pragma omp target map(myt[2].a[0:10]) + { + myt[2].a[2]++; + } + + #pragma omp target map(myt[2].b[0:10]) + { + myt[2].b[2]++; + } + + #pragma omp target enter data map(to: myt[2].c) + + #pragma omp target map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target exit data map(release: myt[2].c) + + #pragma omp target enter data map(to: myt[2].d) + + #pragma omp target map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + #pragma omp target exit data map(release: myt[2].d) + + assert (myt[2].a[2] == 1); + assert (myt[2].b[2] == 3); + assert (myt[2].c[2] == 3); + assert (myt[2].d[2] == 3); +} +#endif + +#ifdef PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +void +ptr_offset_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T *myt = &myt_real[0]; + + memset (c, 0, sizeof c); + + /* Implicit 'myt'. */ + #pragma omp target map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + /* Explicit 'to'-mapped 'myt'. */ + #pragma omp target map(to:myt) map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt) map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + assert (myt[2].c[2] == 4); + assert (myt[2].d[2] == 4); +} +#endif + +#ifdef REF2PTR_OFFSET_DECL_MEMBER_SLICE +void +ref2ptr_offset_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T *myt_ptr = &myt_real[0]; + T *&myt = myt_ptr; + + memset (c, 0, sizeof c); + + #pragma omp target map(myt[2].a[0:10]) + { + myt[2].a[2]++; + } + + #pragma omp target map(myt[2].b[0:10]) + { + myt[2].b[2]++; + } + + #pragma omp target enter data map(to: myt[2].c) + + #pragma omp target map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target exit data map(release: myt[2].c) + + #pragma omp target enter data map(to: myt[2].d) + + #pragma omp target map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + #pragma omp target exit data map(release: myt[2].d) + + assert (myt[2].a[2] == 1); + assert (myt[2].b[2] == 3); + assert (myt[2].c[2] == 3); + assert (myt[2].d[2] == 3); +} +#endif + +#ifdef REF2PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +void +ref2ptr_offset_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real[4] = + { + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d), + T (c, &c[0], d) + }; + T *myt_ptr = &myt_real[0]; + T *&myt = myt_ptr; + + memset (c, 0, sizeof c); + + /* Implicit 'myt'. */ + #pragma omp target map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + /* Explicit 'to'-mapped 'myt'. */ + #pragma omp target map(to:myt) map(to:myt[2].c) map(myt[2].c[0:10]) + { + myt[2].c[2]++; + } + + #pragma omp target map(to:myt) map(to:myt[2].d) map(myt[2].d[0:10]) + { + myt[2].d[2]++; + } + + assert (myt[2].c[2] == 4); + assert (myt[2].d[2] == 4); +} +#endif + +#ifdef PTRARRAY_DECL_MEMBER_SLICE +void +ptrarray_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt[4] = + { + &myt_real, + &myt_real, + &myt_real, + &myt_real + }; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt[2]) + + #pragma omp target map(myt[2]->a[0:10]) + { + myt[2]->a[2]++; + } + + #pragma omp target map(myt[2]->b[0:10]) + { + myt[2]->b[2]++; + } + + #pragma omp target enter data map(to: myt[2]->c) + + #pragma omp target map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target exit data map(from: myt[2]->c) + + #pragma omp target enter data map(to: myt[2]->d) + + #pragma omp target map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target exit data map(from: myt[2]->d) + + #pragma omp target exit data map(release: myt[2]) + + assert (myt[2]->a[2] == 1); + assert (myt[2]->b[2] == 3); + assert (myt[2]->c[2] == 3); + assert (myt[2]->d[2] == 3); +} +#endif + +#ifdef PTRARRAY_DECL_MEMBER_SLICE_BASEPTR +void +ptrarray_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt[4] = + { + &myt_real, + &myt_real, + &myt_real, + &myt_real + }; + + memset (c, 0, sizeof c); + + // Implicit 'myt' + #pragma omp target map(to: myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to: myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + // One element of 'myt' + #pragma omp target map(to:myt[2], myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt[2], myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + // Explicit map of all of 'myt' + #pragma omp target map(to:myt, myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt, myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + // Explicit map slice of 'myt' + #pragma omp target map(to:myt[1:3], myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt[1:3], myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + assert (myt[2]->c[2] == 8); + assert (myt[2]->d[2] == 8); +} +#endif + +#ifdef REF2PTRARRAY_DECL_MEMBER_SLICE +void +ref2ptrarray_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + &myt_real, + &myt_real, + &myt_real, + &myt_real + }; + T *(&myt)[4] = myt_ptrarr; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt[2]) + + #pragma omp target map(myt[2]->a[0:10]) + { + myt[2]->a[2]++; + } + + #pragma omp target map(myt[2]->b[0:10]) + { + myt[2]->b[2]++; + } + + #pragma omp target enter data map(to: myt[2]->c) + + #pragma omp target map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target exit data map(release: myt[2]->c) + + #pragma omp target enter data map(to: myt[2]->d) + + #pragma omp target map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target exit data map(release: myt[2]->d) + + #pragma omp target exit data map(release: myt[2]) + + assert (myt[2]->a[2] == 1); + assert (myt[2]->b[2] == 3); + assert (myt[2]->c[2] == 3); + assert (myt[2]->d[2] == 3); +} +#endif + +#ifdef REF2PTRARRAY_DECL_MEMBER_SLICE_BASEPTR +void +ref2ptrarray_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + &myt_real, + &myt_real, + &myt_real, + &myt_real + }; + T *(&myt)[4] = myt_ptrarr; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt[2], myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt[2], myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target map(to:myt, myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt, myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + assert (myt[2]->c[2] == 4); + assert (myt[2]->d[2] == 4); +} +#endif + +#ifdef PTRPTR_OFFSET_DECL_MEMBER_SLICE +void +ptrptr_offset_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + &myt_real, + &myt_real, + &myt_real, + &myt_real + }; + T **myt = &myt_ptrarr[0]; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt[0:3]) + + /* NOTE: For the implicit firstprivate 'myt' to work, the zeroth element of + myt[] must be mapped above -- otherwise the zero-length array section + lookup fails. */ + #pragma omp target map(myt[2]->a[0:10]) + { + myt[2]->a[2]++; + } + + #pragma omp target map(myt[2]->b[0:10]) + { + myt[2]->b[2]++; + } + + #pragma omp target enter data map(to: myt[2]->c) + + #pragma omp target map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target exit data map(from: myt[2]->c) + + #pragma omp target enter data map(to: myt[2]->d) + + #pragma omp target map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target exit data map(from: myt[0:3], myt[2]->d) + + assert (myt[2]->a[2] == 1); + assert (myt[2]->b[2] == 3); + assert (myt[2]->c[2] == 3); + assert (myt[2]->d[2] == 3); +} +#endif + +#ifdef PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +void +ptrptr_offset_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + 0, + 0, + 0, + &myt_real + }; + T **myt = &myt_ptrarr[0]; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt[3], myt[3]->c) map(myt[3]->c[0:10]) + { + myt[3]->c[2]++; + } + + #pragma omp target map(to:myt[3], myt[3]->d) map(myt[3]->d[0:10]) + { + myt[3]->d[2]++; + } + + #pragma omp target map(to:myt, myt[3], myt[3]->c) map(myt[3]->c[0:10]) + { + myt[3]->c[2]++; + } + + #pragma omp target map(to:myt, myt[3], myt[3]->d) map(myt[3]->d[0:10]) + { + myt[3]->d[2]++; + } + + assert (myt[3]->c[2] == 4); + assert (myt[3]->d[2] == 4); +} +#endif + +#ifdef REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE +void +ref2ptrptr_offset_decl_member_slice (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + 0, + 0, + &myt_real, + 0 + }; + T **myt_ptrptr = &myt_ptrarr[0]; + T **&myt = myt_ptrptr; + + memset (c, 0, sizeof c); + + #pragma omp target enter data map(to: myt[0:3]) + + #pragma omp target map(myt[2]->a[0:10]) + { + myt[2]->a[2]++; + } + + #pragma omp target map(myt[2]->b[0:10]) + { + myt[2]->b[2]++; + } + + #pragma omp target enter data map(to:myt[2]->c) + + #pragma omp target map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target exit data map(release:myt[2]->c) + + #pragma omp target enter data map(to:myt[2]->d) + + #pragma omp target map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target exit data map(release: myt[0:3], myt[2]->d) + + assert (myt[2]->a[2] == 1); + assert (myt[2]->b[2] == 3); + assert (myt[2]->c[2] == 3); + assert (myt[2]->d[2] == 3); +} +#endif + +#ifdef REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR +void +ref2ptrptr_offset_decl_member_slice_baseptr (void) +{ + int c[10]; + int *d = &c[0]; + T myt_real(c, &c[0], d); + T *myt_ptrarr[4] = + { + 0, + 0, + &myt_real, + 0 + }; + T **myt_ptrptr = &myt_ptrarr[0]; + T **&myt = myt_ptrptr; + + memset (c, 0, sizeof c); + + #pragma omp target map(to:myt[2], myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt[2], myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + #pragma omp target map(to:myt, myt[2], myt[2]->c) map(myt[2]->c[0:10]) + { + myt[2]->c[2]++; + } + + #pragma omp target map(to:myt, myt[2], myt[2]->d) map(myt[2]->d[0:10]) + { + myt[2]->d[2]++; + } + + assert (myt[2]->c[2] == 4); + assert (myt[2]->d[2] == 4); +} +#endif + +struct U +{ + S s1; + T t1; + S &s2; + T &t2; + S *s3; + T *t3; + S *&s4; + T *&t4; + + U(S &sptr1, T &tptr1, S &sptr2, T &tptr2, S *sptr3, T *tptr3, + S *&sptr4, T *&tptr4) + : s1(sptr1), t1(tptr1), s2(sptr2), t2(tptr2), s3(sptr3), t3(tptr3), + s4(sptr4), t4(tptr4) + { + } +}; + +#define INIT_S(N) \ + int a##N = 0, b##N = 0, c##N = 0, d##N = 0; \ + int *d##N##ptr = &d##N; \ + S s##N(a##N, b##N, &c##N, d##N##ptr) + +#define INIT_T(N) \ + int arr##N[10]; \ + int *ptr##N = &arr##N[0]; \ + T t##N(arr##N, &arr##N[0], ptr##N); \ + memset (arr##N, 0, sizeof arr##N) + +#define INIT_ST \ + INIT_S(1); \ + INIT_T(1); \ + INIT_S(2); \ + INIT_T(2); \ + INIT_S(3); \ + INIT_T(3); \ + int a4 = 0, b4 = 0, c4 = 0, d4 = 0; \ + int *d4ptr = &d4; \ + S *s4 = new S(a4, b4, &c4, d4ptr); \ + int arr4[10]; \ + int *ptr4 = &arr4[0]; \ + T *t4 = new T(arr4, &arr4[0], ptr4); \ + memset (arr4, 0, sizeof arr4) + +#ifdef NONREF_COMPONENT_BASE +void +nonref_component_base (void) +{ + INIT_ST; + U myu(s1, t1, s2, t2, &s3, &t3, s4, t4); + + #pragma omp target map(myu.s1.a, myu.s1.b, myu.s1.c, myu.s1.d) + { + myu.s1.a++; + myu.s1.b++; + myu.s1.c++; + myu.s1.d++; + } + + assert (myu.s1.a == 1); + assert (myu.s1.b == 1); + assert (myu.s1.c == &c1 + 1); + assert (myu.s1.d == &d1 + 1); + + #pragma omp target map(myu.s2.a, myu.s2.b, myu.s2.c, myu.s2.d) + { + myu.s2.a++; + myu.s2.b++; + myu.s2.c++; + myu.s2.d++; + } + + assert (myu.s2.a == 1); + assert (myu.s2.b == 1); + assert (myu.s2.c == &c2 + 1); + assert (myu.s2.d == &d2 + 1); + + #pragma omp target map(to:myu.s3) \ + map(myu.s3->a, myu.s3->b, myu.s3->c, myu.s3->d) + { + myu.s3->a++; + myu.s3->b++; + myu.s3->c++; + myu.s3->d++; + } + + assert (myu.s3->a == 1); + assert (myu.s3->b == 1); + assert (myu.s3->c == &c3 + 1); + assert (myu.s3->d == &d3 + 1); + + #pragma omp target map(to:myu.s4) \ + map(myu.s4->a, myu.s4->b, myu.s4->c, myu.s4->d) + { + myu.s4->a++; + myu.s4->b++; + myu.s4->c++; + myu.s4->d++; + } + + assert (myu.s4->a == 1); + assert (myu.s4->b == 1); + assert (myu.s4->c == &c4 + 1); + assert (myu.s4->d == &d4 + 1); + + delete s4; + delete t4; +} +#endif + +#ifdef NONREF_COMPONENT_MEMBER_SLICE +void +nonref_component_member_slice (void) +{ + INIT_ST; + U myu(s1, t1, s2, t2, &s3, &t3, s4, t4); + + #pragma omp target map(myu.t1.a[2:5]) + { + myu.t1.a[2]++; + } + + #pragma omp target map(myu.t1.b[2:5]) + { + myu.t1.b[2]++; + } + + #pragma omp target enter data map(to: myu.t1.c) + + #pragma omp target map(myu.t1.c[2:5]) + { + myu.t1.c[2]++; + } + + #pragma omp target exit data map(release: myu.t1.c) + + #pragma omp target enter data map(to: myu.t1.d) + + #pragma omp target map(myu.t1.d[2:5]) + { + myu.t1.d[2]++; + } + + #pragma omp target exit data map(from: myu.t1.d) + + assert (myu.t1.a[2] == 1); + assert (myu.t1.b[2] == 3); + assert (myu.t1.c[2] == 3); + assert (myu.t1.d[2] == 3); + + #pragma omp target map(myu.t2.a[2:5]) + { + myu.t2.a[2]++; + } + + #pragma omp target map(myu.t2.b[2:5]) + { + myu.t2.b[2]++; + } + + #pragma omp target enter data map(to: myu.t2.c) + + #pragma omp target map(myu.t2.c[2:5]) + { + myu.t2.c[2]++; + } + + #pragma omp target exit data map(release: myu.t2.c) + + #pragma omp target enter data map(to: myu.t2.d) + + #pragma omp target map(myu.t2.d[2:5]) + { + myu.t2.d[2]++; + } + + #pragma omp target exit data map(release: myu.t2.d) + + assert (myu.t2.a[2] == 1); + assert (myu.t2.b[2] == 3); + assert (myu.t2.c[2] == 3); + assert (myu.t2.d[2] == 3); + + #pragma omp target enter data map(to: myu.t3) + + #pragma omp target map(myu.t3->a[2:5]) + { + myu.t3->a[2]++; + } + + #pragma omp target map(myu.t3->b[2:5]) + { + myu.t3->b[2]++; + } + + #pragma omp target enter data map(to: myu.t3->c) + + #pragma omp target map(myu.t3->c[2:5]) + { + myu.t3->c[2]++; + } + + #pragma omp target exit data map(release: myu.t3->c) + + #pragma omp target enter data map(to: myu.t3->d) + + #pragma omp target map(myu.t3->d[2:5]) + { + myu.t3->d[2]++; + } + + #pragma omp target exit data map(release: myu.t3, myu.t3->d) + + assert (myu.t3->a[2] == 1); + assert (myu.t3->b[2] == 3); + assert (myu.t3->c[2] == 3); + assert (myu.t3->d[2] == 3); + + #pragma omp target enter data map(to: myu.t4) + + #pragma omp target map(myu.t4->a[2:5]) + { + myu.t4->a[2]++; + } + + #pragma omp target map(myu.t4->b[2:5]) + { + myu.t4->b[2]++; + } + + #pragma omp target enter data map(to: myu.t4->c) + + #pragma omp target map(myu.t4->c[2:5]) + { + myu.t4->c[2]++; + } + + #pragma omp target exit data map(release: myu.t4->c) + + #pragma omp target enter data map(to: myu.t4->d) + + #pragma omp target map(myu.t4->d[2:5]) + { + myu.t4->d[2]++; + } + + #pragma omp target exit data map(release: myu.t4, myu.t4->d) + + assert (myu.t4->a[2] == 1); + assert (myu.t4->b[2] == 3); + assert (myu.t4->c[2] == 3); + assert (myu.t4->d[2] == 3); + + delete s4; + delete t4; +} +#endif + +#ifdef NONREF_COMPONENT_MEMBER_SLICE_BASEPTR +void +nonref_component_member_slice_baseptr (void) +{ + INIT_ST; + U myu(s1, t1, s2, t2, &s3, &t3, s4, t4); + + #pragma omp target map(to: myu.t1.c) map(myu.t1.c[2:5]) + { + myu.t1.c[2]++; + } + + #pragma omp target map(to: myu.t1.d) map(myu.t1.d[2:5]) + { + myu.t1.d[2]++; + } + + assert (myu.t1.c[2] == 2); + assert (myu.t1.d[2] == 2); + + #pragma omp target map(to: myu.t2.c) map(myu.t2.c[2:5]) + { + myu.t2.c[2]++; + } + + #pragma omp target map(to: myu.t2.d) map(myu.t2.d[2:5]) + { + myu.t2.d[2]++; + } + + assert (myu.t2.c[2] == 2); + assert (myu.t2.d[2] == 2); + + #pragma omp target map(to: myu.t3, myu.t3->c) map(myu.t3->c[2:5]) + { + myu.t3->c[2]++; + } + + #pragma omp target map(to: myu.t3, myu.t3->d) map(myu.t3->d[2:5]) + { + myu.t3->d[2]++; + } + + assert (myu.t3->c[2] == 2); + assert (myu.t3->d[2] == 2); + + #pragma omp target map(to: myu.t4, myu.t4->c) map(myu.t4->c[2:5]) + { + myu.t4->c[2]++; + } + + #pragma omp target map(to: myu.t4, myu.t4->d) map(myu.t4->d[2:5]) + { + myu.t4->d[2]++; + } + + assert (myu.t4->c[2] == 2); + assert (myu.t4->d[2] == 2); + + delete s4; + delete t4; +} +#endif + +#ifdef REF_COMPONENT_BASE +void +ref_component_base (void) +{ + INIT_ST; + U myu_real(s1, t1, s2, t2, &s3, &t3, s4, t4); + U &myu = myu_real; + + #pragma omp target map(myu.s1.a, myu.s1.b, myu.s1.c, myu.s1.d) + { + myu.s1.a++; + myu.s1.b++; + myu.s1.c++; + myu.s1.d++; + } + + assert (myu.s1.a == 1); + assert (myu.s1.b == 1); + assert (myu.s1.c == &c1 + 1); + assert (myu.s1.d == &d1 + 1); + + #pragma omp target map(myu.s2.a, myu.s2.b, myu.s2.c, myu.s2.d) + { + myu.s2.a++; + myu.s2.b++; + myu.s2.c++; + myu.s2.d++; + } + + assert (myu.s2.a == 1); + assert (myu.s2.b == 1); + assert (myu.s2.c == &c2 + 1); + assert (myu.s2.d == &d2 + 1); + + #pragma omp target map(to:myu.s3) \ + map(myu.s3->a, myu.s3->b, myu.s3->c, myu.s3->d) + { + myu.s3->a++; + myu.s3->b++; + myu.s3->c++; + myu.s3->d++; + } + + assert (myu.s3->a == 1); + assert (myu.s3->b == 1); + assert (myu.s3->c == &c3 + 1); + assert (myu.s3->d == &d3 + 1); + + #pragma omp target map(to:myu.s4) \ + map(myu.s4->a, myu.s4->b, myu.s4->c, myu.s4->d) + { + myu.s4->a++; + myu.s4->b++; + myu.s4->c++; + myu.s4->d++; + } + + assert (myu.s4->a == 1); + assert (myu.s4->b == 1); + assert (myu.s4->c == &c4 + 1); + assert (myu.s4->d == &d4 + 1); + + delete s4; + delete t4; +} +#endif + +#ifdef REF_COMPONENT_MEMBER_SLICE +void +ref_component_member_slice (void) +{ + INIT_ST; + U myu_real(s1, t1, s2, t2, &s3, &t3, s4, t4); + U &myu = myu_real; + + #pragma omp target map(myu.t1.a[2:5]) + { + myu.t1.a[2]++; + } + + #pragma omp target map(myu.t1.b[2:5]) + { + myu.t1.b[2]++; + } + + #pragma omp target enter data map(to: myu.t1.c) + + #pragma omp target map(myu.t1.c[2:5]) + { + myu.t1.c[2]++; + } + + #pragma omp target exit data map(release: myu.t1.c) + + #pragma omp target enter data map(to: myu.t1.d) + + #pragma omp target map(myu.t1.d[2:5]) + { + myu.t1.d[2]++; + } + + #pragma omp target exit data map(release: myu.t1.d) + + assert (myu.t1.a[2] == 1); + assert (myu.t1.b[2] == 3); + assert (myu.t1.c[2] == 3); + assert (myu.t1.d[2] == 3); + + #pragma omp target map(myu.t2.a[2:5]) + { + myu.t2.a[2]++; + } + + #pragma omp target map(myu.t2.b[2:5]) + { + myu.t2.b[2]++; + } + + #pragma omp target enter data map(to: myu.t2.c) + + #pragma omp target map(myu.t2.c[2:5]) + { + myu.t2.c[2]++; + } + + #pragma omp target exit data map(release: myu.t2.c) + + #pragma omp target enter data map(to: myu.t2.d) + + #pragma omp target map(myu.t2.d[2:5]) + { + myu.t2.d[2]++; + } + + #pragma omp target exit data map(release: myu.t2.d) + + assert (myu.t2.a[2] == 1); + assert (myu.t2.b[2] == 3); + assert (myu.t2.c[2] == 3); + assert (myu.t2.d[2] == 3); + + #pragma omp target enter data map(to: myu.t3) + + #pragma omp target map(myu.t3->a[2:5]) + { + myu.t3->a[2]++; + } + + #pragma omp target map(myu.t3->b[2:5]) + { + myu.t3->b[2]++; + } + + #pragma omp target enter data map(to: myu.t3->c) + + #pragma omp target map(myu.t3->c[2:5]) + { + myu.t3->c[2]++; + } + + #pragma omp target exit data map(release: myu.t3->c) + + #pragma omp target enter data map(to: myu.t3->d) + + #pragma omp target map(myu.t3->d[2:5]) + { + myu.t3->d[2]++; + } + + #pragma omp target exit data map(release: myu.t3, myu.t3->d) + + assert (myu.t3->a[2] == 1); + assert (myu.t3->b[2] == 3); + assert (myu.t3->c[2] == 3); + assert (myu.t3->d[2] == 3); + + #pragma omp target enter data map(to: myu.t4) + + #pragma omp target map(myu.t4->a[2:5]) + { + myu.t4->a[2]++; + } + + #pragma omp target map(myu.t4->b[2:5]) + { + myu.t4->b[2]++; + } + + #pragma omp target enter data map(to: myu.t4->c) + + #pragma omp target map(myu.t4->c[2:5]) + { + myu.t4->c[2]++; + } + + #pragma omp target exit data map(release: myu.t4->c) + + #pragma omp target enter data map(to: myu.t4->d) + + #pragma omp target map(myu.t4->d[2:5]) + { + myu.t4->d[2]++; + } + + #pragma omp target exit data map(release: myu.t4, myu.t4->d) + + assert (myu.t4->a[2] == 1); + assert (myu.t4->b[2] == 3); + assert (myu.t4->c[2] == 3); + assert (myu.t4->d[2] == 3); + + delete s4; + delete t4; +} +#endif + +#ifdef REF_COMPONENT_MEMBER_SLICE_BASEPTR +void +ref_component_member_slice_baseptr (void) +{ + INIT_ST; + U myu_real(s1, t1, s2, t2, &s3, &t3, s4, t4); + U &myu = myu_real; + + #pragma omp target map(to: myu.t1.c) map(myu.t1.c[2:5]) + { + myu.t1.c[2]++; + } + + #pragma omp target map(to: myu.t1.d) map(myu.t1.d[2:5]) + { + myu.t1.d[2]++; + } + + assert (myu.t1.c[2] == 2); + assert (myu.t1.d[2] == 2); + + #pragma omp target map(to: myu.t2.c) map(myu.t2.c[2:5]) + { + myu.t2.c[2]++; + } + + #pragma omp target map(to: myu.t2.d) map(myu.t2.d[2:5]) + { + myu.t2.d[2]++; + } + + assert (myu.t2.c[2] == 2); + assert (myu.t2.d[2] == 2); + + #pragma omp target map(to: myu.t3, myu.t3->c) map(myu.t3->c[2:5]) + { + myu.t3->c[2]++; + } + + #pragma omp target map(to: myu.t3, myu.t3->d) map(myu.t3->d[2:5]) + { + myu.t3->d[2]++; + } + + assert (myu.t3->c[2] == 2); + assert (myu.t3->d[2] == 2); + + #pragma omp target map(to: myu.t4, myu.t4->c) map(myu.t4->c[2:5]) + { + myu.t4->c[2]++; + } + + #pragma omp target map(to: myu.t4, myu.t4->d) map(myu.t4->d[2:5]) + { + myu.t4->d[2]++; + } + + assert (myu.t4->c[2] == 2); + assert (myu.t4->d[2] == 2); + + delete s4; + delete t4; +} +#endif + +#ifdef PTR_COMPONENT_BASE +void +ptr_component_base (void) +{ + INIT_ST; + U *myu = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + + #pragma omp target map(myu->s1.a, myu->s1.b, myu->s1.c, myu->s1.d) + { + myu->s1.a++; + myu->s1.b++; + myu->s1.c++; + myu->s1.d++; + } + + assert (myu->s1.a == 1); + assert (myu->s1.b == 1); + assert (myu->s1.c == &c1 + 1); + assert (myu->s1.d == &d1 + 1); + + #pragma omp target map(myu->s2.a, myu->s2.b, myu->s2.c, myu->s2.d) + { + myu->s2.a++; + myu->s2.b++; + myu->s2.c++; + myu->s2.d++; + } + + assert (myu->s2.a == 1); + assert (myu->s2.b == 1); + assert (myu->s2.c == &c2 + 1); + assert (myu->s2.d == &d2 + 1); + + #pragma omp target map(to:myu->s3) \ + map(myu->s3->a, myu->s3->b, myu->s3->c, myu->s3->d) + { + myu->s3->a++; + myu->s3->b++; + myu->s3->c++; + myu->s3->d++; + } + + assert (myu->s3->a == 1); + assert (myu->s3->b == 1); + assert (myu->s3->c == &c3 + 1); + assert (myu->s3->d == &d3 + 1); + + #pragma omp target map(to:myu->s4) \ + map(myu->s4->a, myu->s4->b, myu->s4->c, myu->s4->d) + { + myu->s4->a++; + myu->s4->b++; + myu->s4->c++; + myu->s4->d++; + } + + assert (myu->s4->a == 1); + assert (myu->s4->b == 1); + assert (myu->s4->c == &c4 + 1); + assert (myu->s4->d == &d4 + 1); + + delete s4; + delete t4; + delete myu; +} +#endif + +#ifdef PTR_COMPONENT_MEMBER_SLICE +void +ptr_component_member_slice (void) +{ + INIT_ST; + U *myu = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + + #pragma omp target map(myu->t1.a[2:5]) + { + myu->t1.a[2]++; + } + + #pragma omp target map(myu->t1.b[2:5]) + { + myu->t1.b[2]++; + } + + #pragma omp target enter data map(to: myu->t1.c) + + #pragma omp target map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target exit data map(release: myu->t1.c) + + #pragma omp target enter data map(to: myu->t1.d) + + #pragma omp target map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + #pragma omp target exit data map(release: myu->t1.d) + + assert (myu->t1.a[2] == 1); + assert (myu->t1.b[2] == 3); + assert (myu->t1.c[2] == 3); + assert (myu->t1.d[2] == 3); + + #pragma omp target map(myu->t2.a[2:5]) + { + myu->t2.a[2]++; + } + + #pragma omp target map(myu->t2.b[2:5]) + { + myu->t2.b[2]++; + } + + #pragma omp target enter data map(to: myu->t2.c) + + #pragma omp target map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target exit data map(release: myu->t2.c) + + #pragma omp target enter data map(to: myu->t2.d) + + #pragma omp target map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + #pragma omp target exit data map(release: myu->t2.d) + + assert (myu->t2.a[2] == 1); + assert (myu->t2.b[2] == 3); + assert (myu->t2.c[2] == 3); + assert (myu->t2.d[2] == 3); + + #pragma omp target enter data map(to: myu->t3) + + #pragma omp target map(myu->t3->a[2:5]) + { + myu->t3->a[2]++; + } + + #pragma omp target map(myu->t3->b[2:5]) + { + myu->t3->b[2]++; + } + + #pragma omp target enter data map(to: myu->t3->c) + + #pragma omp target map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target exit data map(release: myu->t3->c) + + #pragma omp target enter data map(to: myu->t3->d) + + #pragma omp target map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + #pragma omp target exit data map(release: myu->t3, myu->t3->d) + + assert (myu->t3->a[2] == 1); + assert (myu->t3->b[2] == 3); + assert (myu->t3->c[2] == 3); + assert (myu->t3->d[2] == 3); + + #pragma omp target enter data map(to: myu->t4) + + #pragma omp target map(myu->t4->a[2:5]) + { + myu->t4->a[2]++; + } + + #pragma omp target map(myu->t4->b[2:5]) + { + myu->t4->b[2]++; + } + + #pragma omp target enter data map(to: myu->t4->c) + + #pragma omp target map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target exit data map(release: myu->t4->c) + + #pragma omp target enter data map(to: myu->t4->d) + + #pragma omp target map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + #pragma omp target exit data map(release: myu->t4, myu->t4->d) + + assert (myu->t4->a[2] == 1); + assert (myu->t4->b[2] == 3); + assert (myu->t4->c[2] == 3); + assert (myu->t4->d[2] == 3); + + delete s4; + delete t4; + delete myu; +} +#endif + +#ifdef PTR_COMPONENT_MEMBER_SLICE_BASEPTR +void +ptr_component_member_slice_baseptr (void) +{ + INIT_ST; + U *myu = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t1.c) map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target map(to: myu->t1.d) map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + assert (myu->t1.c[2] == 2); + assert (myu->t1.d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t1.c) map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target map(to: myu, myu->t1.d) map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + assert (myu->t1.c[2] == 4); + assert (myu->t1.d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t2.c) map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target map(to: myu->t2.d) map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + assert (myu->t2.c[2] == 2); + assert (myu->t2.d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t2.c) map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target map(to: myu, myu->t2.d) map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + assert (myu->t2.c[2] == 4); + assert (myu->t2.d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t3, myu->t3->c) map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target map(to: myu->t3, myu->t3->d) map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + assert (myu->t3->c[2] == 2); + assert (myu->t3->d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t3, myu->t3->c) map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target map(to: myu, myu->t3, myu->t3->d) map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + assert (myu->t3->c[2] == 4); + assert (myu->t3->d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t4, myu->t4->c) map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target map(to: myu->t4, myu->t4->d) map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + assert (myu->t4->c[2] == 2); + assert (myu->t4->d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t4, myu->t4->c) map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target map(to: myu, myu->t4, myu->t4->d) map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + assert (myu->t4->c[2] == 4); + assert (myu->t4->d[2] == 4); + + delete s4; + delete t4; + delete myu; +} +#endif + +#ifdef REF2PTR_COMPONENT_BASE +void +ref2ptr_component_base (void) +{ + INIT_ST; + U *myu_ptr = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + U *&myu = myu_ptr; + + #pragma omp target map(myu->s1.a, myu->s1.b, myu->s1.c, myu->s1.d) + { + myu->s1.a++; + myu->s1.b++; + myu->s1.c++; + myu->s1.d++; + } + + assert (myu->s1.a == 1); + assert (myu->s1.b == 1); + assert (myu->s1.c == &c1 + 1); + assert (myu->s1.d == &d1 + 1); + + #pragma omp target map(myu->s2.a, myu->s2.b, myu->s2.c, myu->s2.d) + { + myu->s2.a++; + myu->s2.b++; + myu->s2.c++; + myu->s2.d++; + } + + assert (myu->s2.a == 1); + assert (myu->s2.b == 1); + assert (myu->s2.c == &c2 + 1); + assert (myu->s2.d == &d2 + 1); + + #pragma omp target map(to:myu->s3) \ + map(myu->s3->a, myu->s3->b, myu->s3->c, myu->s3->d) + { + myu->s3->a++; + myu->s3->b++; + myu->s3->c++; + myu->s3->d++; + } + + assert (myu->s3->a == 1); + assert (myu->s3->b == 1); + assert (myu->s3->c == &c3 + 1); + assert (myu->s3->d == &d3 + 1); + + #pragma omp target map(to:myu->s4) \ + map(myu->s4->a, myu->s4->b, myu->s4->c, myu->s4->d) + { + myu->s4->a++; + myu->s4->b++; + myu->s4->c++; + myu->s4->d++; + } + + assert (myu->s4->a == 1); + assert (myu->s4->b == 1); + assert (myu->s4->c == &c4 + 1); + assert (myu->s4->d == &d4 + 1); + + delete s4; + delete t4; + delete myu_ptr; +} +#endif + +#ifdef REF2PTR_COMPONENT_MEMBER_SLICE +void +ref2ptr_component_member_slice (void) +{ + INIT_ST; + U *myu_ptr = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + U *&myu = myu_ptr; + + #pragma omp target map(myu->t1.a[2:5]) + { + myu->t1.a[2]++; + } + + #pragma omp target map(myu->t1.b[2:5]) + { + myu->t1.b[2]++; + } + + #pragma omp target enter data map(to: myu->t1.c) + + #pragma omp target map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target exit data map(release: myu->t1.c) + + #pragma omp target enter data map(to: myu->t1.d) + + #pragma omp target map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + #pragma omp target exit data map(release: myu->t1.d) + + assert (myu->t1.a[2] == 1); + assert (myu->t1.b[2] == 3); + assert (myu->t1.c[2] == 3); + assert (myu->t1.d[2] == 3); + + #pragma omp target map(myu->t2.a[2:5]) + { + myu->t2.a[2]++; + } + + #pragma omp target map(myu->t2.b[2:5]) + { + myu->t2.b[2]++; + } + + #pragma omp target enter data map(to: myu->t2.c) + + #pragma omp target map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target exit data map(release: myu->t2.c) + + #pragma omp target enter data map(to: myu->t2.d) + + #pragma omp target map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + #pragma omp target exit data map(release: myu->t2.d) + + assert (myu->t2.a[2] == 1); + assert (myu->t2.b[2] == 3); + assert (myu->t2.c[2] == 3); + assert (myu->t2.d[2] == 3); + + #pragma omp target enter data map(to: myu->t3) + + #pragma omp target map(myu->t3->a[2:5]) + { + myu->t3->a[2]++; + } + + #pragma omp target map(myu->t3->b[2:5]) + { + myu->t3->b[2]++; + } + + #pragma omp target enter data map(to: myu->t3->c) + + #pragma omp target map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target exit data map(release: myu->t3->c) + + #pragma omp target enter data map(to: myu->t3->d) + + #pragma omp target map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + #pragma omp target exit data map(release: myu->t3, myu->t3->d) + + assert (myu->t3->a[2] == 1); + assert (myu->t3->b[2] == 3); + assert (myu->t3->c[2] == 3); + assert (myu->t3->d[2] == 3); + + #pragma omp target enter data map(to: myu->t4) + + #pragma omp target map(myu->t4->a[2:5]) + { + myu->t4->a[2]++; + } + + #pragma omp target map(myu->t4->b[2:5]) + { + myu->t4->b[2]++; + } + + #pragma omp target enter data map(to: myu->t4->c) + + #pragma omp target map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target exit data map(release: myu->t4->c) + + #pragma omp target enter data map(to: myu->t4->d) + + #pragma omp target map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + #pragma omp target exit data map(release: myu->t4, myu->t4->d) + + assert (myu->t4->a[2] == 1); + assert (myu->t4->b[2] == 3); + assert (myu->t4->c[2] == 3); + assert (myu->t4->d[2] == 3); + + delete s4; + delete t4; + delete myu_ptr; +} +#endif + +#ifdef REF2PTR_COMPONENT_MEMBER_SLICE_BASEPTR +void +ref2ptr_component_member_slice_baseptr (void) +{ + INIT_ST; + U *myu_ptr = new U(s1, t1, s2, t2, &s3, &t3, s4, t4); + U *&myu = myu_ptr; + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t1.c) map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target map(to: myu->t1.d) map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + assert (myu->t1.c[2] == 2); + assert (myu->t1.d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t1.c) map(myu->t1.c[2:5]) + { + myu->t1.c[2]++; + } + + #pragma omp target map(to: myu, myu->t1.d) map(myu->t1.d[2:5]) + { + myu->t1.d[2]++; + } + + assert (myu->t1.c[2] == 4); + assert (myu->t1.d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t2.c) map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target map(to: myu->t2.d) map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + assert (myu->t2.c[2] == 2); + assert (myu->t2.d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t2.c) map(myu->t2.c[2:5]) + { + myu->t2.c[2]++; + } + + #pragma omp target map(to: myu, myu->t2.d) map(myu->t2.d[2:5]) + { + myu->t2.d[2]++; + } + + assert (myu->t2.c[2] == 4); + assert (myu->t2.d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t3, myu->t3->c) map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target map(to: myu->t3, myu->t3->d) map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + assert (myu->t3->c[2] == 2); + assert (myu->t3->d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t3, myu->t3->c) map(myu->t3->c[2:5]) + { + myu->t3->c[2]++; + } + + #pragma omp target map(to: myu, myu->t3, myu->t3->d) map(myu->t3->d[2:5]) + { + myu->t3->d[2]++; + } + + assert (myu->t3->c[2] == 4); + assert (myu->t3->d[2] == 4); + + /* Implicit firstprivate 'myu'. */ + #pragma omp target map(to: myu->t4, myu->t4->c) map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target map(to: myu->t4, myu->t4->d) map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + assert (myu->t4->c[2] == 2); + assert (myu->t4->d[2] == 2); + + /* Explicitly-mapped 'myu'. */ + #pragma omp target map(to: myu, myu->t4, myu->t4->c) map(myu->t4->c[2:5]) + { + myu->t4->c[2]++; + } + + #pragma omp target map(to: myu, myu->t4, myu->t4->d) map(myu->t4->d[2:5]) + { + myu->t4->d[2]++; + } + + assert (myu->t4->c[2] == 4); + assert (myu->t4->d[2] == 4); + + delete s4; + delete t4; + delete myu_ptr; +} +#endif + +int main (int argc, char *argv[]) +{ +#ifdef MAP_DECLS + map_decls (); +#endif + +#ifdef NONREF_DECL_BASE + nonref_decl_base (); +#endif +#ifdef REF_DECL_BASE + ref_decl_base (); +#endif +#ifdef PTR_DECL_BASE + ptr_decl_base (); +#endif +#ifdef REF2PTR_DECL_BASE + ref2ptr_decl_base (); +#endif + +#ifdef ARRAY_DECL_BASE + array_decl_base (); +#endif +#ifdef REF2ARRAY_DECL_BASE + ref2array_decl_base (); +#endif +#ifdef PTR_OFFSET_DECL_BASE + ptr_offset_decl_base (); +#endif +#ifdef REF2PTR_OFFSET_DECL_BASE + ref2ptr_offset_decl_base (); +#endif + +#ifdef MAP_SECTIONS + map_sections (); +#endif + +#ifdef NONREF_DECL_MEMBER_SLICE + nonref_decl_member_slice (); +#endif +#ifdef NONREF_DECL_MEMBER_SLICE_BASEPTR + nonref_decl_member_slice_baseptr (); +#endif +#ifdef REF_DECL_MEMBER_SLICE + ref_decl_member_slice (); +#endif +#ifdef REF_DECL_MEMBER_SLICE_BASEPTR + ref_decl_member_slice_baseptr (); +#endif +#ifdef PTR_DECL_MEMBER_SLICE + ptr_decl_member_slice (); +#endif +#ifdef PTR_DECL_MEMBER_SLICE_BASEPTR + ptr_decl_member_slice_baseptr (); +#endif +#ifdef REF2PTR_DECL_MEMBER_SLICE + ref2ptr_decl_member_slice (); +#endif +#ifdef REF2PTR_DECL_MEMBER_SLICE_BASEPTR + ref2ptr_decl_member_slice_baseptr (); +#endif + +#ifdef ARRAY_DECL_MEMBER_SLICE + array_decl_member_slice (); +#endif +#ifdef ARRAY_DECL_MEMBER_SLICE_BASEPTR + array_decl_member_slice_baseptr (); +#endif +#ifdef REF2ARRAY_DECL_MEMBER_SLICE + ref2array_decl_member_slice (); +#endif +#ifdef REF2ARRAY_DECL_MEMBER_SLICE_BASEPTR + ref2array_decl_member_slice_baseptr (); +#endif +#ifdef PTR_OFFSET_DECL_MEMBER_SLICE + ptr_offset_decl_member_slice (); +#endif +#ifdef PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + ptr_offset_decl_member_slice_baseptr (); +#endif +#ifdef REF2PTR_OFFSET_DECL_MEMBER_SLICE + ref2ptr_offset_decl_member_slice (); +#endif +#ifdef REF2PTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + ref2ptr_offset_decl_member_slice_baseptr (); +#endif + +#ifdef PTRARRAY_DECL_MEMBER_SLICE + ptrarray_decl_member_slice (); +#endif +#ifdef PTRARRAY_DECL_MEMBER_SLICE_BASEPTR + ptrarray_decl_member_slice_baseptr (); +#endif +#ifdef REF2PTRARRAY_DECL_MEMBER_SLICE + ref2ptrarray_decl_member_slice (); +#endif +#ifdef REF2PTRARRAY_DECL_MEMBER_SLICE_BASEPTR + ref2ptrarray_decl_member_slice_baseptr (); +#endif +#ifdef PTRPTR_OFFSET_DECL_MEMBER_SLICE + ptrptr_offset_decl_member_slice (); +#endif +#ifdef PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + ptrptr_offset_decl_member_slice_baseptr (); +#endif +#ifdef REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE + ref2ptrptr_offset_decl_member_slice (); +#endif +#ifdef REF2PTRPTR_OFFSET_DECL_MEMBER_SLICE_BASEPTR + ref2ptrptr_offset_decl_member_slice_baseptr (); +#endif + +#ifdef NONREF_COMPONENT_BASE + nonref_component_base (); +#endif +#ifdef NONREF_COMPONENT_MEMBER_SLICE + nonref_component_member_slice (); +#endif +#ifdef NONREF_COMPONENT_MEMBER_SLICE_BASEPTR + nonref_component_member_slice_baseptr (); +#endif + +#ifdef REF_COMPONENT_BASE + ref_component_base (); +#endif +#ifdef REF_COMPONENT_MEMBER_SLICE + ref_component_member_slice (); +#endif +#ifdef REF_COMPONENT_MEMBER_SLICE_BASEPTR + ref_component_member_slice_baseptr (); +#endif + +#ifdef PTR_COMPONENT_BASE + ptr_component_base (); +#endif +#ifdef PTR_COMPONENT_MEMBER_SLICE + ptr_component_member_slice (); +#endif +#ifdef PTR_COMPONENT_MEMBER_SLICE_BASEPTR + ptr_component_member_slice_baseptr (); +#endif + +#ifdef REF2PTR_COMPONENT_BASE + ref2ptr_component_base (); +#endif +#ifdef REF2PTR_COMPONENT_MEMBER_SLICE + ref2ptr_component_member_slice (); +#endif +#ifdef REF2PTR_COMPONENT_MEMBER_SLICE_BASEPTR + ref2ptr_component_member_slice_baseptr (); +#endif + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-5.C b/libgomp/testsuite/libgomp.c++/baseptrs-5.C new file mode 100644 index 000000000000..16bdfff3ae08 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/baseptrs-5.C @@ -0,0 +1,62 @@ +// { dg-do run } + +#include +#include + +struct sa +{ + int *ptr; + int *ptr2; +}; + +struct sb +{ + int arr[10]; +}; + +struct scp +{ + sa *&a; + sb *&b; + scp (sa *&my_a, sb *&my_b) : a(my_a), b(my_b) {} +}; + +int +main () +{ + sa *my_a = new sa; + sb *my_b = new sb; + + my_a->ptr = new int[10]; + my_a->ptr2 = new int[10]; + scp *my_c = new scp(my_a, my_b); + + memset (my_c->a->ptr, 0, sizeof (int) * 10); + memset (my_c->a->ptr2, 0, sizeof (int) * 10); + + #pragma omp target map (my_c->a, \ + my_c->a->ptr, my_c->a->ptr[:10], \ + my_c->a->ptr2, my_c->a->ptr2[:10]) + { + for (int i = 0; i < 10; i++) + { + my_c->a->ptr[i] = i; + my_c->a->ptr2[i] = i * 2; + } + } + + for (int i = 0; i < 10; i++) + { + assert (my_c->a->ptr[i] == i); + assert (my_c->a->ptr2[i] == i * 2); + } + + delete[] my_a->ptr; + delete[] my_a->ptr2; + delete my_a; + delete my_b; + delete my_c; + + return 0; +} + diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-8.C b/libgomp/testsuite/libgomp.c++/baseptrs-8.C new file mode 100644 index 000000000000..f99918185513 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/baseptrs-8.C @@ -0,0 +1,70 @@ +/* { dg-do run } */ + +#include +#include + +#define N 1024 +#define M 64 + +int main (void) +{ + int *a_orig[N]; + int *(&a)[N] = a_orig; + + for (int i = 0; i < N; i++) + a[i] = (int *) calloc (M, sizeof (int)); + + /* 'target enter data'/'target exit data' with array of pointers. */ +#pragma omp target enter data map(alloc: a[0:N]) + + for (int i = 0; i < N; i++) + { +#pragma omp target enter data map(to: a[i][0:M]) + } + +#pragma omp target map(alloc: a) + { + for (int i = 0; i < N; i++) + for (int j = 0; j < M; j++) + a[i][j] = i + j; + } + +for (int i = 0; i < N; i++) + { +#pragma omp target exit data map(release: a[i]) map(from: a[i][0:M]) + } + +#pragma omp target exit data map(release: a, a[0:N]) + + /* 'target data' with array of pointers. */ +#pragma omp target data map(alloc: a[0:N]) + { +#pragma omp target data map(tofrom: a[5][0:M]) + { +#pragma omp target map(alloc: a) + { + for (int i = 0; i < M; i++) + a[5][i]++; + } + } + } + + /* 'target' with array of pointers. */ +#pragma omp target data map(alloc: a[0:N]) + { +#pragma omp target map(tofrom: a[7][0:M]) + { + for (int i = 0; i < M; i++) + a[7][i] += 2; + } + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < M; j++) + assert (a[i][j] == i + j + (i == 5) + 2 * (i == 7)); + + for (int i = 0; i < N; i++) + free (a[i]); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/baseptrs-9.C b/libgomp/testsuite/libgomp.c++/baseptrs-9.C new file mode 100644 index 000000000000..95e7eebb0ed8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/baseptrs-9.C @@ -0,0 +1,57 @@ +/* { dg-do run } */ + +#include +#include + +int main (void) +{ + int **a_orig,i,j,n; + int **&a = a_orig; + + j = 3; + n = 12; + + a = (int **) calloc (32, sizeof (int *)); + for (int x = 0; x < 32; x++) + a[x] = (int *) calloc (32, sizeof (int)); + + for (int i = 2; i < 32; i++) + { + #pragma omp target enter data map(a, a[2:30]) + #pragma omp target enter data map(a[i][j:n]) + #pragma omp target map(alloc: a) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + #pragma omp target exit data map(a[i][j:n]) + + #pragma omp target data map(a, a[i][j:n]) + { + #pragma omp target map(alloc: a) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + } + #pragma omp target exit data map(a, a[2:30]) + + #pragma omp target data map(a, a[2:30]) + { + #pragma omp target map(a[i][j:n]) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + } + } + + for (int i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + if (i >= 2 && j >= 3 && j < 15) + assert (a[i][j] == 3); + else + assert (a[i][j] == 0); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/class-array-1.C b/libgomp/testsuite/libgomp.c++/class-array-1.C new file mode 100644 index 000000000000..d8d3f7f1f99a --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/class-array-1.C @@ -0,0 +1,59 @@ +/* { dg-do run } */ + +#include + +#define N 1024 + +class M { + int array[N]; + +public: + M () + { + for (int i = 0; i < N; i++) + array[i] = 0; + } + + void incr_with_this (int c) + { +#pragma omp target map(this->array[:N]) + for (int i = 0; i < N; i++) + array[i] += c; + } + + void incr_without_this (int c) + { +#pragma omp target map(array[:N]) + for (int i = 0; i < N; i++) + array[i] += c; + } + + void incr_implicit (int c) + { +#pragma omp target + for (int i = 0; i < N; i++) + array[i] += c; + } + + void check (int c) + { + for (int i = 0; i < N; i++) + assert (array[i] == c); + } +}; + +int +main (int argc, char *argv[]) +{ + M m; + + m.check (0); + m.incr_with_this (3); + m.check (3); + m.incr_without_this (5); + m.check (8); + m.incr_implicit (2); + m.check (10); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/ref-mapping-1.C b/libgomp/testsuite/libgomp.c++/ref-mapping-1.C new file mode 100644 index 000000000000..9aa232f3f672 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/ref-mapping-1.C @@ -0,0 +1,80 @@ +/* { dg-do run } */ + +#include + +void test_ref () +{ + int c_orig = 5; + int &c = c_orig; + +#pragma omp target map(tofrom: c) + { + c++; + } + + assert (c == 6); +} + +void test_ref_to_ptr () +{ + int val = 5; + int *ptr_orig = &val; + int *&ptr_ref = ptr_orig; + +#pragma omp target map(tofrom: ptr_ref[0]) + { + (*ptr_ref)++; + } + + assert (val == 6); +} + +void test_ref_to_array () +{ + int arr[1]; + int (&arr_ref)[1] = arr; + + arr_ref[0] = 5; + +#pragma omp target map(tofrom: arr_ref[0:1]) + { + arr_ref[0]++; + } + + assert (arr_ref[0] == 6); + +#pragma omp target map(tofrom: arr_ref[0]) + { + arr_ref[0]++; + } + + assert (arr_ref[0] == 7); +} + +void test_ref_to_ptr_array () +{ + int *arr[1]; + int *(&arr_ref)[1] = arr; + int val = 5; + + arr_ref[0] = &val; + +#pragma omp target data map(alloc: arr_ref, arr_ref[0]) + { +#pragma omp target map(tofrom: arr_ref[0][0:1]) + { + arr_ref[0][0]++; + } + } + + assert (arr_ref[0][0] == 6); +} + +int main () +{ + test_ref (); + test_ref_to_ptr (); + test_ref_to_array (); + test_ref_to_ptr_array (); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-48.C b/libgomp/testsuite/libgomp.c++/target-48.C new file mode 100644 index 000000000000..db171d2f5a3d --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-48.C @@ -0,0 +1,32 @@ +#include +#include + +struct s { + int (&a)[10]; + s(int (&a0)[10]) : a(a0) {} +}; + +int +main (int argc, char *argv[]) +{ + int la[10]; + s v(la); + + memset (la, 0, sizeof la); + + #pragma omp target enter data map(to: v) + + /* This mapping must use GOMP_MAP_ATTACH_DETACH not GOMP_MAP_ALWAYS_POINTER, + else the host reference v.a will be corrupted on copy-out. */ + + #pragma omp target map(v.a[0:10]) + { + v.a[5]++; + } + + #pragma omp target exit data map(from: v) + + assert (v.a[5] == 1); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-49.C b/libgomp/testsuite/libgomp.c++/target-49.C new file mode 100644 index 000000000000..b0746b40c096 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-49.C @@ -0,0 +1,37 @@ +#include +#include + +struct s { + int (&a)[10]; + s(int (&a0)[10]) : a(a0) {} +}; + +int +main (int argc, char *argv[]) +{ + int la[10]; + s v_real(la); + s *v = &v_real; + + memset (la, 0, sizeof la); + + #pragma omp target enter data map(to: v) + + /* Copying the whole v[0] here DOES NOT WORK yet because the reference 'a' is + not copied "as if" it was mapped explicitly as a member. FIXME. */ + #pragma omp target enter data map(to: v[0]) + + #pragma omp target + { + v->a[5]++; + } + + #pragma omp target exit data map(release: v[0]) + #pragma omp target exit data map(from: v) + + assert (v->a[5] == 1); + + return 0; +} + +// { dg-xfail-run-if "TODO" { offload_device_nonshared_as } } diff --git a/libgomp/testsuite/libgomp.c++/target-exit-data-reftoptr-1.C b/libgomp/testsuite/libgomp.c++/target-exit-data-reftoptr-1.C new file mode 100644 index 000000000000..1a66fcb0f7fb --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/target-exit-data-reftoptr-1.C @@ -0,0 +1,34 @@ +#include +#include + +struct S { + int *&ptr; + S(int *&ptr_) : ptr(ptr_) { } +}; + +int main() +{ + int *orig = new int[100]; + + memset (orig, 0, sizeof (int) * 100); + + S svar(orig); + +#pragma omp target enter data map(to: svar.ptr, svar.ptr[10:80]) + +#pragma omp target + { + for (int i = 10; i < 90; i++) + svar.ptr[i]++; + } + +#pragma omp target exit data map(release: svar.ptr) map(from: svar.ptr[10:80]) + + for (int i = 0; i < 100; i++) + assert (i >= 10 && i < 90 && svar.ptr[i] == 1 + || svar.ptr[i] == 0); + + delete orig; + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c++/target-lambda-1.C b/libgomp/testsuite/libgomp.c++/target-lambda-1.C index c5acbb8bf30c..fa882d09800f 100644 --- a/libgomp/testsuite/libgomp.c++/target-lambda-1.C +++ b/libgomp/testsuite/libgomp.c++/target-lambda-1.C @@ -2,6 +2,7 @@ #include #include +#include template void @@ -22,9 +23,11 @@ struct S auto fn = [=](void) -> bool { bool mapped; + uintptr_t hostptr = (uintptr_t) ptr; + uintptr_t hostiptr = (uintptr_t) iptr; #pragma omp target map(from:mapped) { - mapped = (ptr != NULL && iptr != NULL); + mapped = (ptr != (int*) hostptr && iptr != (int*) hostiptr); if (mapped) { for (int i = 0; i < len; i++) diff --git a/libgomp/testsuite/libgomp.c++/target-this-3.C b/libgomp/testsuite/libgomp.c++/target-this-3.C index 6049ba8e2013..986582430e24 100644 --- a/libgomp/testsuite/libgomp.c++/target-this-3.C +++ b/libgomp/testsuite/libgomp.c++/target-this-3.C @@ -2,6 +2,7 @@ #include #include +#include extern "C" void abort (); struct S @@ -15,12 +16,13 @@ struct S bool set_ptr (int n) { bool mapped; + uintptr_t hostptr = (uintptr_t) ptr; #pragma omp target map(from:mapped) { - if (ptr != NULL) + if (ptr != (int *) hostptr) for (int i = 0; i < ptr_len; i++) ptr[i] = n; - mapped = (ptr != NULL); + mapped = (ptr != (int *) hostptr); } return mapped; } @@ -28,12 +30,13 @@ struct S bool set_refptr (int n) { bool mapped; + uintptr_t hostrefptr = (uintptr_t) refptr; #pragma omp target map(from:mapped) { - if (refptr != NULL) + if (refptr != (int *) hostrefptr) for (int i = 0; i < refptr_len; i++) refptr[i] = n; - mapped = (refptr != NULL); + mapped = (refptr != (int *) hostrefptr); } return mapped; } diff --git a/libgomp/testsuite/libgomp.c++/target-this-4.C b/libgomp/testsuite/libgomp.c++/target-this-4.C index f0237c9b6b83..b2a593d03afa 100644 --- a/libgomp/testsuite/libgomp.c++/target-this-4.C +++ b/libgomp/testsuite/libgomp.c++/target-this-4.C @@ -4,6 +4,7 @@ #include #include +#include struct T { @@ -18,12 +19,13 @@ struct T auto fn = [=](void) -> bool { bool mapped; + uintptr_t hostptr = (uintptr_t) ptr; #pragma omp target map(from:mapped) { - if (ptr) + if (ptr != (int *) hostptr) for (int i = 0; i < ptr_len; i++) ptr[i] = n; - mapped = (ptr != NULL); + mapped = (ptr != (int *) hostptr); } return mapped; }; @@ -35,12 +37,13 @@ struct T auto fn = [=](void) -> bool { bool mapped; + uintptr_t hostrefptr = (uintptr_t) refptr; #pragma omp target map(from:mapped) { - if (refptr) + if (refptr != (int *) hostrefptr) for (int i = 0; i < refptr_len; i++) refptr[i] = n; - mapped = (refptr != NULL); + mapped = (refptr != (int *) hostrefptr); } return mapped; }; diff --git a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-1.c b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-1.c new file mode 100644 index 000000000000..073615625b7f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-1.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +#define N 32 + +typedef struct { + int x2[10][N]; +} x1type; + +typedef struct { + x1type x1[10]; +} p2type; + +typedef struct { + p2type *p2; +} p1type; + +typedef struct { + p1type *p1; +} x0type; + +typedef struct { + x0type x0[10]; +} p0type; + +int main(int argc, char *argv[]) +{ + p0type *p0; + int k1 = 0, k2 = 0, k3 = 0, n = N; + + p0 = (p0type *) malloc (sizeof *p0); + p0->x0[0].p1 = (p1type *) malloc (sizeof *p0->x0[0].p1); + p0->x0[0].p1->p2 = (p2type *) malloc (sizeof *p0->x0[0].p1->p2); + memset (p0->x0[0].p1->p2, 0, sizeof *p0->x0[0].p1->p2); + +#pragma omp target map(tofrom: p0->x0[k1].p1->p2[k2].x1[k3].x2[4][0:n]) \ + map(to: p0->x0[k1].p1, p0->x0[k1].p1->p2) \ + map(to: p0->x0[k1].p1[0]) + { + for (int i = 0; i < n; i++) + p0->x0[k1].p1->p2[k2].x1[k3].x2[4][i] = i; + } + + for (int i = 0; i < n; i++) + assert (i == p0->x0[k1].p1->p2[k2].x1[k3].x2[4][i]); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c new file mode 100644 index 000000000000..e335d7da9669 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-2.c @@ -0,0 +1,70 @@ +#include +#include +#include + +#define N 32 + +typedef struct { + int arr[N]; + int *ptr; +} sc; + +typedef struct { + sc *c; +} sb; + +typedef struct { + sb *b; + sc *c; +} sa; + +int main (int argc, char *argv[]) +{ + sa *p; + + p = (sa *) malloc (sizeof *p); + p->b = (sb *) malloc (sizeof *p->b); + p->b->c = (sc *) malloc (sizeof *p->b->c); + p->c = (sc *) malloc (sizeof *p->c); + p->b->c->ptr = (int *) malloc (N * sizeof (int)); + p->c->ptr = (int *) malloc (N * sizeof (int)); + + for (int i = 0; i < N; i++) + { + p->b->c->ptr[i] = 0; + p->c->ptr[i] = 0; + p->b->c->arr[i] = 0; + p->c->arr[i] = 0; + } + +#pragma omp target map(to: p->b, p->b[0], p->c, p->c[0], p->b->c, p->b->c[0]) \ + map(to: p->b->c->ptr, p->c->ptr) \ + map(tofrom: p->b->c->ptr[:N], p->c->ptr[:N]) + { + for (int i = 0; i < N; i++) + { + p->b->c->ptr[i] = i; + p->c->ptr[i] = i * 2; + } + } + +#pragma omp target map(to: p->b, p->b[0], p->b->c, p->c) \ + map(tofrom: p->c[0], p->b->c[0]) + { + for (int i = 0; i < N; i++) + { + p->b->c->arr[i] = i * 3; + p->c->arr[i] = i * 4; + } + } + + for (int i = 0; i < N; i++) + { + assert (p->b->c->ptr[i] == i); + assert (p->c->ptr[i] == i * 2); + assert (p->b->c->arr[i] == i * 3); + assert (p->c->arr[i] == i * 4); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-6.c b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-6.c new file mode 100644 index 000000000000..4b6e237471af --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-6.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ + +#include +#include + +#define N 1024 +#define M 64 + +int main (void) +{ + int *a[N]; + + for (int i = 0; i < N; i++) + a[i] = (int *) calloc (M, sizeof (int)); + + /* 'target enter data'/'target exit data' with array of pointers. */ +#pragma omp target enter data map(alloc: a[0:N]) + + for (int i = 0; i < N; i++) + { +#pragma omp target enter data map(to: a[i][0:M]) + } + +#pragma omp target map(alloc: a) + { + for (int i = 0; i < N; i++) + for (int j = 0; j < M; j++) + a[i][j] = i + j; + } + +for (int i = 0; i < N; i++) + { +#pragma omp target exit data map(release: a[i]) map(from: a[i][0:M]) + } + +#pragma omp target exit data map(release: a, a[0:N]) + + /* 'target data' with array of pointers. */ +#pragma omp target data map(alloc: a[0:N]) + { +#pragma omp target data map(tofrom: a[5][0:M]) + { +#pragma omp target map(alloc: a) + { + for (int i = 0; i < M; i++) + a[5][i]++; + } + } + } + + /* 'target' with array of pointers. */ +#pragma omp target data map(alloc: a[0:N]) + { +#pragma omp target map(tofrom: a[7][0:M]) + { + for (int i = 0; i < M; i++) + a[7][i] += 2; + } + } + + for (int i = 0; i < N; i++) + for (int j = 0; j < M; j++) + assert (a[i][j] == i + j + (i == 5) + 2 * (i == 7)); + + for (int i = 0; i < N; i++) + free (a[i]); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/baseptrs-7.c b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-7.c new file mode 100644 index 000000000000..9c6710e4e5b8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/baseptrs-7.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ + +#include +#include + +int main (void) +{ + int **a,i,j,n; + + j = 3; + n = 12; + + a = (int **) calloc (32, sizeof (int *)); + for (int x = 0; x < 32; x++) + a[x] = (int *) calloc (32, sizeof (int)); + + for (int i = 2; i < 32; i++) + { + #pragma omp target enter data map(a, a[2:30]) + #pragma omp target enter data map(a[i][j:n]) + #pragma omp target map(alloc: a) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + #pragma omp target exit data map(a[i][j:n]) + + #pragma omp target data map(a, a[i][j:n]) + { + #pragma omp target map(alloc: a) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + } + #pragma omp target exit data map(a, a[2:30]) + + #pragma omp target data map(a, a[0:32]) + { + #pragma omp target map(a[i][j:n]) + { + for (int x = j; x < j + n; x++) + a[i][x]++; + } + } + } + + for (int i = 0; i < 32; i++) + for (int j = 0; j < 32; j++) + if (i >= 2 && j >= 3 && j < 15) + assert (a[i][j] == 3); + else + assert (a[i][j] == 0); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c new file mode 100644 index 000000000000..889a4a253ae8 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/ptr-attach-2.c @@ -0,0 +1,60 @@ +#include + +struct blk { int x, y; }; +struct L +{ + #define N 10 + struct { + int num_blocks[N]; + struct blk * blocks[N]; + } m; +}; + +void foo (struct L *l) +{ + for (int i = 0; i < N; i++) + { + l->m.blocks[i] = (struct blk *) malloc (sizeof (struct blk) * N); + l->m.num_blocks[i] = N; + } + + #pragma omp target enter data map(to:l[:1]) + for (int i = 0; i < N; i++) + { + #pragma omp target enter data map(to:l->m.blocks[i][:l->m.num_blocks[i]]) + } + + #pragma omp target + { + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + l->m.blocks[i][j].x = i + j; + l->m.blocks[i][j].y = i * j; + } + } + + for (int i = 0; i < N; i++) + { + #pragma omp target exit data map(from:l->m.blocks[i][:l->m.num_blocks[i]]) + } + #pragma omp target exit data map(from:l[:1]) + + + for (int i = 0; i < N; i++) + for (int j = 0; j < N; j++) + { + if (l->m.blocks[i][j].x != i + j) + abort (); + if (l->m.blocks[i][j].y != i * j) + abort (); + } + +} + +int main (void) +{ + struct L l; + foo (&l); + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c index 974a9786c3f6..4c49cd091c38 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c +++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-2.c @@ -42,5 +42,7 @@ main (void) #pragma omp target exit data map(from:a.ptr, a.ptr[:N]) + free (a.ptr); + return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c new file mode 100644 index 000000000000..81a7752685c5 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-implicit-map-5.c @@ -0,0 +1,50 @@ +#include + +#define N 10 + +struct S +{ + int a, b; + int *ptr; + int c, d; +}; + +int +main (void) +{ + struct S a; + a.ptr = (int *) malloc (sizeof (int) * N); + + for (int i = 0; i < N; i++) + a.ptr[i] = 0; + + #pragma omp target enter data map(to: a.ptr) + #pragma omp target enter data map(to: a.ptr[:N]) + + #pragma omp target + for (int i = 0; i < N; i++) + a.ptr[i] += 1; + + #pragma omp target update from(a.ptr[:N]) + + for (int i = 0; i < N; i++) + if (a.ptr[i] != 1) + abort (); + + #pragma omp target map(a.ptr[:N]) + for (int i = 0; i < N; i++) + a.ptr[i] += 1; + + #pragma omp target update from(a.ptr[:N]) + + for (int i = 0; i < N; i++) + if (a.ptr[i] != 2) + abort (); + + #pragma omp target exit data map(release: a.ptr[:N]) + #pragma omp target exit data map(release: a.ptr) + + free (a.ptr); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c b/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c new file mode 100644 index 000000000000..1ec0c9a0d5f9 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-map-zlas-1.c @@ -0,0 +1,36 @@ +#include + +#define N 10 + +struct S +{ + int a, b; + int *ptr; + int c, d; +}; + +int +main (void) +{ + struct S a; + a.ptr = (int *) malloc (sizeof (int) * N); + + for (int i = 0; i < N; i++) + a.ptr[i] = 0; + + #pragma omp target enter data map(to: a.ptr[:N]) + + #pragma omp target map(a, a.ptr[:0]) + for (int i = 0; i < N; i++) + a.ptr[i] += 1; + + #pragma omp target exit data map(from: a.ptr[:N]) + + for (int i = 0; i < N; i++) + if (a.ptr[i] != 1) + abort (); + + free (a.ptr); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 index 58550c79d69c..7f3d8174f97b 100644 --- a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 @@ -409,3 +409,6 @@ contains end subroutine eight end program main + +! Fixed by the "Fortran pointers and member mappings" patch +! { dg-xfail-run-if TODO { offload_device_nonshared_as } } diff --git a/libgomp/testsuite/libgomp.fortran/target-enter-data-6.f90 b/libgomp/testsuite/libgomp.fortran/target-enter-data-6.f90 index 80d30edbfc56..b55d0b268d4a 100644 --- a/libgomp/testsuite/libgomp.fortran/target-enter-data-6.f90 +++ b/libgomp/testsuite/libgomp.fortran/target-enter-data-6.f90 @@ -3,6 +3,16 @@ ! - arrays with array descriptors ! For those, the array descriptor / string length must be mapped with 'to:' +! This test fails without the following additional patches: +! +! "OpenMP: Pointers and member mappings": +! https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627898.html +! +! "OpenMP/OpenACC: Reorganise OMP map clause handling in gimplify.cc": +! https://gcc.gnu.org/pipermail/gcc-patches/2023-August/627900.html +! +! { dg-xfail-run-if TODO { offload_device_nonshared_as } } + program main implicit none From 7362543f00c9d3359d9377b800080fb421414ee5 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Mon, 17 Oct 2022 16:44:31 +0000 Subject: [PATCH 289/311] OpenMP: Pointers and member mappings This patch changes the mapping node arrangement used for array components of derived types in order to accommodate for changes made in the previous patch, particularly the use of "GOMP_MAP_ATTACH_DETACH" for pointer-typed derived-type members instead of "GOMP_MAP_ALWAYS_POINTER". We change the mapping nodes used for a derived-type mapping like this: type T integer, pointer, dimension(:) :: arrptr end type T type(T) :: tvar [...] !$omp target map(tofrom: tvar%arrptr) So that the nodes used look like this: 1) map(to: tvar%arrptr) --> GOMP_MAP_TO [implicit] *tvar%arrptr%data (the array data) GOMP_MAP_TO_PSET tvar%arrptr (the descriptor) GOMP_MAP_ATTACH_DETACH tvar%arrptr%data 2) map(tofrom: tvar%arrptr(3:8) --> GOMP_MAP_TOFROM *tvar%arrptr%data(3) (size 8-3+1, etc.) GOMP_MAP_TO_PSET tvar%arrptr GOMP_MAP_ATTACH_DETACH tvar%arrptr%data (bias 3, etc.) In this case, we can determine in the front-end that the whole-array/pointer mapping (1) is only needed to map the pointer -- so we drop it entirely. (Note also that we set -- early -- the OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P flag for whole-array-via-pointer mappings. See below.) In the middle end, we process mappings using the struct sibling-list handling machinery by moving the "GOMP_MAP_TO_PSET" node from the middle of the group of three mapping nodes to the proper sorted position after the GOMP_MAP_STRUCT mapping: GOMP_MAP_STRUCT tvar (len: 1) GOMP_MAP_TO_PSET tvar%arr (size: 64, etc.) <--. moved here [...] | GOMP_MAP_TOFROM *tvar%arrptr%data(3) ___| GOMP_MAP_ATTACH_DETACH tvar%arrptr%data In another case, if we have an array of derived-type values "dtarr", and mappings like: i = 1 j = 1 map(to: dtarr(i)%arrptr) map(tofrom: dtarr(j)%arrptr(3:8)) We still map the same way, but this time we cannot prove that the base expressions "dtarr(i) and "dtarr(j)" are the same in the front-end. So we keep both mappings, but we move the "[implicit]" mapping of the full-array reference to the end of the clause list in gimplify.cc (by adjusting the topological sorting algorithm): GOMP_MAP_STRUCT dtvar (len: 2) GOMP_MAP_TO_PSET dtvar(i)%arrptr GOMP_MAP_TO_PSET dtvar(j)%arrptr [...] GOMP_MAP_TOFROM *dtvar(j)%arrptr%data(3) (size: 8-3+1) GOMP_MAP_ATTACH_DETACH dtvar(j)%arrptr%data GOMP_MAP_TO [implicit] *dtvar(i)%arrptr%data(1) (size: whole array) GOMP_MAP_ATTACH_DETACH dtvar(i)%arrptr%data Always moving "[implicit]" full-array mappings after array-section mappings (without that bit set) means that we'll avoid copying the whole array unnecessarily -- even in cases where we can't prove that the arrays are the same. The patch also fixes some bugs with "enter data" and "exit data" directives with this new mapping arrangement. Also now if you have mappings like this: #pragma omp target enter data map(to: dv, dv%arr(1:20)) The whole of the derived-type variable "dv" is mapped, so the GOMP_MAP_TO_PSET for the array-section mapping can be dropped: GOMP_MAP_TO dv GOMP_MAP_TO *dv%arr%data GOMP_MAP_TO_PSET dv%arr <-- deleted (array section mapping) GOMP_MAP_ATTACH_DETACH dv%arr%data To accommodate for recent changes to mapping nodes made by Tobias, this version of the patch avoids using GOMP_MAP_TO_PSET for "exit data" directives, in favour of using the "correct" GOMP_MAP_RELEASE/GOMP_MAP_DELETE kinds during early expansion. A new flag is introduced so the middle-end knows when the latter two kinds are being used specifically for an array descriptor. This version of the patch fixes "omp target exit data" handling for GOMP_MAP_DELETE, and adds pretty-printing dump output for the OMP_CLAUSE_RELEASE_DESCRIPTOR flag (for a little extra clarity). Also I noticed the handling of descriptors on *OpenACC* exit-data directives was inconsistent, so I've made those use GOMP_MAP_RELEASE/GOMP_MAP_DELETE with the new flag in the same way as OpenMP too. In the end it doesn't actually matter to the runtime, which handles GOMP_MAP_RELEASE/GOMP_MAP_DELETE/GOMP_MAP_TO_PSET for array descriptors on OpenACC "exit data" directives the same, anyway, and doing it this way in the FE avoids needless divergence. I've added a couple of new tests (gomp/target-enter-exit-data.f90 and goacc/enter-exit-data-2.f90). 2023-12-07 Julian Brown gcc/fortran/ * dependency.cc (gfc_omp_expr_prefix_same): New function. * dependency.h (gfc_omp_expr_prefix_same): Add prototype. * gfortran.h (gfc_omp_namelist): Add "duplicate_of" field to "u2" union. * trans-openmp.cc (dependency.h): Include. (gfc_trans_omp_array_section): Adjust mapping node arrangement for array descriptors. Use GOMP_MAP_TO_PSET or GOMP_MAP_RELEASE/GOMP_MAP_DELETE with the OMP_CLAUSE_RELEASE_DESCRIPTOR flag set. (gfc_symbol_rooted_namelist): New function. (gfc_trans_omp_clauses): Check subcomponent and subarray/element accesses elsewhere in the clause list for pointers to derived types or array descriptors, and adjust or drop mapping nodes appropriately. Adjust for changes to mapping node arrangement. (gfc_trans_oacc_executable_directive): Pass code op through. gcc/ * gimplify.cc (omp_map_clause_descriptor_p): New function. (build_omp_struct_comp_nodes, omp_get_attachment, omp_group_base): Use above function. (omp_tsort_mapping_groups): Process nodes that have OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P set after those that don't. Add enter_exit_data parameter. (omp_resolve_clause_dependencies): Remove GOMP_MAP_TO_PSET mappings if we're mapping the whole containing derived-type variable. (omp_accumulate_sibling_list): Adjust GOMP_MAP_TO_PSET handling. Remove GOMP_MAP_ALWAYS_POINTER handling. (gimplify_scan_omp_clauses): Pass enter_exit argument to omp_tsort_mapping_groups. Don't adjust/remove GOMP_MAP_TO_PSET mappings for derived-type components here. * tree.h (OMP_CLAUSE_RELEASE_DESCRIPTOR): New macro. * tree-pretty-print.cc (dump_omp_clause): Show OMP_CLAUSE_RELEASE_DESCRIPTOR in dump output (with GOMP_MAP_TO_PSET-like syntax). gcc/testsuite/ * gfortran.dg/goacc/enter-exit-data-2.f90: New test. * gfortran.dg/goacc/finalize-1.f: Adjust scan output. * gfortran.dg/gomp/map-9.f90: Adjust scan output. * gfortran.dg/gomp/map-subarray-2.f90: New test. * gfortran.dg/gomp/map-subarray.f90: New test. * gfortran.dg/gomp/target-enter-exit-data.f90: New test. libgomp/ * testsuite/libgomp.fortran/map-subarray.f90: New test. * testsuite/libgomp.fortran/map-subarray-2.f90: New test. * testsuite/libgomp.fortran/map-subarray-3.f90: New test. * testsuite/libgomp.fortran/map-subarray-4.f90: New test. * testsuite/libgomp.fortran/map-subarray-6.f90: New test. * testsuite/libgomp.fortran/map-subarray-7.f90: New test. * testsuite/libgomp.fortran/map-subarray-8.f90: New test. * testsuite/libgomp.fortran/map-subcomponents.f90: New test. * testsuite/libgomp.fortran/struct-elem-map-1.f90: Adjust for descriptor-mapping changes. Remove XFAIL. --- gcc/fortran/dependency.cc | 128 +++++++++ gcc/fortran/dependency.h | 1 + gcc/fortran/gfortran.h | 1 + gcc/fortran/trans-openmp.cc | 265 +++++++++++++++--- gcc/gimplify.cc | 163 ++++++++--- .../gfortran.dg/goacc/enter-exit-data-2.f90 | 38 +++ gcc/testsuite/gfortran.dg/goacc/finalize-1.f | 8 +- gcc/testsuite/gfortran.dg/gomp/map-9.f90 | 2 +- .../gfortran.dg/gomp/map-subarray-2.f90 | 57 ++++ .../gfortran.dg/gomp/map-subarray.f90 | 40 +++ .../gomp/target-enter-exit-data.f90 | 39 +++ gcc/tree-pretty-print.cc | 9 + gcc/tree.h | 4 + .../libgomp.fortran/map-subarray-2.f90 | 108 +++++++ .../libgomp.fortran/map-subarray-3.f90 | 62 ++++ .../libgomp.fortran/map-subarray-4.f90 | 35 +++ .../libgomp.fortran/map-subarray-6.f90 | 26 ++ .../libgomp.fortran/map-subarray-7.f90 | 29 ++ .../libgomp.fortran/map-subarray-8.f90 | 47 ++++ .../libgomp.fortran/map-subarray.f90 | 33 +++ .../libgomp.fortran/map-subcomponents.f90 | 32 +++ .../libgomp.fortran/struct-elem-map-1.f90 | 183 +++++++++++- 22 files changed, 1221 insertions(+), 89 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/goacc/enter-exit-data-2.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/map-subarray-2.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/map-subarray.f90 create mode 100644 gcc/testsuite/gfortran.dg/gomp/target-enter-exit-data.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-3.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-4.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-6.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-7.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray-8.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subarray.f90 create mode 100644 libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 diff --git a/gcc/fortran/dependency.cc b/gcc/fortran/dependency.cc index 632b3985723e..94647032ab53 100644 --- a/gcc/fortran/dependency.cc +++ b/gcc/fortran/dependency.cc @@ -2337,3 +2337,131 @@ gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_reverse *reverse, return fin_dep == GFC_DEP_OVERLAP; } + +/* Check if two refs are equal, for the purposes of checking if one might be + the base of the other for OpenMP (target directives). Derived from + gfc_dep_resolver. This function is stricter, e.g. indices arr(i) and + arr(j) compare as non-equal. */ + +bool +gfc_omp_expr_prefix_same (gfc_expr *lexpr, gfc_expr *rexpr) +{ + gfc_ref *lref, *rref; + + if (lexpr->symtree && rexpr->symtree) + { + /* See are_identical_variables above. */ + if (lexpr->symtree->n.sym->attr.dummy + && rexpr->symtree->n.sym->attr.dummy) + { + /* Dummy arguments: Only check for equal names. */ + if (lexpr->symtree->n.sym->name != rexpr->symtree->n.sym->name) + return false; + } + else + { + if (lexpr->symtree->n.sym != rexpr->symtree->n.sym) + return false; + } + } + else if (lexpr->base_expr && rexpr->base_expr) + { + if (gfc_dep_compare_expr (lexpr->base_expr, rexpr->base_expr) != 0) + return false; + } + else + return false; + + lref = lexpr->ref; + rref = rexpr->ref; + + while (lref && rref) + { + gfc_dependency fin_dep = GFC_DEP_EQUAL; + + if (lref && lref->type == REF_COMPONENT && lref->u.c.component + && strcmp (lref->u.c.component->name, "_data") == 0) + lref = lref->next; + + if (rref && rref->type == REF_COMPONENT && rref->u.c.component + && strcmp (rref->u.c.component->name, "_data") == 0) + rref = rref->next; + + gcc_assert (lref->type == rref->type); + + switch (lref->type) + { + case REF_COMPONENT: + if (lref->u.c.component != rref->u.c.component) + return false; + break; + + case REF_ARRAY: + if (ref_same_as_full_array (lref, rref)) + break; + if (ref_same_as_full_array (rref, lref)) + break; + + if (lref->u.ar.dimen != rref->u.ar.dimen) + { + if (lref->u.ar.type == AR_FULL + && gfc_full_array_ref_p (rref, NULL)) + break; + if (rref->u.ar.type == AR_FULL + && gfc_full_array_ref_p (lref, NULL)) + break; + return false; + } + + for (int n = 0; n < lref->u.ar.dimen; n++) + { + if (lref->u.ar.dimen_type[n] == DIMEN_VECTOR + && rref->u.ar.dimen_type[n] == DIMEN_VECTOR + && gfc_dep_compare_expr (lref->u.ar.start[n], + rref->u.ar.start[n]) == 0) + continue; + if (lref->u.ar.dimen_type[n] == DIMEN_RANGE + && rref->u.ar.dimen_type[n] == DIMEN_RANGE) + fin_dep = check_section_vs_section (&lref->u.ar, &rref->u.ar, + n); + else if (lref->u.ar.dimen_type[n] == DIMEN_ELEMENT + && rref->u.ar.dimen_type[n] == DIMEN_RANGE) + fin_dep = gfc_check_element_vs_section (lref, rref, n); + else if (rref->u.ar.dimen_type[n] == DIMEN_ELEMENT + && lref->u.ar.dimen_type[n] == DIMEN_RANGE) + fin_dep = gfc_check_element_vs_section (rref, lref, n); + else if (lref->u.ar.dimen_type[n] == DIMEN_ELEMENT + && rref->u.ar.dimen_type[n] == DIMEN_ELEMENT) + { + gfc_array_ref l_ar = lref->u.ar; + gfc_array_ref r_ar = rref->u.ar; + gfc_expr *l_start = l_ar.start[n]; + gfc_expr *r_start = r_ar.start[n]; + int i = gfc_dep_compare_expr (r_start, l_start); + if (i == 0) + fin_dep = GFC_DEP_EQUAL; + else + return false; + } + else + return false; + if (n + 1 < lref->u.ar.dimen + && fin_dep != GFC_DEP_EQUAL) + return false; + } + + if (fin_dep != GFC_DEP_EQUAL + && fin_dep != GFC_DEP_OVERLAP) + return false; + + break; + + default: + gcc_unreachable (); + } + lref = lref->next; + rref = rref->next; + } + + return true; +} diff --git a/gcc/fortran/dependency.h b/gcc/fortran/dependency.h index fbbede8b22ca..a2458f79034b 100644 --- a/gcc/fortran/dependency.h +++ b/gcc/fortran/dependency.h @@ -40,5 +40,6 @@ int gfc_expr_is_one (gfc_expr *, int); bool gfc_dep_resolver (gfc_ref *, gfc_ref *, gfc_reverse *, bool identical = false); bool gfc_are_equivalenced_arrays (gfc_expr *, gfc_expr *); +bool gfc_omp_expr_prefix_same (gfc_expr *, gfc_expr *); gfc_expr * gfc_discard_nops (gfc_expr *); diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index c86a025a3fce..b5e1b4c9d4b9 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -1380,6 +1380,7 @@ typedef struct gfc_omp_namelist gfc_namespace *ns; gfc_expr *allocator; struct gfc_symbol *traits_sym; + struct gfc_omp_namelist *duplicate_of; } u2; struct gfc_omp_namelist *next; locus where; diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index c6061cea62a5..9c51ac4aeeea 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see #include "omp-general.h" #include "omp-low.h" #include "memmodel.h" /* For MEMMODEL_ enums. */ +#include "dependency.h" #undef GCC_DIAG_STYLE #define GCC_DIAG_STYLE __gcc_tdiag__ @@ -2491,36 +2492,24 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) { - tree desc_node; tree type = TREE_TYPE (decl); ptr2 = gfc_conv_descriptor_data_get (decl); - desc_node = build_omp_clause (input_location, OMP_CLAUSE_MAP); - OMP_CLAUSE_DECL (desc_node) = decl; - OMP_CLAUSE_SIZE (desc_node) = TYPE_SIZE_UNIT (type); - if (OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_DELETE) + node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + OMP_CLAUSE_DECL (node2) = decl; + OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); + if (OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_DELETE + || OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_RELEASE + || op == EXEC_OMP_TARGET_EXIT_DATA + || op == EXEC_OACC_EXIT_DATA) { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_DELETE); - node2 = desc_node; - } - else if (OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_RELEASE - || op == EXEC_OMP_TARGET_EXIT_DATA) - { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_RELEASE); - node2 = desc_node; - } - else if (ptr_kind == GOMP_MAP_ALWAYS_POINTER) - { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_TO); - node2 = node; - node = desc_node; /* Needs to come first. */ + gomp_map_kind map_kind + = OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_DELETE ? GOMP_MAP_DELETE + : GOMP_MAP_RELEASE; + OMP_CLAUSE_SET_MAP_KIND (node2, map_kind); + OMP_CLAUSE_RELEASE_DESCRIPTOR (node2) = 1; } else - { - OMP_CLAUSE_SET_MAP_KIND (desc_node, GOMP_MAP_TO_PSET); - node2 = desc_node; - } - if (op == EXEC_OMP_TARGET_EXIT_DATA) - return; + OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_TO_PSET); node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, ptr_kind); OMP_CLAUSE_DECL (node3) = gfc_conv_descriptor_data_get (decl); @@ -2624,6 +2613,73 @@ handle_iterator (gfc_namespace *ns, stmtblock_t *iter_block, tree block) return list; } +/* To alleviate quadratic behaviour in checking each entry of a + gfc_omp_namelist against every other entry, we build a hashtable indexed by + gfc_symbol pointer, which we can use in the usual case that a map + expression has a symbol as its root term. Return a namelist based on the + root symbol used by N, building a new table in SYM_ROOTED_NL using the + gfc_omp_namelist N2 (all clauses) if we haven't done so already. */ + +static gfc_omp_namelist * +get_symbol_rooted_namelist (hash_map *&sym_rooted_nl, + gfc_omp_namelist *n, + gfc_omp_namelist *n2, bool *sym_based) +{ + /* Early-out if we have a NULL clause list (e.g. for OpenACC). */ + if (!n2) + return NULL; + + gfc_symbol *use_sym = NULL; + + /* We're only interested in cases where we have an expression, e.g. a + component access. */ + if (n->expr && n->expr->expr_type == EXPR_VARIABLE && n->expr->symtree) + use_sym = n->expr->symtree->n.sym; + + *sym_based = false; + + if (!use_sym) + return n2; + + if (!sym_rooted_nl) + { + sym_rooted_nl = new hash_map (); + + for (; n2 != NULL; n2 = n2->next) + { + if (!n2->expr + || n2->expr->expr_type != EXPR_VARIABLE + || !n2->expr->symtree) + continue; + + gfc_omp_namelist *nl_copy = gfc_get_omp_namelist (); + memcpy (nl_copy, n2, sizeof *nl_copy); + nl_copy->u2.duplicate_of = n2; + nl_copy->next = NULL; + + gfc_symbol *idx_sym = n2->expr->symtree->n.sym; + + bool existed; + gfc_omp_namelist *&entry + = sym_rooted_nl->get_or_insert (idx_sym, &existed); + if (existed) + nl_copy->next = entry; + entry = nl_copy; + } + } + + gfc_omp_namelist **n2_sym = sym_rooted_nl->get (use_sym); + + if (n2_sym) + { + *sym_based = true; + return *n2_sym; + } + + return NULL; +} + static tree gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, locus where, bool declare_simd = false, @@ -2641,6 +2697,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (clauses == NULL) return NULL_TREE; + hash_map *sym_rooted_nl = NULL; + for (list = 0; list < OMP_LIST_NUM; list++) { gfc_omp_namelist *n = clauses->lists[list]; @@ -3664,6 +3722,54 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, goto finalize_map_clause; } + gfc_omp_namelist *n2 + = openacc ? NULL : clauses->lists[OMP_LIST_MAP]; + + bool sym_based; + n2 = get_symbol_rooted_namelist (sym_rooted_nl, n, + n2, &sym_based); + + /* If the last reference is a pointer to a derived + type ("foo%dt_ptr"), check if any subcomponents + of the same derived type member are being mapped + elsewhere in the clause list ("foo%dt_ptr%x", + etc.). If we have such subcomponent mappings, + we only create an ALLOC node for the pointer + itself, and inhibit mapping the whole derived + type. */ + + for (; n2 != NULL; n2 = n2->next) + { + if ((!sym_based && n == n2) + || (sym_based && n == n2->u2.duplicate_of) + || !n2->expr) + continue; + + if (!gfc_omp_expr_prefix_same (n->expr, + n2->expr)) + continue; + + gfc_ref *ref1 = n->expr->ref; + gfc_ref *ref2 = n2->expr->ref; + + while (ref1->next && ref2->next) + { + ref1 = ref1->next; + ref2 = ref2->next; + } + + if (ref2->next) + { + inner = build_fold_addr_expr (inner); + OMP_CLAUSE_SET_MAP_KIND (node, + GOMP_MAP_ALLOC); + OMP_CLAUSE_DECL (node) = inner; + OMP_CLAUSE_SIZE (node) + = TYPE_SIZE_UNIT (TREE_TYPE (inner)); + goto finalize_map_clause; + } + } + tree data, size; if (lastref->u.c.component->ts.type == BT_CLASS) @@ -3719,7 +3825,6 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (inner))) { gomp_map_kind map_kind; - tree desc_node; tree type = TREE_TYPE (inner); tree ptr = gfc_conv_descriptor_data_get (inner); ptr = build_fold_indirect_ref (ptr); @@ -3738,7 +3843,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, else if (n->u.map_op == OMP_MAP_RELEASE || n->u.map_op == OMP_MAP_DELETE) ; - else if (op == EXEC_OMP_TARGET_EXIT_DATA) + else if (op == EXEC_OMP_TARGET_EXIT_DATA + || op == EXEC_OACC_EXIT_DATA) map_kind = GOMP_MAP_RELEASE; else map_kind = GOMP_MAP_ALLOC; @@ -3764,24 +3870,78 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node) = fold_build2 (MULT_EXPR, gfc_array_index_type, OMP_CLAUSE_SIZE (node), elemsz); - desc_node = build_omp_clause (input_location, - OMP_CLAUSE_MAP); - if (openacc) - OMP_CLAUSE_SET_MAP_KIND (desc_node, - GOMP_MAP_TO_PSET); - else - OMP_CLAUSE_SET_MAP_KIND (desc_node, map_kind); - OMP_CLAUSE_DECL (desc_node) = inner; - OMP_CLAUSE_SIZE (desc_node) = TYPE_SIZE_UNIT (type); - if (openacc) - node2 = desc_node; - else + node2 = build_omp_clause (input_location, + OMP_CLAUSE_MAP); + if (map_kind == GOMP_MAP_RELEASE + || map_kind == GOMP_MAP_DELETE) { - node2 = node; - node = desc_node; /* Put first. */ + OMP_CLAUSE_SET_MAP_KIND (node2, map_kind); + OMP_CLAUSE_RELEASE_DESCRIPTOR (node2) = 1; + } + else + OMP_CLAUSE_SET_MAP_KIND (node2, + GOMP_MAP_TO_PSET); + OMP_CLAUSE_DECL (node2) = inner; + OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); + if (!openacc) + { + gfc_omp_namelist *n2 + = clauses->lists[OMP_LIST_MAP]; + + /* If we don't have a mapping of a smaller part + of the array -- or we can't prove that we do + statically -- set this flag. If there is a + mapping of a smaller part of the array after + all, this will turn into a no-op at + runtime. */ + OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (node) = 1; + + bool sym_based; + n2 = get_symbol_rooted_namelist (sym_rooted_nl, + n, n2, + &sym_based); + + bool drop_mapping = false; + + for (; n2 != NULL; n2 = n2->next) + { + if ((!sym_based && n == n2) + || (sym_based && n == n2->u2.duplicate_of) + || !n2->expr) + continue; + + if (!gfc_omp_expr_prefix_same (n->expr, + n2->expr)) + continue; + + gfc_ref *ref1 = n->expr->ref; + gfc_ref *ref2 = n2->expr->ref; + + /* We know ref1 and ref2 overlap. We're + interested in whether ref2 describes a + smaller part of the array than ref1, which + we already know refers to the full + array. */ + + while (ref1->next && ref2->next) + { + ref1 = ref1->next; + ref2 = ref2->next; + } + + if (ref2->next + || (ref2->type == REF_ARRAY + && (ref2->u.ar.type == AR_ELEMENT + || (ref2->u.ar.type + == AR_SECTION)))) + { + drop_mapping = true; + break; + } + } + if (drop_mapping) + continue; } - if (op == EXEC_OMP_TARGET_EXIT_DATA) - goto finalize_map_clause; node3 = build_omp_clause (input_location, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, @@ -3945,6 +4105,23 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, } } + /* Free hashmap if we built it. */ + if (sym_rooted_nl) + { + typedef hash_map::iterator hti; + for (hti it = sym_rooted_nl->begin (); it != sym_rooted_nl->end (); ++it) + { + gfc_omp_namelist *&nl = (*it).second; + while (nl) + { + gfc_omp_namelist *next = nl->next; + free (nl); + nl = next; + } + } + delete sym_rooted_nl; + } + if (clauses->if_expr) { tree if_var; @@ -4787,7 +4964,7 @@ gfc_trans_oacc_executable_directive (gfc_code *code) gfc_start_block (&block); oacc_clauses = gfc_trans_omp_clauses (&block, code->ext.omp_clauses, - code->loc, false, true); + code->loc, false, true, code->op); stmt = build1_loc (input_location, construct_code, void_type_node, oacc_clauses); gfc_add_expr_to_block (&block, stmt); diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 26df5b0a8298..96657468951c 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -9151,6 +9151,25 @@ gimplify_omp_depend (tree *list_p, gimple_seq *pre_p) return 1; } +/* True if mapping node C maps, or unmaps, a (Fortran) array descriptor. */ + +static bool +omp_map_clause_descriptor_p (tree c) +{ + if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP) + return false; + + if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_TO_PSET) + return true; + + if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_RELEASE + || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_DELETE) + && OMP_CLAUSE_RELEASE_DESCRIPTOR (c)) + return true; + + return false; +} + /* For a set of mappings describing an array section pointed to by a struct (or derived type, etc.) component, create an "alloc" or "release" node to insert into a list following a GOMP_MAP_STRUCT node. For some types of @@ -9186,9 +9205,7 @@ build_omp_struct_comp_nodes (enum tree_code code, tree grp_start, tree grp_end, if (OMP_CLAUSE_CHAIN (grp_start) != grp_end) grp_mid = OMP_CLAUSE_CHAIN (grp_start); - if (grp_mid - && OMP_CLAUSE_CODE (grp_mid) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (grp_mid) == GOMP_MAP_TO_PSET) + if (grp_mid && omp_map_clause_descriptor_p (grp_mid)) OMP_CLAUSE_SIZE (c2) = OMP_CLAUSE_SIZE (grp_mid); else OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (ptr_type_node); @@ -9374,7 +9391,7 @@ omp_get_attachment (omp_mapping_group *grp) return NULL_TREE; node = OMP_CLAUSE_CHAIN (node); - if (node && OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_TO_PSET) + if (node && omp_map_clause_descriptor_p (node)) { gcc_assert (node != grp->grp_end); node = OMP_CLAUSE_CHAIN (node); @@ -9469,7 +9486,7 @@ omp_group_last (tree *start_p) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION) || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_ALWAYS_POINTER - || OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_TO_PSET)) + || omp_map_clause_descriptor_p (nc))) { tree nc2 = OMP_CLAUSE_CHAIN (nc); if (OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_DETACH) @@ -9636,33 +9653,32 @@ omp_group_base (omp_mapping_group *grp, unsigned int *chained, return node; node = OMP_CLAUSE_CHAIN (node); - if (node && OMP_CLAUSE_MAP_KIND (node) == GOMP_MAP_TO_PSET) + if (!node) + internal_error ("unexpected mapping node"); + if (omp_map_clause_descriptor_p (node)) { if (node == grp->grp_end) return *grp->grp_start; node = OMP_CLAUSE_CHAIN (node); } - if (node) - switch (OMP_CLAUSE_MAP_KIND (node)) - { - case GOMP_MAP_POINTER: - case GOMP_MAP_FIRSTPRIVATE_POINTER: - case GOMP_MAP_FIRSTPRIVATE_REFERENCE: - case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: - *firstprivate = OMP_CLAUSE_DECL (node); - return *grp->grp_start; + switch (OMP_CLAUSE_MAP_KIND (node)) + { + case GOMP_MAP_POINTER: + case GOMP_MAP_FIRSTPRIVATE_POINTER: + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: + *firstprivate = OMP_CLAUSE_DECL (node); + return *grp->grp_start; - case GOMP_MAP_ALWAYS_POINTER: - case GOMP_MAP_ATTACH_DETACH: - case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: - case GOMP_MAP_DETACH: - return *grp->grp_start; + case GOMP_MAP_ALWAYS_POINTER: + case GOMP_MAP_ATTACH_DETACH: + case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DETACH: + return *grp->grp_start; - default: - internal_error ("unexpected mapping node"); - } - else - internal_error ("unexpected mapping node"); + default: + internal_error ("unexpected mapping node"); + } return error_mark_node; case GOMP_MAP_TO_PSET: @@ -10010,18 +10026,45 @@ omp_tsort_mapping_groups_1 (omp_mapping_group ***outlist, static omp_mapping_group * omp_tsort_mapping_groups (vec *groups, hash_map - *grpmap) + *grpmap, + bool enter_exit_data) { omp_mapping_group *grp, *outlist = NULL, **cursor; unsigned int i; + bool saw_runtime_implicit = false; cursor = &outlist; FOR_EACH_VEC_ELT (*groups, i, grp) { if (grp->mark != PERMANENT) - if (!omp_tsort_mapping_groups_1 (&cursor, groups, grpmap, grp)) - return NULL; + { + if (OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (*grp->grp_start)) + { + saw_runtime_implicit = true; + continue; + } + if (!omp_tsort_mapping_groups_1 (&cursor, groups, grpmap, grp)) + return NULL; + } + } + + if (!saw_runtime_implicit) + return outlist; + + FOR_EACH_VEC_ELT (*groups, i, grp) + { + if (grp->mark != PERMANENT + && OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (*grp->grp_start)) + { + /* Clear the flag for enter/exit data because it is currently + meaningless for those operations in libgomp. */ + if (enter_exit_data) + OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P (*grp->grp_start) = 0; + + if (!omp_tsort_mapping_groups_1 (&cursor, groups, grpmap, grp)) + return NULL; + } } return outlist; @@ -10424,6 +10467,11 @@ omp_check_mapping_compatibility (location_t loc, mapping. However, if we have a reference to pointer, make other appropriate adjustments to the mapping nodes instead. + If we have an ATTACH_DETACH node with a Fortran pointer-set (array + descriptor) mapping for a derived-type component, and we're also mapping the + whole of the derived-type variable on another clause, the pointer-set + mapping is removed. + If we have a component access but we're also mapping the whole of the containing struct, drop the former access. @@ -10603,6 +10651,17 @@ omp_resolve_clause_dependencies (enum tree_code code, GOMP_MAP_ATTACH_ZLAS for it. */ if (!base_mapped_to && referenced_ptr_node) OMP_CLAUSE_SET_MAP_KIND (referenced_ptr_node, zlas_kind); + + omp_mapping_group *struct_group; + tree desc; + if ((desc = OMP_CLAUSE_CHAIN (*grp->grp_start)) + && omp_map_clause_descriptor_p (desc) + && omp_mapped_by_containing_struct (grpmap, decl, + &struct_group)) + /* If we have a pointer set but we're mapping (or unmapping) + the whole of the containing struct, we can remove the + pointer set mapping. */ + OMP_CLAUSE_CHAIN (*grp->grp_start) = OMP_CLAUSE_CHAIN (desc); } else if (TREE_CODE (TREE_TYPE (base_ptr)) == REFERENCE_TYPE && (TREE_CODE (TREE_TYPE (TREE_TYPE (base_ptr))) @@ -11001,11 +11060,17 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, for the purposes of gathering sibling lists, etc. */ /* gcc_assert (base == addr_tokens[base_token]->expr); */ - bool ptr = (OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ALWAYS_POINTER); bool attach_detach = ((OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ATTACH_DETACH) || (OMP_CLAUSE_MAP_KIND (grp_end) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)); + bool has_descriptor = false; + if (OMP_CLAUSE_CHAIN (*grp_start_p) != grp_end) + { + tree grp_mid = OMP_CLAUSE_CHAIN (*grp_start_p); + if (grp_mid && omp_map_clause_descriptor_p (grp_mid)) + has_descriptor = true; + } if (!struct_map_to_clause || struct_map_to_clause->get (base) == NULL) { @@ -11028,7 +11093,18 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, GOMP_MAP_STRUCT into the middle of the old one. */ tree *insert_node_pos = reprocessing_struct ? *added_tail : grp_start_p; - if (ptr || attach_detach) + if (has_descriptor) + { + tree desc = OMP_CLAUSE_CHAIN (*grp_start_p); + if (code == OMP_TARGET_EXIT_DATA || code == OACC_EXIT_DATA) + OMP_CLAUSE_SET_MAP_KIND (desc, GOMP_MAP_RELEASE); + tree sc = *insert_node_pos; + OMP_CLAUSE_CHAIN (l) = desc; + OMP_CLAUSE_CHAIN (*grp_start_p) = OMP_CLAUSE_CHAIN (desc); + OMP_CLAUSE_CHAIN (desc) = sc; + *insert_node_pos = l; + } + else if (attach_detach) { tree extra_node; tree alloc_node @@ -11259,7 +11335,7 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, || OMP_CLAUSE_MAP_KIND (*sc) == GOMP_MAP_ATTACH_DETACH) sc = &OMP_CLAUSE_CHAIN (*sc); for (i = 0; i < elems; i++, sc = &OMP_CLAUSE_CHAIN (*sc)) - if ((ptr || attach_detach) && sc == grp_start_p) + if (attach_detach && sc == grp_start_p) break; else if (TREE_CODE (OMP_CLAUSE_DECL (*sc)) != COMPONENT_REF && TREE_CODE (OMP_CLAUSE_DECL (*sc)) != INDIRECT_REF @@ -11315,7 +11391,7 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, || (known_eq (coffset, offset) && maybe_lt (cbitpos, bitpos))) { - if (ptr || attach_detach) + if (attach_detach) scp = sc; else break; @@ -11331,7 +11407,9 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, the list manipulation below. We only need to handle the (pointer or reference) attach/detach case. */ tree extra_node, alloc_node; - if (attach_detach) + if (has_descriptor) + gcc_unreachable (); + else if (attach_detach) alloc_node = build_omp_struct_comp_nodes (code, *grp_start_p, grp_end, &extra_node); else @@ -11364,7 +11442,17 @@ omp_accumulate_sibling_list (enum omp_region_type region_type, return NULL; } - if (ptr || attach_detach) + if (has_descriptor) + { + tree desc = OMP_CLAUSE_CHAIN (*grp_start_p); + if (code == OMP_TARGET_EXIT_DATA + || code == OACC_EXIT_DATA) + OMP_CLAUSE_SET_MAP_KIND (desc, GOMP_MAP_RELEASE); + omp_siblist_move_node_after (desc, + &OMP_CLAUSE_CHAIN (*grp_start_p), + scp ? scp : sc); + } + else if (attach_detach) { tree cl = NULL_TREE, extra_node; tree alloc_node = build_omp_struct_comp_nodes (code, *grp_start_p, @@ -11509,8 +11597,7 @@ omp_build_struct_sibling_lists (enum tree_code code, as a struct (the GOMP_MAP_POINTER following will have the form "var.data", but such mappings are handled specially). */ tree grpmid = OMP_CLAUSE_CHAIN (*grp_start_p); - if (OMP_CLAUSE_CODE (grpmid) == OMP_CLAUSE_MAP - && OMP_CLAUSE_MAP_KIND (grpmid) == GOMP_MAP_TO_PSET + if (omp_map_clause_descriptor_p (grpmid) && DECL_P (OMP_CLAUSE_DECL (grpmid))) continue; } @@ -11786,6 +11873,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, list_p); omp_mapping_group *outlist = NULL; + bool enter_exit = (code == OMP_TARGET_ENTER_DATA + || code == OMP_TARGET_EXIT_DATA); /* Topological sorting may fail if we have duplicate nodes, which we should have detected and shown an error for already. Skip @@ -11800,7 +11889,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p, groups = omp_gather_mapping_groups (list_p); grpmap = omp_index_mapping_groups (groups); - outlist = omp_tsort_mapping_groups (groups, grpmap); + outlist = omp_tsort_mapping_groups (groups, grpmap, enter_exit); outlist = omp_segregate_mapping_groups (outlist); list_p = omp_reorder_mapping_groups (groups, outlist, list_p); diff --git a/gcc/testsuite/gfortran.dg/goacc/enter-exit-data-2.f90 b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data-2.f90 new file mode 100644 index 000000000000..6a16c8a89eab --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/enter-exit-data-2.f90 @@ -0,0 +1,38 @@ +! { dg-additional-options "-fdump-tree-original" } + +type t +integer, pointer :: arr(:) +end type t + +type(t) :: var + +allocate (var%arr(1:100)) + +!$acc enter data copyin(var%arr(10:20)) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc enter data map\(to:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\);$} 1 "original" } } + +!$acc exit data delete(var%arr(10:20)) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\);$} 1 "original" } } + + +!$acc enter data create(var%arr(20:30)) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc enter data map\(alloc:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\);$} 1 "original" } } + +!$acc exit data finalize delete(var%arr(20:30)) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\) finalize;$} 1 "original" } } + + +!$acc enter data copyin(var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc enter data map\(to:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: 0\]\);$} 1 "original" } } + +!$acc exit data delete(var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: 0\]\);$} 1 "original" } } + + +!$acc enter data create(var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc enter data map\(alloc:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: 0\]\);$} 1 "original" } } + +!$acc exit data finalize delete(var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma acc exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:var\.arr\.data \[bias: 0\]\) finalize;$} 1 "original" } } + +end diff --git a/gcc/testsuite/gfortran.dg/goacc/finalize-1.f b/gcc/testsuite/gfortran.dg/goacc/finalize-1.f index 23f0ffc627e0..63beb4794534 100644 --- a/gcc/testsuite/gfortran.dg/goacc/finalize-1.f +++ b/gcc/testsuite/gfortran.dg/goacc/finalize-1.f @@ -20,8 +20,8 @@ ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(delete:del_f \\\[len: \[0-9\]+\\\]\\) finalize$" 1 "gimple" } } !$ACC EXIT DATA FINALIZE DELETE (del_f_p(2:5)) -! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(release:\\*\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\) parm\\.0\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(release:del_f_p \\\[len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) del_f_p\\.data \\\[pointer assign, bias: \\(.*int.*\\) parm\\.0\\.data - \\(.*int.*\\) del_f_p\\.data\\\]\\) finalize;$" 1 "original" } } -! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(delete:MEM <\[^>\]+> \\\[\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\)_\[0-9\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(delete:del_f_p \\\[len: \[0-9\]+\\\]\\) map\\(alloc:del_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(release:\\*\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\) parm\\.0\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(release:del_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) del_f_p\\.data \\\[pointer assign, bias: \\(.*int.*\\) parm\\.0\\.data - \\(.*int.*\\) del_f_p\\.data\\\]\\) finalize;$" 1 "original" } } +! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(delete:MEM <\[^>\]+> \\\[\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\)_\[0-9\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(delete:del_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:del_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } } !$ACC EXIT DATA COPYOUT (cpo_r) ! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(from:cpo_r\\);$" 1 "original" } } @@ -32,6 +32,6 @@ ! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(force_from:cpo_f \\\[len: \[0-9\]+\\\]\\) finalize$" 1 "gimple" } } !$ACC EXIT DATA COPYOUT (cpo_f_p(4:10)) FINALIZE -! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(from:\\*\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\) parm\\.1\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(to:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) cpo_f_p\\.data \\\[pointer assign, bias: \\(.*int.*\\) parm\\.1\\.data - \\(.*int.*\\) cpo_f_p\\.data\\\]\\) finalize;$" 1 "original" } } -! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(force_from:MEM <\[^>\]+> \\\[\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\)_\[0-9\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(to:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:cpo_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "(?n)#pragma acc exit data map\\(from:\\*\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\) parm\\.1\\.data \\\[len: \[^\\\]\]+\\\]\\) map\\(release:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:\\(integer\\(kind=1\\)\\\[0:\\\] \\* restrict\\) cpo_f_p\\.data \\\[pointer assign, bias: \\(.*int.*\\) parm\\.1\\.data - \\(.*int.*\\) cpo_f_p\\.data\\\]\\) finalize;$" 1 "original" } } +! { dg-final { scan-tree-dump-times "(?n)#pragma omp target oacc_exit_data map\\(force_from:MEM <\[^>\]+> \\\[\\(integer\\(kind=.\\)\\\[0:\\\] \\*\\)_\[0-9\]+\\\] \\\[len: \[^\\\]\]+\\\]\\) map\\(delete:cpo_f_p \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(alloc:cpo_f_p\\.data \\\[pointer assign, bias: \[^\\\]\]+\\\]\\) finalize$" 1 "gimple" } } END SUBROUTINE f diff --git a/gcc/testsuite/gfortran.dg/gomp/map-9.f90 b/gcc/testsuite/gfortran.dg/gomp/map-9.f90 index f930a49d9fff..8c8d4f7c57c4 100644 --- a/gcc/testsuite/gfortran.dg/gomp/map-9.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/map-9.f90 @@ -2,7 +2,7 @@ ! PR fortran/108545 -! { dg-final { scan-tree-dump "#pragma omp target enter data map\\(struct:x \\\[len: 1\\\]\\) map\\(always,to:x\.a \\\[len: \[0-9\]+\\\]\\) map\\(to:MEM \\\[\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\)_\[0-9\]+] \\\[len: _\[0-9\]+\\\]\\) map\\(attach:x\.a\.data \\\[bias: 0\\\]\\)" "omplower" } } +! { dg-final { scan-tree-dump "#pragma omp target enter data map\\(struct:x \\\[len: 1\\\]\\) map\\(to:x\.a \\\[pointer set, len: \[0-9\]+\\\]\\) map\\(to:MEM \\\[\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\)_\[0-9\]+] \\\[len: _\[0-9\]+\\\]\\) map\\(attach:x\.a\.data \\\[bias: 0\\\]\\)" "omplower" } } program p type t diff --git a/gcc/testsuite/gfortran.dg/gomp/map-subarray-2.f90 b/gcc/testsuite/gfortran.dg/gomp/map-subarray-2.f90 new file mode 100644 index 000000000000..26e113f4fdd7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/map-subarray-2.f90 @@ -0,0 +1,57 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-gimple" } + +type T +integer, pointer :: arr1(:) +integer, pointer :: arr2(:) +integer, pointer :: arr3(:) +integer, pointer :: arr4(:) +end type T + +type(T) :: tv +integer, allocatable, target, dimension(:) :: arr + +allocate(arr(1:20)) + +tv%arr1 => arr +tv%arr2 => arr +tv%arr3 => arr +tv%arr4 => arr + +!$omp target enter data map(to: tv%arr1) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target enter data map\(struct:tv \[len: 1\]\) map\(to:tv\.arr1 \[pointer set, len: [0-9]+\]\) map\(to:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr1\.data \[bias: 0\]\)} "gimple" } } + +!$omp target exit data map(from: tv%arr1) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target exit data map\(release:tv\.arr1 \[pointer set, len: [0-9]+\]\) map\(from:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(detach:tv\.arr1\.data \[bias: 0\]\)} "gimple" } } + + +!$omp target enter data map(to: tv%arr2) map(to: tv%arr2(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target enter data map\(struct:tv \[len: 1\]\) map\(to:tv\.arr2 \[pointer set, len: [0-9]+\]\) map\(to:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr2\.data \[bias: [^\]]+\]\)} "gimple" } } + +!$omp target exit data map(from: tv%arr2) map(from: tv%arr2(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target exit data map\(release:tv\.arr2 \[pointer set, len: [0-9]+\]\) map\(from:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(detach:tv\.arr2\.data \[bias: [^\]]+\]\)} "gimple" } } + + +!$omp target enter data map(to: tv, tv%arr3(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target enter data map\(to:tv \[len: [0-9]+\]\) map\(to:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr3\.data \[bias: [^\]]+\]\)} "gimple" } } + +!$omp target exit data map(from: tv, tv%arr3(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target exit data map\(from:tv \[len: [0-9]+\]\) map\(from:MEM \[\(integer\(kind=4\)\[0:\] \*\)[_[0-9]+\] \[len: _[0-9]+\]\) map\(detach:tv\.arr3\.data \[bias: [^\]]+\]\)} "gimple" } } + + +!$omp target enter data map(to: tv%arr4(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target enter data map\(struct:tv \[len: 1\]\) map\(to:tv\.arr4 \[pointer set, len: [0-9]+\]\) map\(to:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr4\.data \[bias: [^\]]+\]\)} "gimple" } } + +!$omp target exit data map(from: tv%arr4(1:10)) + +! { dg-final { scan-tree-dump {(?n)#pragma omp target exit data map\(release:tv\.arr4 \[pointer set, len: [0-9]+\]\) map\(from:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(detach:tv\.arr4\.data \[bias: [^\]]+\]\)} "gimple" } } + +end + diff --git a/gcc/testsuite/gfortran.dg/gomp/map-subarray.f90 b/gcc/testsuite/gfortran.dg/gomp/map-subarray.f90 new file mode 100644 index 000000000000..197888a43365 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/map-subarray.f90 @@ -0,0 +1,40 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-gimple" } + +type T +integer, pointer :: arr1(:) +integer, pointer :: arr2(:) +end type T + +type(T) :: tv +integer, allocatable, target, dimension(:) :: arr + +allocate(arr(1:20)) + +tv%arr1 => arr +tv%arr2 => arr + +!$omp target map(tv%arr1) +tv%arr1(1) = tv%arr1(1) + 1 +!$omp end target + +! { dg-final { scan-tree-dump {(?n)#pragma omp target.* map\(struct:tv \[len: 1\]\) map\(to:tv\.arr1 \[pointer set, len: [0-9]+\]\) map\(tofrom:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\[implicit\]\) map\(attach:tv\.arr1\.data \[bias: 0\]\)} "gimple" } } + +!$omp target map(tv%arr2) map(tv%arr2(1:10)) +tv%arr2(1) = tv%arr2(1) + 1 +!$omp end target + +!$omp target map(tv%arr2(1:10)) +tv%arr2(1) = tv%arr2(1) + 1 +!$omp end target + +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target.* map\(struct:tv \[len: 1\]\) map\(to:tv\.arr2 \[pointer set, len: [0-9]+\]\) map\(tofrom:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr2\.data \[bias: [^\]]+\]\)} 2 "gimple" } } + +!$omp target map(tv, tv%arr2(1:10)) +tv%arr2(1) = tv%arr2(1) + 1 +!$omp end target + +! { dg-final { scan-tree-dump {(?n)#pragma omp target.* map\(tofrom:tv \[len: [0-9]+\]\) map\(tofrom:MEM \[\(integer\(kind=4\)\[0:\] \*\)_[0-9]+\] \[len: _[0-9]+\]\) map\(attach:tv\.arr2\.data \[bias: [^\]]+\]\)} "gimple" } } + +end + diff --git a/gcc/testsuite/gfortran.dg/gomp/target-enter-exit-data.f90 b/gcc/testsuite/gfortran.dg/gomp/target-enter-exit-data.f90 new file mode 100644 index 000000000000..c14a11dacd9d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/target-enter-exit-data.f90 @@ -0,0 +1,39 @@ +! { dg-additional-options "-fdump-tree-original" } + +type t +integer, pointer :: arr(:) +end type t + +type(t) :: var + +allocate (var%arr(1:100)) + +!$omp target enter data map(to: var%arr(10:20)) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target enter data map\(to:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\)$} 1 "original" } } + +!$omp target exit data map(release: var%arr(10:20)) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\)$} 1 "original" } } + + +!$omp target enter data map(alloc: var%arr(20:30)) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target enter data map\(alloc:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\)$} 1 "original" } } + +!$omp target exit data map(delete: var%arr(20:30)) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target exit data map\(delete:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) parm\.[0-9]+\.data \[len: D\.[0-9]+ \* [0-9]+\]\) map\(delete:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: \(integer\(kind=[0-9]+\)\) parm\.[0-9]+\.data - \(integer\(kind=[0-9]+\)\) var\.arr\.data\]\)$} 1 "original" } } + + +!$omp target enter data map(to: var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target enter data map\(to:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\[implicit\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: 0\]\)$} 1 "original" } } + +!$omp target exit data map(release: var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target exit data map\(release:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\[implicit\]\) map\(release:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: 0\]\)$} 1 "original" } } + + +!$omp target enter data map(alloc: var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target enter data map\(alloc:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\[implicit\]\) map\(to:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: 0\]\)$} 1 "original" } } + +!$omp target exit data map(delete: var%arr) +! { dg-final { scan-tree-dump-times {(?n)#pragma omp target exit data map\(delete:\*\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[len: D\.[0-9]+ \* [0-9]+\]\[implicit\]\) map\(delete:var\.arr \[pointer set, len: [0-9]+\]\) map\(attach_detach:\(integer\(kind=[0-9]+\)\[0:\] \*\) var\.arr\.data \[bias: 0\]\)$} 1 "original" } } + + +end diff --git a/gcc/tree-pretty-print.cc b/gcc/tree-pretty-print.cc index cab99f9dfb6d..e6d2ce01c601 100644 --- a/gcc/tree-pretty-print.cc +++ b/gcc/tree-pretty-print.cc @@ -1050,6 +1050,15 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: pp_string (pp, " [bias: "); break; + case GOMP_MAP_RELEASE: + case GOMP_MAP_DELETE: + if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP + && OMP_CLAUSE_RELEASE_DESCRIPTOR (clause)) + { + pp_string (pp, " [pointer set, len: "); + break; + } + /* Fallthrough. */ default: pp_string (pp, " [len: "); break; diff --git a/gcc/tree.h b/gcc/tree.h index 986e7604d931..cec9f6c54a7a 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -1831,6 +1831,10 @@ class auto_suppress_location_wrappers same directive. */ #define OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED(NODE) \ TREE_STATIC (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) +/* Nonzero if this is a release/delete node which refers to a (Fortran) array + descriptor. */ +#define OMP_CLAUSE_RELEASE_DESCRIPTOR(NODE) \ + TREE_NOTHROW (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_MAP)) /* Flag that 'OMP_CLAUSE_DECL (NODE)' is to be made addressable during OMP lowering. */ diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 new file mode 100644 index 000000000000..02f08c52a8c3 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-2.f90 @@ -0,0 +1,108 @@ +! { dg-do run } + +program myprog +type u + integer, dimension (:), pointer :: tarr1 + integer, dimension (:), pointer :: tarr2 + integer, dimension (:), pointer :: tarr3 +end type u + +type(u) :: myu1, myu2, myu3 + +integer, dimension (12), target :: myarray1 +integer, dimension (12), target :: myarray2 +integer, dimension (12), target :: myarray3 +integer, dimension (12), target :: myarray4 +integer, dimension (12), target :: myarray5 +integer, dimension (12), target :: myarray6 +integer, dimension (12), target :: myarray7 +integer, dimension (12), target :: myarray8 +integer, dimension (12), target :: myarray9 + +myu1%tarr1 => myarray1 +myu1%tarr2 => myarray2 +myu1%tarr3 => myarray3 +myu2%tarr1 => myarray4 +myu2%tarr2 => myarray5 +myu2%tarr3 => myarray6 +myu3%tarr1 => myarray7 +myu3%tarr2 => myarray8 +myu3%tarr3 => myarray9 + +myu1%tarr1 = 0 +myu1%tarr2 = 0 +myu1%tarr3 = 0 +myu2%tarr1 = 0 +myu2%tarr2 = 0 +myu2%tarr3 = 0 +myu3%tarr1 = 0 +myu3%tarr2 = 0 +myu3%tarr3 = 0 + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(:)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(:)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(:)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(:)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(:)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(:)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(:)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(:)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(:)) +myu1%tarr1(1) = myu1%tarr1(1) + 1 +myu2%tarr1(1) = myu2%tarr1(1) + 1 +myu3%tarr1(1) = myu3%tarr1(1) + 1 +!$omp end target + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(1:2)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(1:2)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(1:2)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(1:2)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(1:2)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(1:2)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(1:2)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(1:2)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(1:2)) +myu1%tarr2(1) = myu1%tarr2(1) + 1 +myu2%tarr2(1) = myu2%tarr2(1) + 1 +myu3%tarr2(1) = myu3%tarr2(1) + 1 +!$omp end target + +!$omp target map(to:myu1%tarr1) map(tofrom:myu1%tarr1(1)) & +!$omp& map(to:myu1%tarr2) map(tofrom:myu1%tarr2(1)) & +!$omp& map(to:myu1%tarr3) map(tofrom:myu1%tarr3(1)) & +!$omp& map(to:myu2%tarr1) map(tofrom:myu2%tarr1(1)) & +!$omp& map(to:myu2%tarr2) map(tofrom:myu2%tarr2(1)) & +!$omp& map(to:myu2%tarr3) map(tofrom:myu2%tarr3(1)) & +!$omp& map(to:myu3%tarr1) map(tofrom:myu3%tarr1(1)) & +!$omp& map(to:myu3%tarr2) map(tofrom:myu3%tarr2(1)) & +!$omp& map(to:myu3%tarr3) map(tofrom:myu3%tarr3(1)) +myu1%tarr3(1) = myu1%tarr3(1) + 1 +myu2%tarr3(1) = myu2%tarr3(1) + 1 +myu3%tarr3(1) = myu3%tarr3(1) + 1 +!$omp end target + +!$omp target map(tofrom:myu1%tarr1) & +!$omp& map(tofrom:myu1%tarr2) & +!$omp& map(tofrom:myu1%tarr3) & +!$omp& map(tofrom:myu2%tarr1) & +!$omp& map(tofrom:myu2%tarr2) & +!$omp& map(tofrom:myu2%tarr3) & +!$omp& map(tofrom:myu3%tarr1) & +!$omp& map(tofrom:myu3%tarr2) & +!$omp& map(tofrom:myu3%tarr3) +myu1%tarr2(1) = myu1%tarr2(1) + 1 +myu2%tarr2(1) = myu2%tarr2(1) + 1 +myu3%tarr2(1) = myu3%tarr2(1) + 1 +!$omp end target + +if (myu1%tarr1(1).ne.1) stop 1 +if (myu2%tarr1(1).ne.1) stop 2 +if (myu3%tarr1(1).ne.1) stop 3 +if (myu1%tarr2(1).ne.2) stop 4 +if (myu2%tarr2(1).ne.2) stop 5 +if (myu3%tarr2(1).ne.2) stop 6 +if (myu1%tarr3(1).ne.1) stop 7 +if (myu2%tarr3(1).ne.1) stop 8 +if (myu3%tarr3(1).ne.1) stop 9 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-3.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-3.f90 new file mode 100644 index 000000000000..318e77ea44ff --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-3.f90 @@ -0,0 +1,62 @@ +! { dg-do run } + +module mymod +type G +integer :: x, y +integer, pointer :: arr(:) +integer :: z +end type G +end module mymod + +program myprog +use mymod + +integer, target :: arr1(10) +integer, target :: arr2(10) +integer, target :: arr3(10) +type(G), dimension(3) :: gvar + +integer :: i, j + +gvar(1)%arr => arr1 +gvar(2)%arr => arr2 +gvar(3)%arr => arr3 + +gvar(1)%arr = 0 +gvar(2)%arr = 0 +gvar(3)%arr = 0 + +i = 1 +j = 1 + +! Here 'gvar(i)' and 'gvar(j)' are the same element, so this should work. +! This generates a whole-array mapping for gvar(i)%arr, but with the +! "runtime implicit" bit set so the smaller subarray gvar(j)%arr(1:5) takes +! precedence. + +!$omp target map(gvar(i)%arr, gvar(j)%arr(1:5)) +gvar(i)%arr(1) = gvar(i)%arr(1) + 1 +gvar(j)%arr(1) = gvar(j)%arr(1) + 2 +!$omp end target + +!$omp target map(gvar(i)%arr(1:5), gvar(j)%arr) +gvar(i)%arr(1) = gvar(i)%arr(1) + 3 +gvar(j)%arr(1) = gvar(j)%arr(1) + 4 +!$omp end target + +! For these ones, we know the array index is the same, so we can just +! drop the whole-array mapping. + +!$omp target map(gvar(i)%arr, gvar(i)%arr(1:5)) +gvar(i)%arr(1) = gvar(i)%arr(1) + 1 +gvar(i)%arr(1) = gvar(j)%arr(1) + 2 +!$omp end target + +!$omp target map(gvar(i)%arr(1:5), gvar(i)%arr) +gvar(i)%arr(1) = gvar(i)%arr(1) + 3 +gvar(i)%arr(1) = gvar(j)%arr(1) + 4 +!$omp end target + +if (gvar(1)%arr(1).ne.20) stop 1 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-4.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-4.f90 new file mode 100644 index 000000000000..5d15808f0da7 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-4.f90 @@ -0,0 +1,35 @@ +! { dg-do run } + +type t + integer, pointer :: p(:) +end type t + +type(t) :: var(2) + +allocate (var(1)%p, source=[1,2,3,5]) +allocate (var(2)%p, source=[2,3,5]) + +!$omp target map(var(1)%p, var(2)%p) +var(1)%p(1) = 5 +var(2)%p(2) = 7 +!$omp end target + +!$omp target map(var(1)%p(1:3), var(1)%p, var(2)%p) +var(1)%p(1) = var(1)%p(1) + 1 +var(2)%p(2) = var(2)%p(2) + 1 +!$omp end target + +!$omp target map(var(1)%p, var(2)%p, var(2)%p(1:3)) +var(1)%p(1) = var(1)%p(1) + 1 +var(2)%p(2) = var(2)%p(2) + 1 +!$omp end target + +!$omp target map(var(1)%p, var(1)%p(1:3), var(2)%p, var(2)%p(2)) +var(1)%p(1) = var(1)%p(1) + 1 +var(2)%p(2) = var(2)%p(2) + 1 +!$omp end target + +if (var(1)%p(1).ne.8) stop 1 +if (var(2)%p(2).ne.10) stop 2 + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-6.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-6.f90 new file mode 100644 index 000000000000..9f0edf70890e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-6.f90 @@ -0,0 +1,26 @@ +! { dg-do run } + +type t + integer, pointer :: p(:) + integer, pointer :: p2(:) +end type t + +type(t) :: var +integer, target :: tgt(5), tgt2(1000) +var%p => tgt +var%p2 => tgt2 + +p = 0 +p2 = 0 + +!$omp target map(tgt, tgt2(4:6), var) + var%p(1) = 5 + var%p2(5) = 7 +!$omp end target + +if (var%p(1).ne.5) stop 1 +if (var%p2(5).ne.7) stop 2 + +end + +! { dg-shouldfail "" { offload_device_nonshared_as } } diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-7.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-7.f90 new file mode 100644 index 000000000000..42da72961069 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-7.f90 @@ -0,0 +1,29 @@ +type t +integer, pointer :: p2(:) +end type t + +integer, target :: A(5) +integer, pointer :: p(:), p2(:) +type(t) :: var + +allocate(p2(1:20)) +p => A +var%p2 => p2 + +A = 0 +p2 = 0 + +! These arrays "share original storage", so are unsupported. This will +! (correctly) fail with a non-shared address space. + +!$omp target map(A(3:4), p2(4:8), p, var%p2) +A(3) = A(3) + 1 +p2(4) = p2(4) + 2 +!$omp end target + +if (A(3).ne.1) stop 1 +if (p2(4).ne.2) stop 2 + +end program + +! { dg-shouldfail "" { offload_device_nonshared_as } } diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray-8.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray-8.f90 new file mode 100644 index 000000000000..a47360e10ec3 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray-8.f90 @@ -0,0 +1,47 @@ +! { dg-do run } + +type F +integer, pointer :: mem(:) +end type F + +type(F) :: fv +integer, allocatable, target :: arr(:) + +allocate(arr(1:20)) + +fv%mem => arr +fv%mem = 0 + +!$omp target enter data map(to: fv%mem(1:10)) +!$omp target map(alloc: fv%mem) +fv%mem(1) = fv%mem(1) + 1 +!$omp end target +!$omp target exit data map(from: fv%mem(1:10)) + +if (fv%mem(1).ne.1) stop 1 + +!$omp target enter data map(to: fv, fv%mem(1:10)) +!$omp target +fv%mem(1) = fv%mem(1) + 1 +!$omp end target +!$omp target exit data map(from: fv, fv%mem(1:10)) + +if (fv%mem(1).ne.2) stop 2 + +!$omp target enter data map(to: fv%mem, fv%mem(1:10)) +!$omp target +fv%mem(1) = fv%mem(1) + 1 +!$omp end target +!$omp target exit data map(from: fv%mem, fv%mem(1:10)) + +if (fv%mem(1).ne.3) stop 3 + +!$omp target enter data map(to: fv%mem) +!$omp target +fv%mem(1) = fv%mem(1) + 1 +!$omp end target +!$omp target exit data map(from: fv%mem) + +if (fv%mem(1).ne.4) stop 4 + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-subarray.f90 b/libgomp/testsuite/libgomp.fortran/map-subarray.f90 new file mode 100644 index 000000000000..85f5af3a2a6c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subarray.f90 @@ -0,0 +1,33 @@ +! { dg-do run } + +program myprog +type u + integer, dimension (:), pointer :: tarr +end type u + +type(u) :: myu +integer, dimension (12), target :: myarray + +myu%tarr => myarray + +myu%tarr = 0 + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(:)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(1:2)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(to:myu%tarr) map(tofrom:myu%tarr(1)) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +!$omp target map(tofrom:myu%tarr) +myu%tarr(1) = myu%tarr(1) + 1 +!$omp end target + +if (myu%tarr(1).ne.4) stop 1 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 b/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 new file mode 100644 index 000000000000..c7f90131cbae --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-subcomponents.f90 @@ -0,0 +1,32 @@ +! { dg-do run } + +module mymod +type F +integer :: a, b, c +integer, dimension(10) :: d +end type F + +type G +integer :: x, y +type(F), pointer :: myf +integer :: z +end type G +end module mymod + +program myprog +use mymod + +type(F), target :: ftmp +type(G) :: gvar + +gvar%myf => ftmp + +gvar%myf%d = 0 + +!$omp target map(to:gvar%myf) map(tofrom: gvar%myf%b, gvar%myf%d) +gvar%myf%d(1) = gvar%myf%d(1) + 1 +!$omp end target + +if (gvar%myf%d(1).ne.1) stop 1 + +end program myprog diff --git a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 index 7f3d8174f97b..b1d696656c0f 100644 --- a/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 +++ b/libgomp/testsuite/libgomp.fortran/struct-elem-map-1.f90 @@ -36,6 +36,10 @@ program main call six () call seven () call eight () + call nine () + call ten () + call eleven () + call twelve () contains ! Implicitly mapped – but no pointers are mapped @@ -408,7 +412,180 @@ contains !$omp end target end subroutine eight -end program main + ! This is "subroutine four" but with explicit base-pointer mappings + ! (var%f, etc.). + subroutine nine() + type(t2) :: var -! Fixed by the "Fortran pointers and member mappings" patch -! { dg-xfail-run-if TODO { offload_device_nonshared_as } } + print '(g0)', '==== TESTCASE "nine" ====' + + var = t2(a = 1, & + b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), & + d = [(-3*i, i = 1, 10)], & + str1 = "abcde", & + str2 = ["12345", "67890", "ABCDE", "FGHIJ"], & + uni1 = 4_"abcde", & + uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"]) + allocate (var%f, source=[22, 33, 44, 55]) + allocate (var%str4, source=["Let's", "Go!!!"]) + allocate (var%uni4, source=[4_"Let's", 4_"Go!!!"]) + +! !$omp target map(tofrom: var%d(4:7), var%f(2:3), var%str2(2:3)) & +! !$omp& map(tofrom: var%str4(2:2), var%uni2(2:3), var%uni4(2:2)) + !$omp target map(to: var%f) map(tofrom: var%d(4:7), var%f(2:3), & + !$omp& var%str2(2:3), var%uni2(2:3)) + if (any (var%d(4:7) /= [(-3*i, i = 4, 7)])) stop 4 + if (any (var%str2(2:3) /= ["67890", "ABCDE"])) stop 6 + + if (.not. associated (var%f)) stop 9 + if (size (var%f) /= 4) stop 10 + if (any (var%f(2:3) /= [33, 44])) stop 11 +! if (.not. associated (var%str4)) stop 15 +! if (len (var%str4) /= 5) stop 16 +! if (size (var%str4) /= 2) stop 17 +! if (var%str4(2) /= "Go!!!") stop 18 + + if (any (var%uni2(2:3) /= [4_"67890", 4_"ABCDE"])) stop 19 +! if (.not. associated (var%uni4)) stop 20 +! if (len (var%uni4) /= 5) stop 21 +! if (size (var%uni4) /= 2) stop 22 +! if (var%uni4(2) /= "Go!!!") stop 23 + !$omp end target + + deallocate(var%f, var%str4) + end subroutine nine + + ! This is "subroutine five" but with explicit base-pointer mappings. + subroutine ten() + type(t2) :: var + + print '(g0)', '==== TESTCASE "ten" ====' + + var = t2(a = 1, & + b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), & + d = [(-3*i, i = 1, 10)], & + str1 = "abcde", & + str2 = ["12345", "67890", "ABCDE", "FGHIJ"], & + uni1 = 4_"abcde", & + uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"]) + allocate (var%f, source=[22, 33, 44, 55]) + allocate (var%str4, source=["Let's", "Go!!!"]) + + !$omp target map(tofrom: var%d(4:7)) + if (any (var%d(4:7) /= [(-3*i, i = 4, 7)])) stop 4 + !$omp end target + !$omp target map(tofrom: var%str2(2:3)) + if (any (var%str2(2:3) /= ["67890", "ABCDE"])) stop 6 + !$omp end target + + !$omp target map(to: var%f) map(tofrom: var%f(2:3)) + if (.not. associated (var%f)) stop 9 + if (size (var%f) /= 4) stop 10 + if (any (var%f(2:3) /= [33, 44])) stop 11 + !$omp end target +! !$omp target map(tofrom: var%str4(2:2)) +! if (.not. associated (var%str4)) stop 15 +! if (len (var%str4) /= 5) stop 16 +! if (size (var%str4) /= 2) stop 17 +! if (var%str4(2) /= "Go!!!") stop 18 +! !$omp end target +! !$omp target map(tofrom: var%uni4(2:2)) +! if (.not. associated (var%uni4)) stop 15 +! if (len (var%uni4) /= 5) stop 16 +! if (size (var%uni4) /= 2) stop 17 +! if (var%uni4(2) /= 4_"Go!!!") stop 18 +! !$omp end target + + deallocate(var%f, var%str4) + end subroutine ten + + ! This is "subroutine six" but with explicit base pointer mappings. + subroutine eleven() + type(t2) :: var + + print '(g0)', '==== TESTCASE "eleven" ====' + + var = t2(a = 1, & + b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), & + d = [(-3*i, i = 1, 10)], & + str1 = "abcde", & + str2 = ["12345", "67890", "ABCDE", "FGHIJ"], & + uni1 = 4_"abcde", & + uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"]) + allocate (var%f, source=[22, 33, 44, 55]) + allocate (var%str4, source=["Let's", "Go!!!"]) + allocate (var%uni4, source=[4_"Let's", 4_"Go!!!"]) + +! !$omp target map(tofrom: var%d(5), var%f(3), var%str2(3), & +! !$omp var%str4(2), var%uni2(3), var%uni4(2)) + !$omp target map(to: var%f) map(tofrom: var%d(5), var%f(3), & + !$omp& var%str2(3), var%uni2(3)) + if (var%d(5) /= -3*5) stop 4 + if (var%str2(3) /= "ABCDE") stop 6 + if (var%uni2(3) /= 4_"ABCDE") stop 7 + + if (.not. associated (var%f)) stop 9 + if (size (var%f) /= 4) stop 10 + if (var%f(3) /= 44) stop 11 +! if (.not. associated (var%str4)) stop 15 +! if (len (var%str4) /= 5) stop 16 +! if (size (var%str4) /= 2) stop 17 +! if (var%str4(2) /= "Go!!!") stop 18 +! if (.not. associated (var%uni4)) stop 19 +! if (len (var%uni4) /= 5) stop 20 +! if (size (var%uni4) /= 2) stop 21 +! if (var%uni4(2) /= 4_"Go!!!") stop 22 + !$omp end target + + deallocate(var%f, var%str4, var%uni4) + end subroutine eleven + + ! This is "subroutine seven" but with explicit base-pointer mappings. + subroutine twelve() + type(t2) :: var + + print '(g0)', '==== TESTCASE "twelve" ====' + + var = t2(a = 1, & + b = 2, c = cmplx(-1.0_8, 2.0_8,kind=8), & + d = [(-3*i, i = 1, 10)], & + str1 = "abcde", & + str2 = ["12345", "67890", "ABCDE", "FGHIJ"], & + uni1 = 4_"abcde", & + uni2 = [4_"12345", 4_"67890", 4_"ABCDE", 4_"FGHIJ"]) + allocate (var%f, source=[22, 33, 44, 55]) + allocate (var%str4, source=["Let's", "Go!!!"]) + allocate (var%uni4, source=[4_"Let's", 4_"Go!!!"]) + + !$omp target map(tofrom: var%d(5)) + if (var%d(5) /= (-3*5)) stop 4 + !$omp end target + !$omp target map(tofrom: var%str2(2:3)) + if (any (var%str2(2:3) /= ["67890", "ABCDE"])) stop 6 + !$omp end target + !$omp target map(tofrom: var%uni2(2:3)) + if (any (var%uni2(2:3) /= [4_"67890", 4_"ABCDE"])) stop 7 + !$omp end target + + !$omp target map(to: var%f) map(tofrom: var%f(2:3)) + if (.not. associated (var%f)) stop 9 + if (size (var%f) /= 4) stop 10 + if (any (var%f(2:3) /= [33, 44])) stop 11 + !$omp end target +! !$omp target map(tofrom: var%str4(2:2)) +! if (.not. associated (var%str4)) stop 15 +! if (len (var%str4) /= 5) stop 16 +! if (size (var%str4) /= 2) stop 17 +! if (var%str4(2) /= "Go!!!") stop 18 +! !$omp end target +! !$omp target map(tofrom: var%uni4(2:2)) +! if (.not. associated (var%uni4)) stop 15 +! if (len (var%uni4) /= 5) stop 16 +! if (size (var%uni4) /= 2) stop 17 +! if (var%uni4(2) /= 4_"Go!!!") stop 18 +! !$omp end target + + deallocate(var%f, var%str4, var%uni4) + end subroutine twelve + +end program main From b7e4a4c626eeeb32c291d5bbbaa148c5081b6bfd Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Wed, 22 Nov 2023 13:20:58 -0500 Subject: [PATCH 290/311] c-family: -Waddress-of-packed-member and casts -Waddress-of-packed-member, in addition to the documented warning about actually taking the address of a packed member, also warns about casting from a pointer to a TYPE_PACKED type to a pointer to a type with greater alignment. This wrongly warns if the source is a pointer to enum when -fshort-enums is on, since that is also represented by TYPE_PACKED. And there's already -Wcast-align to catch casting from pointer to less aligned type (packed or otherwise) to pointer to more aligned type; even apart from the enum problem, this seems like a somewhat arbitrary subset of that warning. So, this patch removes the undocumented type-based warning from -Waddress-of-packed-member. Some of the tests where the warning is desirable I changed to use -Wcast-align=strict instead. The ones that require -Wno-incompatible-pointer-types I just removed. gcc/c-family/ChangeLog: * c-warn.cc (check_address_or_pointer_of_packed_member): Remove warning based on TYPE_PACKED. gcc/testsuite/ChangeLog: * c-c++-common/Waddress-of-packed-member-1.c: Don't expect a warning on the cast cases. * c-c++-common/pr51628-35.c: Use -Wcast-align=strict. * g++.dg/warn/Waddress-of-packed-member3.C: Likewise. * gcc.dg/pr88928.c: Likewise. * gcc.dg/pr51628-20.c: Removed. * gcc.dg/pr51628-21.c: Removed. * gcc.dg/pr51628-25.c: Removed. --- gcc/c-family/c-warn.cc | 58 +------------------ .../Waddress-of-packed-member-1.c | 12 ++-- gcc/testsuite/c-c++-common/pr51628-35.c | 6 +- .../g++.dg/warn/Waddress-of-packed-member3.C | 8 +-- gcc/testsuite/gcc.dg/pr51628-20.c | 11 ---- gcc/testsuite/gcc.dg/pr51628-21.c | 11 ---- gcc/testsuite/gcc.dg/pr51628-25.c | 9 --- gcc/testsuite/gcc.dg/pr88928.c | 6 +- 8 files changed, 19 insertions(+), 102 deletions(-) delete mode 100644 gcc/testsuite/gcc.dg/pr51628-20.c delete mode 100644 gcc/testsuite/gcc.dg/pr51628-21.c delete mode 100644 gcc/testsuite/gcc.dg/pr51628-25.c diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc index d2938b91043d..2a399ba6d14d 100644 --- a/gcc/c-family/c-warn.cc +++ b/gcc/c-family/c-warn.cc @@ -2991,10 +2991,9 @@ check_alignment_of_packed_member (tree type, tree field, bool rvalue) return NULL_TREE; } -/* Return struct or union type if the right hand value, RHS: - 1. Is a pointer value which isn't aligned to a pointer type TYPE. - 2. Is an address which takes the unaligned address of packed member - of struct or union when assigning to TYPE. +/* Return struct or union type if the right hand value, RHS + is an address which takes the unaligned address of packed member + of struct or union when assigning to TYPE. Otherwise, return NULL_TREE. */ static tree @@ -3021,57 +3020,6 @@ check_address_or_pointer_of_packed_member (tree type, tree rhs) type = TREE_TYPE (type); - if (TREE_CODE (rhs) == PARM_DECL - || VAR_P (rhs) - || TREE_CODE (rhs) == CALL_EXPR) - { - tree rhstype = TREE_TYPE (rhs); - if (TREE_CODE (rhs) == CALL_EXPR) - { - rhs = CALL_EXPR_FN (rhs); /* Pointer expression. */ - if (rhs == NULL_TREE) - return NULL_TREE; - rhs = TREE_TYPE (rhs); /* Pointer type. */ - /* We could be called while processing a template and RHS could be - a functor. In that case it's a class, not a pointer. */ - if (!rhs || !POINTER_TYPE_P (rhs)) - return NULL_TREE; - rhs = TREE_TYPE (rhs); /* Function type. */ - rhstype = TREE_TYPE (rhs); - if (!rhstype || !POINTER_TYPE_P (rhstype)) - return NULL_TREE; - rvalue = true; - } - if (rvalue && POINTER_TYPE_P (rhstype)) - rhstype = TREE_TYPE (rhstype); - while (TREE_CODE (rhstype) == ARRAY_TYPE) - rhstype = TREE_TYPE (rhstype); - if (TYPE_PACKED (rhstype)) - { - unsigned int type_align = min_align_of_type (type); - unsigned int rhs_align = min_align_of_type (rhstype); - if (rhs_align < type_align) - { - auto_diagnostic_group d; - location_t location = EXPR_LOC_OR_LOC (rhs, input_location); - if (warning_at (location, OPT_Waddress_of_packed_member, - "converting a packed %qT pointer (alignment %d) " - "to a %qT pointer (alignment %d) may result in " - "an unaligned pointer value", - rhstype, rhs_align, type, type_align)) - { - tree decl = TYPE_STUB_DECL (rhstype); - if (decl) - inform (DECL_SOURCE_LOCATION (decl), "defined here"); - decl = TYPE_STUB_DECL (type); - if (decl) - inform (DECL_SOURCE_LOCATION (decl), "defined here"); - } - } - } - return NULL_TREE; - } - tree context = NULL_TREE; /* Check alignment of the object. */ diff --git a/gcc/testsuite/c-c++-common/Waddress-of-packed-member-1.c b/gcc/testsuite/c-c++-common/Waddress-of-packed-member-1.c index 95a376664da4..0f5188df70af 100644 --- a/gcc/testsuite/c-c++-common/Waddress-of-packed-member-1.c +++ b/gcc/testsuite/c-c++-common/Waddress-of-packed-member-1.c @@ -52,12 +52,12 @@ void foo (void) f0 = *&__real__ t0.f; /* { dg-bogus "may result in an unaligned pointer value" } */ f0 = *&__imag__ t0.f; /* { dg-bogus "may result in an unaligned pointer value" } */ i1 = (&t0.c, (int*) 0); /* { dg-bogus "may result in an unaligned pointer value" } */ - t2 = (struct t**) t10; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ - t2 = (struct t**) t100; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ - t2 = (struct t**) t1; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ - t2 = (struct t**) bar(); /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ - t2 = (struct t**) baz(); /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ - t2 = (struct t**) bazz(); /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ + t2 = (struct t**) t10; /* { dg-bogus "may result in an unaligned pointer value" } */ + t2 = (struct t**) t100; /* { dg-bogus "may result in an unaligned pointer value" } */ + t2 = (struct t**) t1; /* { dg-bogus "may result in an unaligned pointer value" } */ + t2 = (struct t**) bar(); /* { dg-bogus "may result in an unaligned pointer value" } */ + t2 = (struct t**) baz(); /* { dg-bogus "may result in an unaligned pointer value" } */ + t2 = (struct t**) bazz(); /* { dg-bogus "may result in an unaligned pointer value" } */ i1 = &t0.b; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ i1 = &t1->b; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ i1 = &t10[0].b; /* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } } */ diff --git a/gcc/testsuite/c-c++-common/pr51628-35.c b/gcc/testsuite/c-c++-common/pr51628-35.c index fa37d99beb79..a88c19ea0df0 100644 --- a/gcc/testsuite/c-c++-common/pr51628-35.c +++ b/gcc/testsuite/c-c++-common/pr51628-35.c @@ -1,6 +1,6 @@ /* PR c/51628. */ /* { dg-do compile } */ -/* { dg-options "-O" } */ +/* { dg-options "-O -Wcast-align=strict" } */ struct B { int i; }; struct C { struct B b; } __attribute__ ((packed)); @@ -12,12 +12,12 @@ long * foo1 (void) { return (long *) p; -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ +/* { dg-warning "increases required alignment" "" { target { ! default_packed } } .-1 } */ } long * foo2 (void) { return (long *) bar (); -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ +/* { dg-warning "increases required alignment" "" { target { ! default_packed } } .-1 } */ } diff --git a/gcc/testsuite/g++.dg/warn/Waddress-of-packed-member3.C b/gcc/testsuite/g++.dg/warn/Waddress-of-packed-member3.C index aeffb969c012..28dd05d366c5 100644 --- a/gcc/testsuite/g++.dg/warn/Waddress-of-packed-member3.C +++ b/gcc/testsuite/g++.dg/warn/Waddress-of-packed-member3.C @@ -1,5 +1,5 @@ // { dg-do compile { target { ! default_packed } } } -// Test that -Waddress-of-packed-member works with member functions. +// { dg-additional-options -Wcast-align=strict } struct S { char c; @@ -16,8 +16,8 @@ S** f () { S **s; - s = reinterpret_cast(foo ()); // { dg-warning "converting a packed" } - s = reinterpret_cast(x.memfn ()); // { dg-warning "converting a packed" } - s = reinterpret_cast(X::smemfn ()); // { dg-warning "converting a packed" } + s = reinterpret_cast(foo ()); // { dg-warning "increases required alignment" } + s = reinterpret_cast(x.memfn ()); // { dg-warning "increases required alignment" } + s = reinterpret_cast(X::smemfn ()); // { dg-warning "increases required alignment" } return s; } diff --git a/gcc/testsuite/gcc.dg/pr51628-20.c b/gcc/testsuite/gcc.dg/pr51628-20.c deleted file mode 100644 index 2249d85098b7..000000000000 --- a/gcc/testsuite/gcc.dg/pr51628-20.c +++ /dev/null @@ -1,11 +0,0 @@ -/* PR c/51628. */ -/* { dg-do compile } */ -/* { dg-options "-O -Wno-incompatible-pointer-types" } */ - -struct B { int i; }; -struct C { struct B b; } __attribute__ ((packed)); - -extern struct C *p; - -long* g8 (void) { return p; } -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/pr51628-21.c b/gcc/testsuite/gcc.dg/pr51628-21.c deleted file mode 100644 index f1adbe640029..000000000000 --- a/gcc/testsuite/gcc.dg/pr51628-21.c +++ /dev/null @@ -1,11 +0,0 @@ -/* PR c/51628. */ -/* { dg-do compile } */ -/* { dg-options "-O -Wno-incompatible-pointer-types" } */ - -struct B { int i; }; -struct C { struct B b; } __attribute__ ((packed)); - -extern struct C p[]; - -long* g8 (void) { return p; } -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/pr51628-25.c b/gcc/testsuite/gcc.dg/pr51628-25.c deleted file mode 100644 index f00d9b1bcacf..000000000000 --- a/gcc/testsuite/gcc.dg/pr51628-25.c +++ /dev/null @@ -1,9 +0,0 @@ -/* PR c/51628. */ -/* { dg-do compile } */ -/* { dg-options "-O -Wno-incompatible-pointer-types" } */ - -struct B { int i; }; -struct C { struct B b; } __attribute__ ((packed)); - -long* g8 (struct C *p) { return p; } -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ diff --git a/gcc/testsuite/gcc.dg/pr88928.c b/gcc/testsuite/gcc.dg/pr88928.c index 0b6c1d70f05a..1d176d6d51db 100644 --- a/gcc/testsuite/gcc.dg/pr88928.c +++ b/gcc/testsuite/gcc.dg/pr88928.c @@ -1,6 +1,6 @@ -/* { dg-do compile } */ -/* { dg-options "-Wno-pedantic -Waddress-of-packed-member" } */ +/* { dg-do compile { target { ! default_packed } } } */ +/* { dg-options "-Wno-pedantic -Waddress-of-packed-member -Wcast-align=strict" } */ struct a { } __attribute__((__packed__)); void c (struct a **); void d (const struct a *b) { c ((struct a **) b); } -/* { dg-warning "may result in an unaligned pointer value" "" { target { ! default_packed } } .-1 } */ +/* { dg-warning "increases required alignment" "" { target *-*-* } .-1 } */ From ff35f1d4daa37e74f7a68e87c1a6c180d9a91f10 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Wed, 22 Nov 2023 13:48:45 -0500 Subject: [PATCH 291/311] c-family: rename warn_for_address_or_pointer_of_packed_member Following the last patch, let's rename the functions to reflect the change in behavior. gcc/c-family/ChangeLog: * c-warn.cc (check_address_or_pointer_of_packed_member): Rename to check_address_of_packed_member. (check_and_warn_address_or_pointer_of_packed_member): Rename to check_and_warn_address_of_packed_member. (warn_for_address_or_pointer_of_packed_member): Rename to warn_for_address_of_packed_member. * c-common.h: Adjust. gcc/c/ChangeLog: * c-typeck.cc (convert_for_assignment): Adjust call to warn_for_address_of_packed_member. gcc/cp/ChangeLog: * call.cc (convert_for_arg_passing) * typeck.cc (convert_for_assignment): Adjust call to warn_for_address_of_packed_member. --- gcc/c-family/c-common.h | 2 +- gcc/c-family/c-warn.cc | 32 ++++++++++++++------------------ gcc/c/c-typeck.cc | 4 ++-- gcc/cp/call.cc | 2 +- gcc/cp/typeck.cc | 2 +- 5 files changed, 19 insertions(+), 23 deletions(-) diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 6e7fc1b3aa35..b8bd56c1a4df 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -1572,7 +1572,7 @@ extern void warnings_for_convert_and_check (location_t, tree, tree, tree); extern void c_do_switch_warnings (splay_tree, location_t, tree, tree, bool); extern void warn_for_omitted_condop (location_t, tree); extern bool warn_for_restrict (unsigned, tree *, unsigned); -extern void warn_for_address_or_pointer_of_packed_member (tree, tree); +extern void warn_for_address_of_packed_member (tree, tree); extern void warn_parm_array_mismatch (location_t, tree, tree); extern void maybe_warn_sizeof_array_div (location_t, tree, tree, tree, tree); extern void do_warn_array_compare (location_t, tree_code, tree, tree); diff --git a/gcc/c-family/c-warn.cc b/gcc/c-family/c-warn.cc index 2a399ba6d14d..abe66dd3030e 100644 --- a/gcc/c-family/c-warn.cc +++ b/gcc/c-family/c-warn.cc @@ -2991,13 +2991,13 @@ check_alignment_of_packed_member (tree type, tree field, bool rvalue) return NULL_TREE; } -/* Return struct or union type if the right hand value, RHS +/* Return struct or union type if the right hand value, RHS, is an address which takes the unaligned address of packed member of struct or union when assigning to TYPE. Otherwise, return NULL_TREE. */ static tree -check_address_or_pointer_of_packed_member (tree type, tree rhs) +check_address_of_packed_member (tree type, tree rhs) { bool rvalue = true; bool indirect = false; @@ -3042,14 +3042,12 @@ check_address_or_pointer_of_packed_member (tree type, tree rhs) return context; } -/* Check and warn if the right hand value, RHS: - 1. Is a pointer value which isn't aligned to a pointer type TYPE. - 2. Is an address which takes the unaligned address of packed member - of struct or union when assigning to TYPE. - */ +/* Check and warn if the right hand value, RHS, + is an address which takes the unaligned address of packed member + of struct or union when assigning to TYPE. */ static void -check_and_warn_address_or_pointer_of_packed_member (tree type, tree rhs) +check_and_warn_address_of_packed_member (tree type, tree rhs) { bool nop_p = false; tree orig_rhs; @@ -3067,11 +3065,11 @@ check_and_warn_address_or_pointer_of_packed_member (tree type, tree rhs) if (TREE_CODE (rhs) == COND_EXPR) { /* Check the THEN path. */ - check_and_warn_address_or_pointer_of_packed_member + check_and_warn_address_of_packed_member (type, TREE_OPERAND (rhs, 1)); /* Check the ELSE path. */ - check_and_warn_address_or_pointer_of_packed_member + check_and_warn_address_of_packed_member (type, TREE_OPERAND (rhs, 2)); } else @@ -3095,7 +3093,7 @@ check_and_warn_address_or_pointer_of_packed_member (tree type, tree rhs) } tree context - = check_address_or_pointer_of_packed_member (type, rhs); + = check_address_of_packed_member (type, rhs); if (context) { location_t loc = EXPR_LOC_OR_LOC (rhs, input_location); @@ -3107,14 +3105,12 @@ check_and_warn_address_or_pointer_of_packed_member (tree type, tree rhs) } } -/* Warn if the right hand value, RHS: - 1. Is a pointer value which isn't aligned to a pointer type TYPE. - 2. Is an address which takes the unaligned address of packed member - of struct or union when assigning to TYPE. -*/ +/* Warn if the right hand value, RHS, + is an address which takes the unaligned address of packed member + of struct or union when assigning to TYPE. */ void -warn_for_address_or_pointer_of_packed_member (tree type, tree rhs) +warn_for_address_of_packed_member (tree type, tree rhs) { if (!warn_address_of_packed_member) return; @@ -3123,7 +3119,7 @@ warn_for_address_or_pointer_of_packed_member (tree type, tree rhs) if (!POINTER_TYPE_P (type)) return; - check_and_warn_address_or_pointer_of_packed_member (type, rhs); + check_and_warn_address_of_packed_member (type, rhs); } /* Return EXPR + 1. Convenience helper used below. */ diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 18860c2373fb..022e3c6386ba 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -7000,7 +7000,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (rhstype)) { - warn_for_address_or_pointer_of_packed_member (type, orig_rhs); + warn_for_address_of_packed_member (type, orig_rhs); return rhs; } @@ -7658,7 +7658,7 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, /* If RHS isn't an address, check pointer or array of packed struct or union. */ - warn_for_address_or_pointer_of_packed_member (type, orig_rhs); + warn_for_address_of_packed_member (type, orig_rhs); return convert (type, rhs); } diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index aaee34f35b04..13ca9511cc87 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9304,7 +9304,7 @@ convert_for_arg_passing (tree type, tree val, tsubst_flags_t complain) } if (complain & tf_warning) - warn_for_address_or_pointer_of_packed_member (type, val); + warn_for_address_of_packed_member (type, val); return val; } diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 258cfd43114d..a6e2f4ee7da8 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -10384,7 +10384,7 @@ convert_for_assignment (tree type, tree rhs, maybe_warn_unparenthesized_assignment (rhs, complain); if (complain & tf_warning) - warn_for_address_or_pointer_of_packed_member (type, rhs); + warn_for_address_of_packed_member (type, rhs); return perform_implicit_conversion_flags (strip_top_quals (type), rhs, complain, flags); From c822ad86984e752734b9c371f9cfef9330334ec4 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 13 Dec 2023 15:55:01 -0500 Subject: [PATCH 292/311] c++: unifying FUNCTION_DECLs [PR93740] unify currently always returns success when unifying two FUNCTION_DECLs (due to the is_overloaded_fn deferment within the default case), which means for the below testcase we incorrectly unify &A::foo and &A::bar leading to deduction failure for the index_of calls due to a bogus base class ambiguity. This patch makes unify handle FUNCTION_DECL naturally like other decls. PR c++/93740 gcc/cp/ChangeLog: * pt.cc (unify) : Handle it like FIELD_DECL and TEMPLATE_DECL. gcc/testsuite/ChangeLog: * g++.dg/template/ptrmem34.C: New test. --- gcc/cp/pt.cc | 1 + gcc/testsuite/g++.dg/template/ptrmem34.C | 27 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 gcc/testsuite/g++.dg/template/ptrmem34.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 0dd0a9c644c1..9a21467bf219 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -24964,6 +24964,7 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, gcc_unreachable (); case FIELD_DECL: + case FUNCTION_DECL: case TEMPLATE_DECL: /* Matched cases are handled by the ARG == PARM test above. */ return unify_template_argument_mismatch (explain_p, parm, arg); diff --git a/gcc/testsuite/g++.dg/template/ptrmem34.C b/gcc/testsuite/g++.dg/template/ptrmem34.C new file mode 100644 index 000000000000..75c911ee243c --- /dev/null +++ b/gcc/testsuite/g++.dg/template/ptrmem34.C @@ -0,0 +1,27 @@ +// PR c++/93740 +// { dg-do compile { target c++11 } } + +struct A { + void foo(); + void bar(); +}; + +template +struct const_val { }; + +template +struct indexed_elem { }; + +using mem_fun_A_foo = const_val; +using mem_fun_A_bar = const_val; + +struct A_indexed_member_funcs + : indexed_elem<0, mem_fun_A_foo>, + indexed_elem<1, mem_fun_A_bar> +{ }; + +template +constexpr int index_of(indexed_elem) { return N; } + +static_assert(index_of(A_indexed_member_funcs{}) == 0, ""); +static_assert(index_of(A_indexed_member_funcs{}) == 1, ""); From 35ba3add7d0a9fc6ce955ba8ad82b0413e86ad7d Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 13 Dec 2023 15:55:14 -0500 Subject: [PATCH 293/311] c++: unifying constants vs their type [PR99186, PR104867] When unifying constants we need to treat constants of different types but same value as different in light of auto template parameters since otherwise e.g. A<1> will unify with A<1u> (where A's template-head is template). This patch fixes this in a minimal way; it seems we could get away with just using template_args_equal here, as we do in the default case, or even just cp_tree_equal since the CONVERT_EXPR_P loop seems to be dead code, but that's a simplification we could consider during next stage 1. PR c++/99186 PR c++/104867 gcc/cp/ChangeLog: * pt.cc (unify) : Compare types as well. gcc/testsuite/ChangeLog: * g++.dg/cpp1z/nontype-auto23.C: New test. * g++.dg/cpp1z/nontype-auto24.C: New test. --- gcc/cp/pt.cc | 2 ++ gcc/testsuite/g++.dg/cpp1z/nontype-auto23.C | 22 +++++++++++++++++++++ gcc/testsuite/g++.dg/cpp1z/nontype-auto24.C | 18 +++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp1z/nontype-auto23.C create mode 100644 gcc/testsuite/g++.dg/cpp1z/nontype-auto24.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 9a21467bf219..b6a450c4ad40 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -24706,6 +24706,8 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, /* Type INTEGER_CST can come from ordinary constant template args. */ case INTEGER_CST: case REAL_CST: + if (!same_type_p (TREE_TYPE (parm), TREE_TYPE (arg))) + return unify_template_argument_mismatch (explain_p, parm, arg); while (CONVERT_EXPR_P (arg)) arg = TREE_OPERAND (arg, 0); diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype-auto23.C b/gcc/testsuite/g++.dg/cpp1z/nontype-auto23.C new file mode 100644 index 000000000000..62a571ef84ab --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/nontype-auto23.C @@ -0,0 +1,22 @@ +// PR c++/99186 +// { dg-do compile { target c++17 } } + +template +struct tuple_impl : tuple_impl { }; + +template +struct tuple_impl { }; + +template +struct tuple : tuple_impl<0, T, U> { }; + +template +void get(const tuple_impl&); + +template struct S; + +int main() { + tuple,S<1U>> x; + get>(x); + get>(x); +} diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype-auto24.C b/gcc/testsuite/g++.dg/cpp1z/nontype-auto24.C new file mode 100644 index 000000000000..52e4c134ccdb --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/nontype-auto24.C @@ -0,0 +1,18 @@ +// PR c++/104867 +// { dg-do compile { target c++17 } } + +enum class Foo { A1 }; + +enum class Bar { B1 }; + +template struct enum_; + +template struct list { }; + +template void f(list, V>); + +struct enum_type_map : list, int>, list, double> { }; + +int main() { + f(enum_type_map()); +} From ead2b94d602ce758575aa46ec35a51c3157ff9cd Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 13 Dec 2023 16:45:50 -0500 Subject: [PATCH 294/311] c++: sort candidates according to viability This patch: * changes splice_viable to move the non-viable candidates to the end of the list instead of removing them outright * makes tourney move the best candidate to the front of the candidate list * adjusts print_z_candidates to preserve our behavior of printing only viable candidates when diagnosing ambiguity * adds a parameter to print_z_candidates to control this default behavior (the follow-up patch will want to print all candidates when diagnosing deletedness) Thus after this patch we have access to the entire candidate list through the best viable candidate. This change also happens to fix diagnostics for the below testcase where we currently neglect to note the third candidate, since the presence of the two unordered non-strictly viable candidates causes splice_viable to prematurely get rid of the non-viable third candidate. gcc/cp/ChangeLog: * call.cc: Include "tristate.h". (splice_viable): Sort the candidate list according to viability. Don't remove non-viable candidates from the list. (print_z_candidates): Add defaulted only_viable_p parameter. By default only print non-viable candidates if there is no viable candidate. (tourney): Ignore non-viable candidates. Move the true champ to the front of the candidates list, and update 'candidates' to point to the front. Rename champ_compared_to_predecessor to previous_worse_champ. gcc/testsuite/ChangeLog: * g++.dg/overload/error5.C: New test. --- gcc/cp/call.cc | 177 ++++++++++++++----------- gcc/testsuite/g++.dg/overload/error5.C | 12 ++ 2 files changed, 115 insertions(+), 74 deletions(-) create mode 100644 gcc/testsuite/g++.dg/overload/error5.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 13ca9511cc87..1b47d13a72ef 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "attribs.h" #include "decl.h" #include "gcc-rich-location.h" +#include "tristate.h" /* The various kinds of conversion. */ @@ -176,7 +177,8 @@ static void op_error (const op_location_t &, enum tree_code, enum tree_code, static struct z_candidate *build_user_type_conversion_1 (tree, tree, int, tsubst_flags_t); static void print_z_candidate (location_t, const char *, struct z_candidate *); -static void print_z_candidates (location_t, struct z_candidate *); +static void print_z_candidates (location_t, struct z_candidate *, + tristate = tristate::unknown ()); static tree build_this (tree); static struct z_candidate *splice_viable (struct z_candidate *, bool, bool *); static bool any_strictly_viable (struct z_candidate *); @@ -3700,68 +3702,60 @@ add_template_conv_candidate (struct z_candidate **candidates, tree tmpl, } /* The CANDS are the set of candidates that were considered for - overload resolution. Return the set of viable candidates, or CANDS - if none are viable. If any of the candidates were viable, set + overload resolution. Sort CANDS so that the strictly viable + candidates appear first, followed by non-strictly viable candidates, + followed by non-viable candidates. Returns the first candidate + in this sorted list. If any of the candidates were viable, set *ANY_VIABLE_P to true. STRICT_P is true if a candidate should be - considered viable only if it is strictly viable. */ + considered viable only if it is strictly viable when setting + *ANY_VIABLE_P. */ static struct z_candidate* splice_viable (struct z_candidate *cands, bool strict_p, bool *any_viable_p) { - struct z_candidate *viable; - struct z_candidate **last_viable; - struct z_candidate **cand; - bool found_strictly_viable = false; + z_candidate *strictly_viable = nullptr; + z_candidate **strictly_viable_tail = &strictly_viable; + + z_candidate *non_strictly_viable = nullptr; + z_candidate **non_strictly_viable_tail = &non_strictly_viable; + + z_candidate *non_viable = nullptr; + z_candidate **non_viable_tail = &non_viable; /* Be strict inside templates, since build_over_call won't actually do the conversions to get pedwarns. */ if (processing_template_decl) strict_p = true; - viable = NULL; - last_viable = &viable; - *any_viable_p = false; - - cand = &cands; - while (*cand) + for (z_candidate *cand = cands; cand; cand = cand->next) { - struct z_candidate *c = *cand; if (!strict_p - && (c->viable == 1 || TREE_CODE (c->fn) == TEMPLATE_DECL)) - { - /* Be strict in the presence of a viable candidate. Also if - there are template candidates, so that we get deduction errors - for them instead of silently preferring a bad conversion. */ - strict_p = true; - if (viable && !found_strictly_viable) - { - /* Put any spliced near matches back onto the main list so - that we see them if there is no strict match. */ - *any_viable_p = false; - *last_viable = cands; - cands = viable; - viable = NULL; - last_viable = &viable; - } - } + && (cand->viable == 1 || TREE_CODE (cand->fn) == TEMPLATE_DECL)) + /* Be strict in the presence of a viable candidate. Also if + there are template candidates, so that we get deduction errors + for them instead of silently preferring a bad conversion. */ + strict_p = true; - if (strict_p ? c->viable == 1 : c->viable) - { - *last_viable = c; - *cand = c->next; - c->next = NULL; - last_viable = &c->next; - *any_viable_p = true; - if (c->viable == 1) - found_strictly_viable = true; - } - else - cand = &c->next; + /* Move this candidate to the appropriate list according to + its viability. */ + auto& tail = (cand->viable == 1 ? strictly_viable_tail + : cand->viable == -1 ? non_strictly_viable_tail + : non_viable_tail); + *tail = cand; + tail = &cand->next; } - return viable ? viable : cands; + *any_viable_p = (strictly_viable != nullptr + || (!strict_p && non_strictly_viable != nullptr)); + + /* Combine the lists. */ + *non_viable_tail = nullptr; + *non_strictly_viable_tail = non_viable; + *strictly_viable_tail = non_strictly_viable; + + return strictly_viable; } static bool @@ -3995,8 +3989,13 @@ print_z_candidate (location_t loc, const char *msgstr, } } +/* Print information about each overload candidate in CANDIDATES, + which is assumed to have gone through splice_viable and tourney + (if splice_viable succeeded). */ + static void -print_z_candidates (location_t loc, struct z_candidate *candidates) +print_z_candidates (location_t loc, struct z_candidate *candidates, + tristate only_viable_p /* = tristate::unknown () */) { struct z_candidate *cand1; struct z_candidate **cand2; @@ -4041,8 +4040,19 @@ print_z_candidates (location_t loc, struct z_candidate *candidates) } } + /* Unless otherwise specified, if there's a (strictly) viable candidate + then we assume we're being called as part of diagnosing ambiguity, in + which case we want to print only viable candidates since non-viable + candidates couldn't have contributed to the ambiguity. */ + if (only_viable_p.is_unknown ()) + only_viable_p = candidates->viable == 1; + for (; candidates; candidates = candidates->next) - print_z_candidate (loc, N_("candidate:"), candidates); + { + if (only_viable_p.is_true () && candidates->viable != 1) + break; + print_z_candidate (loc, N_("candidate:"), candidates); + } } /* USER_SEQ is a user-defined conversion sequence, beginning with a @@ -13204,57 +13214,76 @@ tweak: /* Given a list of candidates for overloading, find the best one, if any. This algorithm has a worst case of O(2n) (winner is last), and a best case of O(n/2) (totally ambiguous); much better than a sorting - algorithm. */ + algorithm. The candidates list is assumed to be sorted according + to viability (via splice_viable). */ static struct z_candidate * tourney (struct z_candidate *candidates, tsubst_flags_t complain) { - struct z_candidate *champ = candidates, *challenger; + struct z_candidate **champ = &candidates, **challenger; int fate; - struct z_candidate *champ_compared_to_predecessor = nullptr; + struct z_candidate *previous_worse_champ = nullptr; /* Walk through the list once, comparing each current champ to the next candidate, knocking out a candidate or two with each comparison. */ - for (challenger = champ->next; challenger; ) + for (challenger = &candidates->next; *challenger && (*challenger)->viable; ) { - fate = joust (champ, challenger, 0, complain); + fate = joust (*champ, *challenger, 0, complain); if (fate == 1) - challenger = challenger->next; + challenger = &(*challenger)->next; + else if (fate == -1) + { + previous_worse_champ = *champ; + champ = challenger; + challenger = &(*challenger)->next; + } else { - if (fate == 0) + previous_worse_champ = nullptr; + champ = &(*challenger)->next; + if (!*champ || !(*champ)->viable) { - champ = challenger->next; - if (champ == 0) - return NULL; - champ_compared_to_predecessor = nullptr; + champ = nullptr; + break; } - else - { - champ_compared_to_predecessor = champ; - champ = challenger; - } - - challenger = champ->next; + challenger = &(*champ)->next; } } /* Make sure the champ is better than all the candidates it hasn't yet been compared to. */ - for (challenger = candidates; - challenger != champ; - challenger = challenger->next) + if (champ) + for (challenger = &candidates; + challenger != champ; + challenger = &(*challenger)->next) + { + if (*challenger == previous_worse_champ) + /* We already know this candidate is worse than the champ. */ + continue; + fate = joust (*champ, *challenger, 0, complain); + if (fate != 1) + { + champ = nullptr; + break; + } + } + + if (!champ) + return nullptr; + + /* Move the champ to the front of the candidate list. */ + + if (champ != &candidates) { - if (challenger == champ_compared_to_predecessor) - continue; - fate = joust (champ, challenger, 0, complain); - if (fate != 1) - return NULL; + z_candidate *saved_champ = *champ; + *champ = saved_champ->next; + saved_champ->next = candidates; + candidates = saved_champ; } - return champ; + return candidates; } /* Returns nonzero if things of type FROM can be converted to TO. */ diff --git a/gcc/testsuite/g++.dg/overload/error5.C b/gcc/testsuite/g++.dg/overload/error5.C new file mode 100644 index 000000000000..6a2f3b5ba35a --- /dev/null +++ b/gcc/testsuite/g++.dg/overload/error5.C @@ -0,0 +1,12 @@ +// Verify we note all three candidates when diagnosing overload +// resolution failure. The presence of the first two (ambiguous) +// non-strictly viable candidates used to make us prune the third +// and not note it. + +void f(int, int*); // { dg-message "candidate" } +void f(int*, int); // { dg-message "candidate" } +void f(int, int, int); // { dg-message "candidate" } + +int main() { + f(1, 2); // { dg-error "no match|invalid conversion" } +} From b24c09bfb626271cda345f5a6f0d3a6b6480593d Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 13 Dec 2023 16:46:01 -0500 Subject: [PATCH 295/311] c++: remember candidates that we ignored During overload resolution, we sometimes outright ignore a function in the overload set and leave no trace of it in the candidates list, for example when we find a perfect non-template candidate we discard all function templates, or when the callee is a template-id we discard all non-template functions. We should still however make note of these non-viable functions when diagnosing overload resolution failure, but that's not possible if they're not present in the returned candidates list. To that end, this patch reworks add_candidates to add such ignored functions to the list. The new rr_ignored rejection reason is somewhat of a catch-all; we could perhaps split it up into more specific rejection reasons, but I leave that as future work. gcc/cp/ChangeLog: * call.cc (enum rejection_reason_code): Add rr_ignored. (add_ignored_candidate): Define. (ignored_candidate_p): Define. (add_template_candidate_real): Do add_ignored_candidate instead of returning NULL. (splice_viable): Put ignored (non-viable) candidates last. (print_z_candidate): Handle ignored candidates. (build_new_function_call): Refine shortcut that calls cp_build_function_call_vec now that non-templates can appear in the candidate list for a template-id call. (add_candidates): Replace 'bad_fns' overload with 'bad_cands' candidate list. When not considering a candidate, add it to the list as an ignored candidate. Add all 'bad_cands' to the overload set as well. gcc/testsuite/ChangeLog: * g++.dg/diagnostic/param-type-mismatch-2.C: Rename template function test_7 that (maybe accidentally) shares the same name as its non-template callee. * g++.dg/overload/error6.C: New test. --- gcc/cp/call.cc | 146 ++++++++++++++---- .../g++.dg/diagnostic/param-type-mismatch-2.C | 20 +-- gcc/testsuite/g++.dg/overload/error6.C | 9 ++ 3 files changed, 131 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/g++.dg/overload/error6.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 1b47d13a72ef..aa4111dda5c7 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -441,7 +441,8 @@ enum rejection_reason_code { rr_template_unification, rr_invalid_copy, rr_inherited_ctor, - rr_constraint_failure + rr_constraint_failure, + rr_ignored, }; struct conversion_info { @@ -2224,6 +2225,35 @@ add_candidate (struct z_candidate **candidates, return cand; } +/* FN is a function from the overload set that we outright didn't even + consider (for some reason); add it to the list as an non-viable "ignored" + candidate. */ + +static z_candidate * +add_ignored_candidate (z_candidate **candidates, tree fn) +{ + /* No need to dynamically allocate these. */ + static const rejection_reason reason_ignored = { rr_ignored, {} }; + + struct z_candidate *cand = (struct z_candidate *) + conversion_obstack_alloc (sizeof (struct z_candidate)); + + cand->fn = fn; + cand->reason = const_cast (&reason_ignored); + cand->next = *candidates; + *candidates = cand; + + return cand; +} + +/* True iff CAND is a candidate added by add_ignored_candidate. */ + +static bool +ignored_candidate_p (const z_candidate *cand) +{ + return cand->reason && cand->reason->code == rr_ignored; +} + /* Return the number of remaining arguments in the parameter list beginning with ARG. */ @@ -3471,7 +3501,7 @@ add_template_candidate_real (struct z_candidate **candidates, tree tmpl, } if (len < skip_without_in_chrg) - return NULL; + return add_ignored_candidate (candidates, tmpl); if (DECL_CONSTRUCTOR_P (tmpl) && nargs == 2 && same_type_ignoring_top_level_qualifiers_p (TREE_TYPE (first_arg), @@ -3609,7 +3639,7 @@ add_template_candidate_real (struct z_candidate **candidates, tree tmpl, if (((flags & (LOOKUP_ONLYCONVERTING|LOOKUP_LIST_INIT_CTOR)) == LOOKUP_ONLYCONVERTING) && DECL_NONCONVERTING_P (fn)) - return NULL; + return add_ignored_candidate (candidates, fn); if (DECL_CONSTRUCTOR_P (fn) && nargs == 2) { @@ -3724,6 +3754,9 @@ splice_viable (struct z_candidate *cands, z_candidate *non_viable = nullptr; z_candidate **non_viable_tail = &non_viable; + z_candidate *non_viable_ignored = nullptr; + z_candidate **non_viable_ignored_tail = &non_viable_ignored; + /* Be strict inside templates, since build_over_call won't actually do the conversions to get pedwarns. */ if (processing_template_decl) @@ -3742,6 +3775,7 @@ splice_viable (struct z_candidate *cands, its viability. */ auto& tail = (cand->viable == 1 ? strictly_viable_tail : cand->viable == -1 ? non_strictly_viable_tail + : ignored_candidate_p (cand) ? non_viable_ignored_tail : non_viable_tail); *tail = cand; tail = &cand->next; @@ -3751,7 +3785,8 @@ splice_viable (struct z_candidate *cands, || (!strict_p && non_strictly_viable != nullptr)); /* Combine the lists. */ - *non_viable_tail = nullptr; + *non_viable_ignored_tail = nullptr; + *non_viable_tail = non_viable_ignored; *non_strictly_viable_tail = non_viable; *strictly_viable_tail = non_strictly_viable; @@ -3901,6 +3936,8 @@ print_z_candidate (location_t loc, const char *msgstr, inform (cloc, "%s%qT (conversion)", msg, fn); else if (candidate->viable == -1) inform (cloc, "%s%#qD (near match)", msg, fn); + else if (ignored_candidate_p (candidate)) + inform (cloc, "%s%#qD (ignored)", msg, fn); else if (DECL_DELETED_FN (fn)) inform (cloc, "%s%#qD (deleted)", msg, fn); else if (candidate->reversed ()) @@ -3980,6 +4017,8 @@ print_z_candidate (location_t loc, const char *msgstr, "initialization from an expression of the same or derived " "type"); break; + case rr_ignored: + break; case rr_none: default: /* This candidate didn't have any issues or we failed to @@ -5023,7 +5062,12 @@ build_new_function_call (tree fn, vec **args, // If there is a single (non-viable) function candidate, // let the error be diagnosed by cp_build_function_call_vec. if (!any_viable_p && candidates && ! candidates->next - && (TREE_CODE (candidates->fn) == FUNCTION_DECL)) + && TREE_CODE (candidates->fn) == FUNCTION_DECL + /* A template-id callee consisting of a single (ignored) + non-template candidate needs to be diagnosed the + ordinary way. */ + && (TREE_CODE (fn) != TEMPLATE_ID_EXPR + || candidates->template_decl)) return cp_build_function_call_vec (candidates->fn, args, complain); // Otherwise, emit notes for non-viable candidates. @@ -6526,6 +6570,10 @@ add_candidates (tree fns, tree first_arg, const vec *args, else /*if (flags & LOOKUP_DEFAULTED)*/ which = non_templates; + /* Template candidates that we'll potentially ignore if the + perfect candidate optimization succeeds. */ + z_candidate *ignored_template_cands = nullptr; + /* During overload resolution, we first consider each function under the assumption that we'll eventually find a strictly viable candidate. This allows us to circumvent our defacto behavior when checking @@ -6536,20 +6584,29 @@ add_candidates (tree fns, tree first_arg, const vec *args, This trick is important for pruning member function overloads according to their const/ref-qualifiers (since all 'this' conversions are at worst bad) without breaking -fpermissive. */ - tree bad_fns = NULL_TREE; + z_candidate *bad_cands = nullptr; bool shortcut_bad_convs = true; again: for (tree fn : lkp_range (fns)) { - if (check_converting && DECL_NONCONVERTING_P (fn)) - continue; - if (check_list_ctor && !is_list_ctor (fn)) - continue; if (which == templates && TREE_CODE (fn) != TEMPLATE_DECL) - continue; + { + if (template_only) + add_ignored_candidate (candidates, fn); + continue; + } if (which == non_templates && TREE_CODE (fn) == TEMPLATE_DECL) - continue; + { + add_ignored_candidate (&ignored_template_cands, fn); + continue; + } + if ((check_converting && DECL_NONCONVERTING_P (fn)) + || (check_list_ctor && !is_list_ctor (fn))) + { + add_ignored_candidate (candidates, fn); + continue; + } tree fn_first_arg = NULL_TREE; const vec *fn_args = args; @@ -6606,22 +6663,19 @@ add_candidates (tree fns, tree first_arg, const vec *args, } if (TREE_CODE (fn) == TEMPLATE_DECL) - { - if (!add_template_candidate (candidates, - fn, - ctype, - explicit_targs, - fn_first_arg, - fn_args, - return_type, - access_path, - conversion_path, - flags, - strict, - shortcut_bad_convs, - complain)) - continue; - } + add_template_candidate (candidates, + fn, + ctype, + explicit_targs, + fn_first_arg, + fn_args, + return_type, + access_path, + conversion_path, + flags, + strict, + shortcut_bad_convs, + complain); else { add_function_candidate (candidates, @@ -6649,13 +6703,14 @@ add_candidates (tree fns, tree first_arg, const vec *args, { /* This candidate has been tentatively marked non-strictly viable, and we didn't compute all argument conversions for it (having - stopped at the first bad conversion). Add the function to BAD_FNS + stopped at the first bad conversion). Move it to BAD_CANDS to to fully reconsider later if we don't find any strictly viable candidates. */ if (complain & (tf_error | tf_conv)) { - bad_fns = lookup_add (fn, bad_fns); - *candidates = (*candidates)->next; + *candidates = cand->next; + cand->next = bad_cands; + bad_cands = cand; } else /* But if we're in a SFINAE context, just mark this candidate as @@ -6669,21 +6724,44 @@ add_candidates (tree fns, tree first_arg, const vec *args, if (which == non_templates && !seen_perfect) { which = templates; + ignored_template_cands = nullptr; goto again; } else if (which == templates && !seen_strictly_viable && shortcut_bad_convs - && bad_fns) + && bad_cands) { /* None of the candidates are strictly viable, so consider again those - functions in BAD_FNS, this time without shortcutting bad conversions + functions in BAD_CANDS, this time without shortcutting bad conversions so that all their argument conversions are computed. */ which = either; - fns = bad_fns; + fns = NULL_TREE; + for (z_candidate *cand = bad_cands; cand; cand = cand->next) + { + tree fn = cand->fn; + if (tree ti = cand->template_decl) + fn = TI_TEMPLATE (ti); + fns = ovl_make (fn, fns); + } shortcut_bad_convs = false; + bad_cands = nullptr; goto again; } + + if (complain & tf_error) + { + /* Remember any omitted candidates; we may want to print all candidates + as part of overload resolution failure diagnostics. */ + for (z_candidate *omitted_cands : { ignored_template_cands, bad_cands }) + { + z_candidate **omitted_cands_tail = &omitted_cands; + while (*omitted_cands_tail) + omitted_cands_tail = &(*omitted_cands_tail)->next; + *omitted_cands_tail = *candidates; + *candidates = omitted_cands; + } + } } /* Returns 1 if P0145R2 says that the LHS of operator CODE is evaluated first, diff --git a/gcc/testsuite/g++.dg/diagnostic/param-type-mismatch-2.C b/gcc/testsuite/g++.dg/diagnostic/param-type-mismatch-2.C index de7570a6efac..50c25cd49b73 100644 --- a/gcc/testsuite/g++.dg/diagnostic/param-type-mismatch-2.C +++ b/gcc/testsuite/g++.dg/diagnostic/param-type-mismatch-2.C @@ -129,22 +129,22 @@ int test_6 (int first, const char *second, float third, s6 *ptr) /* Template function. */ template -int test_7 (int one, T two, float three); // { dg-line test_7_decl } +int callee_7 (int one, T two, float three); // { dg-line callee_7_decl } int test_7 (int first, const char *second, float third) { - return test_7 (first, second, third); // { dg-line test_7_usage } - // { dg-message "cannot convert 'const char\\*' to 'const char\\*\\*'" "" { target *-*-* } test_7_usage } + return callee_7 (first, second, third); // { dg-line callee_7_usage } + // { dg-message "cannot convert 'const char\\*' to 'const char\\*\\*'" "" { target *-*-* } callee_7_usage } /* { dg-begin-multiline-output "" } - return test_7 (first, second, third); - ^~~~~~ - | - const char* + return callee_7 (first, second, third); + ^~~~~~ + | + const char* { dg-end-multiline-output "" } */ - // { dg-message "initializing argument 2 of 'int test_7\\(int, T, float\\) .with T = const char\\*\\*.'" "" { target *-*-* } test_7_decl } + // { dg-message "initializing argument 2 of 'int callee_7\\(int, T, float\\) .with T = const char\\*\\*.'" "" { target *-*-* } callee_7_decl } /* { dg-begin-multiline-output "" } - int test_7 (int one, T two, float three); - ~~^~~ + int callee_7 (int one, T two, float three); + ~~^~~ { dg-end-multiline-output "" } */ } diff --git a/gcc/testsuite/g++.dg/overload/error6.C b/gcc/testsuite/g++.dg/overload/error6.C new file mode 100644 index 000000000000..86a12eaa8de0 --- /dev/null +++ b/gcc/testsuite/g++.dg/overload/error6.C @@ -0,0 +1,9 @@ +// Verify we note even non-template candidates when diagnosing +// overload resolution failure for a template-id. + +template void f(T); // { dg-message "candidate" } +void f(int); // { dg-message {candidate: 'void f\(int\)' \(ignored\)} } + +int main() { + f(0, 0); // { dg-error "no match" } +} From d6840b3143f617065a070857cb22d826d24c622b Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 13 Dec 2023 16:46:04 -0500 Subject: [PATCH 296/311] c++: note other candidates when diagnosing deletedness With the previous two patches in place, we can now extend our deletedness diagnostic to note the other considered candidates, e.g.: deleted.C: In function 'int main()': deleted.C:10:4: error: use of deleted function 'void f(int)' 10 | f(0); | ~^~~ deleted.C:5:6: note: declared here 5 | void f(int) = delete; | ^ deleted.C:5:6: note: candidate: 'void f(int)' (deleted) deleted.C:6:6: note: candidate: 'void f(...)' 6 | void f(...); | ^ deleted.C:7:6: note: candidate: 'void f(int, int)' 7 | void f(int, int); | ^ deleted.C:7:6: note: candidate expects 2 arguments, 1 provided These notes are controlled by a new command line flag -fdiagnostics-all-candidates which also controls whether we note ignored candidates more generally. gcc/ChangeLog: * doc/invoke.texi (C++ Dialect Options): Document -fdiagnostics-all-candidates. gcc/c-family/ChangeLog: * c.opt: Add -fdiagnostics-all-candidates. gcc/cp/ChangeLog: * call.cc (print_z_candidates): Only print ignored candidates when -fdiagnostics-all-candidates is set, otherwise suggest the flag. (build_over_call): When diagnosing deletedness, note other candidates only if -fdiagnostics-all-candidates is set, otherwise suggest the flag. gcc/testsuite/ChangeLog: * g++.dg/overload/error6.C: Pass -fdiagnostics-all-candidates. * g++.dg/cpp0x/deleted16.C: New test. * g++.dg/cpp0x/deleted16a.C: New test. * g++.dg/overload/error6a.C: New test. --- gcc/c-family/c.opt | 4 ++++ gcc/cp/call.cc | 19 ++++++++++++++++++- gcc/doc/invoke.texi | 5 +++++ gcc/testsuite/g++.dg/cpp0x/deleted16.C | 25 +++++++++++++++++++++++++ gcc/testsuite/g++.dg/cpp0x/deleted16a.C | 12 ++++++++++++ gcc/testsuite/g++.dg/overload/error6.C | 1 + gcc/testsuite/g++.dg/overload/error6a.C | 6 ++++++ 7 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/deleted16.C create mode 100644 gcc/testsuite/g++.dg/cpp0x/deleted16a.C create mode 100644 gcc/testsuite/g++.dg/overload/error6a.C diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 3706505f8bf8..03b64d536fa3 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1805,6 +1805,10 @@ fdiagnostics-show-template-tree C++ ObjC++ Var(flag_diagnostics_show_template_tree) Init(0) Print hierarchical comparisons when template types are mismatched. +fdiagnostics-all-candidates +C++ ObjC++ Var(flag_diagnostics_all_candidates) +Note all candidates during overload resolution failure. + fdirectives-only C ObjC C++ ObjC++ Preprocess directives only. diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index aa4111dda5c7..6ac87a298b29 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -4090,6 +4090,12 @@ print_z_candidates (location_t loc, struct z_candidate *candidates, { if (only_viable_p.is_true () && candidates->viable != 1) break; + if (ignored_candidate_p (candidates) && !flag_diagnostics_all_candidates) + { + inform (loc, "some candidates omitted; " + "use %<-fdiagnostics-all-candidates%> to display them"); + break; + } print_z_candidate (loc, N_("candidate:"), candidates); } } @@ -9967,7 +9973,18 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) if (DECL_DELETED_FN (fn)) { if (complain & tf_error) - mark_used (fn); + { + mark_used (fn); + if (cand->next) + { + if (flag_diagnostics_all_candidates) + print_z_candidates (input_location, cand, /*only_viable_p=*/false); + else + inform (input_location, + "use %<-fdiagnostics-all-candidates%> to display " + "considered candidates"); + } + } return error_mark_node; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8f885b8c6d69..f89f926a572b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -3328,6 +3328,11 @@ called. If the handler returns, execution continues normally. @item -fcoroutines Enable support for the C++ coroutines extension (experimental). +@opindex fdiagnostics-all-candidates +@item -fdiagnostics-all-candidates +Permit the C++ front end to note all candidates during overload resolution +failure, including when a deleted function is selected. + @opindex fno-elide-constructors @opindex felide-constructors @item -fno-elide-constructors diff --git a/gcc/testsuite/g++.dg/cpp0x/deleted16.C b/gcc/testsuite/g++.dg/cpp0x/deleted16.C new file mode 100644 index 000000000000..d4347942c42a --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/deleted16.C @@ -0,0 +1,25 @@ +// Verify -fdiagnostics-all-candidates makes us note other candidates +// when a deleted function is selected by overload resolution. +// { dg-do compile { target c++11 } } +// { dg-additional-options "-fdiagnostics-all-candidates" } + +void f(int) = delete; // { dg-message "declared here" } +void f(...); // { dg-message "candidate" } +void f(int, int); // { dg-message "candidate" } + +// An example where the perfect candidate optimization causes us +// to ignore function templates. +void g(int) = delete; // { dg-message "declared here" } +template void g(T); // { dg-message "candidate" } + +// An example where we have a strictly viable candidate and +// an incompletely considered bad candidate. +template void h(T, T) = delete; // { dg-message "declared here|candidate" } +void h(int*, int) = delete; // { dg-message "candidate" } + +int main() { + f(0); // { dg-error "deleted" } + g(0); // { dg-error "deleted" } + h(1, 1); // { dg-error "deleted" } + // { dg-error "invalid conversion" "" { target *-*-* } .-1 } when noting 2nd cand +} diff --git a/gcc/testsuite/g++.dg/cpp0x/deleted16a.C b/gcc/testsuite/g++.dg/cpp0x/deleted16a.C new file mode 100644 index 000000000000..e62306fa3d18 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/deleted16a.C @@ -0,0 +1,12 @@ +// Verify we suggest -fdiagnostics-all-candidates when diagnosing +// overload resolution selecting a deleted function. +// { dg-do compile { target c++11 } } +#include "deleted16.C" + +// { dg-error "deleted" "" { target *-*-* } 21 } +// { dg-error "deleted" "" { target *-*-* } 22 } +// { dg-error "deleted" "" { target *-*-* } 23 } + +// { dg-message "use '-fdiagnostics-all-candidates'" "" { target *-*-* } 21 } +// { dg-message "use '-fdiagnostics-all-candidates'" "" { target *-*-* } 22 } +// { dg-message "use '-fdiagnostics-all-candidates'" "" { target *-*-* } 23 } diff --git a/gcc/testsuite/g++.dg/overload/error6.C b/gcc/testsuite/g++.dg/overload/error6.C index 86a12eaa8de0..3186a297bfc9 100644 --- a/gcc/testsuite/g++.dg/overload/error6.C +++ b/gcc/testsuite/g++.dg/overload/error6.C @@ -1,5 +1,6 @@ // Verify we note even non-template candidates when diagnosing // overload resolution failure for a template-id. +// { dg-additional-options "-fdiagnostics-all-candidates" } template void f(T); // { dg-message "candidate" } void f(int); // { dg-message {candidate: 'void f\(int\)' \(ignored\)} } diff --git a/gcc/testsuite/g++.dg/overload/error6a.C b/gcc/testsuite/g++.dg/overload/error6a.C new file mode 100644 index 000000000000..e86ab5158abc --- /dev/null +++ b/gcc/testsuite/g++.dg/overload/error6a.C @@ -0,0 +1,6 @@ +// Verify we suggest -fdiagnostics-all-candidates when there are +// omitted candidates. +#include "error6.C" + +// { dg-error "no match" "" { target *-*-* } 9 } +// { dg-message "use '-fdiagnostics-all-candidates'" "" { target *-*-* } 9 } From c535360788e142a92e1d8b1db25bf4452e26f5fb Mon Sep 17 00:00:00 2001 From: Richard Ball Date: Wed, 13 Dec 2023 21:34:57 +0000 Subject: [PATCH 297/311] aarch64: SVE/NEON Bridging intrinsics ACLE has added intrinsics to bridge between SVE and Neon. The NEON_SVE Bridge adds intrinsics that allow conversions between NEON and SVE vectors. This patch adds support to GCC for the following 3 intrinsics: svset_neonq, svget_neonq and svdup_neonq gcc/ChangeLog: * config.gcc: Adds new header to config. * config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers): Moved to header file. (ENTRY): Likewise. (enum aarch64_simd_type): Likewise. (struct aarch64_simd_type_info): Remove static. (GTY): Likewise. * config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64): Defines pragma for arm_neon_sve_bridge.h. * config/aarch64/aarch64-protos.h: Add handle_arm_neon_sve_bridge_h * config/aarch64/aarch64-sve-builtins-base.h: New intrinsics. * config/aarch64/aarch64-sve-builtins-base.cc (class svget_neonq_impl): New intrinsic implementation. (class svset_neonq_impl): Likewise. (class svdup_neonq_impl): Likewise. (NEON_SVE_BRIDGE_FUNCTION): New intrinsics. * config/aarch64/aarch64-sve-builtins-functions.h (NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE functions. * config/aarch64/aarch64-sve-builtins-shapes.h: New shapes. * config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type): Add NEON element types. (parse_type): Likewise. (struct get_neonq_def): Defines function shape for get_neonq. (struct set_neonq_def): Defines function shape for set_neonq. (struct dup_neonq_def): Defines function shape for dup_neonq. * config/aarch64/aarch64-sve-builtins.cc (DEF_SVE_TYPE_SUFFIX): Changed to be called through SVE_NEON macro. (DEF_SVE_NEON_TYPE_SUFFIX): Defines macro for NEON_SVE_BRIDGE type suffixes. (DEF_NEON_SVE_FUNCTION): Defines macro for NEON_SVE_BRIDGE functions. (function_resolver::infer_neon128_vector_type): Infers type suffix for overloaded functions. (handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h. * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes. (bf16): Replace entry with neon-sve entry. (f16): Likewise. (f32): Likewise. (f64): Likewise. (s8): Likewise. (s16): Likewise. (s32): Likewise. (s64): Likewise. (u8): Likewise. (u16): Likewise. (u32): Likewise. (u64): Likewise. * config/aarch64/aarch64-sve-builtins.h (GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h. (ENTRY): Add aarch64_simd_type definiton. (enum aarch64_simd_type): Add neon information to type_suffix_info. (struct type_suffix_info): New function. * config/aarch64/aarch64-sve.md (@aarch64_sve_get_neonq_): New intrinsic insn for big endian. (@aarch64_sve_set_neonq_): Likewise. * config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ. * config/aarch64/aarch64-builtins.h: New file. * config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file. * config/aarch64/arm_neon_sve_bridge.h: New file. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include arm_neon_sve_bridge header file * gcc.dg/torture/neon-sve-bridge.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c: New test. * gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c: New test. * gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c: New test. * gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c: New test. * gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c: New test. * gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c: New test. --- gcc/config.gcc | 2 +- gcc/config/aarch64/aarch64-builtins.cc | 80 +------------ gcc/config/aarch64/aarch64-builtins.h | 99 ++++++++++++++++ gcc/config/aarch64/aarch64-c.cc | 2 + .../aarch64-neon-sve-bridge-builtins.def | 28 +++++ gcc/config/aarch64/aarch64-protos.h | 1 + .../aarch64/aarch64-sve-builtins-base.cc | 110 ++++++++++++++++++ .../aarch64/aarch64-sve-builtins-base.h | 6 + .../aarch64/aarch64-sve-builtins-functions.h | 4 + .../aarch64/aarch64-sve-builtins-shapes.cc | 78 +++++++++++++ .../aarch64/aarch64-sve-builtins-shapes.h | 3 + gcc/config/aarch64/aarch64-sve-builtins.cc | 63 +++++++++- gcc/config/aarch64/aarch64-sve-builtins.def | 42 +++++-- gcc/config/aarch64/aarch64-sve-builtins.h | 16 +++ gcc/config/aarch64/aarch64-sve.md | 33 ++++++ gcc/config/aarch64/arm_neon_sve_bridge.h | 38 ++++++ gcc/config/aarch64/iterators.md | 1 + .../gcc.dg/torture/neon-sve-bridge.c | 64 ++++++++++ .../aarch64/sve/acle/asm/dup_neonq_bf16.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_f16.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_f32.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_f64.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_s16.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_s32.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_s64.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_s8.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_u16.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_u32.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_u64.c | 30 +++++ .../aarch64/sve/acle/asm/dup_neonq_u8.c | 30 +++++ .../aarch64/sve/acle/asm/get_neonq_bf16.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_f16.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_f32.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_f64.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_s16.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_s32.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_s64.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_s8.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_u16.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_u32.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_u64.c | 33 ++++++ .../aarch64/sve/acle/asm/get_neonq_u8.c | 33 ++++++ .../aarch64/sve/acle/asm/set_neonq_bf16.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_f16.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_f32.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_f64.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_s16.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_s32.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_s64.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_s8.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_u16.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_u32.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_u64.c | 23 ++++ .../aarch64/sve/acle/asm/set_neonq_u8.c | 23 ++++ .../aarch64/sve/acle/asm/test_sve_acle.h | 24 +++- .../aarch64/sve/acle/general-c/dup_neonq_1.c | 20 ++++ .../aarch64/sve/acle/general-c/get_neonq_1.c | 20 ++++ .../aarch64/sve/acle/general-c/set_neonq_1.c | 27 +++++ 58 files changed, 1697 insertions(+), 96 deletions(-) create mode 100644 gcc/config/aarch64/aarch64-builtins.h create mode 100644 gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def create mode 100644 gcc/config/aarch64/arm_neon_sve_bridge.h create mode 100644 gcc/testsuite/gcc.dg/torture/neon-sve-bridge.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c diff --git a/gcc/config.gcc b/gcc/config.gcc index 4884aca4dd87..c31e342fc412 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -345,7 +345,7 @@ m32c*-*-*) ;; aarch64*-*-*) cpu_type=aarch64 - extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h" + extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h arm_neon_sve_bridge.h" c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index b57255ba1c88..23f87d0b7ef4 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -48,6 +48,7 @@ #include "attribs.h" #include "gimple-fold.h" #include "builtins.h" +#include "aarch64-builtins.h" #define v8qi_UP E_V8QImode #define v8di_UP E_V8DImode @@ -184,47 +185,8 @@ #define SIMD_INTR_QUAL(suffix) QUAL_##suffix #define SIMD_INTR_LENGTH_CHAR(length) LENGTH_##length - #define SIMD_MAX_BUILTIN_ARGS 5 -enum aarch64_type_qualifiers -{ - /* T foo. */ - qualifier_none = 0x0, - /* unsigned T foo. */ - qualifier_unsigned = 0x1, /* 1 << 0 */ - /* const T foo. */ - qualifier_const = 0x2, /* 1 << 1 */ - /* T *foo. */ - qualifier_pointer = 0x4, /* 1 << 2 */ - /* Used when expanding arguments if an operand could - be an immediate. */ - qualifier_immediate = 0x8, /* 1 << 3 */ - qualifier_maybe_immediate = 0x10, /* 1 << 4 */ - /* void foo (...). */ - qualifier_void = 0x20, /* 1 << 5 */ - /* 1 << 6 is now unused */ - /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum - rather than using the type of the operand. */ - qualifier_map_mode = 0x80, /* 1 << 7 */ - /* qualifier_pointer | qualifier_map_mode */ - qualifier_pointer_map_mode = 0x84, - /* qualifier_const | qualifier_pointer | qualifier_map_mode */ - qualifier_const_pointer_map_mode = 0x86, - /* Polynomial types. */ - qualifier_poly = 0x100, - /* Lane indices - must be in range, and flipped for bigendian. */ - qualifier_lane_index = 0x200, - /* Lane indices for single lane structure loads and stores. */ - qualifier_struct_load_store_lane_index = 0x400, - /* Lane indices selected in pairs. - must be in range, and flipped for - bigendian. */ - qualifier_lane_pair_index = 0x800, - /* Lane indices selected in quadtuplets. - must be in range, and flipped for - bigendian. */ - qualifier_lane_quadtup_index = 0x1000, -}; - /* Flags that describe what a function might do. */ const unsigned int FLAG_NONE = 0U; const unsigned int FLAG_READ_FPCR = 1U << 0; @@ -901,47 +863,9 @@ const char *aarch64_scalar_builtin_types[] = { NULL }; -#define ENTRY(E, M, Q, G) E, -enum aarch64_simd_type -{ -#include "aarch64-simd-builtin-types.def" - ARM_NEON_H_TYPES_LAST -}; -#undef ENTRY - -struct GTY(()) aarch64_simd_type_info -{ - enum aarch64_simd_type type; - - /* Internal type name. */ - const char *name; - - /* Internal type name(mangled). The mangled names conform to the - AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", - Appendix A). To qualify for emission with the mangled names defined in - that document, a vector type must not only be of the correct mode but also - be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these - types are registered by aarch64_init_simd_builtin_types (). In other - words, vector types defined in other ways e.g. via vector_size attribute - will get default mangled names. */ - const char *mangle; - - /* Internal type. */ - tree itype; - - /* Element type. */ - tree eltype; - - /* Machine mode the internal type maps to. */ - enum machine_mode mode; - - /* Qualifiers. */ - enum aarch64_type_qualifiers q; -}; - #define ENTRY(E, M, Q, G) \ {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, -static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = { +GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = { #include "aarch64-simd-builtin-types.def" }; #undef ENTRY diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h new file mode 100644 index 000000000000..2356681e4923 --- /dev/null +++ b/gcc/config/aarch64/aarch64-builtins.h @@ -0,0 +1,99 @@ +/* Builtins' description for AArch64 SIMD architecture. + Copyright (C) 2023 Free Software Foundation, Inc. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ +#ifndef GCC_AARCH64_BUILTINS_H +#define GCC_AARCH64_BUILTINS_H + +enum aarch64_type_qualifiers +{ + /* T foo. */ + qualifier_none = 0x0, + /* unsigned T foo. */ + qualifier_unsigned = 0x1, /* 1 << 0 */ + /* const T foo. */ + qualifier_const = 0x2, /* 1 << 1 */ + /* T *foo. */ + qualifier_pointer = 0x4, /* 1 << 2 */ + /* Used when expanding arguments if an operand could + be an immediate. */ + qualifier_immediate = 0x8, /* 1 << 3 */ + qualifier_maybe_immediate = 0x10, /* 1 << 4 */ + /* void foo (...). */ + qualifier_void = 0x20, /* 1 << 5 */ + /* 1 << 6 is now unused */ + /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum + rather than using the type of the operand. */ + qualifier_map_mode = 0x80, /* 1 << 7 */ + /* qualifier_pointer | qualifier_map_mode */ + qualifier_pointer_map_mode = 0x84, + /* qualifier_const | qualifier_pointer | qualifier_map_mode */ + qualifier_const_pointer_map_mode = 0x86, + /* Polynomial types. */ + qualifier_poly = 0x100, + /* Lane indices - must be in range, and flipped for bigendian. */ + qualifier_lane_index = 0x200, + /* Lane indices for single lane structure loads and stores. */ + qualifier_struct_load_store_lane_index = 0x400, + /* Lane indices selected in pairs. - must be in range, and flipped for + bigendian. */ + qualifier_lane_pair_index = 0x800, + /* Lane indices selected in quadtuplets. - must be in range, and flipped for + bigendian. */ + qualifier_lane_quadtup_index = 0x1000, +}; + +#define ENTRY(E, M, Q, G) E, +enum aarch64_simd_type +{ +#include "aarch64-simd-builtin-types.def" + ARM_NEON_H_TYPES_LAST +}; +#undef ENTRY + +struct GTY(()) aarch64_simd_type_info +{ + enum aarch64_simd_type type; + + /* Internal type name. */ + const char *name; + + /* Internal type name(mangled). The mangled names conform to the + AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", + Appendix A). To qualify for emission with the mangled names defined in + that document, a vector type must not only be of the correct mode but also + be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these + types are registered by aarch64_init_simd_builtin_types (). In other + words, vector types defined in other ways e.g. via vector_size attribute + will get default mangled names. */ + const char *mangle; + + /* Internal type. */ + tree itype; + + /* Element type. */ + tree eltype; + + /* Machine mode the internal type maps to. */ + enum machine_mode mode; + + /* Qualifiers. */ + enum aarch64_type_qualifiers q; +}; + +extern aarch64_simd_type_info aarch64_simd_types[]; + +#endif \ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index 115a2a8b7568..05ad763b871b 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -351,6 +351,8 @@ aarch64_pragma_aarch64 (cpp_reader *) handle_arm_neon_h (); else if (strcmp (name, "arm_acle.h") == 0) handle_arm_acle_h (); + else if (strcmp (name, "arm_neon_sve_bridge.h") == 0) + aarch64_sve::handle_arm_neon_sve_bridge_h (); else error ("unknown %<#pragma GCC aarch64%> option %qs", name); } diff --git a/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def b/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def new file mode 100644 index 000000000000..0fcbe446ac58 --- /dev/null +++ b/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def @@ -0,0 +1,28 @@ +/* Builtin lists for AArch64 NEON-SVE-Bridge + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +#ifndef DEF_NEON_SVE_FUNCTION +#define DEF_NEON_SVE_FUNCTION(A, B, C, D, E) +#endif + +DEF_NEON_SVE_FUNCTION (svset_neonq, set_neonq, all_data, none, none) +DEF_NEON_SVE_FUNCTION (svget_neonq, get_neonq, all_data, none, none) +DEF_NEON_SVE_FUNCTION (svdup_neonq, dup_neonq, all_data, none, none) + +#undef DEF_NEON_SVE_FUNCTION \ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index eaf74a725e70..8baae4003fc3 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1012,6 +1012,7 @@ namespace aarch64_sve { void init_builtins (); void handle_arm_sve_h (); void handle_arm_sme_h (); + void handle_arm_neon_sve_bridge_h (); tree builtin_decl (unsigned, bool); bool builtin_type_p (const_tree); bool builtin_type_p (const_tree, unsigned int *, unsigned int *); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 4e5a88aa03a9..783c90159942 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -44,6 +44,7 @@ #include "aarch64-sve-builtins-shapes.h" #include "aarch64-sve-builtins-base.h" #include "aarch64-sve-builtins-functions.h" +#include "aarch64-builtins.h" #include "ssa.h" #include "gimple-fold.h" @@ -1099,6 +1100,112 @@ public: } }; +class svget_neonq_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const override + { + if (BYTES_BIG_ENDIAN) + return NULL; + tree rhs_sve_vector = gimple_call_arg (f.call, 0); + tree rhs_vector = build3 (BIT_FIELD_REF, TREE_TYPE (f.lhs), + rhs_sve_vector, bitsize_int (128), bitsize_int (0)); + return gimple_build_assign (f.lhs, rhs_vector); + } + + rtx + expand (function_expander &e) const override + { + if (BYTES_BIG_ENDIAN) + { + machine_mode mode = e.vector_mode (0); + insn_code icode = code_for_aarch64_sve_get_neonq (mode); + unsigned int nunits = 128 / GET_MODE_UNIT_BITSIZE (mode); + rtx indices = aarch64_gen_stepped_int_parallel + (nunits, nunits - 1, -1); + + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[0]); + e.add_fixed_operand (indices); + return e.generate_insn (icode); + } + return simplify_gen_subreg (e.result_mode (), e.args[0], + GET_MODE (e.args[0]), 0); + } +}; + +class svset_neonq_impl : public function_base +{ +public: + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vector_mode (0); + rtx_vector_builder builder (VNx16BImode, 16, 2); + for (unsigned int i = 0; i < 16; i++) + builder.quick_push (CONST1_RTX (BImode)); + for (unsigned int i = 0; i < 16; i++) + builder.quick_push (CONST0_RTX (BImode)); + e.args.quick_push (builder.build ()); + if (BYTES_BIG_ENDIAN) + return e.use_exact_insn (code_for_aarch64_sve_set_neonq (mode)); + insn_code icode = code_for_vcond_mask (mode, mode); + e.args[1] = lowpart_subreg (mode, e.args[1], GET_MODE (e.args[1])); + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[1]); + e.add_input_operand (icode, e.args[0]); + e.add_input_operand (icode, e.args[2]); + return e.generate_insn (icode); + } +}; + +class svdup_neonq_impl : public function_base +{ +public: + gimple * + fold (gimple_folder &f) const override + { + if (BYTES_BIG_ENDIAN) + return NULL; + tree rhs_vector = gimple_call_arg (f.call, 0); + unsigned HOST_WIDE_INT neon_nelts + = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs_vector)).to_constant (); + poly_uint64 sve_nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); + vec_perm_builder builder (sve_nelts, neon_nelts, 1); + for (unsigned int i = 0; i < neon_nelts; i++) + builder.quick_push (i); + vec_perm_indices indices (builder, 1, neon_nelts); + tree perm_type = build_vector_type (ssizetype, sve_nelts); + return gimple_build_assign (f.lhs, VEC_PERM_EXPR, + rhs_vector, + rhs_vector, + vec_perm_indices_to_tree (perm_type, indices)); + } + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vector_mode (0); + if (BYTES_BIG_ENDIAN) + { + insn_code icode = code_for_aarch64_vec_duplicate_vq_be (mode); + unsigned int nunits = 128 / GET_MODE_UNIT_BITSIZE (mode); + rtx indices = aarch64_gen_stepped_int_parallel + (nunits, nunits - 1, -1); + + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[0]); + e.add_fixed_operand (indices); + return e.generate_insn (icode); + } + insn_code icode = code_for_aarch64_vec_duplicate_vq_le (mode); + e.add_output_operand (icode); + e.add_input_operand (icode, e.args[0]); + return e.generate_insn (icode); + } +}; + class svindex_impl : public function_base { public: @@ -3122,5 +3229,8 @@ FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, FUNCTION (svzip2, svzip_impl, (1)) FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, UNSPEC_ZIP2Q)) +NEON_SVE_BRIDGE_FUNCTION (svget_neonq, svget_neonq_impl,) +NEON_SVE_BRIDGE_FUNCTION (svset_neonq, svset_neonq_impl,) +NEON_SVE_BRIDGE_FUNCTION (svdup_neonq, svdup_neonq_impl,) } /* end namespace aarch64_sve */ diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h index d300e3a85d00..df75e4c1ecf8 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h @@ -299,6 +299,12 @@ namespace aarch64_sve extern const function_base *const svzip2; extern const function_base *const svzip2q; } + namespace neon_sve_bridge_functions + { + extern const function_base *const svset_neonq; + extern const function_base *const svget_neonq; + extern const function_base *const svdup_neonq; + } } #endif diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h index b40640b07634..7aa59568a251 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h @@ -840,4 +840,8 @@ public: namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ namespace functions { const function_base *const NAME = &NAME##_obj; } +#define NEON_SVE_BRIDGE_FUNCTION(NAME, CLASS, ARGS) \ + namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ + namespace neon_sve_bridge_functions { const function_base *const NAME = &NAME##_obj; } + #endif diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 9380cc7db205..d0039c05895f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -29,6 +29,7 @@ #include "optabs.h" #include "aarch64-sve-builtins.h" #include "aarch64-sve-builtins-shapes.h" +#include "aarch64-builtins.h" /* In the comments below, _t0 represents the first type suffix and _t1 represents the second. Square brackets enclose characters that are @@ -178,6 +179,8 @@ parse_element_type (const function_instance &instance, const char *&format) s - a scalar type with the given element suffix t - a vector or tuple type with given element suffix [*1] v - a vector with the given element suffix + D - a 64 bit neon vector + Q - a 128 bit neon vector where has the format described above parse_element_type @@ -261,6 +264,20 @@ parse_type (const function_instance &instance, const char *&format) return acle_vector_types[0][type_suffixes[suffix].vector_type]; } + if (ch == 'D') + { + type_suffix_index suffix = parse_element_type (instance, format); + int neon_index = type_suffixes[suffix].neon64_type; + return aarch64_simd_types[neon_index].itype; + } + + if (ch == 'Q') + { + type_suffix_index suffix = parse_element_type (instance, format); + int neon_index = type_suffixes[suffix].neon128_type; + return aarch64_simd_types[neon_index].itype; + } + gcc_unreachable (); } @@ -2476,6 +2493,67 @@ struct get_def : public overloaded_base<0> }; SHAPE (get) +/* xN_t svfoo[_t0](sv_t). */ +struct get_neonq_def : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "Q0,v0", group, MODE_none); + } + tree + resolve (function_resolver &r) const override + { + return r.resolve_unary (); + } +}; +SHAPE (get_neonq) + +/* sv_t svfoo[_t0](sv_t, xN_t). */ +struct set_neonq_def : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,v0,Q0", group, MODE_none); + } + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type; + if (!r.check_gp_argument (2, i, nargs) + || (type = r.infer_neon128_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (set_neonq) + +/* sv_t svfoo[_t0](xN_t). */ +struct dup_neonq_def : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,Q0", group, MODE_none); + } + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type; + if (!r.check_gp_argument (1, i, nargs) + || (type = r.infer_neon128_vector_type (i)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + return r.resolve_to (r.mode_suffix_id, type); + } +}; +SHAPE (dup_neonq) + /* sv_t svfoo[_t0](sv_t, uint64_t) _t svfoo[_n_t0](_t, uint64_t) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index 88af62df48ba..8e159e666fd7 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -126,10 +126,12 @@ namespace aarch64_sve extern const function_shape *const dot_za_slice_lane; extern const function_shape *const dot_za_slice_uint_lane; extern const function_shape *const dupq; + extern const function_shape *const dup_neonq; extern const function_shape *const ext; extern const function_shape *const extract_pred; extern const function_shape *const fold_left; extern const function_shape *const get; + extern const function_shape *const get_neonq; extern const function_shape *const inc_dec; extern const function_shape *const inc_dec_pat; extern const function_shape *const inc_dec_pred; @@ -170,6 +172,7 @@ namespace aarch64_sve extern const function_shape *const select_pred; extern const function_shape *const set; extern const function_shape *const setffr; + extern const function_shape *const set_neonq; extern const function_shape *const shift_left_imm_long; extern const function_shape *const shift_left_imm_to_uint; extern const function_shape *const shift_right_imm; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 15fa5907de5f..9cdac5ebbd20 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -53,6 +53,7 @@ #include "aarch64-sve-builtins-sve2.h" #include "aarch64-sve-builtins-sme.h" #include "aarch64-sve-builtins-shapes.h" +#include "aarch64-builtins.h" namespace aarch64_sve { @@ -129,7 +130,8 @@ CONSTEXPR const mode_suffix_info mode_suffixes[] = { /* Static information about each type_suffix_index. */ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { -#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ +#define DEF_SVE_NEON_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE, \ + NEON64, NEON128) \ { "_" #NAME, \ VECTOR_TYPE_##ACLE_TYPE, \ TYPE_##CLASS, \ @@ -142,7 +144,12 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { TYPE_##CLASS == TYPE_bool, \ false, \ 0, \ - MODE }, + MODE, \ + NEON64, \ + NEON128 }, +#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ + DEF_SVE_NEON_TYPE_SUFFIX (NAME, ACLE_TYPE, CLASS, BITS, MODE, \ + ARM_NEON_H_TYPES_LAST, ARM_NEON_H_TYPES_LAST) #define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ { "_" #NAME, \ NUM_VECTOR_TYPES, \ @@ -156,10 +163,12 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { false, \ true, \ 0, \ - MODE }, + MODE, \ + ARM_NEON_H_TYPES_LAST, \ + ARM_NEON_H_TYPES_LAST }, #include "aarch64-sve-builtins.def" { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, - false, false, 0, VOIDmode } + false, false, 0, VOIDmode, ARM_NEON_H_TYPES_LAST, ARM_NEON_H_TYPES_LAST } }; CONSTEXPR const group_suffix_info group_suffixes[] = { @@ -884,6 +893,14 @@ static CONSTEXPR const function_group_info function_groups[] = { #include "aarch64-sve-builtins.def" }; +/* A list of all NEON-SVE-Bridge ACLE functions. */ +static CONSTEXPR const function_group_info neon_sve_function_groups[] = { +#define DEF_NEON_SVE_FUNCTION(NAME, SHAPE, TYPES, GROUPS, PREDS) \ + { #NAME, &neon_sve_bridge_functions::NAME, &shapes::SHAPE, types_##TYPES, \ + groups_##GROUPS, preds_##PREDS, 0 }, +#include "aarch64-neon-sve-bridge-builtins.def" +}; + /* The scalar type associated with each vector type. */ extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES + 1]; tree scalar_types[NUM_VECTOR_TYPES + 1]; @@ -2092,6 +2109,33 @@ function_resolver::infer_integer_vector_type (unsigned int argno) return type; } +/* Require argument ARGNO to have some form of NEON128 vector type. Return the + associated type suffix on success. + Report an error and return NUM_TYPE_SUFFIXES on failure. */ +type_suffix_index +function_resolver::infer_neon128_vector_type (unsigned int argno) +{ + tree actual = get_argument_type (argno); + if (actual == error_mark_node) + return NUM_TYPE_SUFFIXES; + + for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) + { + int neon_index = type_suffixes[suffix_i].neon128_type; + if (neon_index != ARM_NEON_H_TYPES_LAST) + { + tree type = aarch64_simd_types[neon_index].itype; + if (type && matches_type_p (type, actual)) + return type_suffix_index (suffix_i); + } + } + + error_at (location, "passing %qT to argument %d of %qE, which" + " expects a 128 bit NEON vector type", actual, argno + 1, fndecl); + return NUM_TYPE_SUFFIXES; +} + + /* Like infer_vector_type, but also require the type to be an unsigned integer. */ type_suffix_index @@ -4454,6 +4498,7 @@ init_builtins () { handle_arm_sve_h (); handle_arm_sme_h (); + handle_arm_neon_sve_bridge_h (); } } @@ -4588,6 +4633,16 @@ handle_arm_sve_h () builder.register_function_group (function_groups[i]); } +/* Implement #pragma GCC aarch64 "arm_neon_sve_bridge.h". */ +void +handle_arm_neon_sve_bridge_h () +{ + /* Define the functions. */ + function_builder builder; + for (unsigned int i = 0; i < ARRAY_SIZE (neon_sve_function_groups); ++i) + builder.register_function_group (neon_sve_function_groups[i]); +} + /* Return the function decl with SVE function subcode CODE, or error_mark_node if no such function exists. */ tree diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def index 23ef7889c513..83bf70eb6e49 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.def +++ b/gcc/config/aarch64/aarch64-sve-builtins.def @@ -41,6 +41,11 @@ #define DEF_SVE_FUNCTION_GS(A, B, C, D, E) #endif +#ifndef DEF_SVE_NEON_TYPE_SUFFIX +#define DEF_SVE_NEON_TYPE_SUFFIX(A, B, C, D, E, F, G) \ + DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) +#endif + #ifndef DEF_SVE_FUNCTION #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) @@ -107,23 +112,35 @@ DEF_SVE_TYPE_SUFFIX (b8, svbool_t, bool, 8, VNx16BImode) DEF_SVE_TYPE_SUFFIX (b16, svbool_t, bool, 16, VNx8BImode) DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode) DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode) -DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode) DEF_SVE_TYPE_SUFFIX (c, svcount_t, count, 8, VNx16BImode) DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode) DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode) DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode) DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode) -DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode) -DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode) -DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode) -DEF_SVE_TYPE_SUFFIX (s8, svint8_t, signed, 8, VNx16QImode) -DEF_SVE_TYPE_SUFFIX (s16, svint16_t, signed, 16, VNx8HImode) -DEF_SVE_TYPE_SUFFIX (s32, svint32_t, signed, 32, VNx4SImode) -DEF_SVE_TYPE_SUFFIX (s64, svint64_t, signed, 64, VNx2DImode) -DEF_SVE_TYPE_SUFFIX (u8, svuint8_t, unsigned, 8, VNx16QImode) -DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) -DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) -DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) +DEF_SVE_NEON_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode, + Bfloat16x4_t, Bfloat16x8_t) +DEF_SVE_NEON_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode, + Float16x4_t, Float16x8_t) +DEF_SVE_NEON_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode, + Float32x2_t, Float32x4_t) +DEF_SVE_NEON_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode, + Float64x1_t, Float64x2_t) +DEF_SVE_NEON_TYPE_SUFFIX (s8, svint8_t, signed, 8, VNx16QImode, + Int8x8_t, Int8x16_t) +DEF_SVE_NEON_TYPE_SUFFIX (s16, svint16_t, signed, 16, VNx8HImode, + Int16x4_t, Int16x8_t) +DEF_SVE_NEON_TYPE_SUFFIX (s32, svint32_t, signed, 32, VNx4SImode, + Int32x2_t, Int32x4_t) +DEF_SVE_NEON_TYPE_SUFFIX (s64, svint64_t, signed, 64, VNx2DImode, + Int64x1_t, Int64x2_t) +DEF_SVE_NEON_TYPE_SUFFIX (u8, svuint8_t, unsigned, 8, VNx16QImode, + Uint8x8_t, Uint8x16_t) +DEF_SVE_NEON_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode, + Uint16x4_t, Uint16x8_t) +DEF_SVE_NEON_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode, + Uint32x2_t, Uint32x4_t) +DEF_SVE_NEON_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode, + Uint64x1_t, Uint64x2_t) /* Associate _za with bytes. This is needed for svldr_vnum_za and svstr_vnum_za, whose ZA offset can be in the range [0, 15], as for za8. */ @@ -159,6 +176,7 @@ DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4) #undef DEF_SVE_FUNCTION_GS #undef DEF_SVE_GROUP_SUFFIX #undef DEF_SME_ZA_SUFFIX +#undef DEF_SVE_NEON_TYPE_SUFFIX #undef DEF_SVE_TYPE_SUFFIX #undef DEF_SVE_TYPE #undef DEF_SVE_MODE diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index e67c46581f37..e682e14f3b51 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -20,6 +20,8 @@ #ifndef GCC_AARCH64_SVE_BUILTINS_H #define GCC_AARCH64_SVE_BUILTINS_H +#include "aarch64-builtins.h" + /* The full name of an SVE ACLE function is the concatenation of: - the base name ("svadd", etc.) @@ -229,6 +231,14 @@ struct mode_suffix_info units_index displacement_units; }; +#define ENTRY(E, M, Q, G) E, +enum aarch64_simd_type +{ +#include "aarch64-simd-builtin-types.def" + ARM_NEON_H_TYPES_LAST +}; +#undef ENTRY + /* Static information about a type suffix. */ struct type_suffix_info { @@ -262,6 +272,11 @@ struct type_suffix_info /* The associated vector or predicate mode. */ machine_mode vector_mode : 16; + + /* The corresponding 64-bit and 128-bit arm_neon.h types, or + ARM_NEON_H_TYPES_LAST if none. */ + aarch64_simd_type neon64_type; + aarch64_simd_type neon128_type; }; /* Static information about a group suffix. */ @@ -498,6 +513,7 @@ public: sve_type infer_vector_or_tuple_type (unsigned int, unsigned int); type_suffix_index infer_vector_type (unsigned int); type_suffix_index infer_integer_vector_type (unsigned int); + type_suffix_index infer_neon128_vector_type (unsigned int); type_suffix_index infer_unsigned_vector_type (unsigned int); type_suffix_index infer_sd_vector_type (unsigned int); sve_type infer_tuple_type (unsigned int); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index fdd14d15096a..32630dbe6b68 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -10950,3 +10950,36 @@ operands[4] = CONSTM1_RTX (mode); } ) + +(define_insn_and_split "@aarch64_sve_get_neonq_" + [(set (match_operand: 0 "register_operand" "=w") + (vec_select: + (match_operand:SVE_FULL 1 "register_operand" "w") + (match_operand 2 "descending_int_parallel")))] + "TARGET_SVE + && BYTES_BIG_ENDIAN + && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), + GET_MODE_NUNITS (mode) - 1)" + "#" + "&& reload_completed" + [(set (match_dup 0) (match_dup 1))] + { + operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); + } +) + +(define_insn "@aarch64_sve_set_neonq_" + [(set (match_operand:SVE_FULL 0 "register_operand" "=w") + (unspec:SVE_FULL + [(match_operand:SVE_FULL 1 "register_operand" "w") + (match_operand: 2 "register_operand" "w") + (match_operand: 3 "register_operand" "Upl")] + UNSPEC_SET_NEONQ))] + "TARGET_SVE + && BYTES_BIG_ENDIAN" + { + operands[2] = lowpart_subreg (mode, operands[2], + GET_MODE (operands[2])); + return "sel\t%0., %3, %2., %1."; + } +) \ No newline at end of file diff --git a/gcc/config/aarch64/arm_neon_sve_bridge.h b/gcc/config/aarch64/arm_neon_sve_bridge.h new file mode 100644 index 000000000000..8f526eae86b9 --- /dev/null +++ b/gcc/config/aarch64/arm_neon_sve_bridge.h @@ -0,0 +1,38 @@ +/* AArch64 NEON-SVE Bridge intrinsics include file. + Copyright (C) 2023 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _ARM_NEON_SVE_BRIDGE_H_ +#define _ARM_NEON_SVE_BRIDGE_H_ + +#include +#include + +/* NOTE: This implementation of arm_neon_sve_bridge.h is intentionally short. It does + not define the types and intrinsic functions directly in C and C++ + code, but instead uses the following pragma to tell GCC to insert the + necessary type and function definitions itself. The net effect is the + same, and the file is a complete implementation of arm_neon_sve_bridge.h. */ +#pragma GCC aarch64 "arm_neon_sve_bridge.h" + +#endif \ No newline at end of file diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 4377188303c3..a9397fcadc9a 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -806,6 +806,7 @@ UNSPEC_FTSMUL ; Used in aarch64-sve.md. UNSPEC_FTSSEL ; Used in aarch64-sve.md. UNSPEC_SMATMUL ; Used in aarch64-sve.md. + UNSPEC_SET_NEONQ ; Used in aarch64-sve.md. UNSPEC_UMATMUL ; Used in aarch64-sve.md. UNSPEC_USMATMUL ; Used in aarch64-sve.md. UNSPEC_TRN1Q ; Used in aarch64-sve.md. diff --git a/gcc/testsuite/gcc.dg/torture/neon-sve-bridge.c b/gcc/testsuite/gcc.dg/torture/neon-sve-bridge.c new file mode 100644 index 000000000000..4f8f26cec1b4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/neon-sve-bridge.c @@ -0,0 +1,64 @@ +// { dg-options "-march=armv8.2-a+sve" } +// { dg-do run { target aarch64_sve_hw } } + +#include + +extern void abort (void); + +int +svget_neonq_test () +{ + int64_t val1 = 987654321; + int64_t val2 = 123456789; + svint64_t sveInput = svdupq_n_s64 (val1, val2); + int64x2_t neonReturn = svget_neonq_s64 (sveInput); + int64_t val1Return = vgetq_lane_s64 (neonReturn, 0); + int64_t val2Return = vgetq_lane_s64 (neonReturn, 1); + if (val1 == val1Return && val2 == val2Return) + return 0; + return 1; +} + +int +svset_neonq_test () +{ + int64_t val1 = 987654321; + int64_t val2 = 123456789; + int64x2_t NeonInput; + NeonInput = vsetq_lane_s64 (val1, NeonInput, 0); + NeonInput = vsetq_lane_s64 (val2, NeonInput, 1); + svint64_t sveReturn = svset_neonq_s64 (sveReturn, NeonInput); + int64_t val1Return = svlasta_s64 (svptrue_b64(), sveReturn); + int64_t val2Return = svlastb_s64 (svptrue_pat_b8(SV_VL16), sveReturn); + if (val1 == val1Return && val2 == val2Return) + return 0; + return 1; +} + +int +svdup_neonq_test () +{ + int64_t val1 = 987654321; + int64_t val2 = 123456789; + int64x2_t NeonInput; + NeonInput = vsetq_lane_s64 (val1, NeonInput, 0); + NeonInput = vsetq_lane_s64 (val2, NeonInput, 1); + svint64_t sveReturn = svdup_neonq_s64 (NeonInput); + int64_t val1Return = svlasta_s64 (svptrue_b64(), sveReturn); + int64_t val2Return = svlastb_s64 (svptrue_b64(), sveReturn); + if (val1 == val1Return && val2 == val2Return) + return 0; + return 1; +} + +int +main () +{ + if (svget_neonq_test () == 1) + abort (); + if (svset_neonq_test () == 1) + abort (); + if (svdup_neonq_test () == 1) + abort (); + return 0; +} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c new file mode 100644 index 000000000000..9b4b762bf133 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_bf16_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_bf16_z0, bfloat16x8_t, svbfloat16_t, + z0 = svdup_neonq_bf16 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_bf16_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_bf16_z4, bfloat16x8_t, svbfloat16_t, + z4_res = svdup_neonq_bf16 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_bf16_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_bf16_z5, bfloat16x8_t, svbfloat16_t, + z5_res = svdup_neonq_bf16 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c new file mode 100644 index 000000000000..699e966c2af1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_f16_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f16_z0, float16x8_t, svfloat16_t, + z0 = svdup_neonq_f16 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_f16_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f16_z4, float16x8_t, svfloat16_t, + z4_res = svdup_neonq_f16 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_f16_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f16_z5, float16x8_t, svfloat16_t, + z5_res = svdup_neonq_f16 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c new file mode 100644 index 000000000000..cd606ef0d0c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_f32_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f32_z0, float32x4_t, svfloat32_t, + z0 = svdup_neonq_f32 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_f32_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f32_z4, float32x4_t, svfloat32_t, + z4_res = svdup_neonq_f32 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_f32_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f32_z5, float32x4_t, svfloat32_t, + z5_res = svdup_neonq_f32 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c new file mode 100644 index 000000000000..4dd3e507ea33 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_f64_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f64_z0, float64x2_t, svfloat64_t, + z0 = svdup_neonq_f64 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_f64_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f64_z4, float64x2_t, svfloat64_t, + z4_res = svdup_neonq_f64 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_f64_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_f64_z5, float64x2_t, svfloat64_t, + z5_res = svdup_neonq_f64 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c new file mode 100644 index 000000000000..e70e454a1602 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_s16_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s16_z0, int16x8_t, svint16_t, + z0 = svdup_neonq_s16 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_s16_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s16_z4, int16x8_t, svint16_t, + z4_res = svdup_neonq_s16 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_s16_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s16_z5, int16x8_t, svint16_t, + z5_res = svdup_neonq_s16 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c new file mode 100644 index 000000000000..9ef9228d5298 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_s32_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s32_z0, int32x4_t, svint32_t, + z0 = svdup_neonq_s32 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_s32_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s32_z4, int32x4_t, svint32_t, + z4_res = svdup_neonq_s32 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_s32_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s32_z5, int32x4_t, svint32_t, + z5_res = svdup_neonq_s32 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c new file mode 100644 index 000000000000..d3ed22220fae --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_s64_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s64_z0, int64x2_t, svint64_t, + z0 = svdup_neonq_s64 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_s64_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s64_z4, int64x2_t, svint64_t, + z4_res = svdup_neonq_s64 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_s64_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s64_z5, int64x2_t, svint64_t, + z5_res = svdup_neonq_s64 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c new file mode 100644 index 000000000000..6437f858a4ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_s8_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s8_z0, int8x16_t, svint8_t, + z0 = svdup_neonq_s8 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_s8_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s8_z4, int8x16_t, svint8_t, + z4_res = svdup_neonq_s8 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_s8_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_s8_z5, int8x16_t, svint8_t, + z5_res = svdup_neonq_s8 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c new file mode 100644 index 000000000000..5e7b2230c19c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_u16_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u16_z0, uint16x8_t, svuint16_t, + z0 = svdup_neonq_u16 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_u16_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u16_z4, uint16x8_t, svuint16_t, + z4_res = svdup_neonq_u16 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_u16_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u16_z5, uint16x8_t, svuint16_t, + z5_res = svdup_neonq_u16 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c new file mode 100644 index 000000000000..ce34d4f213f7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_u32_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u32_z0, uint32x4_t, svuint32_t, + z0 = svdup_neonq_u32 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_u32_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u32_z4, uint32x4_t, svuint32_t, + z4_res = svdup_neonq_u32 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_u32_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u32_z5, uint32x4_t, svuint32_t, + z5_res = svdup_neonq_u32 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c new file mode 100644 index 000000000000..1008740890b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_u64_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u64_z0, uint64x2_t, svuint64_t, + z0 = svdup_neonq_u64 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_u64_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u64_z4, uint64x2_t, svuint64_t, + z4_res = svdup_neonq_u64 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_u64_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u64_z5, uint64x2_t, svuint64_t, + z5_res = svdup_neonq_u64 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c new file mode 100644 index 000000000000..c27ff8f34a75 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c @@ -0,0 +1,30 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** dup_neonq_u8_z0: +** dup z0.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u8_z0, uint8x16_t, svuint8_t, + z0 = svdup_neonq_u8 (z4), + z0 = svdup_neonq (z4)) + +/* +** dup_neonq_u8_z4: +** dup z4.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u8_z4, uint8x16_t, svuint8_t, + z4_res = svdup_neonq_u8 (z4), + z4_res = svdup_neonq (z4)) + +/* +** dup_neonq_u8_z5: +** dup z5.q, z4.q\[0\] +** ret +*/ +TEST_DUP_NEONQ (dup_neonq_u8_z5, uint8x16_t, svuint8_t, + z5_res = svdup_neonq_u8 (z4), + z5_res = svdup_neonq (z4)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c new file mode 100644 index 000000000000..47d5c8df80d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_bf16_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_bf16_z0, svbfloat16_t, bfloat16x8_t, + z0 = svget_neonq_bf16 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_bf16_z4: +** ret +*/ +TEST_GET (get_neonq_bf16_z4, svbfloat16_t, bfloat16x8_t, + z4_res = svget_neonq_bf16 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_bf16_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_bf16_z5, svbfloat16_t, bfloat16x8_t, + z5_res = svget_neonq_bf16 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c new file mode 100644 index 000000000000..89f474a0a4a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_f16_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_f16_z0, svfloat16_t, float16x8_t, + z0 = svget_neonq_f16 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_f16_z4: +** ret +*/ +TEST_GET (get_neonq_f16_z4, svfloat16_t, float16x8_t, + z4_res = svget_neonq_f16 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_f16_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_f16_z5, svfloat16_t, float16x8_t, + z5_res = svget_neonq_f16 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c new file mode 100644 index 000000000000..65126fe71215 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_f32_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_f32_z0, svfloat32_t, float32x4_t, + z0 = svget_neonq_f32 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_f32_z4: +** ret +*/ +TEST_GET (get_neonq_f32_z4, svfloat32_t, float32x4_t, + z4_res = svget_neonq_f32 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_f32_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_f32_z5, svfloat32_t, float32x4_t, + z5_res = svget_neonq_f32 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c new file mode 100644 index 000000000000..6b5621c1152b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_f64_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_f64_z0, svfloat64_t, float64x2_t, + z0 = svget_neonq_f64 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_f64_z4: +** ret +*/ +TEST_GET (get_neonq_f64_z4, svfloat64_t, float64x2_t, + z4_res = svget_neonq_f64 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_f64_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_f64_z5, svfloat64_t, float64x2_t, + z5_res = svget_neonq_f64 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c new file mode 100644 index 000000000000..7e848203af3e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_s16_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_s16_z0, svint16_t, int16x8_t, + z0 = svget_neonq_s16 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_s16_z4: +** ret +*/ +TEST_GET (get_neonq_s16_z4, svint16_t, int16x8_t, + z4_res = svget_neonq_s16 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_s16_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_s16_z5, svint16_t, int16x8_t, + z5_res = svget_neonq_s16 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c new file mode 100644 index 000000000000..f1e7ab36dd54 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_s32_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_s32_z0, svint32_t, int32x4_t, + z0 = svget_neonq_s32 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_s32_z4: +** ret +*/ +TEST_GET (get_neonq_s32_z4, svint32_t, int32x4_t, + z4_res = svget_neonq_s32 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_s32_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_s32_z5, svint32_t, int32x4_t, + z5_res = svget_neonq_s32 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c new file mode 100644 index 000000000000..4dfd7afe3d9f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_s64_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_s64_z0, svint64_t, int64x2_t, + z0 = svget_neonq_s64 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_s64_z4: +** ret +*/ +TEST_GET (get_neonq_s64_z4, svint64_t, int64x2_t, + z4_res = svget_neonq_s64 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_s64_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_s64_z5, svint64_t, int64x2_t, + z5_res = svget_neonq_s64 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c new file mode 100644 index 000000000000..8c2c389a7dd7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_s8_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_s8_z0, svint8_t, int8x16_t, + z0 = svget_neonq_s8 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_s8_z4: +** ret +*/ +TEST_GET (get_neonq_s8_z4, svint8_t, int8x16_t, + z4_res = svget_neonq_s8 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_s8_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_s8_z5, svint8_t, int8x16_t, + z5_res = svget_neonq_s8 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c new file mode 100644 index 000000000000..168ef36bf531 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_u16_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_u16_z0, svuint16_t, uint16x8_t, + z0 = svget_neonq_u16 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_u16_z4: +** ret +*/ +TEST_GET (get_neonq_u16_z4, svuint16_t, uint16x8_t, + z4_res = svget_neonq_u16 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_u16_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_u16_z5, svuint16_t, uint16x8_t, + z5_res = svget_neonq_u16 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c new file mode 100644 index 000000000000..acabc03ae3dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_u32_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_u32_z0, svuint32_t, uint32x4_t, + z0 = svget_neonq_u32 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_u32_z4: +** ret +*/ +TEST_GET (get_neonq_u32_z4, svuint32_t, uint32x4_t, + z4_res = svget_neonq_u32 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_u32_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_u32_z5, svuint32_t, uint32x4_t, + z5_res = svget_neonq_u32 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c new file mode 100644 index 000000000000..42b45abedc18 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_u64_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_u64_z0, svuint64_t, uint64x2_t, + z0 = svget_neonq_u64 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_u64_z4: +** ret +*/ +TEST_GET (get_neonq_u64_z4, svuint64_t, uint64x2_t, + z4_res = svget_neonq_u64 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_u64_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_u64_z5, svuint64_t, uint64x2_t, + z5_res = svget_neonq_u64 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c new file mode 100644 index 000000000000..05d42f2856c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c @@ -0,0 +1,33 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** get_neonq_u8_z0: +** mov v0.16b, v4.16b +** ret +*/ +TEST_GET (get_neonq_u8_z0, svuint8_t, uint8x16_t, + z0 = svget_neonq_u8 (z4), + z0 = svget_neonq (z4)) + +/* +** get_neonq_u8_z4: +** ret +*/ +TEST_GET (get_neonq_u8_z4, svuint8_t, uint8x16_t, + z4_res = svget_neonq_u8 (z4), + z4_res = svget_neonq (z4)) + +/* +** get_neonq_u8_z5: +** ( +** mov z5.d, z4.d +** | +** mov v5.16b, v4.16b +** ) +** ret +*/ +TEST_GET (get_neonq_u8_z5, svuint8_t, uint8x16_t, + z5_res = svget_neonq_u8 (z4), + z5_res = svget_neonq (z4)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c new file mode 100644 index 000000000000..9d0a682416ce --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_bf16_z24: +** ptrue (p[0-9]+).h, vl8 +** sel z24.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_bf16_z24, svbfloat16_t, bfloat16x8_t, + z24 = svset_neonq_bf16 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_bf16_z4: +** ptrue (p[0-9]+).h, vl8 +** sel z4.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_bf16_z4, svbfloat16_t, bfloat16x8_t, + z4_res = svset_neonq_bf16 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c new file mode 100644 index 000000000000..65d59e5638d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_f16_z24: +** ptrue (p[0-9]+).h, vl8 +** sel z24.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_f16_z24, svfloat16_t, float16x8_t, + z24 = svset_neonq_f16 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_f16_z4: +** ptrue (p[0-9]+).h, vl8 +** sel z4.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_f16_z4, svfloat16_t, float16x8_t, + z4_res = svset_neonq_f16 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c new file mode 100644 index 000000000000..5b1d26350661 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_f32_z24: +** ptrue (p[0-9]+).s, vl4 +** sel z24.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_f32_z24, svfloat32_t, float32x4_t, + z24 = svset_neonq_f32 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_f32_z4: +** ptrue (p[0-9]+).s, vl4 +** sel z4.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_f32_z4, svfloat32_t, float32x4_t, + z4_res = svset_neonq_f32 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c new file mode 100644 index 000000000000..618bb43171c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_f64_z24: +** ptrue (p[0-9]+).d, vl2 +** sel z24.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_f64_z24, svfloat64_t, float64x2_t, + z24 = svset_neonq_f64 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_f64_z4: +** ptrue (p[0-9]+).d, vl2 +** sel z4.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_f64_z4, svfloat64_t, float64x2_t, + z4_res = svset_neonq_f64 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c new file mode 100644 index 000000000000..c2a935b2ae56 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_s16_z24: +** ptrue (p[0-9]+).h, vl8 +** sel z24.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_s16_z24, svint16_t, int16x8_t, + z24 = svset_neonq_s16 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_s16_z4: +** ptrue (p[0-9]+).h, vl8 +** sel z4.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_s16_z4, svint16_t, int16x8_t, + z4_res = svset_neonq_s16 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c new file mode 100644 index 000000000000..4dc57689ddba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_s32_z24: +** ptrue (p[0-9]+).s, vl4 +** sel z24.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_s32_z24, svint32_t, int32x4_t, + z24 = svset_neonq_s32 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_s32_z4: +** ptrue (p[0-9]+).s, vl4 +** sel z4.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_s32_z4, svint32_t, int32x4_t, + z4_res = svset_neonq_s32 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c new file mode 100644 index 000000000000..3c130d1d94f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_s64_z24: +** ptrue (p[0-9]+).d, vl2 +** sel z24.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_s64_z24, svint64_t, int64x2_t, + z24 = svset_neonq_s64 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_s64_z4: +** ptrue (p[0-9]+).d, vl2 +** sel z4.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_s64_z4, svint64_t, int64x2_t, + z4_res = svset_neonq_s64 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c new file mode 100644 index 000000000000..76ad7b383623 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_s8_z24: +** ptrue (p[0-9]+).b, vl16 +** sel z24.b, \1, z0.b, z4.b +** ret +*/ +TEST_SET_NEONQ (set_neonq_s8_z24, svint8_t, int8x16_t, + z24 = svset_neonq_s8 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_s8_z4: +** ptrue (p[0-9]+).b, vl16 +** sel z4.b, \1, z0.b, z4.b +** ret +*/ +TEST_SET_NEONQ (set_neonq_s8_z4, svint8_t, int8x16_t, + z4_res = svset_neonq_s8 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c new file mode 100644 index 000000000000..530da2c04232 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_u16_z24: +** ptrue (p[0-9]+).h, vl8 +** sel z24.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_u16_z24, svuint16_t, uint16x8_t, + z24 = svset_neonq_u16 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_u16_z4: +** ptrue (p[0-9]+).h, vl8 +** sel z4.h, \1, z0.h, z4.h +** ret +*/ +TEST_SET_NEONQ (set_neonq_u16_z4, svuint16_t, uint16x8_t, + z4_res = svset_neonq_u16 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c new file mode 100644 index 000000000000..fb1a5234741b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_u32_z24: +** ptrue (p[0-9]+).s, vl4 +** sel z24.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_u32_z24, svuint32_t, uint32x4_t, + z24 = svset_neonq_u32 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_u32_z4: +** ptrue (p[0-9]+).s, vl4 +** sel z4.s, \1, z0.s, z4.s +** ret +*/ +TEST_SET_NEONQ (set_neonq_u32_z4, svuint32_t, uint32x4_t, + z4_res = svset_neonq_u32 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c new file mode 100644 index 000000000000..af2b83a922e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_u64_z24: +** ptrue (p[0-9]+).d, vl2 +** sel z24.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_u64_z24, svuint64_t, uint64x2_t, + z24 = svset_neonq_u64 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_u64_z4: +** ptrue (p[0-9]+).d, vl2 +** sel z4.d, \1, z0.d, z4.d +** ret +*/ +TEST_SET_NEONQ (set_neonq_u64_z4, svuint64_t, uint64x2_t, + z4_res = svset_neonq_u64 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c new file mode 100644 index 000000000000..29f542b957a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c @@ -0,0 +1,23 @@ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +/* +** set_neonq_u8_z24: +** ptrue (p[0-9]+).b, vl16 +** sel z24.b, \1, z0.b, z4.b +** ret +*/ +TEST_SET_NEONQ (set_neonq_u8_z24, svuint8_t, uint8x16_t, + z24 = svset_neonq_u8 (z4, z0), + z24 = svset_neonq (z4, z0)) + +/* +** set_neonq_u8_z4: +** ptrue (p[0-9]+).b, vl16 +** sel z4.b, \1, z0.b, z4.b +** ret +*/ +TEST_SET_NEONQ (set_neonq_u8_z4, svuint8_t, uint8x16_t, + z4_res = svset_neonq_u8 (z4, z0), + z4_res = svset_neonq (z4, z0)) \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h index 756fe4db3856..367024be8635 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h @@ -1,7 +1,7 @@ #ifndef TEST_SVE_ACLE_H #define TEST_SVE_ACLE_H 1 -#include +#include #if defined (TEST_OVERLOADS) #define INVOKE(CODE1, CODE2) CODE2 @@ -615,6 +615,28 @@ __asm volatile ("" :: "Upa" (p4), "Upa" (p8)); \ } +#define TEST_SET_NEONQ(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ + TTYPE z4)) \ + { \ + register TTYPE z24 __asm ("z24"); \ + register TTYPE z4_res __asm ("z4"); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z24), "w" (z4_res)); \ + } + +#define TEST_DUP_NEONQ(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ + PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1, \ + ZTYPE unused2, ZTYPE unused3, TTYPE z4)) \ + { \ + register ZTYPE z0 __asm ("z0"); \ + register ZTYPE z4_res __asm ("z4"); \ + register ZTYPE z5_res __asm ("z5"); \ + INVOKE (CODE1, CODE2); \ + __asm volatile ("" :: "w" (z0), "w" (z4_res), \ + "w" (z5_res)); \ + } + #define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4)) \ { \ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c new file mode 100644 index 000000000000..a07158f6ba8d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.2-a+sve -std=c99 -Wall -Wextra" } */ + +#include + +float64x2_t +f1 (int8x16_t s8, svint8_t sveS8, int64x2_t s64, int8x8x2_t s8x2) +{ + float64x2_t f64; + + sveS8 = svdup_neonq (s8); + sveS8 = svdup_neonq (); /* { dg-error {too few arguments to function 'svdup_neonq'} } */ + sveS8 = svdup_neonq (s8, 1); /* { dg-error {too many arguments to function 'svdup_neonq'} } */ + sveS8 = svdup_neonq (sveS8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svdup_neonq', which expects a 128 bit NEON vector type} } */ + f64 = svdup_neonq (s8); /* { dg-error {incompatible types when assigning to type 'float64x2_t' from type 'svint8_t'} } */ + sveS8 = svdup_neonq (s8x2); /* { dg-error {passing 'int8x8x2_t' to argument 1 of 'svdup_neonq', which expects a 128 bit NEON vector type} } */ + sveS8 = svdup_neonq (s64); /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint64_t'} } */ + + return f64; +} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c new file mode 100644 index 000000000000..2753e9271ff2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.2-a+sve -std=c99 -Wall -Wextra" } */ + +#include + +float64x2_t +f1 (int8x16_t s8, svint8_t sveS8, svint8x2_t sveS8x2, svint64_t sveS64) +{ + float64x2_t f64; + + s8 = svget_neonq (sveS8); + s8 = svget_neonq (); /* { dg-error {too few arguments to function 'svget_neonq'} } */ + s8 = svget_neonq (sveS8, 1); /* { dg-error {too many arguments to function 'svget_neonq'} } */ + s8 = svget_neonq (s8); /* { dg-error {passing 'int8x16_t' to argument 1 of 'svget_neonq', which expects an SVE type} } */ + f64 = svget_neonq (sveS8); /* { dg-error {incompatible types when assigning to type 'float64x2_t' from type '__Int8x16_t'} } */ + s8 = svget_neonq (sveS8x2); /* { dg-error {passing 'svint8x2_t' to argument 1 of 'svget_neonq', which expects a single SVE vector rather than a tuple} } */ + s8 = svget_neonq (sveS64); /* { dg-error {incompatible types when assigning to type 'int8x16_t' from type '__Int64x2_t} } */ + + return f64; +} \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c new file mode 100644 index 000000000000..f08172842a71 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.2-a+sve -std=c99 -Wall -Wextra" } */ + +#include + +float64x2_t +f1 (int8x16_t s8, svint8_t sveS8, svint8x2_t sveS8x2, svint64_t sveS64, + int64x2_t s64, svbfloat16_t sveBF16, bfloat16x8_t bf16, int8x8_t s8_64bit, + svbool_t svbool) +{ + float64x2_t f64; + + sveS8 = svset_neonq (sveS8, s8); + sveS64 = svset_neonq (sveS64, s64); + sveBF16 = svset_neonq (sveBF16, bf16); + sveS8 = svset_neonq (); /* { dg-error {too few arguments to function 'svset_neonq'} } */ + sveS8 = svset_neonq (sveS8, s8, 1); /* { dg-error {too many arguments to function 'svset_neonq'} } */ + sveS8 = svset_neonq (s8, s8); /* { dg-error {incompatible type for argument 1 of 'svset_neonq_s8'} } */ + f64 = svset_neonq (sveS8, s8); /* { dg-error {incompatible types when assigning to type 'float64x2_t' from type 'svint8_t'} } */ + sveS8 = svset_neonq (sveS8x2, s8); /* { dg-error {incompatible type for argument 1 of 'svset_neonq_s8'} } */ + sveS8 = svset_neonq (sveS8, sveS8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svset_neonq', which expects a 128 bit NEON vector type} } */ + sveS8 = svset_neonq (sveS8, s8_64bit); /* { dg-error {passing 'int8x8_t' to argument 2 of 'svset_neonq', which expects a 128 bit NEON vector type} } */ + sveS8 = svset_neonq (sveS64, s64); /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint64_t} } */ + sveS8 = svset_neonq (svbool, svbool); /* { dg-error {passing 'svbool_t' to argument 2 of 'svset_neonq', which expects a 128 bit NEON vector type} } */ + + return f64; +} \ No newline at end of file From 3333a064e4925afa1ad5f2f8c1350c4f57d631ce Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Wed, 13 Dec 2023 18:25:47 -0500 Subject: [PATCH 298/311] c++: fix cpp0x/constexpr-ex1.C in C++23 Since r14-6505 I see: FAIL: g++.dg/cpp0x/constexpr-ex1.C -std=c++23 at line 91 (test for errors, line 89) FAIL: g++.dg/cpp0x/constexpr-ex1.C -std=c++23 (test for excess errors) FAIL: g++.dg/cpp0x/constexpr-ex1.C -std=c++26 at line 91 (test for errors, line 89) FAIL: g++.dg/cpp0x/constexpr-ex1.C -std=c++26 (test for excess errors) and it wasn't fixed by r14-6511. So I'm fixing it with the below. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/constexpr-ex1.C: Adjust expected diagnostic line. --- gcc/testsuite/g++.dg/cpp0x/constexpr-ex1.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-ex1.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-ex1.C index 383d38a42d45..b26eb5d0c905 100644 --- a/gcc/testsuite/g++.dg/cpp0x/constexpr-ex1.C +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-ex1.C @@ -88,7 +88,7 @@ struct resource { }; constexpr resource f(resource d) { return d; } // { dg-error "non-.constexpr." "" { target { { { ! implicit_constexpr } && c++20_down } || c++11_only } } } -// { dg-error "non-.constexpr." "" { target { c++23 && { ! implicit_constexpr } } } .-2 } +// { dg-error "non-.constexpr." "" { target { c++23 && { ! implicit_constexpr } } } .-1 } constexpr resource d = f(9); // { dg-message ".constexpr." "" { target { { ! implicit_constexpr } || c++11_only } } } // 4.4 floating-point constant expressions From e8018ccff9aa9686d00a4bfe42448d517c699964 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 14 Dec 2023 00:18:00 +0000 Subject: [PATCH 299/311] Daily bump. --- ChangeLog | 8 ++ contrib/ChangeLog | 7 + gcc/ChangeLog | 305 ++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 40 ++++++ gcc/c/ChangeLog | 26 ++++ gcc/cp/ChangeLog | 121 ++++++++++++++++ gcc/fortran/ChangeLog | 25 ++++ gcc/m2/ChangeLog | 12 ++ gcc/testsuite/ChangeLog | 272 +++++++++++++++++++++++++++++++++++ libcpp/ChangeLog | 7 + libgm2/ChangeLog | 9 ++ libgomp/ChangeLog | 84 +++++++++++ libstdc++-v3/ChangeLog | 5 + 14 files changed, 922 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 236f420c5d60..f144c37a6226 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2023-12-13 Arsen Arsenović + + * Makefile.def (gettext): Disable (via missing) + {install-,}{pdf,html,info,dvi} and TAGS targets. Set no_install + to true. Add --disable-threads --disable-libasprintf. Drop the + lib_path (as there are no shared libs). + * Makefile.in: Regenerate. + 2023-12-12 Paul Iannetta * MAINTAINERS: Add myself to write after approval diff --git a/contrib/ChangeLog b/contrib/ChangeLog index 5151ade3266f..04b99f1e25d3 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,10 @@ +2023-12-13 Arsen Arsenović + + * download_prerequisites + : Parse --only-gettext. + (echo_archives): Check only_gettext and stop early if true. + (helptext): Document --only-gettext. + 2023-12-02 Richard Sandiford * config-list.mk (OPT_IN_LANGUAGES): New variable. diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bef20da0b6aa..1ebf365d7272 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,308 @@ +2023-12-13 Richard Ball + + * config.gcc: Adds new header to config. + * config/aarch64/aarch64-builtins.cc (enum aarch64_type_qualifiers): + Moved to header file. + (ENTRY): Likewise. + (enum aarch64_simd_type): Likewise. + (struct aarch64_simd_type_info): Remove static. + (GTY): Likewise. + * config/aarch64/aarch64-c.cc (aarch64_pragma_aarch64): + Defines pragma for arm_neon_sve_bridge.h. + * config/aarch64/aarch64-protos.h: + Add handle_arm_neon_sve_bridge_h + * config/aarch64/aarch64-sve-builtins-base.h: New intrinsics. + * config/aarch64/aarch64-sve-builtins-base.cc + (class svget_neonq_impl): New intrinsic implementation. + (class svset_neonq_impl): Likewise. + (class svdup_neonq_impl): Likewise. + (NEON_SVE_BRIDGE_FUNCTION): New intrinsics. + * config/aarch64/aarch64-sve-builtins-functions.h + (NEON_SVE_BRIDGE_FUNCTION): Defines macro for NEON_SVE_BRIDGE + functions. + * config/aarch64/aarch64-sve-builtins-shapes.h: New shapes. + * config/aarch64/aarch64-sve-builtins-shapes.cc + (parse_element_type): Add NEON element types. + (parse_type): Likewise. + (struct get_neonq_def): Defines function shape for get_neonq. + (struct set_neonq_def): Defines function shape for set_neonq. + (struct dup_neonq_def): Defines function shape for dup_neonq. + * config/aarch64/aarch64-sve-builtins.cc + (DEF_SVE_TYPE_SUFFIX): Changed to be called through + SVE_NEON macro. + (DEF_SVE_NEON_TYPE_SUFFIX): Defines + macro for NEON_SVE_BRIDGE type suffixes. + (DEF_NEON_SVE_FUNCTION): Defines + macro for NEON_SVE_BRIDGE functions. + (function_resolver::infer_neon128_vector_type): Infers type suffix + for overloaded functions. + (handle_arm_neon_sve_bridge_h): Handles #pragma arm_neon_sve_bridge.h. + * config/aarch64/aarch64-sve-builtins.def + (DEF_SVE_NEON_TYPE_SUFFIX): Macro for handling neon_sve type suffixes. + (bf16): Replace entry with neon-sve entry. + (f16): Likewise. + (f32): Likewise. + (f64): Likewise. + (s8): Likewise. + (s16): Likewise. + (s32): Likewise. + (s64): Likewise. + (u8): Likewise. + (u16): Likewise. + (u32): Likewise. + (u64): Likewise. + * config/aarch64/aarch64-sve-builtins.h + (GCC_AARCH64_SVE_BUILTINS_H): Include aarch64-builtins.h. + (ENTRY): Add aarch64_simd_type definiton. + (enum aarch64_simd_type): Add neon information to type_suffix_info. + (struct type_suffix_info): New function. + * config/aarch64/aarch64-sve.md + (@aarch64_sve_get_neonq_): New intrinsic insn for big endian. + (@aarch64_sve_set_neonq_): Likewise. + * config/aarch64/iterators.md: Add UNSPEC_SET_NEONQ. + * config/aarch64/aarch64-builtins.h: New file. + * config/aarch64/aarch64-neon-sve-bridge-builtins.def: New file. + * config/aarch64/arm_neon_sve_bridge.h: New file. + +2023-12-13 Patrick Palka + + * doc/invoke.texi (C++ Dialect Options): Document + -fdiagnostics-all-candidates. + +2023-12-13 Julian Brown + + * gimplify.cc (omp_map_clause_descriptor_p): New function. + (build_omp_struct_comp_nodes, omp_get_attachment, omp_group_base): Use + above function. + (omp_tsort_mapping_groups): Process nodes that have + OMP_CLAUSE_MAP_RUNTIME_IMPLICIT_P set after those that don't. Add + enter_exit_data parameter. + (omp_resolve_clause_dependencies): Remove GOMP_MAP_TO_PSET mappings if + we're mapping the whole containing derived-type variable. + (omp_accumulate_sibling_list): Adjust GOMP_MAP_TO_PSET handling. + Remove GOMP_MAP_ALWAYS_POINTER handling. + (gimplify_scan_omp_clauses): Pass enter_exit argument to + omp_tsort_mapping_groups. Don't adjust/remove GOMP_MAP_TO_PSET + mappings for derived-type components here. + * tree.h (OMP_CLAUSE_RELEASE_DESCRIPTOR): New macro. + * tree-pretty-print.cc (dump_omp_clause): Show + OMP_CLAUSE_RELEASE_DESCRIPTOR in dump output (with + GOMP_MAP_TO_PSET-like syntax). + +2023-12-13 Julian Brown + + * gimplify.cc (build_struct_comp_nodes): Don't process + GOMP_MAP_ATTACH_DETACH "middle" nodes here. + (omp_mapping_group): Add REPROCESS_STRUCT and FRAGILE booleans for + nested struct handling. + (omp_strip_components_and_deref, omp_strip_indirections): Remove + functions. + (omp_get_attachment): Handle GOMP_MAP_DETACH here. + (omp_group_last): Handle GOMP_MAP_*, GOMP_MAP_DETACH, + GOMP_MAP_ATTACH_DETACH groups for "exit data" of reference-to-pointer + component array sections. + (omp_gather_mapping_groups_1): Initialise reprocess_struct and fragile + fields. + (omp_group_base): Handle GOMP_MAP_ATTACH_DETACH after GOMP_MAP_STRUCT. + (omp_index_mapping_groups_1): Skip reprocess_struct groups. + (omp_get_nonfirstprivate_group, omp_directive_maps_explicitly, + omp_resolve_clause_dependencies, omp_first_chained_access_token): New + functions. + (omp_check_mapping_compatibility): Adjust accepted node combinations + for "from" clauses using release instead of alloc. + (omp_accumulate_sibling_list): Add GROUP_MAP, ADDR_TOKENS, FRAGILE_P, + REPROCESSING_STRUCT, ADDED_TAIL parameters. Use OMP address tokenizer + to analyze addresses. Reimplement nested struct handling, and + implement "fragile groups". + (omp_build_struct_sibling_lists): Adjust for changes to + omp_accumulate_sibling_list. Recalculate bias for ATTACH_DETACH nodes + after GOMP_MAP_STRUCT nodes. + (gimplify_scan_omp_clauses): Call omp_resolve_clause_dependencies. Use + OMP address tokenizer. + (gimplify_adjust_omp_clauses_1): Use build_fold_indirect_ref_loc + instead of build_simple_mem_ref_loc. + * omp-general.cc (omp-general.h, tree-pretty-print.h): Include. + (omp_addr_tokenizer): New namespace. + (omp_addr_tokenizer::omp_addr_token): New. + (omp_addr_tokenizer::omp_parse_component_selector, + omp_addr_tokenizer::omp_parse_ref, + omp_addr_tokenizer::omp_parse_pointer, + omp_addr_tokenizer::omp_parse_access_method, + omp_addr_tokenizer::omp_parse_access_methods, + omp_addr_tokenizer::omp_parse_structure_base, + omp_addr_tokenizer::omp_parse_structured_expr, + omp_addr_tokenizer::omp_parse_array_expr, + omp_addr_tokenizer::omp_access_chain_p, + omp_addr_tokenizer::omp_accessed_addr): New functions. + (omp_parse_expr, debug_omp_tokenized_addr): New functions. + * omp-general.h (omp_addr_tokenizer::access_method_kinds, + omp_addr_tokenizer::structure_base_kinds, + omp_addr_tokenizer::token_type, + omp_addr_tokenizer::omp_addr_token, + omp_addr_tokenizer::omp_access_chain_p, + omp_addr_tokenizer::omp_accessed_addr): New. + (omp_addr_token, omp_parse_expr): New. + * omp-low.cc (scan_sharing_clauses): Skip error check for references + to pointers. + * tree.h (OMP_CLAUSE_ATTACHMENT_MAPPING_ERASED): New macro. + +2023-12-13 Andrew Stubbs + + * config/gcn/gcn-hsa.h (NO_XNACK): Change the defaults. + * config/gcn/gcn-opts.h (enum hsaco_attr_type): Add HSACO_ATTR_DEFAULT. + * config/gcn/gcn.cc (gcn_option_override): Set the default flag_xnack. + * config/gcn/gcn.opt: Add -mxnack=default. + * doc/invoke.texi: Document the -mxnack default. + +2023-12-13 Andrew Stubbs + + * config/gcn/gcn-hsa.h (NO_XNACK): Ignore missing -march. + (XNACKOPT): Match on/off; ignore any. + * config/gcn/gcn-valu.md (gather_insn_1offset): + Add xnack compatible alternatives. + (gather_insn_2offsets): Likewise. + * config/gcn/gcn.cc (gcn_option_override): Permit -mxnack for devices + other than Fiji and gfx1030. + (gcn_expand_epilogue): Remove early-clobber problems. + (gcn_hsa_declare_function_name): Obey -mxnack setting. + * config/gcn/gcn.md (xnack): New attribute. + (enabled): Rework to include "xnack" attribute. + (*movbi): Add xnack compatible alternatives. + (*mov_insn): Likewise. + (*mov_insn): Likewise. + (*mov_insn): Likewise. + (*movti_insn): Likewise. + * config/gcn/gcn.opt (-mxnack): Change the default to "any". + * doc/invoke.texi: Remove placeholder notice for -mxnack. + +2023-12-13 Andrew Carlotti + + * config/aarch64/x-aarch64: Add missing dependencies. + +2023-12-13 Roger Sayle + Jeff Law + + * config/arc/arc.md (*extvsi_n_0): New define_insn_and_split to + implement SImode sign extract using a AND, XOR and MINUS sequence. + +2023-12-13 Feng Wang + + * common/config/riscv/riscv-common.cc: Modify implied ISA info. + * config/riscv/arch-canonicalize: Add crypto vector implied info. + +2023-12-13 Juzhe-Zhong + + PR target/112929 + PR target/112988 + * config/riscv/riscv-vsetvl.cc + (pre_vsetvl::compute_lcm_local_properties): Remove full available. + (pre_vsetvl::pre_global_vsetvl_info): Add full available optimization. + +2023-12-13 Juzhe-Zhong + + PR target/111317 + * tree-vect-loop.cc (vect_estimate_min_profitable_iters): Adjust for COST for decrement IV. + +2023-12-13 Jakub Jelinek + + PR tree-optimization/112940 + * gimple-lower-bitint.cc (struct bitint_large_huge): Add another + argument to prepare_data_in_out method defaulted to NULL_TREE. + (bitint_large_huge::handle_operand): Pass another argument to + prepare_data_in_out instead of emitting an assignment to set it. + (bitint_large_huge::prepare_data_in_out): Add VAL_OUT argument. + If non-NULL, use it as PHI argument instead of creating a new + SSA_NAME. + (bitint_large_huge::handle_cast): Pass rext as another argument + to 2 prepare_data_in_out calls instead of emitting assignments + to set them. + +2023-12-13 Jakub Jelinek + + PR middle-end/112953 + * attribs.cc (free_attr_data): Use delete x rather than delete[] x. + +2023-12-13 Jakub Jelinek + + PR target/112962 + * config/i386/i386.cc (ix86_gimple_fold_builtin): For shifts + and abs without lhs replace with nop. + +2023-12-13 Richard Biener + + * emit-rtl.cc (set_mem_attributes_minus_bitpos): Preserve + the offset when rewriting an exising MEM_REF base for + stack slot sharing. + +2023-12-13 Richard Biener + + PR tree-optimization/112991 + PR tree-optimization/112961 + * tree-ssa-sccvn.h (do_rpo_vn): Add skip_entry_phis argument. + * tree-ssa-sccvn.cc (do_rpo_vn): Likewise. + (do_rpo_vn_1): Likewise, merge with auto-processing. + (run_rpo_vn): Adjust. + (pass_fre::execute): Likewise. + * tree-if-conv.cc (tree_if_conversion): Revert last change. + Value-number latch block but disable value-numbering of + entry PHIs. + * tree-ssa-uninit.cc (execute_early_warn_uninitialized): Adjust. + +2023-12-13 Richard Biener + + PR tree-optimization/112990 + * match.pd (bit_insert @0 (BIT_FIELD_REF @1 ..) ..): + Restrict to vector modes after lowering. + +2023-12-13 Richard Biener + + PR middle-end/111591 + * cfgexpand.cc (update_alias_info_with_stack_vars): Document + why not adjusting TBAA info on accesses is OK. + +2023-12-13 Alexandre Oliva + + * doc/invoke.texi (multiflags): Drop extraneous period, use + @pxref instead. + +2023-12-13 Victor Do Nascimento + + * config/aarch64/aarch64-builtins.cc: + (AARCH64_PLD): New enum aarch64_builtins entry. + (AARCH64_PLDX): Likewise. + (AARCH64_PLI): Likewise. + (AARCH64_PLIX): Likewise. + (aarch64_init_prefetch_builtin): New. + (aarch64_general_init_builtins): Call prefetch init function. + (aarch64_expand_prefetch_builtin): New. + (aarch64_general_expand_builtin): Add prefetch expansion. + (require_const_argument): New. + * config/aarch64/aarch64.md (UNSPEC_PLDX): New. + (aarch64_pldx): Likewise. + * config/aarch64/arm_acle.h (__pld): Likewise. + (__pli): Likewise. + (__plix): Likewise. + (__pldx): Likewise. + +2023-12-13 Kewen Lin + + PR tree-optimization/112788 + * value-range.h (range_compatible_p): Workaround same type mode but + different type precision issue for rs6000 scalar float types + _Float128 and long double. + +2023-12-13 Jiufu Guo + + * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add code to use + pli for 34bit constant. + +2023-12-13 Jiufu Guo + + * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new + parameter to record number of instructions to build the constant. + (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute + num_insn. + 2023-12-12 Juzhe-Zhong * config/riscv/riscv-vector-costs.cc (costs::analyze_loop_vinfo): New function. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index ffab8353875e..e01b2976fac8 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20231213 +20231214 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index c6768968bd5b..d7667cba6bde 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,43 @@ +2023-12-13 Patrick Palka + + * c.opt: Add -fdiagnostics-all-candidates. + +2023-12-13 Jason Merrill + + * c-warn.cc (check_address_or_pointer_of_packed_member): + Rename to check_address_of_packed_member. + (check_and_warn_address_or_pointer_of_packed_member): + Rename to check_and_warn_address_of_packed_member. + (warn_for_address_or_pointer_of_packed_member): + Rename to warn_for_address_of_packed_member. + * c-common.h: Adjust. + +2023-12-13 Jason Merrill + + * c-warn.cc (check_address_or_pointer_of_packed_member): + Remove warning based on TYPE_PACKED. + +2023-12-13 Julian Brown + + * c-common.h (c_omp_region_type): Add C_ORT_EXIT_DATA, + C_ORT_OMP_EXIT_DATA and C_ORT_ACC_TARGET. + (omp_addr_token): Add forward declaration. + (c_omp_address_inspector): New class. + * c-omp.cc (c_omp_adjust_map_clauses): Mark decls addressable here, but + do not change any mapping node types. + (c_omp_address_inspector::unconverted_ref_origin, + c_omp_address_inspector::component_access_p, + c_omp_address_inspector::check_clause, + c_omp_address_inspector::get_root_term, + c_omp_address_inspector::map_supported_p, + c_omp_address_inspector::get_origin, + c_omp_address_inspector::maybe_unconvert_ref, + c_omp_address_inspector::maybe_zero_length_array_section, + c_omp_address_inspector::expand_array_base, + c_omp_address_inspector::expand_component_selector, + c_omp_address_inspector::expand_map_clause): New methods. + (omp_expand_access_chain): New function. + 2023-12-12 Richard Biener PR ipa/92606 diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index a96e65b6abb6..4cf122bc6467 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,29 @@ +2023-12-13 Jason Merrill + + * c-typeck.cc (convert_for_assignment): Adjust call to + warn_for_address_of_packed_member. + +2023-12-13 Julian Brown + + * c-parser.cc (c_parser_oacc_all_clauses): Add TARGET_P parameter. Use + to select region type for c_finish_omp_clauses call. + (c_parser_oacc_loop): Update calls to c_parser_oacc_all_clauses. + (c_parser_oacc_compute): Likewise. + (c_parser_omp_target_data, c_parser_omp_target_enter_data): Support + ATTACH kind. + (c_parser_omp_target_exit_data): Support DETACH kind. + (check_clauses): Handle GOMP_MAP_POINTER and GOMP_MAP_ATTACH here. + * c-typeck.cc (handle_omp_array_sections_1, + handle_omp_array_sections, c_finish_omp_clauses): Use + c_omp_address_inspector class and OMP address tokenizer to analyze and + expand map clause expressions. Fix some diagnostics. Fix "is OpenACC" + condition for C_ORT_ACC_TARGET addition. + +2023-12-13 Julian Brown + + * c-typeck.cc (c_finish_omp_clauses): Add braces and reindent + OMP_CLAUSE_TO/OMP_CLAUSE_FROM/OMP_CLAUSE__CACHE_ stanza. + 2023-12-11 Martin Uecker PR c/112488 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 5aec6493de4f..88d6047651b4 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,124 @@ +2023-12-13 Patrick Palka + + * call.cc (print_z_candidates): Only print ignored candidates + when -fdiagnostics-all-candidates is set, otherwise suggest + the flag. + (build_over_call): When diagnosing deletedness, note + other candidates only if -fdiagnostics-all-candidates is + set, otherwise suggest the flag. + +2023-12-13 Patrick Palka + + * call.cc (enum rejection_reason_code): Add rr_ignored. + (add_ignored_candidate): Define. + (ignored_candidate_p): Define. + (add_template_candidate_real): Do add_ignored_candidate + instead of returning NULL. + (splice_viable): Put ignored (non-viable) candidates last. + (print_z_candidate): Handle ignored candidates. + (build_new_function_call): Refine shortcut that calls + cp_build_function_call_vec now that non-templates can + appear in the candidate list for a template-id call. + (add_candidates): Replace 'bad_fns' overload with 'bad_cands' + candidate list. When not considering a candidate, add it + to the list as an ignored candidate. Add all 'bad_cands' + to the overload set as well. + +2023-12-13 Patrick Palka + + * call.cc: Include "tristate.h". + (splice_viable): Sort the candidate list according to viability. + Don't remove non-viable candidates from the list. + (print_z_candidates): Add defaulted only_viable_p parameter. + By default only print non-viable candidates if there is no + viable candidate. + (tourney): Ignore non-viable candidates. Move the true champ to + the front of the candidates list, and update 'candidates' to + point to the front. Rename champ_compared_to_predecessor to + previous_worse_champ. + +2023-12-13 Patrick Palka + + PR c++/99186 + PR c++/104867 + * pt.cc (unify) : Compare types as well. + +2023-12-13 Patrick Palka + + PR c++/93740 + * pt.cc (unify) : Handle it like FIELD_DECL + and TEMPLATE_DECL. + +2023-12-13 Jason Merrill + + * call.cc (convert_for_arg_passing) + * typeck.cc (convert_for_assignment): Adjust call to + warn_for_address_of_packed_member. + +2023-12-13 Julian Brown + + * parser.cc (cp_parser_oacc_all_clauses): Add TARGET_P parameter. Use + to select region type for finish_omp_clauses call. + (cp_parser_omp_target_data, cp_parser_omp_target_enter_data): Support + GOMP_MAP_ATTACH kind. + (cp_parser_omp_target_exit_data): Support GOMP_MAP_DETACH kind. + (cp_parser_oacc_declare): Update call to cp_parser_oacc_all_clauses. + (cp_parser_oacc_loop): Update calls to cp_parser_oacc_all_clauses. + (cp_parser_oacc_compute): Likewise. + * pt.cc (tsubst_expr): Use C_ORT_ACC_TARGET for call to + tsubst_omp_clauses for OpenACC compute regions. + * semantics.cc (cp_omp_address_inspector): New class, derived from + c_omp_address_inspector. + (handle_omp_array_sections_1, handle_omp_array_sections, + finish_omp_clauses): Use cp_omp_address_inspector class and OMP address + tokenizer to analyze and expand OpenMP map clause expressions. Fix + some diagnostics. Support C_ORT_ACC_TARGET. + (finish_omp_target): Handle GOMP_MAP_POINTER. + +2023-12-13 Julian Brown + + * semantics.cc (finish_omp_clause): Add braces and reindent + OMP_CLAUSE_TO/OMP_CLAUSE_FROM/OMP_CLAUSE__CACHE_ stanza. + +2023-12-13 Jason Merrill + + PR c++/96997 + * tree.cc (bot_manip): Check data.clear_location for TARGET_EXPR. + +2023-12-13 Nathaniel Shead + + PR c++/71093 + * constexpr.cc (constexpr_global_ctx::get_value_ptr): Don't + return NULL_TREE for objects we're initializing. + (constexpr_global_ctx::destroy_value): Rename from remove_value. + Only mark real variables as outside lifetime. + (constexpr_global_ctx::clear_value): New function. + (destroy_value_checked): New function. + (cxx_eval_call_expression): Defer complaining about non-constant + arg0 for operator delete. Use remove_value_safe. + (cxx_fold_indirect_ref_1): Handle conversion to 'as base' type. + (outside_lifetime_error): Include name of object we're + accessing. + (cxx_eval_store_expression): Handle clobbers. Improve error + messages. + (cxx_eval_constant_expression): Use remove_value_safe. Clear + bind variables before entering body. + +2023-12-13 Jason Merrill + + * constexpr.cc (cxx_eval_call_expression): Handle missing in-charge + argument. + +2023-12-13 Jason Merrill + + PR c++/108243 + * constexpr.cc (cxx_eval_outermost_constant_expr): Turn + a constructor CALL_EXPR into a TARGET_EXPR. + +2023-12-13 Jason Merrill + + * tree.cc (build_aggr_init_expr): Copy EXPR_LOCATION. + 2023-12-12 Jason Merrill Jason Xu diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 0a1eae419bd6..366463ea7fda 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,28 @@ +2023-12-13 Julian Brown + + * dependency.cc (gfc_omp_expr_prefix_same): New function. + * dependency.h (gfc_omp_expr_prefix_same): Add prototype. + * gfortran.h (gfc_omp_namelist): Add "duplicate_of" field to "u2" + union. + * trans-openmp.cc (dependency.h): Include. + (gfc_trans_omp_array_section): Adjust mapping node arrangement for + array descriptors. Use GOMP_MAP_TO_PSET or + GOMP_MAP_RELEASE/GOMP_MAP_DELETE with the OMP_CLAUSE_RELEASE_DESCRIPTOR + flag set. + (gfc_symbol_rooted_namelist): New function. + (gfc_trans_omp_clauses): Check subcomponent and subarray/element + accesses elsewhere in the clause list for pointers to derived types or + array descriptors, and adjust or drop mapping nodes appropriately. + Adjust for changes to mapping node arrangement. + (gfc_trans_oacc_executable_directive): Pass code op through. + +2023-12-13 Julian Brown + + * trans-openmp.cc (gfc_trans_omp_array_section): Add OPENMP parameter. + Use GOMP_MAP_ATTACH_DETACH instead of GOMP_MAP_ALWAYS_POINTER for + derived type components. + (gfc_trans_omp_clauses): Update calls to gfc_trans_omp_array_section. + 2023-12-11 Thomas Schwinge * trans-openmp.cc (gfc_omp_call_is_alloc): Resolve ICE. diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index 2e479e4394a5..ead3c774905e 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,15 @@ +2023-12-13 Gaius Mulley + + PR modula2/112921 + * gm2-libs-iso/ConvStringShort.def: New file. + * gm2-libs-iso/ConvStringShort.mod: New file. + * gm2-libs-iso/ShortConv.def: New file. + * gm2-libs-iso/ShortConv.mod: New file. + * gm2-libs-iso/ShortMath.def: New file. + * gm2-libs-iso/ShortMath.mod: New file. + * gm2-libs-iso/ShortStr.def: New file. + * gm2-libs-iso/ShortStr.mod: New file. + 2023-12-12 Gaius Mulley PR modula2/112984 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 551015f80d35..a3fe36dd1726 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,275 @@ +2023-12-13 Marek Polacek + + * g++.dg/cpp0x/constexpr-ex1.C: Adjust expected diagnostic line. + +2023-12-13 Richard Ball + + * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Add include + arm_neon_sve_bridge header file + * gcc.dg/torture/neon-sve-bridge.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_bf16.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_f16.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_f32.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_f64.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_s16.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_s32.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_s64.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_s8.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_u16.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_u32.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_u64.c: New test. + * gcc.target/aarch64/sve/acle/asm/dup_neonq_u8.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_bf16.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_f16.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_f32.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_f64.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_s16.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_s32.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_s64.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_s8.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_u16.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_u32.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_u64.c: New test. + * gcc.target/aarch64/sve/acle/asm/get_neonq_u8.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_bf16.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_f16.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_f32.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_f64.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_s16.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_s32.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_s64.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_s8.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_u16.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_u32.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_u64.c: New test. + * gcc.target/aarch64/sve/acle/asm/set_neonq_u8.c: New test. + * gcc.target/aarch64/sve/acle/general-c/dup_neonq_1.c: New test. + * gcc.target/aarch64/sve/acle/general-c/get_neonq_1.c: New test. + * gcc.target/aarch64/sve/acle/general-c/set_neonq_1.c: New test. + +2023-12-13 Patrick Palka + + * g++.dg/overload/error6.C: Pass -fdiagnostics-all-candidates. + * g++.dg/cpp0x/deleted16.C: New test. + * g++.dg/cpp0x/deleted16a.C: New test. + * g++.dg/overload/error6a.C: New test. + +2023-12-13 Patrick Palka + + * g++.dg/diagnostic/param-type-mismatch-2.C: Rename template + function test_7 that (maybe accidentally) shares the same name + as its non-template callee. + * g++.dg/overload/error6.C: New test. + +2023-12-13 Patrick Palka + + * g++.dg/overload/error5.C: New test. + +2023-12-13 Patrick Palka + + PR c++/99186 + PR c++/104867 + * g++.dg/cpp1z/nontype-auto23.C: New test. + * g++.dg/cpp1z/nontype-auto24.C: New test. + +2023-12-13 Patrick Palka + + PR c++/93740 + * g++.dg/template/ptrmem34.C: New test. + +2023-12-13 Jason Merrill + + * c-c++-common/Waddress-of-packed-member-1.c: Don't expect + a warning on the cast cases. + * c-c++-common/pr51628-35.c: Use -Wcast-align=strict. + * g++.dg/warn/Waddress-of-packed-member3.C: Likewise. + * gcc.dg/pr88928.c: Likewise. + * gcc.dg/pr51628-20.c: Removed. + * gcc.dg/pr51628-21.c: Removed. + * gcc.dg/pr51628-25.c: Removed. + +2023-12-13 Julian Brown + + * gfortran.dg/goacc/enter-exit-data-2.f90: New test. + * gfortran.dg/goacc/finalize-1.f: Adjust scan output. + * gfortran.dg/gomp/map-9.f90: Adjust scan output. + * gfortran.dg/gomp/map-subarray-2.f90: New test. + * gfortran.dg/gomp/map-subarray.f90: New test. + * gfortran.dg/gomp/target-enter-exit-data.f90: New test. + +2023-12-13 Julian Brown + + * c-c++-common/gomp/clauses-2.c: Fix error output. + * c-c++-common/gomp/target-implicit-map-2.c: Adjust scan output. + * c-c++-common/gomp/target-50.c: Adjust scan output. + * c-c++-common/gomp/target-enter-data-1.c: Adjust scan output. + * g++.dg/gomp/static-component-1.C: New test. + * gcc.dg/gomp/target-3.c: Adjust scan output. + * gfortran.dg/gomp/map-9.f90: Adjust scan output. + +2023-12-13 Jason Merrill + + PR c++/96997 + * g++.dg/debug/cleanup2.C: New test. + +2023-12-13 Jason Merrill + + Revert: + 2023-12-13 Jason Merrill + + * g++.dg/pr112822.C: Require C++17. + +2023-12-13 Gaius Mulley + + PR modula2/112921 + * gm2/iso/run/pass/shorttest.mod: New test. + +2023-12-13 Nathaniel Shead + + PR c++/71093 + * g++.dg/cpp1y/constexpr-lifetime1.C: Improve error message. + * g++.dg/cpp1y/constexpr-lifetime2.C: Likewise. + * g++.dg/cpp1y/constexpr-lifetime3.C: Likewise. + * g++.dg/cpp1y/constexpr-lifetime4.C: Likewise. + * g++.dg/cpp2a/bitfield2.C: Likewise. + * g++.dg/cpp2a/constexpr-new3.C: Likewise. New check. + * g++.dg/cpp1y/constexpr-lifetime7.C: New test. + * g++.dg/cpp2a/constexpr-lifetime1.C: New test. + * g++.dg/cpp2a/constexpr-lifetime2.C: New test. + +2023-12-13 Jason Merrill + + PR c++/108243 + * g++.dg/cpp2a/consteval-prop6.C: Adjust diagnostic. + * g++.dg/opt/is_constant_evaluated3.C: Remove xfails. + +2023-12-13 Jason Merrill + + * g++.dg/cpp1y/constexpr-nsdmi7b.C: Adjust line. + * g++.dg/template/copy1.C: Likewise. + +2023-12-13 Jason Merrill + + * g++.dg/pr112822.C: Require C++17. + +2023-12-13 Andrew Carlotti + + * gcc.target/aarch64/cpunative/native_cpu_18.c: Add \+nopauth\n + * gcc.target/aarch64/options_set_7.c: Add \+crc\n + * gcc.target/aarch64/options_set_8.c: Add \+crc\+nodotprod\n + * gcc.target/aarch64/cpunative/native_cpu_0.c: Add \n + * gcc.target/aarch64/cpunative/native_cpu_1.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_2.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_3.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_4.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_5.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_6.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_7.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_8.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_9.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_10.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_11.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_12.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_13.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_14.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_15.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_16.c: Ditto. + * gcc.target/aarch64/cpunative/native_cpu_17.c: Ditto. + * gcc.target/aarch64/options_set_1.c: Ditto. + * gcc.target/aarch64/options_set_2.c: Ditto. + * gcc.target/aarch64/options_set_3.c: Ditto. + * gcc.target/aarch64/options_set_5.c: Ditto. + * gcc.target/aarch64/options_set_6.c: Ditto. + * gcc.target/aarch64/options_set_9.c: Ditto. + * gcc.target/aarch64/options_set_11.c: Ditto. + * gcc.target/aarch64/options_set_12.c: Ditto. + * gcc.target/aarch64/options_set_13.c: Ditto. + * gcc.target/aarch64/options_set_14.c: Ditto. + * gcc.target/aarch64/options_set_15.c: Ditto. + * gcc.target/aarch64/options_set_16.c: Ditto. + * gcc.target/aarch64/options_set_17.c: Ditto. + * gcc.target/aarch64/options_set_18.c: Ditto. + * gcc.target/aarch64/options_set_19.c: Ditto. + * gcc.target/aarch64/options_set_20.c: Ditto. + * gcc.target/aarch64/options_set_21.c: Ditto. + * gcc.target/aarch64/options_set_22.c: Ditto. + * gcc.target/aarch64/options_set_23.c: Ditto. + * gcc.target/aarch64/options_set_24.c: Ditto. + * gcc.target/aarch64/options_set_25.c: Ditto. + * gcc.target/aarch64/options_set_26.c: Ditto. + +2023-12-13 Peter Bergner + + PR tree-optimization/112822 + * g++.dg/pr112822.C: Add dg-do compile target c++17 directive. + +2023-12-13 Pan Li + + PR target/112929 + PR target/112988 + * gcc.target/riscv/rvv/vsetvl/pr112929.c: Moved to... + * gcc.target/riscv/rvv/vsetvl/pr112929-1.c: ...here. + * gcc.target/riscv/rvv/vsetvl/pr112988.c: Moved to... + * gcc.target/riscv/rvv/vsetvl/pr112988-1.c: ...here. + * gcc.target/riscv/rvv/vsetvl/pr112929-2.c: New test. + * gcc.target/riscv/rvv/vsetvl/pr112988-2.c: New test. + +2023-12-13 Andrew Carlotti + + * g++.target/aarch64/sve/aarch64-ssve.exp: + +2023-12-13 Roger Sayle + Jeff Law + + * gcc.target/arc/extvsi-1.c: New test case. + * gcc.target/arc/extvsi-2.c: Likewise. + +2023-12-13 Juzhe-Zhong + + PR target/112929 + PR target/112988 + * gcc.target/riscv/rvv/vsetvl/pr112929.c: New test. + * gcc.target/riscv/rvv/vsetvl/pr112988.c: New test. + +2023-12-13 demin.han + + * gcc.dg/vect/costmodel/riscv/rvv/riscv_vector.h: New file. + +2023-12-13 Juzhe-Zhong + + PR target/111317 + * gcc.dg/vect/costmodel/riscv/rvv/pr111317.c: New test. + +2023-12-13 Jakub Jelinek + + PR tree-optimization/112940 + * gcc.dg/bitint-53.c: New test. + +2023-12-13 Jakub Jelinek + + PR target/112962 + * gcc.target/i386/pr112962.c: New test. + +2023-12-13 Richard Biener + + PR tree-optimization/112991 + PR tree-optimization/112961 + * gcc.dg/torture/pr112991.c: New testcase. + +2023-12-13 Victor Do Nascimento + + * gcc.target/aarch64/builtin_pld_pli.c: New. + * gcc.target/aarch64/builtin_pld_pli_illegal.c: New. + +2023-12-13 Haochen Jiang + + * gcc.target/i386/pr110790-2.c: Change scan-assembler from shrq + to shr\[qx\]. + +2023-12-13 Jiufu Guo + + * gcc.target/powerpc/const-build-1.c: New test. + 2023-12-12 Jason Merrill Jason Xu diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index cad3da8064b0..09b5b9c0c038 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,10 @@ +2023-12-13 Jakub Jelinek + + PR preprocessor/112956 + * lex.cc (_cpp_lex_direct): Initialize c to 0. + For CPP_PRAGMA_EOL tokens and if c == 0 also for CPP_EOF + set result->src_loc to highest locus. + 2023-11-28 Lewis Hyatt PR preprocessor/112701 diff --git a/libgm2/ChangeLog b/libgm2/ChangeLog index 42cfcf950442..b242dae5435b 100644 --- a/libgm2/ChangeLog +++ b/libgm2/ChangeLog @@ -1,3 +1,12 @@ +2023-12-13 Gaius Mulley + + PR modula2/112921 + * libm2iso/Makefile.am (M2DEFS): Add ConvStringShort.def, + ShortConv.def, ShortMath.def and ShortStr.def. + (M2MODS): Add ConvStringShort.mod, + ShortConv.mod, ShortMath.mod and ShortStr.mod. + * libm2iso/Makefile.in: Regenerate. + 2023-12-12 Gaius Mulley * libm2iso/Makefile.am (libm2iso_la_M2FLAGS): Added line breaks. diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index b22bd98498fe..f4919e50ebae 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,87 @@ +2023-12-13 Julian Brown + + * testsuite/libgomp.fortran/map-subarray.f90: New test. + * testsuite/libgomp.fortran/map-subarray-2.f90: New test. + * testsuite/libgomp.fortran/map-subarray-3.f90: New test. + * testsuite/libgomp.fortran/map-subarray-4.f90: New test. + * testsuite/libgomp.fortran/map-subarray-6.f90: New test. + * testsuite/libgomp.fortran/map-subarray-7.f90: New test. + * testsuite/libgomp.fortran/map-subarray-8.f90: New test. + * testsuite/libgomp.fortran/map-subcomponents.f90: New test. + * testsuite/libgomp.fortran/struct-elem-map-1.f90: Adjust for + descriptor-mapping changes. Remove XFAIL. + +2023-12-13 Julian Brown + + * target.c (gomp_map_pointer): Modify zero-length array section + pointer handling. + (gomp_attach_pointer): Likewise. + (gomp_map_fields_existing): Use gomp_map_0len_lookup. + (gomp_attach_pointer): Allow attaching null pointers (or Fortran + "unassociated" pointers). + (gomp_map_vars_internal): Handle zero-sized struct members. Add + diagnostic for unmapped struct pointer members. + * testsuite/libgomp.c-c++-common/baseptrs-1.c: New test. + * testsuite/libgomp.c-c++-common/baseptrs-2.c: New test. + * testsuite/libgomp.c-c++-common/baseptrs-6.c: New test. + * testsuite/libgomp.c-c++-common/baseptrs-7.c: New test. + * testsuite/libgomp.c-c++-common/ptr-attach-2.c: New test. + * testsuite/libgomp.c-c++-common/target-implicit-map-2.c: Fix missing + "free". + * testsuite/libgomp.c-c++-common/target-implicit-map-5.c: New test. + * testsuite/libgomp.c-c++-common/target-map-zlas-1.c: New test. + * testsuite/libgomp.c++/class-array-1.C: New test. + * testsuite/libgomp.c++/baseptrs-3.C: New test. + * testsuite/libgomp.c++/baseptrs-4.C: New test. + * testsuite/libgomp.c++/baseptrs-5.C: New test. + * testsuite/libgomp.c++/baseptrs-8.C: New test. + * testsuite/libgomp.c++/baseptrs-9.C: New test. + * testsuite/libgomp.c++/ref-mapping-1.C: New test. + * testsuite/libgomp.c++/target-48.C: New test. + * testsuite/libgomp.c++/target-49.C: New test. + * testsuite/libgomp.c++/target-exit-data-reftoptr-1.C: New test. + * testsuite/libgomp.c++/target-lambda-1.C: Update for OpenMP 5.2 + semantics. + * testsuite/libgomp.c++/target-this-3.C: Likewise. + * testsuite/libgomp.c++/target-this-4.C: Likewise. + * testsuite/libgomp.fortran/struct-elem-map-1.f90: Add temporary XFAIL. + * testsuite/libgomp.fortran/target-enter-data-6.f90: Likewise. + +2023-12-13 Thomas Schwinge + + * config/linux/allocator.c (linux_memspace_alloc): Fix 'size_t' + vs. '%ld' format string mismatch. + +2023-12-13 Andrew Stubbs + Thomas Schwinge + + * allocator.c (MEMSPACE_ALLOC): Add PIN. + (MEMSPACE_CALLOC): Add PIN. + (MEMSPACE_REALLOC): Add PIN. + (MEMSPACE_FREE): Add PIN. + (MEMSPACE_VALIDATE): Add PIN. + (omp_init_allocator): Use MEMSPACE_VALIDATE to check pinning. + (omp_aligned_alloc): Add pinning to all MEMSPACE_* calls. + (omp_aligned_calloc): Likewise. + (omp_realloc): Likewise. + (omp_free): Likewise. + * config/linux/allocator.c: New file. + * config/nvptx/allocator.c (MEMSPACE_ALLOC): Add PIN. + (MEMSPACE_CALLOC): Add PIN. + (MEMSPACE_REALLOC): Add PIN. + (MEMSPACE_FREE): Add PIN. + (MEMSPACE_VALIDATE): Add PIN. + * config/gcn/allocator.c (MEMSPACE_ALLOC): Add PIN. + (MEMSPACE_CALLOC): Add PIN. + (MEMSPACE_REALLOC): Add PIN. + (MEMSPACE_FREE): Add PIN. + * libgomp.texi: Switch pinned trait to supported. + (MEMSPACE_VALIDATE): Add PIN. + * testsuite/libgomp.c/alloc-pinned-1.c: New test. + * testsuite/libgomp.c/alloc-pinned-2.c: New test. + * testsuite/libgomp.c/alloc-pinned-3.c: New test. + * testsuite/libgomp.c/alloc-pinned-4.c: New test. + 2023-12-11 Lipeng Zhu * testsuite/libgomp.fortran/rwlock_1.f90: New file. diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index cdbd3b90d901..53f247e7c6bd 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,8 @@ +2023-12-13 Jonathan Wakely + + * include/bits/chrono_io.h (__formatter_chrono::_M_C_y_Y): Do + not round century down for %Y formats. + 2023-12-12 Jonathan Wakely * include/std/format (__do_vformat_to): Handle char separately From acfd33620af3519b84baecedb0eb6618c2f599a6 Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Wed, 13 Dec 2023 17:24:39 -0700 Subject: [PATCH 300/311] [committed] Minor testsuite fallout from c99 changes The alpha port failed its weekly test due to a lack of a prototype for the syscall() routine. Fixed thusly and pushed to the trunk. gcc/testsuite * gcc.c-torture/execute/20001229-1.c: Prototype syscall(). --- gcc/testsuite/gcc.c-torture/execute/20001229-1.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/testsuite/gcc.c-torture/execute/20001229-1.c b/gcc/testsuite/gcc.c-torture/execute/20001229-1.c index 3bf333dd5a8e..a235fdad1385 100644 --- a/gcc/testsuite/gcc.c-torture/execute/20001229-1.c +++ b/gcc/testsuite/gcc.c-torture/execute/20001229-1.c @@ -14,6 +14,8 @@ void exit (int); #include #include +int syscall (int, ...); + static inline int setsysinfo(unsigned long op, void *buffer, unsigned long size, int *start, void *arg, unsigned long flag) From 5e0f67b84a615ba186ab234a9bc43df0df5a50b6 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Thu, 14 Dec 2023 11:23:43 +0800 Subject: [PATCH 301/311] RISC-V: Add RVV builtin vectorization cost model This patch fixes PR11153: ble a1,zero,.L8 addiw a5,a1,-1 li a4,4 addi sp,sp,-16 mv a2,a0 sext.w a3,a1 bleu a5,a4,.L9 srliw a4,a3,2 slli a4,a4,4 mv a5,a0 add a4,a4,a0 vsetivli zero,4,e32,m1,ta,ma vmv.v.i v1,0 vse32.v v1,0(sp) .L4: vle32.v v1,0(a5) ---> This loop always processes 4 elements which is ok for VLEN = 128bits, but waste a huge amount of computation units when VLEN > 128bits vle32.v v2,0(sp) addi a5,a5,16 vadd.vv v1,v2,v1 vse32.v v1,0(sp) bne a4,a5,.L4 ld a5,0(sp) lw a4,0(sp) andi a1,a1,-4 srai a5,a5,32 addw a5,a4,a5 lw a4,8(sp) addw a5,a5,a4 ld a4,8(sp) srai a4,a4,32 addw a0,a5,a4 beq a3,a1,.L15 .L3: subw a3,a3,a1 slli a5,a1,32 slli a3,a3,32 srli a3,a3,32 srli a5,a5,30 add a2,a2,a5 vsetvli a5,a3,e8,mf4,tu,mu vsetvli a4,zero,e32,m1,ta,ma sub a1,a3,a5 vmv.v.i v1,0 vsetvli zero,a3,e32,m1,tu,ma vle32.v v2,0(a2) vmv.v.v v1,v2 bne a3,a5,.L21 .L7: vsetvli a4,zero,e32,m1,ta,ma vmv.s.x v2,zero vredsum.vs v1,v1,v2 vmv.x.s a5,v1 addw a0,a0,a5 .L15: addi sp,sp,16 jr ra .L21: slli a5,a5,2 add a2,a2,a5 vsetvli zero,a1,e32,m1,tu,ma vle32.v v2,0(a2) vadd.vv v1,v1,v2 j .L7 .L8: li a0,0 ret .L9: li a1,0 li a0,0 j .L3 The rootcause of this is we missed RVV builtin vectorization cost model. After this patch: ble a1,zero,.L4 vsetvli a5,zero,e32,m1,ta,ma vmv.v.i v1,0 .L3: vsetvli a5,a1,e32,m1,tu,ma vle32.v v2,0(a0) slli a4,a5,2 sub a1,a1,a5 add a0,a0,a4 vadd.vv v1,v2,v1 bne a1,zero,.L3 li a5,0 vsetivli zero,1,e32,m1,ta,ma vmv.s.x v2,a5 vsetvli a5,zero,e32,m1,ta,ma vredsum.vs v1,v1,v2 vmv.x.s a0,v1 ret .L4: li a0,0 ret PR target/111153 gcc/ChangeLog: * config/riscv/riscv-protos.h (struct common_vector_cost): New struct. (struct scalable_vector_cost): Ditto. (struct cpu_vector_cost): Ditto. * config/riscv/riscv-vector-costs.cc (costs::add_stmt_cost): Add RVV builtin vectorization cost * config/riscv/riscv.cc (struct riscv_tune_param): Ditto. (get_common_costs): New function. (riscv_builtin_vectorization_cost): Ditto. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New targethook. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: New test. --- gcc/config/riscv/riscv-protos.h | 76 ++++++++++ gcc/config/riscv/riscv-vector-costs.cc | 5 +- gcc/config/riscv/riscv.cc | 143 ++++++++++++++++++ .../vect/costmodel/riscv/rvv/pr111153.c | 18 +++ 4 files changed, 239 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 85ab1db2088e..7de0b0310010 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -200,6 +200,82 @@ struct riscv_cpu_info { extern const riscv_cpu_info *riscv_find_cpu (const char *); +/* Common vector costs in any kind of vectorization (e.g VLA and VLS). */ +struct common_vector_cost +{ + /* Cost of any integer vector operation, excluding the ones handled + specially below. */ + const int int_stmt_cost; + + /* Cost of any fp vector operation, excluding the ones handled + specially below. */ + const int fp_stmt_cost; + + /* Gather/scatter vectorization cost. */ + const int gather_load_cost; + const int scatter_store_cost; + + /* Cost of a vector-to-scalar operation. */ + const int vec_to_scalar_cost; + + /* Cost of a scalar-to-vector operation. */ + const int scalar_to_vec_cost; + + /* Cost of a permute operation. */ + const int permute_cost; + + /* Cost of an aligned vector load. */ + const int align_load_cost; + + /* Cost of an aligned vector store. */ + const int align_store_cost; + + /* Cost of an unaligned vector load. */ + const int unalign_load_cost; + + /* Cost of an unaligned vector store. */ + const int unalign_store_cost; +}; + +/* scalable vectorization (VLA) specific cost. */ +struct scalable_vector_cost : common_vector_cost +{ + CONSTEXPR scalable_vector_cost (const common_vector_cost &base) + : common_vector_cost (base) + {} + + /* TODO: We will need more other kinds of vector cost for VLA. + E.g. fold_left reduction cost, lanes load/store cost, ..., etc. */ +}; + +/* Cost for vector insn classes. */ +struct cpu_vector_cost +{ + /* Cost of any integer scalar operation, excluding load and store. */ + const int scalar_int_stmt_cost; + + /* Cost of any fp scalar operation, excluding load and store. */ + const int scalar_fp_stmt_cost; + + /* Cost of a scalar load. */ + const int scalar_load_cost; + + /* Cost of a scalar store. */ + const int scalar_store_cost; + + /* Cost of a taken branch. */ + const int cond_taken_branch_cost; + + /* Cost of a not-taken branch. */ + const int cond_not_taken_branch_cost; + + /* Cost of an VLS modes operations. */ + const common_vector_cost *vls; + + /* Cost of an VLA modes operations. */ + const scalable_vector_cost *vla; +}; + /* Routines implemented in riscv-selftests.cc. */ #if CHECKING_P namespace selftest { diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 7888cef58fe9..e7bc9ed52331 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -750,9 +750,8 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree, tree vectype, int misalign, vect_cost_model_location where) { - /* TODO: Use default STMT cost model. - We will support more accurate STMT cost model later. */ - int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign); + int stmt_cost + = targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign); /* Do one-time initialization based on the vinfo. */ loop_vec_info loop_vinfo = dyn_cast (m_vinfo); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 69a8a503f303..2dc442443093 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -281,6 +281,7 @@ struct riscv_tune_param bool slow_unaligned_access; bool use_divmod_expansion; unsigned int fusible_ops; + const struct cpu_vector_cost *vec_costs; }; @@ -348,6 +349,50 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = { VD_REGS, VD_REGS, VD_REGS, VD_REGS, }; +/* Generic costs for VLS vector operations. */ +static const common_vector_cost generic_vls_vector_cost = { + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 1, /* gather_load_cost */ + 1, /* scatter_store_cost */ + 1, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* permute_cost */ + 3, /* align_load_cost */ + 3, /* align_store_cost */ + 3, /* unalign_load_cost */ + 3, /* unalign_store_cost */ +}; + +/* Generic costs for VLA vector operations. */ +static const scalable_vector_cost generic_vla_vector_cost = { + { + 1, /* int_stmt_cost */ + 1, /* fp_stmt_cost */ + 1, /* gather_load_cost */ + 1, /* scatter_store_cost */ + 1, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* permute_cost */ + 3, /* align_load_cost */ + 3, /* align_store_cost */ + 3, /* unalign_load_cost */ + 3, /* unalign_store_cost */ + }, +}; + +/* Generic costs for vector insn classes. */ +static const struct cpu_vector_cost generic_vector_cost = { + 1, /* scalar_int_stmt_cost */ + 1, /* scalar_fp_stmt_cost */ + 1, /* scalar_load_cost */ + 1, /* scalar_store_cost */ + 3, /* cond_taken_branch_cost */ + 1, /* cond_not_taken_branch_cost */ + &generic_vls_vector_cost, /* vls */ + &generic_vla_vector_cost, /* vla */ +}; + /* Costs to use when optimizing for rocket. */ static const struct riscv_tune_param rocket_tune_info = { {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ @@ -362,6 +407,7 @@ static const struct riscv_tune_param rocket_tune_info = { true, /* slow_unaligned_access */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ }; /* Costs to use when optimizing for Sifive 7 Series. */ @@ -378,6 +424,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { true, /* slow_unaligned_access */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ }; /* Costs to use when optimizing for T-HEAD c906. */ @@ -394,6 +441,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { false, /* slow_unaligned_access */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ }; /* Costs to use when optimizing for a generic ooo profile. */ @@ -410,6 +458,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { false, /* slow_unaligned_access */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ + &generic_vector_cost, /* vector cost */ }; /* Costs to use when optimizing for size. */ @@ -426,6 +475,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { false, /* slow_unaligned_access */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ }; static bool riscv_avoid_shrink_wrapping_separate (); @@ -10192,6 +10242,95 @@ riscv_frame_pointer_required (void) return riscv_save_frame_pointer && !crtl->is_leaf; } +/* Return the appropriate common costs for vectors of type VECTYPE. */ +static const common_vector_cost * +get_common_costs (tree vectype) +{ + const cpu_vector_cost *costs = tune_param->vec_costs; + gcc_assert (costs); + + if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype))) + return costs->vls; + return costs->vla; +} + +/* Implement targetm.vectorize.builtin_vectorization_cost. */ + +static int +riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, int misalign ATTRIBUTE_UNUSED) +{ + unsigned elements; + const cpu_vector_cost *costs = tune_param->vec_costs; + bool fp = false; + + if (vectype != NULL) + fp = FLOAT_TYPE_P (vectype); + + if (costs != NULL) + { + const common_vector_cost *common_costs = get_common_costs (vectype); + gcc_assert (common_costs != NULL); + switch (type_of_cost) + { + case scalar_stmt: + return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost; + + case scalar_load: + return costs->scalar_load_cost; + + case scalar_store: + return costs->scalar_store_cost; + + case vector_stmt: + return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost; + + case vector_load: + return common_costs->align_load_cost; + + case vector_store: + return common_costs->align_store_cost; + + case vec_to_scalar: + return common_costs->vec_to_scalar_cost; + + case scalar_to_vec: + return common_costs->scalar_to_vec_cost; + + case unaligned_load: + return common_costs->unalign_load_cost; + case vector_gather_load: + return common_costs->gather_load_cost; + + case unaligned_store: + return common_costs->unalign_store_cost; + case vector_scatter_store: + return common_costs->scatter_store_cost; + + case cond_branch_taken: + return costs->cond_taken_branch_cost; + + case cond_branch_not_taken: + return costs->cond_not_taken_branch_cost; + + case vec_perm: + return common_costs->permute_cost; + + case vec_promote_demote: + return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost; + + case vec_construct: + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); + return elements / 2 + 1; + + default: + gcc_unreachable (); + } + } + + return default_builtin_vectorization_cost (type_of_cost, vectype, misalign); +} + /* Implement targetm.vectorize.create_costs. */ static vector_costs * @@ -10582,6 +10721,10 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset) #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + riscv_builtin_vectorization_cost + #undef TARGET_VECTORIZE_CREATE_COSTS #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c new file mode 100644 index 000000000000..06e08ec5f2e1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mtune=generic-ooo" } */ + +#define DEF_REDUC_PLUS(TYPE) \ + TYPE __attribute__ ((noinline, noclone)) \ + reduc_plus_##TYPE (TYPE *__restrict a, int n) \ + { \ + TYPE r = 0; \ + for (int i = 0; i < n; ++i) \ + r += a[i]; \ + return r; \ + } + +#define TEST_PLUS(T) T (int) + +TEST_PLUS (DEF_REDUC_PLUS) + +/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */ From 7ff33c609a64319583223d6d39a89e971f993ecf Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 14 Dec 2023 07:57:34 +0100 Subject: [PATCH 302/311] c++: Fix tinst_level::to_list [PR112968] With valgrind checking, there are various errors reported on some C++26 libstdc++ tests, like: ==2009913== Conditional jump or move depends on uninitialised value(s) ==2009913== at 0x914C59: gt_ggc_mx_lang_tree_node(void*) (gt-cp-tree.h:107) ==2009913== by 0x8AB7A5: gt_ggc_mx_tinst_level(void*) (gt-cp-pt.h:32) ==2009913== by 0xB89B25: ggc_mark_root_tab(ggc_root_tab const*) (ggc-common.cc:75) ==2009913== by 0xB89DF4: ggc_mark_roots() (ggc-common.cc:104) ==2009913== by 0x9D6311: ggc_collect(ggc_collect) (ggc-page.cc:2227) ==2009913== by 0xDB70F6: execute_one_pass(opt_pass*) (passes.cc:2738) ==2009913== by 0xDB721F: execute_pass_list_1(opt_pass*) (passes.cc:2755) ==2009913== by 0xDB7258: execute_pass_list(function*, opt_pass*) (passes.cc:2766) ==2009913== by 0xA55525: cgraph_node::analyze() (cgraphunit.cc:695) ==2009913== by 0xA57CC7: analyze_functions(bool) (cgraphunit.cc:1248) ==2009913== by 0xA5890D: symbol_table::finalize_compilation_unit() (cgraphunit.cc:2555) ==2009913== by 0xEB02A1: compile_file() (toplev.cc:473) I think the problem is in the tinst_level::to_list optimization from 2018. That function returns a TREE_LIST with TREE_PURPOSE/TREE_VALUE filled in. Either it freshly allocates using build_tree_list (NULL, NULL); + stores TREE_PURPOSE/TREE_VALUE, that case is fine (the whole tree_list object is zeros, except for TREE_CODE set to TREE_LIST and TREE_PURPOSE/TREE_VALUE modified later; the above also means in particular TREE_TYPE of it is NULL and TREE_CHAIN is NULL and both are accessible/initialized even in valgrind annotations. Or it grabs a TREE_LIST node from a freelist. If defined(ENABLE_GC_CHECKING), the object is still all zeros except for TREE_CODE/TREE_PURPOSE/TREE_VALUE like in the fresh allocation case (but unlike the build_tree_list case in the valgrind annotations TREE_TYPE and TREE_CHAIN are marked as uninitialized). If !defined(ENABLE_GC_CHECKING), I believe the actual memory content is that everything but TREE_CODE/TREE_PURPOSE/TREE_VALUE/TREE_CHAIN is zeros and TREE_CHAIN is something random (whatever next entry is in the freelist, nothing overwrote it) and from valgrind POV again, TREE_TYPE and TREE_CHAIN are marked as uninitialized. When using the other freelist instantiations (pending_template and tinst_level) I believe everything is correct, from valgrind POV it marks the whole pending_template or tinst_level as uninitialized, but the caller initializes it all). One way to fix this would be let tinst_level::to_list not store just TREE_PURPOSE (ret) = tldcl; TREE_VALUE (ret) = targs; but also TREE_TYPE (ret) = NULL_TREE; TREE_CHAIN (ret) = NULL_TREE; Though, that seems like wasted effort in the build_tree_list case to me. So, the following patch instead does that TREE_CHAIN = NULL_TREE store only in the case where it isn't already done (and likewise for TREE_TYPE just to be sure) and marks both TREE_CHAIN and TREE_TYPE as initialized (the latter is at that spot, the former is because we never really touch TREE_TYPE of a TREE_LIST anywhere and so the NULL gets stored into the freelist and restored from there (except for ENABLE_GC_CHECKING where it is poisoned and then cleared again). 2023-12-14 Jakub Jelinek PR c++/112968 * pt.cc (freelist::reinit): Make whole obj->common defined for valgrind annotations rather than just obj->base, and do it even for ENABLE_GC_CHECKING. If not ENABLE_GC_CHECKING, clear TREE_CHAIN (obj) and TREE_TYPE (obj). --- gcc/cp/pt.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index b6a450c4ad40..810d33c9c814 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -9525,7 +9525,7 @@ template <> inline void freelist::reinit (tree obj ATTRIBUTE_UNUSED) { - tree_base *b ATTRIBUTE_UNUSED = &obj->base; + tree_common *c ATTRIBUTE_UNUSED = &obj->common; #ifdef ENABLE_GC_CHECKING gcc_checking_assert (TREE_CODE (obj) == TREE_LIST); @@ -9540,8 +9540,10 @@ freelist::reinit (tree obj ATTRIBUTE_UNUSED) #ifdef ENABLE_GC_CHECKING TREE_SET_CODE (obj, TREE_LIST); #else - VALGRIND_DISCARD (VALGRIND_MAKE_MEM_DEFINED (b, sizeof (*b))); + TREE_CHAIN (obj) = NULL_TREE; + TREE_TYPE (obj) = NULL_TREE; #endif + VALGRIND_DISCARD (VALGRIND_MAKE_MEM_DEFINED (c, sizeof (*c))); } /* Point to the first object in the TREE_LIST freelist. */ From 585dc19ef051959d688e6ea89b41998600480ecb Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 14 Dec 2023 08:01:04 +0100 Subject: [PATCH 303/311] testsuite: Fix up pr112904.C test [PR112904] On Fri, Dec 08, 2023 at 03:12:00PM +0800, liuhongt wrote: > * g++.target/i386/pr112904.C: New test. The new test FAILs on i686-linux and even on x86_64-linux I think it doesn't actually test what was reported, unless one performs testing with -march= for some XOP enabled CPU or -mxop. The following patch fixes that, tested on x86_64-linux with make check-g++ RUNTESTFLAGS='--target_board=unix\{-m32,-m32/-mno-sse/-mno-mmx,-m64\} i386.exp=pr112904.C' 2023-12-14 Jakub Jelinek PR target/112904 * g++.target/i386/pr112904.C: Add dg-do compile, dg-options -mxop and for ia32 also dg-additional-options -mmmx. --- gcc/testsuite/g++.target/i386/pr112904.C | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gcc/testsuite/g++.target/i386/pr112904.C b/gcc/testsuite/g++.target/i386/pr112904.C index 556be9211971..3a470ad0afea 100644 --- a/gcc/testsuite/g++.target/i386/pr112904.C +++ b/gcc/testsuite/g++.target/i386/pr112904.C @@ -1,3 +1,8 @@ +// PR target/112904 +// { dg-do compile } +// { dg-options "-mxop" } +// { dg-additional-options "-mmmx" { target ia32 } } + typedef _Float16 v4hf __attribute__((vector_size(8))); typedef short v4hi __attribute__((vector_size(8))); typedef _Float16 v2hf __attribute__((vector_size(4))); From d16479170066b7606dea8dc25a7527a081a90a0b Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 14 Dec 2023 08:10:48 +0100 Subject: [PATCH 304/311] testsuite: Fix up target-enter-data-1.c on 32-bit targets struct bar { int num_vectors; double *vectors; }; is 16 bytes only on 64-bit targets, on 32-bit ones it is just 8 bytes, so the explicit matching of the * 16 multiplication only works on the former. 2023-12-14 Jakub Jelinek * c-c++-common/gomp/target-enter-data-1.c: Match also sizeof bar on 32-bit targets - 8 bytes - rather than just 16 bytes. --- gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c b/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c index 4913d338e5f9..3e5d5c76bb0c 100644 --- a/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c +++ b/gcc/testsuite/c-c++-common/gomp/target-enter-data-1.c @@ -22,4 +22,4 @@ void func (struct foo *f, int n, int m) } /* { dg-final { scan-tree-dump-times {map\(struct:\*f \[len: 1\]\) map\(alloc:[a-z0-9\._]+->vectors \[len: 0\]\) map\(to:\*_[0-9]+ \[len: _[0-9]+\]\) map\(attach:[a-z0-9\._]+->vectors \[bias: [^\]]+\]\) map\(attach:\*_[0-9]+ \[bias: _[0-9]+\]\)} 1 "gimple" } } */ -/* { dg-final { scan-tree-dump-times {map\(struct:\*\(f->bars \+ \(sizetype\) \(\([^\)]+\) n \* 16\)\) \[len: 1\]\) map\(alloc:[a-z0-9\._]+->vectors \[len: 0\]\) map\(to:\*_[0-9]+ \[len: _[0-9]+\]\) map\(attach:[a-z0-9\._]+->vectors \[bias: [^\]]+\]\)} 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times {map\(struct:\*\(f->bars \+ \(sizetype\) \(\([^\)]+\) n \* (?:16|8)\)\) \[len: 1\]\) map\(alloc:[a-z0-9\._]+->vectors \[len: 0\]\) map\(to:\*_[0-9]+ \[len: _[0-9]+\]\) map\(attach:[a-z0-9\._]+->vectors \[bias: [^\]]+\]\)} 2 "gimple" } } */ From 9e20779a7d8e14e77df6efaeadc6bf3a001336ef Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 14 Dec 2023 08:40:00 +0100 Subject: [PATCH 305/311] tree-optimization/110640 - testcase for fixed bug PR tree-optimization/110640 * gcc.dg/torture/pr110640.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr110640.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/torture/pr110640.c diff --git a/gcc/testsuite/gcc.dg/torture/pr110640.c b/gcc/testsuite/gcc.dg/torture/pr110640.c new file mode 100644 index 000000000000..7667294dde8f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr110640.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-require-effective-target int32plus } */ + +unsigned short a = 65535; +int b, f, g; +int *c = &b; +long d; +short e; +static int *h(int); +void i() { h(a); } +int *h(int j) { + unsigned char k; + for (; e != 8; e = e + 4) + k = 0; + for (; (unsigned char)(j-181249535) + k <= 1; k++) { + *c = d; + for (; f; f++) + ; + } + return &g; +} +int main() { i(); } From e9f0af150358d93b4c4c5f65d662748ae87bd3d0 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Thu, 14 Dec 2023 15:47:52 +0800 Subject: [PATCH 306/311] RISC-V: Add failed SLP testcase After recent RVV cost model tweak, I found this PR issue has been fixed. Add testcase and committed. PR target/112387 gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr112387.c: New test. --- .../vect/costmodel/riscv/rvv/pr112387.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr112387.c diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr112387.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr112387.c new file mode 100644 index 000000000000..ee44b5f09e83 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr112387.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-tree-vect-details" } */ + +#include + +void +f2 (uint64_t *__restrict y, uint64_t *__restrict x, + uint64_t *__restrict indices, uint64_t n) +{ + for (int64_t i = 0; i < n; ++i) + { + y[i * 2] = x[indices[i * 2]] + 1; + y[i * 2 + 1] = x[indices[i * 2 + 1]] + 2; + } +} + +/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */ +/* { dg-final { scan-assembler-not "vlseg" } } */ +/* { dg-final { scan-assembler-not "vsseg" } } */ From be0ff0866a6f072ccfbbb3a3c2079adf1db51aa1 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 13 Dec 2023 11:20:46 +0800 Subject: [PATCH 307/311] Force broadcast constant to mem for vec_dup{v4di,v8si,v4df,v8df} when TARGET_AVX2 is not available. vpbroadcastd/vpbroadcastq is avaiable under TARGET_AVX2, but vec_dup{v4di,v8si} pattern is avaiable under AVX with memory operand. And it will cause LRA/Reload to generate spill and reload if we put constant in register. gcc/ChangeLog: PR target/112992 * config/i386/i386-expand.cc (ix86_convert_const_wide_int_to_broadcast): Don't convert to broadcast for vec_dup{v4di,v8si} when TARGET_AVX2 is not available. (ix86_broadcast_from_constant): Allow broadcast for V4DI/V8SI when !TARGET_AVX2 since it will be forced to memory later. (ix86_expand_vector_move): Force constant to mem for vec_dup{vssi,v4di} when TARGET_AVX2 is not available. gcc/testsuite/ChangeLog: * gcc.target/i386/pr100865-7a.c: Adjust testcase. * gcc.target/i386/pr100865-7c.c: Ditto. * gcc.target/i386/pr112992.c: New test. --- gcc/config/i386/i386-expand.cc | 48 +++++++++++++-------- gcc/testsuite/gcc.target/i386/pr100865-7a.c | 3 +- gcc/testsuite/gcc.target/i386/pr100865-7c.c | 3 +- gcc/testsuite/gcc.target/i386/pr112992.c | 30 +++++++++++++ 4 files changed, 62 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr112992.c diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a53d69d54000..fad4f34f9055 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -297,6 +297,12 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op) if (!TARGET_INTER_UNIT_MOVES_TO_VEC) return nullptr; + unsigned int msize = GET_MODE_SIZE (mode); + + /* Only optimized for vpbroadcast[bwsd]/vbroadcastss with xmm/ymm/zmm. */ + if (msize != 16 && msize != 32 && msize != 64) + return nullptr; + /* Convert CONST_WIDE_INT to a non-standard SSE constant integer broadcast only if vector broadcast is available. */ if (!TARGET_AVX @@ -309,18 +315,23 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op) HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0); HOST_WIDE_INT val_broadcast; scalar_int_mode broadcast_mode; - if (TARGET_AVX2 + /* vpbroadcastb zmm requires TARGET_AVX512BW. */ + if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (QImode), val_broadcast)) broadcast_mode = QImode; - else if (TARGET_AVX2 + else if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (HImode), val_broadcast)) broadcast_mode = HImode; - else if (ix86_broadcast (val, GET_MODE_BITSIZE (SImode), + /* vbroadcasts[sd] only support memory operand w/o AVX2. + When msize == 16, pshufs is used for vec_duplicate. + when msize == 64, vpbroadcastd is used, and TARGET_AVX512F must be existed. */ + else if ((msize != 32 || TARGET_AVX2) + && ix86_broadcast (val, GET_MODE_BITSIZE (SImode), val_broadcast)) broadcast_mode = SImode; - else if (TARGET_64BIT + else if (TARGET_64BIT && (msize != 32 || TARGET_AVX2) && ix86_broadcast (val, GET_MODE_BITSIZE (DImode), val_broadcast)) broadcast_mode = DImode; @@ -596,23 +607,17 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op) && INTEGRAL_MODE_P (mode)) return nullptr; + unsigned int msize = GET_MODE_SIZE (mode); + unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode))); + /* Convert CONST_VECTOR to a non-standard SSE constant integer broadcast only if vector broadcast is available. */ - if (!(TARGET_AVX2 - || (TARGET_AVX - && (GET_MODE_INNER (mode) == SImode - || GET_MODE_INNER (mode) == DImode)) - || FLOAT_MODE_P (mode)) - || standard_sse_constant_p (op, mode)) + if (standard_sse_constant_p (op, mode)) return nullptr; - /* Don't broadcast from a 64-bit integer constant in 32-bit mode. - We can still put 64-bit integer constant in memory when - avx512 embed broadcast is available. */ - if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT - && (!TARGET_AVX512F - || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512) - || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL))) + /* vpbroadcast[b,w] is available under TARGET_AVX2. + or TARGET_AVX512BW for zmm. */ + if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)) return nullptr; if (GET_MODE_INNER (mode) == TImode) @@ -710,7 +715,14 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) constant or scalar mem. */ op1 = gen_reg_rtx (mode); if (FLOAT_MODE_P (mode) - || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) + || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode) + /* vbroadcastss/vbroadcastsd only supports memory operand + w/o AVX2, force them into memory to avoid spill to + memory. */ + || (GET_MODE_SIZE (mode) == 32 + && (GET_MODE_INNER (mode) == DImode + || GET_MODE_INNER (mode) == SImode) + && !TARGET_AVX2)) first = force_const_mem (GET_MODE_INNER (mode), first); bool ok = ix86_expand_vector_init_duplicate (false, mode, op1, first); diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c b/gcc/testsuite/gcc.target/i386/pr100865-7a.c index f6f2be911206..7de7d4a3ce3a 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c @@ -11,7 +11,6 @@ foo (void) array[i] = -45; } -/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */ -/* { dg-final { scan-assembler-not "vpbroadcastq" { target ia32 } } } */ /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c b/gcc/testsuite/gcc.target/i386/pr100865-7c.c index 4d50bb7e2f60..edbfd5b09ed6 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c @@ -11,7 +11,6 @@ foo (void) array[i] = -45; } -/* { dg-final { scan-assembler-times "vbroadcastsd" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vbroadcastsd" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */ -/* { dg-final { scan-assembler-not "vbroadcastsd" { target ia32 } } } */ /* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr112992.c b/gcc/testsuite/gcc.target/i386/pr112992.c new file mode 100644 index 000000000000..743e64dccba2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112992.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx -mno-avx2 -O2 " } */ +/* { dg-final { scan-assembler-not {(?n)(%rsp)} } } */ + +typedef unsigned long long v4di __attribute((vector_size(32))); +typedef unsigned int v8si __attribute((vector_size(32))); +typedef unsigned short v16hi __attribute((vector_size(32))); +typedef unsigned char v32qi __attribute((vector_size(32))); + +#define MASK 0x01010101 +#define MASKL 0x0101010101010101ULL +#define MASKS 0x0101 + +v4di fooq() { + return (v4di){MASKL,MASKL,MASKL,MASKL}; +} + +v8si food() { + return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK}; +} + +v16hi foow() { + return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS, + MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS}; +} + +v32qi foob() { + return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; +} From 96e1978b213482fc4c25693b91ae2ead481af620 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Thu, 14 Dec 2023 11:09:06 +0100 Subject: [PATCH 308/311] SRA: Relax requirements to use build_reconstructed_reference (PR 111807) This patch half-reverts 3aaf704bca3e and replaces it with a fix with relaxed requiremets for invoking build_reconstructed_reference in build_ref_for_model. build_ref_for_model/build_ref_for_offset is used in two slightly different contexts. The first is when we are looking at an assignmernt like p->field_A.field_B = s.field_B; and we have a replacements for e.g. s.field_B.field_C.field_D and we want to store them directly to p->field_A.field_B.field_C.field_D (as opposed to going through s or using a MEM_REF based in p->field_A.field_B). In this case, the offset of the "model" (s.field_B.field_C.field_D) within this can be different than offset within the LHS that we want to reach (field_C.field_D within the "base" p->field_A.field_B). Patch 3aaf704bca3e has caused us to unnecessarily create MEM_REFs for these situations. These uses of build_ref_for_model work with the relaxed condition just fine. The second, problematic, context is when somewhere in the function we have an assignment s.field_A = t.field_A.field_B; and we are creating an access structure to represent s.field_A.field_B even if it is not actually accessed in the original input. This is done after scanning the entire function body and we need to construct a "universal" reference to s.field_A.field_B. In this case the "base" is "s" and it has to be the DECL itself and not some reference for it because for arbitrary references we need a GSI pointing to a statement which we don't have, the reference is supposed to be universal. But then using build_ref_for_model and within it build_reconstructed_reference misbihaves if the expression contains any ARRAY_REFs. In the first case those are fine because as we eventually reach the aggregate type that matches a real LHS or RHS, we know we we can just bolt the rest of the references onto it and end up with the correct overall reference. However when dealing with s.array[1].field_A = s.array[2].field_B; we cannot just bolt array[2] reference when we want array[1] but that is exactly what happens when we use build_reconstructed_reference and keep it walking all the way to s. I was consiering making all users of the second kind use directly build_ref_for_offset instead of build_ref_for_model but the latter also handles COMPONENT_REFs to bit-fields which the former does not. THerefore I have deided to use the NULL-ness of GSI as an indicator how strict we need to be. I have changed the function comment to reflect that. I have been able to observe diambiguation improvements with this patch over currenct master, we do successfuly manage a few more aliasing_component_refs_p disambiguations when compiling cc1, going from: Alias oracle query stats: refs_may_alias_p: 94354287 disambiguations, 106279231 queries ref_maybe_used_by_call_p: 1572511 disambiguations, 95618222 queries call_may_clobber_ref_p: 649273 disambiguations, 659371 queries stmt_kills_ref_p: 142342 kills, 8407309 queries nonoverlapping_component_refs_p: 19 disambiguations, 10227 queries nonoverlapping_refs_since_match_p: 15665 disambiguations, 52585 must overlaps, 68893 queries aliasing_component_refs_p: 67090 disambiguations, 3081766 queries TBAA oracle: 22675296 disambiguations 61781978 queries 14045969 are in alias set 0 10997085 queries asked about the same object 153 queries asked about the same alias set 0 access volatile 12485774 are dependent in the DAG 1577701 are aritificially in conflict with void * Modref stats: modref kill: 832 kills, 19399 queries modref use: 50760 disambiguations, 1825109 queries modref clobber: 1371014 disambiguations, 40152535 queries 5190238 tbaa queries (0.129263 per modref query) 1341663 base compares (0.033414 per modref query) PTA query stats: pt_solution_includes: 36784427 disambiguations, 46141175 queries pt_solutions_intersect: 4519387 disambiguations, 17081996 queries to: Alias oracle query stats: refs_may_alias_p: 94354083 disambiguations, 106278948 queries ref_maybe_used_by_call_p: 1572511 disambiguations, 95618018 queries call_may_clobber_ref_p: 649273 disambiguations, 659371 queries stmt_kills_ref_p: 142342 kills, 8407310 queries nonoverlapping_component_refs_p: 19 disambiguations, 10227 queries nonoverlapping_refs_since_match_p: 15665 disambiguations, 52585 must overlaps, 68893 queries aliasing_component_refs_p: 67104 disambiguations, 3081781 queries TBAA oracle: 22676608 disambiguations 61782455 queries 14044948 are in alias set 0 10998619 queries asked about the same object 153 queries asked about the same alias set 0 access volatile 12484882 are dependent in the DAG 1577245 are aritificially in conflict with void * Modref stats: modref kill: 832 kills, 19399 queries modref use: 50760 disambiguations, 1825106 queries modref clobber: 1371028 disambiguations, 40152504 queries 5190319 tbaa queries (0.129265 per modref query) 1341403 base compares (0.033408 per modref query) PTA query stats: pt_solution_includes: 36784449 disambiguations, 46141210 queries pt_solutions_intersect: 4519320 disambiguations, 17082083 queries gcc/ChangeLog: 2023-12-13 Martin Jambor PR tree-optimization/111807 * tree-sra.cc (build_ref_for_model): Allow offset smaller than model->offset when gsi is non-NULL. Adjust function comment. --- gcc/tree-sra.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index 99a1b0a6d179..44137b2052bd 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -1843,8 +1843,11 @@ build_reconstructed_reference (location_t, tree base, struct access *model) /* Construct a memory reference to a part of an aggregate BASE at the given OFFSET and of the same type as MODEL. In case this is a reference to a bit-field, the function will replicate the last component_ref of model's - expr to access it. GSI and INSERT_AFTER have the same meaning as in - build_ref_for_offset. */ + expr to access it. INSERT_AFTER and GSI have the same meaning as in + build_ref_for_offset, furthermore, when GSI is NULL, the function expects + that it re-builds the entire reference from a DECL to the final access and + so will create a MEM_REF when OFFSET does not exactly match offset of + MODEL. */ static tree build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, @@ -1874,7 +1877,8 @@ build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, && !TREE_THIS_VOLATILE (base) && (TYPE_ADDR_SPACE (TREE_TYPE (base)) == TYPE_ADDR_SPACE (TREE_TYPE (model->expr))) - && offset == model->offset + && (offset == model->offset + || (gsi && offset <= model->offset)) /* build_reconstructed_reference can still fail if we have already massaged BASE because of another type incompatibility. */ && (res = build_reconstructed_reference (loc, base, model))) From cd794c3961017703a4d2ca0e854ea23b3d4b6373 Mon Sep 17 00:00:00 2001 From: Filip Kastl Date: Thu, 14 Dec 2023 11:29:31 +0100 Subject: [PATCH 309/311] A new copy propagation and PHI elimination pass This patch adds the strongly-connected copy propagation (SCCOPY) pass. It is a lightweight GIMPLE copy propagation pass that also removes some redundant PHI statements. It handles degenerate PHIs, e.g.: _5 = PHI <_1>; _6 = PHI <_6, _6, _1, _1>; _7 = PHI <16, _7>; // Replaces occurences of _5 and _6 by _1 and _7 by 16 It also handles more complicated situations, e.g.: _8 = PHI <_9, _10>; _9 = PHI <_8, _10>; _10 = PHI <_8, _9, _1>; // Replaces occurences of _8, _9 and _10 by _1 gcc/ChangeLog: * Makefile.in: Added sccopy pass. * passes.def: Added sccopy pass before LTO streaming and before RTL expansion. * tree-pass.h (make_pass_sccopy): Added sccopy pass. * gimple-ssa-sccopy.cc: New file. gcc/testsuite/ChangeLog: * gcc.dg/sccopy-1.c: New test. Signed-off-by: Filip Kastl --- gcc/Makefile.in | 1 + gcc/gimple-ssa-sccopy.cc | 680 ++++++++++++++++++++++++++++++++ gcc/passes.def | 2 + gcc/testsuite/gcc.dg/sccopy-1.c | 78 ++++ gcc/tree-pass.h | 1 + 5 files changed, 762 insertions(+) create mode 100644 gcc/gimple-ssa-sccopy.cc create mode 100644 gcc/testsuite/gcc.dg/sccopy-1.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index f284c1387e27..754eceb23bbc 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1497,6 +1497,7 @@ OBJS = \ gimple-ssa-backprop.o \ gimple-ssa-isolate-paths.o \ gimple-ssa-nonnull-compare.o \ + gimple-ssa-sccopy.o \ gimple-ssa-split-paths.o \ gimple-ssa-store-merging.o \ gimple-ssa-strength-reduction.o \ diff --git a/gcc/gimple-ssa-sccopy.cc b/gcc/gimple-ssa-sccopy.cc new file mode 100644 index 000000000000..ac5ec32eb32b --- /dev/null +++ b/gcc/gimple-ssa-sccopy.cc @@ -0,0 +1,680 @@ +/* Strongly-connected copy propagation pass for the GNU compiler. + Copyright (C) 2023 Free Software Foundation, Inc. + Contributed by Filip Kastl + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "gimple-iterator.h" +#include "vec.h" +#include "hash-set.h" +#include +#include "ssa-iterators.h" +#include "gimple-fold.h" +#include "gimplify.h" +#include "tree-cfg.h" +#include "tree-eh.h" +#include "builtins.h" +#include "tree-ssa-dce.h" +#include "fold-const.h" + +/* Strongly connected copy propagation pass. + + This is a lightweight copy propagation pass that is also able to eliminate + redundant PHI statements. The pass considers the following types of copy + statements: + + 1 An assignment statement with a single argument. + + _3 = _2; + _4 = 5; + + 2 A degenerate PHI statement. A degenerate PHI is a PHI that only refers to + itself or one other value. + + _5 = PHI <_1>; + _6 = PHI <_6, _6, _1, _1>; + _7 = PHI <16, _7>; + + 3 A set of PHI statements that only refer to each other or to one other + value. + + _8 = PHI <_9, _10>; + _9 = PHI <_8, _10>; + _10 = PHI <_8, _9, _1>; + + All of these statements produce copies and can be eliminated from the + program. For a copy statement we identify the value it creates a copy of + and replace references to the statement with the value -- we propagate the + copy. + + _3 = _2; // Replace all occurences of _3 by _2 + + _8 = PHI <_9, _10>; + _9 = PHI <_8, _10>; + _10 = PHI <_8, _9, _1>; // Replace all occurences of _8, _9 and _10 by _1 + + To find all three types of copy statements we use an algorithm based on + strongly-connected components (SCCs) in dataflow graph. The algorithm was + introduced in an article from 2013[1]. We describe the algorithm bellow. + + To identify SCCs we implement the Robert Tarjan's SCC algorithm. For the + SCC computation we wrap potential copy statements in the 'vertex' struct. + To each of these statements we also assign a vertex number ('vxnum'). Since + the main algorithm has to be able to compute SCCs of subgraphs of the whole + dataflow graph we use GIMPLE stmt flags to prevent Tarjan's algorithm from + leaving the subgraph. + + References: + + [1] Simple and Efficient Construction of Static Single Assignmemnt Form, + Braun, Buchwald, Hack, Leissa, Mallon, Zwinkau, 2013, LNCS vol. 7791, + Section 3.2. */ + +/* Bitmap tracking statements which were propagated to be removed at the end of + the pass. */ + +static bitmap dead_stmts; + +/* State of vertex during SCC discovery. + + unvisited Vertex hasn't yet been popped from worklist. + vopen DFS has visited vertex for the first time. Vertex has been put + on Tarjan stack. + closed DFS has backtracked through vertex. At this point, vertex + doesn't have any unvisited neighbors. + in_scc Vertex has been popped from Tarjan stack. */ + +enum vstate +{ + unvisited, + vopen, + closed, + in_scc +}; + +/* Information about a vertex. Used by SCC discovery. */ + +struct vertex +{ + bool active; /* scc_discovery::compute_sccs () only considers a subgraph of + the whole dataflow graph. It uses this flag so that it knows + which vertices are part of this subgraph. */ + vstate state; + unsigned index; + unsigned lowlink; +}; + +/* SCC discovery. + + Used to find SCCs in a dataflow graph. Implements Tarjan's SCC + algorithm. */ + +class scc_discovery +{ +public: + scc_discovery (); + ~scc_discovery (); + auto_vec> compute_sccs (vec &stmts); + +private: + unsigned curr_generation = 0; + vertex* vertices; /* Indexed by SSA_NAME_VERSION. */ + auto_vec worklist; /* DFS stack. */ + auto_vec stack; /* Tarjan stack. */ + + void visit_neighbor (tree neigh_tree, unsigned parent_vxnum); +}; + +scc_discovery::scc_discovery () +{ + /* Create vertex struct for each SSA name. */ + vertices = XNEWVEC (struct vertex, num_ssa_names); + unsigned i = 0; + for (i = 0; i < num_ssa_names; i++) + vertices[i].active = false; +} + +scc_discovery::~scc_discovery () +{ + XDELETEVEC (vertices); +} + +/* Part of 'scc_discovery::compute_sccs ()'. */ + +void +scc_discovery::visit_neighbor (tree neigh_tree, unsigned parent_version) +{ + if (TREE_CODE (neigh_tree) != SSA_NAME) + return; /* Skip any neighbor that isn't an SSA name. */ + unsigned neigh_version = SSA_NAME_VERSION (neigh_tree); + + /* Skip neighbors outside the subgraph that Tarjan currently works + with. */ + if (!vertices[neigh_version].active) + return; + + vstate neigh_state = vertices[neigh_version].state; + vstate parent_state = vertices[parent_version].state; + if (parent_state == vopen) /* We're currently opening parent. */ + { + /* Put unvisited neighbors on worklist. Update lowlink of parent + vertex according to indices of neighbors present on stack. */ + switch (neigh_state) + { + case unvisited: + worklist.safe_push (neigh_version); + break; + case vopen: + case closed: + vertices[parent_version].lowlink + = std::min (vertices[parent_version].lowlink, + vertices[neigh_version].index); + break; + case in_scc: + /* Ignore these edges. */ + break; + } + } + else if (parent_state == closed) /* We're currently closing parent. */ + { + /* Update lowlink of parent vertex according to lowlinks of + children of parent (in terms of DFS tree). */ + if (neigh_state == closed) + { + vertices[parent_version].lowlink + = std::min (vertices[parent_version].lowlink, + vertices[neigh_version].lowlink); + } + } +} + +/* Compute SCCs in dataflow graph on given statements 'stmts'. Ignore + statements outside 'stmts'. Return the SCCs in a reverse topological + order. + + stmt_may_generate_copy () must be true for all statements from 'stmts'! */ + +auto_vec> +scc_discovery::compute_sccs (vec &stmts) +{ + auto_vec> sccs; + + for (gimple *stmt : stmts) + { + unsigned i; + switch (gimple_code (stmt)) + { + case GIMPLE_ASSIGN: + i = SSA_NAME_VERSION (gimple_assign_lhs (stmt)); + break; + case GIMPLE_PHI: + i = SSA_NAME_VERSION (gimple_phi_result (stmt)); + break; + default: + gcc_unreachable (); + } + + vertices[i].index = 0; + vertices[i].lowlink = 0; + vertices[i].state = unvisited; + vertices[i].active = true; /* Mark the subgraph we'll be working on so + that we don't leave it. */ + + worklist.safe_push (i); + } + + /* Worklist loop. */ + unsigned curr_index = 0; + while (!worklist.is_empty ()) + { + unsigned i = worklist.pop (); + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name (i)); + vstate state = vertices[i].state; + + if (state == unvisited) + { + vertices[i].state = vopen; + + /* Assign index to this vertex. */ + vertices[i].index = curr_index; + vertices[i].lowlink = curr_index; + curr_index++; + + /* Put vertex on stack and also on worklist to be closed later. */ + stack.safe_push (i); + worklist.safe_push (i); + } + else if (state == vopen) + vertices[i].state = closed; + + /* Visit neighbors of this vertex. */ + tree op; + gphi *phi; + switch (gimple_code (stmt)) + { + case GIMPLE_PHI: + phi = as_a (stmt); + unsigned j; + for (j = 0; j < gimple_phi_num_args (phi); j++) + { + op = gimple_phi_arg_def (phi, j); + visit_neighbor (op, i); + } + break; + case GIMPLE_ASSIGN: + op = gimple_assign_rhs1 (stmt); + visit_neighbor (op, i); + break; + default: + gcc_unreachable (); + } + + /* If we've just closed a root vertex of an scc, pop scc from stack. */ + if (state == vopen && vertices[i].lowlink == vertices[i].index) + { + vec scc = vNULL; + + unsigned j; + do + { + j = stack.pop (); + scc.safe_push (SSA_NAME_DEF_STMT (ssa_name (j))); + vertices[j].state = in_scc; + } + while (j != i); + + sccs.safe_push (scc); + } + } + + if (!stack.is_empty ()) + gcc_unreachable (); + + /* Clear 'active' flags. */ + for (gimple *stmt : stmts) + { + unsigned i; + switch (gimple_code (stmt)) + { + case GIMPLE_ASSIGN: + i = SSA_NAME_VERSION (gimple_assign_lhs (stmt)); + break; + case GIMPLE_PHI: + i = SSA_NAME_VERSION (gimple_phi_result (stmt)); + break; + default: + gcc_unreachable (); + } + + vertices[i].active = false; + } + + return sccs; +} + +/* Could this statement potentially be a copy statement? + + This pass only considers statements for which this function returns 'true'. + Those are basically PHI functions and assignment statements similar to + + _2 = _1; + or + _2 = 5; */ + +static bool +stmt_may_generate_copy (gimple *stmt) +{ + /* A PHI may generate a copy. */ + if (gimple_code (stmt) == GIMPLE_PHI) + { + gphi *phi = as_a (stmt); + + /* No OCCURS_IN_ABNORMAL_PHI SSA names in lhs nor rhs. */ + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (phi))) + return false; + + unsigned i; + for (i = 0; i < gimple_phi_num_args (phi); i++) + { + tree op = gimple_phi_arg_def (phi, i); + if (TREE_CODE (op) == SSA_NAME + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op)) + return false; + } + + /* If PHI has more than one unique non-SSA arguments, it won't generate a + copy. */ + tree const_op = NULL_TREE; + for (i = 0; i < gimple_phi_num_args (phi); i++) + { + tree op = gimple_phi_arg_def (phi, i); + if (TREE_CODE (op) != SSA_NAME) + { + if (const_op && !operand_equal_p (op, const_op)) + return false; + const_op = op; + } + } + + return true; + } + + /* Or a statement of type _2 = _1; OR _2 = 5; may generate a copy. */ + + if (!gimple_assign_single_p (stmt)) + return false; + + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + + if (TREE_CODE (lhs) != SSA_NAME) + return false; + + /* lhs shouldn't flow through any abnormal edges. */ + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)) + return false; + + if (is_gimple_min_invariant (rhs)) + return true; /* A statement of type _2 = 5;. */ + + if (TREE_CODE (rhs) != SSA_NAME) + return false; + + /* rhs shouldn't flow through any abnormal edges. */ + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs)) + return false; + + /* It is possible that lhs has more alignment or value range information. By + propagating we would lose this information. So in the case that alignment + or value range information differs, we are conservative and do not + propagate. + + FIXME: Propagate alignment and value range info the same way copy-prop + does. */ + if (POINTER_TYPE_P (TREE_TYPE (lhs)) + && POINTER_TYPE_P (TREE_TYPE (rhs)) + && SSA_NAME_PTR_INFO (lhs) != SSA_NAME_PTR_INFO (rhs)) + return false; + if (!POINTER_TYPE_P (TREE_TYPE (lhs)) + && !POINTER_TYPE_P (TREE_TYPE (rhs)) + && SSA_NAME_RANGE_INFO (lhs) != SSA_NAME_RANGE_INFO (rhs)) + return false; + + return true; /* A statement of type _2 = _1;. */ +} + +/* Return all statements in cfun that could generate copies. All statements + for which stmt_may_generate_copy returns 'true'. */ + +static auto_vec +get_all_stmt_may_generate_copy (void) +{ + auto_vec result; + + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *s = gsi_stmt (gsi); + if (stmt_may_generate_copy (s)) + result.safe_push (s); + } + + gphi_iterator pi; + for (pi = gsi_start_phis (bb); !gsi_end_p (pi); gsi_next (&pi)) + { + gimple *s = pi.phi (); + if (stmt_may_generate_copy (s)) + result.safe_push (s); + } + } + + return result; +} + +/* For each statement from given SCC, replace its usages by value + VAL. */ + +static void +replace_scc_by_value (vec scc, tree val) +{ + for (gimple *stmt : scc) + { + tree name = gimple_get_lhs (stmt); + replace_uses_by (name, val); + bitmap_set_bit (dead_stmts, SSA_NAME_VERSION (name)); + } + + if (dump_file) + fprintf (dump_file, "Replacing SCC of size %d\n", scc.length ()); +} + +/* Part of 'sccopy_propagate ()'. */ + +static void +sccopy_visit_op (tree op, hash_set &outer_ops, + hash_set &scc_set, bool &is_inner, + tree &last_outer_op) +{ + bool op_in_scc = false; + + if (TREE_CODE (op) == SSA_NAME) + { + gimple *op_stmt = SSA_NAME_DEF_STMT (op); + if (scc_set.contains (op_stmt)) + op_in_scc = true; + } + + if (!op_in_scc) + { + outer_ops.add (op); + last_outer_op = op; + is_inner = false; + } +} + +/* Main function of this pass. Find and propagate all three types of copy + statements (see pass description above). + + This is an implementation of an algorithm from the paper Simple and + Efficient Construction of Static Single Assignmemnt Form[1]. It is based + on strongly-connected components (SCCs) in dataflow graph. The original + algorithm only considers PHI statements. We extend it to also consider + assignment statements of type _2 = _1;. + + The algorithm is based on this definition of a set of redundant PHIs[1]: + + A non-empty set P of PHI functions is redundant iff the PHI functions just + reference each other or one other value + + It uses this lemma[1]: + + Let P be a redundant set of PHI functions. Then there is a + strongly-connected component S subset of P that is also redundant. + + The algorithm works in this way: + + 1 Find SCCs + 2 For each SCC S in topological order: + 3 Construct set 'inner' of statements that only have other statements + from S on their right hand side + 4 Construct set 'outer' of values that originate outside S and appear on + right hand side of some statement from S + 5 If |outer| = 1, outer only contains a value v. Statements in S only + refer to each other or to v -- they are redundant. Propagate v. + Else, recurse on statements in inner. + + The implementation is non-recursive. + + References: + + [1] Simple and Efficient Construction of Static Single Assignmemnt Form, + Braun, Buchwald, Hack, Leissa, Mallon, Zwinkau, 2013, LNCS vol. 7791, + Section 3.2. */ + +static void +sccopy_propagate () +{ + auto_vec useful_stmts = get_all_stmt_may_generate_copy (); + scc_discovery discovery; + + auto_vec> worklist = discovery.compute_sccs (useful_stmts); + + while (!worklist.is_empty ()) + { + vec scc = worklist.pop (); + + auto_vec inner; + hash_set outer_ops; + tree last_outer_op = NULL_TREE; + + /* Prepare hash set of PHIs in scc to query later. */ + hash_set scc_set; + for (gimple *stmt : scc) + scc_set.add (stmt); + + for (gimple *stmt : scc) + { + bool is_inner = true; + + gphi *phi; + tree op; + + switch (gimple_code (stmt)) + { + case GIMPLE_PHI: + phi = as_a (stmt); + unsigned j; + for (j = 0; j < gimple_phi_num_args (phi); j++) + { + op = gimple_phi_arg_def (phi, j); + sccopy_visit_op (op, outer_ops, scc_set, is_inner, + last_outer_op); + } + break; + case GIMPLE_ASSIGN: + op = gimple_assign_rhs1 (stmt); + sccopy_visit_op (op, outer_ops, scc_set, is_inner, + last_outer_op); + break; + default: + gcc_unreachable (); + } + + if (is_inner) + inner.safe_push (stmt); + } + + if (outer_ops.elements () == 1) + { + /* The only operand in outer_ops. */ + tree outer_op = last_outer_op; + replace_scc_by_value (scc, outer_op); + } + else if (outer_ops.elements () > 1) + { + /* Add inner sccs to worklist. */ + auto_vec> inner_sccs + = discovery.compute_sccs (inner); + for (vec inner_scc : inner_sccs) + worklist.safe_push (inner_scc); + } + else + gcc_unreachable (); + + scc.release (); + } +} + +/* Called when pass execution starts. */ + +static void +init_sccopy (void) +{ + /* For propagated statements. */ + dead_stmts = BITMAP_ALLOC (NULL); +} + +/* Called before pass execution ends. */ + +static void +finalize_sccopy (void) +{ + /* Remove all propagated statements. */ + simple_dce_from_worklist (dead_stmts); + BITMAP_FREE (dead_stmts); + + /* Propagating a constant may create dead eh edges. */ + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + gimple_purge_dead_eh_edges (bb); +} + +namespace { + +const pass_data pass_data_sccopy = +{ + GIMPLE_PASS, /* type */ + "sccopy", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ +}; + +class pass_sccopy : public gimple_opt_pass +{ +public: + pass_sccopy (gcc::context *ctxt) + : gimple_opt_pass (pass_data_sccopy, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) { return true; } + virtual unsigned int execute (function *); + opt_pass * clone () final override { return new pass_sccopy (m_ctxt); } +}; // class pass_sccopy + +unsigned +pass_sccopy::execute (function *) +{ + init_sccopy (); + sccopy_propagate (); + finalize_sccopy (); + return 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_sccopy (gcc::context *ctxt) +{ + return new pass_sccopy (ctxt); +} diff --git a/gcc/passes.def b/gcc/passes.def index d3fccdf0a4a1..43b416f98f76 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -101,6 +101,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_if_to_switch); NEXT_PASS (pass_convert_switch); NEXT_PASS (pass_cleanup_eh); + NEXT_PASS (pass_sccopy); NEXT_PASS (pass_profile); NEXT_PASS (pass_local_pure_const); NEXT_PASS (pass_modref); @@ -370,6 +371,7 @@ along with GCC; see the file COPYING3. If not see However, this also causes us to misdiagnose cases that should be real warnings (e.g., testsuite/gcc.dg/pr18501.c). */ NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */); + NEXT_PASS (pass_sccopy); NEXT_PASS (pass_tail_calls); /* Split critical edges before late uninit warning to reduce the number of false positives from it. */ diff --git a/gcc/testsuite/gcc.dg/sccopy-1.c b/gcc/testsuite/gcc.dg/sccopy-1.c new file mode 100644 index 000000000000..1e61a6b320e0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/sccopy-1.c @@ -0,0 +1,78 @@ +/* { dg-do compile } */ +/* { dg-options "-fgimple -fdump-tree-sccopy -O2" } */ +/* { dg-final { scan-tree-dump "Replacing SCC of size 2" "sccopy1" } } */ + +int __GIMPLE (ssa, startwith ("sccopy")) +main () +{ + int a; + int y; + int x; + int _1; + int _2; + int _13; + + __BB(2): + if (x_7(D) == 5) + goto __BB3; + else + goto __BB4; + + __BB(3): + a_10 = x_7(D); + goto __BB5; + + __BB(4): + a_9 = y_8(D); + goto __BB5; + + __BB(5): + a_3 = __PHI (__BB3: a_10, __BB4: a_9); + if (x_7(D) == y_8(D)) + goto __BB6; + else + goto __BB11; + + __BB(6): + a_11 = a_3 + 1; + goto __BB7; + + __BB(7): + a_4 = __PHI (__BB6: a_11, __BB11: a_6); +label1: + if (x_7(D) != y_8(D)) + goto __BB8; + else + goto __BB10; + + __BB(8): + goto __BB9; + + __BB(9): + a_12 = __PHI (__BB8: a_4, __BB10: a_5); + goto __BB10; + + __BB(10,loop_header(1)): + a_5 = __PHI (__BB7: a_4, __BB9: a_12); +label2: + _1 = y_8(D) * 2; + if (x_7(D) == _1) + goto __BB9; + else + goto __BB11; + + __BB(11): + a_6 = __PHI (__BB5: a_3, __BB10: a_5); + _2 = x_7(D) * 3; + if (y_8(D) == _2) + goto __BB7; + else + goto __BB12; + + __BB(12): + _13 = 0; + return _13; + +} + + diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index de2820b3a3c7..52fd57fd4c67 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -399,6 +399,7 @@ extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_sccopy (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ccp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_split_paths (gcc::context *ctxt); extern gimple_opt_pass *make_pass_build_ssa (gcc::context *ctxt); From 90c9403f89d3c55512ae83dd20e2023c2e4430f4 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 14 Dec 2023 11:55:49 +0100 Subject: [PATCH 310/311] match.pd: Simplify (t * u) / v -> t * (u / v) [PR112994] The following testcase is optimized just on GENERIC (using strict_overflow_p = false; if (TREE_CODE (arg1) == INTEGER_CST && (tem = extract_muldiv (op0, arg1, code, NULL_TREE, &strict_overflow_p)) != 0) { if (strict_overflow_p) fold_overflow_warning (("assuming signed overflow does not occur " "when simplifying division"), WARN_STRICT_OVERFLOW_MISC); return fold_convert_loc (loc, type, tem); } ) but not on GIMPLE. An earlier version of the patch regressed +FAIL: gcc.dg/Wstrict-overflow-3.c correct warning (test for warnings, line 12) test, we are indeed assuming that signed overflow does not occur when simplifying division in there. This version of the patch (which provides the simplification only for GIMPLE) fixes that. And/or we could add the fold_overflow_warning (("assuming signed overflow does not occur " "when simplifying division"), WARN_STRICT_OVERFLOW_MISC); call into the simplification, but in that case IMHO it should go into the (t * u) / u -> t simplification as well, there we assume the exact same thing (of course, in both cases only in the spots where we don't verify it through ranger that it never overflows). Guarding the whole simplification to GIMPLE only IMHO makes sense because the above mentioned folding does it for GENERIC (and extract_muldiv even handles far more cases, dunno how many from that we should be doing on GIMPLE in match.pd and what could be done elsewhere; e.g. extract_muldiv can handle (x * 16 + y * 32) / 8 -> x * 2 + y * 4 etc.). Dunno about the fold_overflow_warning, I always have doubts about why such a warning is useful to users. 2023-12-14 Jakub Jelinek PR tree-optimization/112994 * match.pd ((t * 2) / 2 -> t): Adjust comment to use u instead of 2. Punt without range checks if TYPE_OVERFLOW_SANITIZED. ((t * u) / v -> t * (u / v)): New simplification. * gcc.dg/tree-ssa/pr112994-1.c: New test. --- gcc/match.pd | 22 +++++++++++++++++++--- gcc/testsuite/gcc.dg/tree-ssa/pr112994-1.c | 13 +++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr112994-1.c diff --git a/gcc/match.pd b/gcc/match.pd index e3dcff5c29ca..595482865555 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -930,12 +930,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@0))) (bit_and @0 (negate @1)))) -/* Simplify (t * 2) / 2) -> t. */ (for div (trunc_div ceil_div floor_div round_div exact_div) + /* Simplify (t * u) / u -> t. */ (simplify (div (mult:c @0 @1) @1) (if (ANY_INTEGRAL_TYPE_P (type)) - (if (TYPE_OVERFLOW_UNDEFINED (type)) + (if (TYPE_OVERFLOW_UNDEFINED (type) && !TYPE_OVERFLOW_SANITIZED (type)) @0 #if GIMPLE (with {value_range vr0, vr1;} @@ -945,7 +945,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && range_op_handler (MULT_EXPR).overflow_free_p (vr0, vr1)) @0)) #endif - )))) + ))) +#if GIMPLE + /* Simplify (t * u) / v -> t * (u / v) if u is multiple of v. */ + (simplify + (div (mult @0 INTEGER_CST@1) INTEGER_CST@2) + (if (INTEGRAL_TYPE_P (type) + && wi::multiple_of_p (wi::to_widest (@1), wi::to_widest (@2), SIGNED)) + (if (TYPE_OVERFLOW_UNDEFINED (type) && !TYPE_OVERFLOW_SANITIZED (type)) + (mult @0 (div! @1 @2)) + (with {value_range vr0, vr1;} + (if (get_range_query (cfun)->range_of_expr (vr0, @0) + && get_range_query (cfun)->range_of_expr (vr1, @1) + && range_op_handler (MULT_EXPR).overflow_free_p (vr0, vr1)) + (mult @0 (div! @1 @2)))) + ))) +#endif +) #if GIMPLE (for div (trunc_div exact_div) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr112994-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr112994-1.c new file mode 100644 index 000000000000..e48d54960730 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr112994-1.c @@ -0,0 +1,13 @@ +/* PR tree-optimization/112994 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not " / \\\[2389-\\\]" "optimized" } } */ + +int f1 (int x) { return (x * 4) / 2; } +int f2 (int x) { return (x * 56) / 8; } +int f3 (int x) { return (x * 56) / -8; } +int f4 (int x) { int y = x * 4; return y / 2; } +int f5 (int x) { int y = x * 56; return y / 8; } +int f6 (int x) { int y = x * 56; return y / -8; } +unsigned f7 (unsigned x) { if (x > ~0U / 6) __builtin_unreachable (); unsigned y = x * 6; return y / 3; } +unsigned f8 (unsigned x) { if (x > ~0U / 63) __builtin_unreachable (); unsigned y = x * 63; return y / 9; } From 2c92551405bc8616f456e5cbc696ab0292c7ff00 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 14 Dec 2023 12:06:59 +0100 Subject: [PATCH 311/311] match.pd: Simplify (t * u) / (t * v) [PR112994] On top of the previously posted patch, this simplifies say (x * 16) / (x * 4) into 4. Unlike the previous pattern, this is something we didn't fold previously on GENERIC, so I think it shouldn't be all wrapped with #if GIMPLE. The question whether there should be fold_overflow_warning for the TYPE_OVERFLOW_UNDEFINED case remains. 2023-12-14 Jakub Jelinek PR tree-optimization/112994 * match.pd ((t * u) / (t * v) -> (u / v)): New simplification. * gcc.dg/tree-ssa/pr112994-2.c: New test. --- gcc/match.pd | 18 +++++++++++++++++- gcc/testsuite/gcc.dg/tree-ssa/pr112994-2.c | 15 +++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr112994-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 595482865555..562880aee4c2 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -961,7 +961,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (mult @0 (div! @1 @2)))) ))) #endif -) + /* Simplify (t * u) / (t * v) -> (u / v) if u is multiple of v. */ + (simplify + (div (mult @0 INTEGER_CST@1) (mult @0 INTEGER_CST@2)) + (if (INTEGRAL_TYPE_P (type) + && wi::multiple_of_p (wi::to_widest (@1), wi::to_widest (@2), SIGNED)) + (if (TYPE_OVERFLOW_UNDEFINED (type) && !TYPE_OVERFLOW_SANITIZED (type)) + (div @1 @2) +#if GIMPLE + (with {value_range vr0, vr1, vr2;} + (if (get_range_query (cfun)->range_of_expr (vr0, @0) + && get_range_query (cfun)->range_of_expr (vr1, @1) + && get_range_query (cfun)->range_of_expr (vr2, @2) + && range_op_handler (MULT_EXPR).overflow_free_p (vr0, vr1) + && range_op_handler (MULT_EXPR).overflow_free_p (vr0, vr2)) + (div @1 @2))) +#endif + )))) #if GIMPLE (for div (trunc_div exact_div) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr112994-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr112994-2.c new file mode 100644 index 000000000000..f78a72f7850b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr112994-2.c @@ -0,0 +1,15 @@ +/* PR tree-optimization/112994 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "return 2;" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "return 7;" 3 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "return -7;" 2 "optimized" } } */ + +int f1 (int x) { return (x * 4) / (x * 2); } +int f2 (int x) { return (x * 56) / (x * 8); } +int f3 (int x) { return (x * 56) / (x * -8); } +int f4 (int x) { int y = x * 4; return y / (x * 2); } +int f5 (int x) { int y = x * 56; return y / (x * 8); } +int f6 (int x) { int y = x * 56; return y / (x * -8); } +unsigned f7 (unsigned x) { if (x > ~0U / 4) __builtin_unreachable (); unsigned y = x * 4; return y / (x * 2); } +unsigned f8 (unsigned x) { if (x > ~0U / 56) __builtin_unreachable (); unsigned y = x * 56; return y / (x * 8); }