mirror of git://gcc.gnu.org/git/gcc.git
On-demand locations within string-literals
gcc/c-family/ChangeLog: * c-common.c: Include "substring-locations.h". (get_cpp_ttype_from_string_type): New function. (g_string_concat_db): New global. (substring_loc::get_range): New method. * c-common.h (g_string_concat_db): New declaration. (class substring_loc): New class. * c-lex.c (lex_string): When concatenating strings, capture the locations of all tokens using a new obstack, and record the concatenation locations within g_string_concat_db. * c-opts.c (c_common_init_options): Construct g_string_concat_db on the ggc-heap. gcc/ChangeLog: * input.c (string_concat::string_concat): New constructor. (string_concat_db::string_concat_db): New constructor. (string_concat_db::record_string_concatenation): New method. (string_concat_db::get_string_concatenation): New method. (string_concat_db::get_key_loc): New method. (class auto_cpp_string_vec): New class. (get_substring_ranges_for_loc): New function. (get_source_range_for_substring): New function. (get_num_source_ranges_for_substring): New function. (class selftest::lexer_test_options): New class. (struct selftest::lexer_test): New struct. (class selftest::ebcdic_execution_charset): New class. (selftest::ebcdic_execution_charset::s_singleton): New variable. (selftest::lexer_test::lexer_test): New constructor. (selftest::lexer_test::~lexer_test): New destructor. (selftest::lexer_test::get_token): New method. (selftest::assert_char_at_range): New function. (ASSERT_CHAR_AT_RANGE): New macro. (selftest::assert_num_substring_ranges): New function. (ASSERT_NUM_SUBSTRING_RANGES): New macro. (selftest::assert_has_no_substring_ranges): New function. (ASSERT_HAS_NO_SUBSTRING_RANGES): New macro. (selftest::test_lexer_string_locations_simple): New function. (selftest::test_lexer_string_locations_ebcdic): New function. (selftest::test_lexer_string_locations_hex): New function. (selftest::test_lexer_string_locations_oct): New function. (selftest::test_lexer_string_locations_letter_escape_1): New function. (selftest::test_lexer_string_locations_letter_escape_2): New function. (selftest::test_lexer_string_locations_ucn4): New function. (selftest::test_lexer_string_locations_ucn8): New function. (selftest::uint32_from_big_endian): New function. (selftest::test_lexer_string_locations_wide_string): New function. (selftest::uint16_from_big_endian): New function. (selftest::test_lexer_string_locations_string16): New function. (selftest::test_lexer_string_locations_string32): New function. (selftest::test_lexer_string_locations_u8): New function. (selftest::test_lexer_string_locations_utf8_source): New function. (selftest::test_lexer_string_locations_concatenation_1): New function. (selftest::test_lexer_string_locations_concatenation_2): New function. (selftest::test_lexer_string_locations_concatenation_3): New function. (selftest::test_lexer_string_locations_macro): New function. (selftest::test_lexer_string_locations_stringified_macro_argument): New function. (selftest::test_lexer_string_locations_non_string): New function. (selftest::test_lexer_string_locations_long_line): New function. (selftest::test_lexer_char_constants): New function. (selftest::input_c_tests): Call the new test functions once per case within the line_table test matrix. * input.h (struct string_concat): New struct. (struct location_hash): New struct. (class string_concat_db): New class. * substring-locations.h: New header. gcc/testsuite/ChangeLog: * gcc.dg/plugin/diagnostic-test-string-literals-1.c: New file. * gcc.dg/plugin/diagnostic-test-string-literals-2.c: New file. * gcc.dg/plugin/diagnostic_plugin_test_string_literals.c: New file. * gcc.dg/plugin/plugin.exp (plugin_test_list): Add the above new files. libcpp/ChangeLog: * charset.c (cpp_substring_ranges::cpp_substring_ranges): New constructor. (cpp_substring_ranges::~cpp_substring_ranges): New destructor. (cpp_substring_ranges::add_range): New method. (cpp_substring_ranges::add_n_ranges): New method. (_cpp_valid_ucn): Add "char_range" and "loc_reader" params; if they are non-NULL, read position information from *loc_reader and update char_range->m_finish accordingly. (convert_ucn): Add "char_range", "loc_reader", and "ranges" params. If loc_reader is non-NULL, read location information from it, and update *ranges accordingly, using char_range. Conditionalize the conversion into tbuf on tbuf being non-NULL. (convert_hex): Likewise, conditionalizing the call to emit_numeric_escape on tbuf. (convert_oct): Likewise. (convert_escape): Add params "loc_reader" and "ranges". If loc_reader is non-NULL, read location information from it, and update *ranges accordingly. Conditionalize the conversion into tbuf on tbuf being non-NULL. (cpp_interpret_string): Rename to... (cpp_interpret_string_1): ...this, adding params "loc_readers" and "out". Use "to" to conditionalize the initialization and usage of "tbuf", such as running the converter. If "loc_readers" is non-NULL, use the instances within it, reading location information from them, and passing them to convert_escape; likewise write to "out" if loc_readers is non-NULL. Check for leading quote and issue an error if it is not present. Update boundary check from "== limit" to ">= limit" to protect against erroneous location values to calls that are not parsing string literals. (cpp_interpret_string): Reimplement in terms to cpp_interpret_string_1. (noop_error_cb): New function. (cpp_interpret_string_ranges): New function. (cpp_string_location_reader::cpp_string_location_reader): New constructor. (cpp_string_location_reader::get_next): New method. * include/cpplib.h (class cpp_string_location_reader): New class. (class cpp_substring_ranges): New class. (cpp_interpret_string_ranges): New prototype. * internal.h (_cpp_valid_ucn): Add params "char_range" and "loc_reader". * lex.c (forms_identifier_p): Pass NULL for new params to _cpp_valid_ucn. From-SVN: r239175
This commit is contained in:
parent
1addb9e62b
commit
88fa5555a3
|
|
@ -1,3 +1,61 @@
|
||||||
|
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||||
|
|
||||||
|
* input.c (string_concat::string_concat): New constructor.
|
||||||
|
(string_concat_db::string_concat_db): New constructor.
|
||||||
|
(string_concat_db::record_string_concatenation): New method.
|
||||||
|
(string_concat_db::get_string_concatenation): New method.
|
||||||
|
(string_concat_db::get_key_loc): New method.
|
||||||
|
(class auto_cpp_string_vec): New class.
|
||||||
|
(get_substring_ranges_for_loc): New function.
|
||||||
|
(get_source_range_for_substring): New function.
|
||||||
|
(get_num_source_ranges_for_substring): New function.
|
||||||
|
(class selftest::lexer_test_options): New class.
|
||||||
|
(struct selftest::lexer_test): New struct.
|
||||||
|
(class selftest::ebcdic_execution_charset): New class.
|
||||||
|
(selftest::ebcdic_execution_charset::s_singleton): New variable.
|
||||||
|
(selftest::lexer_test::lexer_test): New constructor.
|
||||||
|
(selftest::lexer_test::~lexer_test): New destructor.
|
||||||
|
(selftest::lexer_test::get_token): New method.
|
||||||
|
(selftest::assert_char_at_range): New function.
|
||||||
|
(ASSERT_CHAR_AT_RANGE): New macro.
|
||||||
|
(selftest::assert_num_substring_ranges): New function.
|
||||||
|
(ASSERT_NUM_SUBSTRING_RANGES): New macro.
|
||||||
|
(selftest::assert_has_no_substring_ranges): New function.
|
||||||
|
(ASSERT_HAS_NO_SUBSTRING_RANGES): New macro.
|
||||||
|
(selftest::test_lexer_string_locations_simple): New function.
|
||||||
|
(selftest::test_lexer_string_locations_ebcdic): New function.
|
||||||
|
(selftest::test_lexer_string_locations_hex): New function.
|
||||||
|
(selftest::test_lexer_string_locations_oct): New function.
|
||||||
|
(selftest::test_lexer_string_locations_letter_escape_1): New function.
|
||||||
|
(selftest::test_lexer_string_locations_letter_escape_2): New function.
|
||||||
|
(selftest::test_lexer_string_locations_ucn4): New function.
|
||||||
|
(selftest::test_lexer_string_locations_ucn8): New function.
|
||||||
|
(selftest::uint32_from_big_endian): New function.
|
||||||
|
(selftest::test_lexer_string_locations_wide_string): New function.
|
||||||
|
(selftest::uint16_from_big_endian): New function.
|
||||||
|
(selftest::test_lexer_string_locations_string16): New function.
|
||||||
|
(selftest::test_lexer_string_locations_string32): New function.
|
||||||
|
(selftest::test_lexer_string_locations_u8): New function.
|
||||||
|
(selftest::test_lexer_string_locations_utf8_source): New function.
|
||||||
|
(selftest::test_lexer_string_locations_concatenation_1): New
|
||||||
|
function.
|
||||||
|
(selftest::test_lexer_string_locations_concatenation_2): New
|
||||||
|
function.
|
||||||
|
(selftest::test_lexer_string_locations_concatenation_3): New
|
||||||
|
function.
|
||||||
|
(selftest::test_lexer_string_locations_macro): New function.
|
||||||
|
(selftest::test_lexer_string_locations_stringified_macro_argument):
|
||||||
|
New function.
|
||||||
|
(selftest::test_lexer_string_locations_non_string): New function.
|
||||||
|
(selftest::test_lexer_string_locations_long_line): New function.
|
||||||
|
(selftest::test_lexer_char_constants): New function.
|
||||||
|
(selftest::input_c_tests): Call the new test functions once per
|
||||||
|
case within the line_table test matrix.
|
||||||
|
* input.h (struct string_concat): New struct.
|
||||||
|
(struct location_hash): New struct.
|
||||||
|
(class string_concat_db): New class.
|
||||||
|
* substring-locations.h: New header.
|
||||||
|
|
||||||
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
||||||
|
|
||||||
PR tree-optimization/72810
|
PR tree-optimization/72810
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,17 @@
|
||||||
|
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||||
|
|
||||||
|
* c-common.c: Include "substring-locations.h".
|
||||||
|
(get_cpp_ttype_from_string_type): New function.
|
||||||
|
(g_string_concat_db): New global.
|
||||||
|
(substring_loc::get_range): New method.
|
||||||
|
* c-common.h (g_string_concat_db): New declaration.
|
||||||
|
(class substring_loc): New class.
|
||||||
|
* c-lex.c (lex_string): When concatenating strings, capture the
|
||||||
|
locations of all tokens using a new obstack, and record the
|
||||||
|
concatenation locations within g_string_concat_db.
|
||||||
|
* c-opts.c (c_common_init_options): Construct g_string_concat_db
|
||||||
|
on the ggc-heap.
|
||||||
|
|
||||||
2016-07-29 Marek Polacek <polacek@redhat.com>
|
2016-07-29 Marek Polacek <polacek@redhat.com>
|
||||||
|
|
||||||
PR c/71926
|
PR c/71926
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
#include "tree-iterator.h"
|
#include "tree-iterator.h"
|
||||||
#include "opts.h"
|
#include "opts.h"
|
||||||
#include "gimplify.h"
|
#include "gimplify.h"
|
||||||
|
#include "substring-locations.h"
|
||||||
|
|
||||||
cpp_reader *parse_in; /* Declared in c-pragma.h. */
|
cpp_reader *parse_in; /* Declared in c-pragma.h. */
|
||||||
|
|
||||||
|
|
@ -1098,6 +1099,67 @@ fix_string_type (tree value)
|
||||||
TREE_STATIC (value) = 1;
|
TREE_STATIC (value) = 1;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given a string of type STRING_TYPE, determine what kind of string
|
||||||
|
token would give an equivalent execution encoding: CPP_STRING,
|
||||||
|
CPP_STRING16, or CPP_STRING32. Return CPP_OTHER in case of error.
|
||||||
|
This may not be exactly the string token type that initially created
|
||||||
|
the string, since CPP_WSTRING is indistinguishable from the 16/32 bit
|
||||||
|
string type at this point.
|
||||||
|
|
||||||
|
This effectively reverses part of the logic in lex_string and
|
||||||
|
fix_string_type. */
|
||||||
|
|
||||||
|
static enum cpp_ttype
|
||||||
|
get_cpp_ttype_from_string_type (tree string_type)
|
||||||
|
{
|
||||||
|
gcc_assert (string_type);
|
||||||
|
if (TREE_CODE (string_type) != ARRAY_TYPE)
|
||||||
|
return CPP_OTHER;
|
||||||
|
|
||||||
|
tree element_type = TREE_TYPE (string_type);
|
||||||
|
if (TREE_CODE (element_type) != INTEGER_TYPE)
|
||||||
|
return CPP_OTHER;
|
||||||
|
|
||||||
|
int bits_per_character = TYPE_PRECISION (element_type);
|
||||||
|
switch (bits_per_character)
|
||||||
|
{
|
||||||
|
case 8:
|
||||||
|
return CPP_STRING; /* It could have also been CPP_UTF8STRING. */
|
||||||
|
case 16:
|
||||||
|
return CPP_STRING16;
|
||||||
|
case 32:
|
||||||
|
return CPP_STRING32;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CPP_OTHER;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The global record of string concatentations, for use in
|
||||||
|
extracting locations within string literals. */
|
||||||
|
|
||||||
|
GTY(()) string_concat_db *g_string_concat_db;
|
||||||
|
|
||||||
|
/* Attempt to determine the source range of the substring.
|
||||||
|
If successful, return NULL and write the source range to *OUT_RANGE.
|
||||||
|
Otherwise return an error message. Error messages are intended
|
||||||
|
for GCC developers (to help debugging) rather than for end-users. */
|
||||||
|
|
||||||
|
const char *
|
||||||
|
substring_loc::get_range (source_range *out_range) const
|
||||||
|
{
|
||||||
|
gcc_assert (out_range);
|
||||||
|
|
||||||
|
enum cpp_ttype tok_type = get_cpp_ttype_from_string_type (m_string_type);
|
||||||
|
if (tok_type == CPP_OTHER)
|
||||||
|
return "unrecognized string type";
|
||||||
|
|
||||||
|
return get_source_range_for_substring (parse_in, g_string_concat_db,
|
||||||
|
m_fmt_string_loc, tok_type,
|
||||||
|
m_start_idx, m_end_idx,
|
||||||
|
out_range);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Fold X for consideration by one of the warning functions when checking
|
/* Fold X for consideration by one of the warning functions when checking
|
||||||
whether an expression has a constant value. */
|
whether an expression has a constant value. */
|
||||||
|
|
|
||||||
|
|
@ -1110,6 +1110,35 @@ extern time_t cb_get_source_date_epoch (cpp_reader *pfile);
|
||||||
__TIME__ can store. */
|
__TIME__ can store. */
|
||||||
#define MAX_SOURCE_DATE_EPOCH HOST_WIDE_INT_C (253402300799)
|
#define MAX_SOURCE_DATE_EPOCH HOST_WIDE_INT_C (253402300799)
|
||||||
|
|
||||||
|
extern GTY(()) string_concat_db *g_string_concat_db;
|
||||||
|
|
||||||
|
/* libcpp can calculate location information about a range of characters
|
||||||
|
within a string literal, but doing so is non-trivial.
|
||||||
|
|
||||||
|
This class encapsulates such a source location, so that it can be
|
||||||
|
passed around (e.g. within c-format.c). It is effectively a deferred
|
||||||
|
call into libcpp. If needed by a diagnostic, the actual source_range
|
||||||
|
can be calculated by calling the get_range method. */
|
||||||
|
|
||||||
|
class substring_loc
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
substring_loc (location_t fmt_string_loc, tree string_type,
|
||||||
|
int start_idx, int end_idx)
|
||||||
|
: m_fmt_string_loc (fmt_string_loc), m_string_type (string_type),
|
||||||
|
m_start_idx (start_idx), m_end_idx (end_idx) {}
|
||||||
|
|
||||||
|
const char *get_range (source_range *out_range) const;
|
||||||
|
|
||||||
|
location_t get_fmt_string_loc () const { return m_fmt_string_loc; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
location_t m_fmt_string_loc;
|
||||||
|
tree m_string_type;
|
||||||
|
int m_start_idx;
|
||||||
|
int m_end_idx;
|
||||||
|
};
|
||||||
|
|
||||||
/* In c-gimplify.c */
|
/* In c-gimplify.c */
|
||||||
extern void c_genericize (tree);
|
extern void c_genericize (tree);
|
||||||
extern int c_gimplify_expr (tree *, gimple_seq *, gimple_seq *);
|
extern int c_gimplify_expr (tree *, gimple_seq *, gimple_seq *);
|
||||||
|
|
|
||||||
|
|
@ -1097,13 +1097,16 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||||
tree value;
|
tree value;
|
||||||
size_t concats = 0;
|
size_t concats = 0;
|
||||||
struct obstack str_ob;
|
struct obstack str_ob;
|
||||||
|
struct obstack loc_ob;
|
||||||
cpp_string istr;
|
cpp_string istr;
|
||||||
enum cpp_ttype type = tok->type;
|
enum cpp_ttype type = tok->type;
|
||||||
|
|
||||||
/* Try to avoid the overhead of creating and destroying an obstack
|
/* Try to avoid the overhead of creating and destroying an obstack
|
||||||
for the common case of just one string. */
|
for the common case of just one string. */
|
||||||
cpp_string str = tok->val.str;
|
cpp_string str = tok->val.str;
|
||||||
|
location_t init_loc = tok->src_loc;
|
||||||
cpp_string *strs = &str;
|
cpp_string *strs = &str;
|
||||||
|
location_t *locs = NULL;
|
||||||
|
|
||||||
/* objc_at_sign_was_seen is only used when doing Objective-C string
|
/* objc_at_sign_was_seen is only used when doing Objective-C string
|
||||||
concatenation. It is 'true' if we have seen an '@' before the
|
concatenation. It is 'true' if we have seen an '@' before the
|
||||||
|
|
@ -1142,16 +1145,21 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||||
else
|
else
|
||||||
error ("unsupported non-standard concatenation of string literals");
|
error ("unsupported non-standard concatenation of string literals");
|
||||||
}
|
}
|
||||||
|
/* FALLTHROUGH */
|
||||||
|
|
||||||
case CPP_STRING:
|
case CPP_STRING:
|
||||||
if (!concats)
|
if (!concats)
|
||||||
{
|
{
|
||||||
gcc_obstack_init (&str_ob);
|
gcc_obstack_init (&str_ob);
|
||||||
|
gcc_obstack_init (&loc_ob);
|
||||||
obstack_grow (&str_ob, &str, sizeof (cpp_string));
|
obstack_grow (&str_ob, &str, sizeof (cpp_string));
|
||||||
|
obstack_grow (&loc_ob, &init_loc, sizeof (location_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
concats++;
|
concats++;
|
||||||
obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
|
obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
|
||||||
|
obstack_grow (&loc_ob, &tok->src_loc, sizeof (location_t));
|
||||||
|
|
||||||
if (objc_string)
|
if (objc_string)
|
||||||
objc_at_sign_was_seen = false;
|
objc_at_sign_was_seen = false;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
|
@ -1164,7 +1172,10 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||||
/* We have read one more token than we want. */
|
/* We have read one more token than we want. */
|
||||||
_cpp_backup_tokens (parse_in, 1);
|
_cpp_backup_tokens (parse_in, 1);
|
||||||
if (concats)
|
if (concats)
|
||||||
strs = XOBFINISH (&str_ob, cpp_string *);
|
{
|
||||||
|
strs = XOBFINISH (&str_ob, cpp_string *);
|
||||||
|
locs = XOBFINISH (&loc_ob, location_t *);
|
||||||
|
}
|
||||||
|
|
||||||
if (concats && !objc_string && !in_system_header_at (input_location))
|
if (concats && !objc_string && !in_system_header_at (input_location))
|
||||||
warning (OPT_Wtraditional,
|
warning (OPT_Wtraditional,
|
||||||
|
|
@ -1176,6 +1187,12 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||||
{
|
{
|
||||||
value = build_string (istr.len, (const char *) istr.text);
|
value = build_string (istr.len, (const char *) istr.text);
|
||||||
free (CONST_CAST (unsigned char *, istr.text));
|
free (CONST_CAST (unsigned char *, istr.text));
|
||||||
|
if (concats)
|
||||||
|
{
|
||||||
|
gcc_assert (locs);
|
||||||
|
gcc_assert (g_string_concat_db);
|
||||||
|
g_string_concat_db->record_string_concatenation (concats + 1, locs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
@ -1227,7 +1244,10 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||||
*valp = fix_string_type (value);
|
*valp = fix_string_type (value);
|
||||||
|
|
||||||
if (concats)
|
if (concats)
|
||||||
obstack_free (&str_ob, 0);
|
{
|
||||||
|
obstack_free (&str_ob, 0);
|
||||||
|
obstack_free (&loc_ob, 0);
|
||||||
|
}
|
||||||
|
|
||||||
return objc_string ? CPP_OBJC_STRING : type;
|
return objc_string ? CPP_OBJC_STRING : type;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -216,6 +216,9 @@ c_common_init_options (unsigned int decoded_options_count,
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
struct cpp_callbacks *cb;
|
struct cpp_callbacks *cb;
|
||||||
|
|
||||||
|
g_string_concat_db
|
||||||
|
= new (ggc_alloc <string_concat_db> ()) string_concat_db ();
|
||||||
|
|
||||||
parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89,
|
parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89,
|
||||||
ident_hash, line_table);
|
ident_hash, line_table);
|
||||||
cb = cpp_get_callbacks (parse_in);
|
cb = cpp_get_callbacks (parse_in);
|
||||||
|
|
|
||||||
1547
gcc/input.c
1547
gcc/input.c
File diff suppressed because it is too large
Load Diff
35
gcc/input.h
35
gcc/input.h
|
|
@ -95,4 +95,39 @@ void dump_location_info (FILE *stream);
|
||||||
|
|
||||||
void diagnostics_file_cache_fini (void);
|
void diagnostics_file_cache_fini (void);
|
||||||
|
|
||||||
|
struct GTY(()) string_concat
|
||||||
|
{
|
||||||
|
string_concat (int num, location_t *locs);
|
||||||
|
|
||||||
|
int m_num;
|
||||||
|
location_t * GTY ((atomic)) m_locs;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct location_hash : int_hash <location_t, UNKNOWN_LOCATION> { };
|
||||||
|
|
||||||
|
class GTY(()) string_concat_db
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
string_concat_db ();
|
||||||
|
void record_string_concatenation (int num, location_t *locs);
|
||||||
|
|
||||||
|
bool get_string_concatenation (location_t loc,
|
||||||
|
int *out_num,
|
||||||
|
location_t **out_locs);
|
||||||
|
|
||||||
|
private:
|
||||||
|
static location_t get_key_loc (location_t loc);
|
||||||
|
|
||||||
|
/* For the fields to be private, we must grant access to the
|
||||||
|
generated code in gtype-desc.c. */
|
||||||
|
|
||||||
|
friend void ::gt_ggc_mx_string_concat_db (void *x_p);
|
||||||
|
friend void ::gt_pch_nx_string_concat_db (void *x_p);
|
||||||
|
friend void ::gt_pch_p_16string_concat_db (void *this_obj, void *x_p,
|
||||||
|
gt_pointer_operator op,
|
||||||
|
void *cookie);
|
||||||
|
|
||||||
|
hash_map <location_hash, string_concat *> *m_table;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
/* Source locations within string literals.
|
||||||
|
Copyright (C) 2016 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of GCC.
|
||||||
|
|
||||||
|
GCC is free software; you can redistribute it and/or modify it under
|
||||||
|
the terms of the GNU General Public License as published by the Free
|
||||||
|
Software Foundation; either version 3, or (at your option) any later
|
||||||
|
version.
|
||||||
|
|
||||||
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with GCC; see the file COPYING3. If not see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#ifndef GCC_SUBSTRING_LOCATIONS_H
|
||||||
|
#define GCC_SUBSTRING_LOCATIONS_H
|
||||||
|
|
||||||
|
extern const char *get_source_range_for_substring (cpp_reader *pfile,
|
||||||
|
string_concat_db *concats,
|
||||||
|
location_t strloc,
|
||||||
|
enum cpp_ttype type,
|
||||||
|
int start_idx, int end_idx,
|
||||||
|
source_range *out_range);
|
||||||
|
|
||||||
|
#endif /* ! GCC_SUBSTRING_LOCATIONS_H */
|
||||||
|
|
@ -1,3 +1,10 @@
|
||||||
|
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||||
|
|
||||||
|
* gcc.dg/plugin/diagnostic-test-string-literals-1.c: New file.
|
||||||
|
* gcc.dg/plugin/diagnostic-test-string-literals-2.c: New file.
|
||||||
|
* gcc.dg/plugin/diagnostic_plugin_test_string_literals.c: New file.
|
||||||
|
* gcc.dg/plugin/plugin.exp (plugin_test_list): Add the above new files.
|
||||||
|
|
||||||
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
||||||
|
|
||||||
PR tree-optimization/72810
|
PR tree-optimization/72810
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,211 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O -fdiagnostics-show-caret" } */
|
||||||
|
|
||||||
|
/* This is a collection of unittests for ranges within string literals,
|
||||||
|
using diagnostic_plugin_test_string_literals, which handles
|
||||||
|
"__emit_string_literal_range" by generating a warning at the given
|
||||||
|
subset of a string literal.
|
||||||
|
|
||||||
|
The indices are 0-based. It's easiest to verify things using string
|
||||||
|
literals that are runs of 0-based digits (to avoid having to count
|
||||||
|
characters).
|
||||||
|
|
||||||
|
LITERAL is a const void * to allow testing the various kinds of wide
|
||||||
|
string literal, rather than just const char *. */
|
||||||
|
|
||||||
|
extern void __emit_string_literal_range (const void *literal,
|
||||||
|
int start_idx, int end_idx);
|
||||||
|
|
||||||
|
void
|
||||||
|
test_simple_string_literal (void)
|
||||||
|
{
|
||||||
|
__emit_string_literal_range ("0123456789", /* { dg-warning "range" } */
|
||||||
|
6, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("0123456789",
|
||||||
|
^~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_concatenated_string_literal (void)
|
||||||
|
{
|
||||||
|
__emit_string_literal_range ("01234" "56789", /* { dg-warning "range" } */
|
||||||
|
3, 6);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234" "56789",
|
||||||
|
^~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_multiline_string_literal (void)
|
||||||
|
{
|
||||||
|
__emit_string_literal_range ("01234" /* { dg-warning "range" } */
|
||||||
|
"56789",
|
||||||
|
3, 6);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234"
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
"56789",
|
||||||
|
~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
/* FIXME: why does the above need two trailing spaces? */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tests of various unicode encodings.
|
||||||
|
|
||||||
|
Digits 0 through 9 are unicode code points:
|
||||||
|
U+0030 DIGIT ZERO
|
||||||
|
...
|
||||||
|
U+0039 DIGIT NINE
|
||||||
|
However, these are not always valid as UCN (see the comment in
|
||||||
|
libcpp/charset.c:_cpp_valid_ucn).
|
||||||
|
|
||||||
|
Hence we need to test UCN using an alternative unicode
|
||||||
|
representation of numbers; let's use Roman numerals,
|
||||||
|
(though these start at one, not zero):
|
||||||
|
U+2170 SMALL ROMAN NUMERAL ONE
|
||||||
|
...
|
||||||
|
U+2174 SMALL ROMAN NUMERAL FIVE ("v")
|
||||||
|
U+2175 SMALL ROMAN NUMERAL SIX ("vi")
|
||||||
|
...
|
||||||
|
U+2178 SMALL ROMAN NUMERAL NINE. */
|
||||||
|
|
||||||
|
void
|
||||||
|
test_hex (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9, expressing digit 5 in ASCII as "\x35"
|
||||||
|
and with a space in place of digit 6, to terminate the escaped
|
||||||
|
hex code. */
|
||||||
|
__emit_string_literal_range ("01234\x35 789", /* { dg-warning "range" } */
|
||||||
|
3, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234\x35 789"
|
||||||
|
^~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_oct (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9, expressing digit 5 in ASCII as "\065"
|
||||||
|
and with a space in place of digit 6, to terminate the escaped
|
||||||
|
octal code. */
|
||||||
|
__emit_string_literal_range ("01234\065 789", /* { dg-warning "range" } */
|
||||||
|
3, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234\065 789"
|
||||||
|
^~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_multiple (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
|
||||||
|
digit 6 in ASCII as octal "\066", concatenating multiple strings. */
|
||||||
|
__emit_string_literal_range ("01234" "\x35" "\066" "789", /* { dg-warning "range" } */
|
||||||
|
3, 8);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234" "\x35" "\066" "789",
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_ucn4 (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
|
||||||
|
as UCN 4.
|
||||||
|
The resulting string is encoded as UTF-8. Most of the digits are 1 byte
|
||||||
|
each, but digits 5 and 6 are encoded with 3 bytes each.
|
||||||
|
Hence to underline digits 4-7 we need to underling using bytes 4-11 in
|
||||||
|
the UTF-8 encoding. */
|
||||||
|
__emit_string_literal_range ("01234\u2174\u2175789", /* { dg-warning "range" } */
|
||||||
|
4, 11);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234\u2174\u2175789",
|
||||||
|
^~~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_ucn8 (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
|
||||||
|
The resulting string is the same as as in test_ucn4 above, and hence
|
||||||
|
has the same UTF-8 encoding, and so we again need to underline bytes
|
||||||
|
4-11 in the UTF-8 encoding in order to underline digits 4-7. */
|
||||||
|
__emit_string_literal_range ("01234\U00002174\U00002175789", /* { dg-warning "range" } */
|
||||||
|
4, 11);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range ("01234\U00002174\U00002175789",
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_u8 (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9. */
|
||||||
|
__emit_string_literal_range (u8"0123456789", /* { dg-warning "range" } */
|
||||||
|
4, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range (u8"0123456789",
|
||||||
|
^~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_u (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9. */
|
||||||
|
__emit_string_literal_range (u"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||||
|
4, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range (u"0123456789",
|
||||||
|
^~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_U (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9. */
|
||||||
|
__emit_string_literal_range (U"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||||
|
4, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range (U"0123456789",
|
||||||
|
^~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_L (void)
|
||||||
|
{
|
||||||
|
/* Digits 0-9. */
|
||||||
|
__emit_string_literal_range (L"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||||
|
4, 7);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
__emit_string_literal_range (L"0123456789",
|
||||||
|
^~~~~~~~~~~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test_macro (void)
|
||||||
|
{
|
||||||
|
#define START "01234" /* { dg-warning "range" } */
|
||||||
|
__emit_string_literal_range (START
|
||||||
|
"56789",
|
||||||
|
3, 6);
|
||||||
|
/* { dg-begin-multiline-output "" }
|
||||||
|
#define START "01234"
|
||||||
|
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
__emit_string_literal_range (START
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
"56789",
|
||||||
|
~~~
|
||||||
|
{ dg-end-multiline-output "" } */
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
|
||||||
|
/* See the notes in diagnostic-test-string-literals-1.c.
|
||||||
|
This test case has caret-printing disabled. */
|
||||||
|
|
||||||
|
extern void __emit_string_literal_range (const void *literal,
|
||||||
|
int start_idx, int end_idx);
|
||||||
|
/* Test of a stringified macro argument, by itself. */
|
||||||
|
|
||||||
|
void
|
||||||
|
test_stringified_token_1 (int x)
|
||||||
|
{
|
||||||
|
#define STRINGIFY(EXPR) #EXPR
|
||||||
|
|
||||||
|
__emit_string_literal_range (STRINGIFY(x > 0), /* { dg-error "unable to read substring range: macro expansion" } */
|
||||||
|
0, 4);
|
||||||
|
|
||||||
|
#undef STRINGIFY
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test of a stringified token within a concatenation. */
|
||||||
|
|
||||||
|
void
|
||||||
|
test_stringized_token_2 (int x)
|
||||||
|
{
|
||||||
|
#define EXAMPLE(EXPR, START_IDX, END_IDX) \
|
||||||
|
do { \
|
||||||
|
__emit_string_literal_range (" before " #EXPR " after \n", \
|
||||||
|
START_IDX, END_IDX); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
EXAMPLE(x > 0, 1, 6);
|
||||||
|
/* { dg-error "unable to read substring range: cpp_interpret_string_1 failed" "" { target *-*-* } 28 } */
|
||||||
|
|
||||||
|
#undef EXAMPLE
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test of a doubly-stringified macro argument (by itself). */
|
||||||
|
|
||||||
|
void
|
||||||
|
test_stringified_token_3 (int x)
|
||||||
|
{
|
||||||
|
#define XSTR(s) STR(s)
|
||||||
|
#define STR(s) #s
|
||||||
|
#define FOO 123456789
|
||||||
|
__emit_string_literal_range (XSTR (FOO), /* { dg-error "unable to read substring range: macro expansion" } */
|
||||||
|
2, 3);
|
||||||
|
|
||||||
|
#undef XSTR
|
||||||
|
#undef STR
|
||||||
|
#undef FOO
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,212 @@
|
||||||
|
/* This plugin uses the diagnostics code to verify tracking of source code
|
||||||
|
locations within string literals. */
|
||||||
|
/* { dg-options "-O" } */
|
||||||
|
|
||||||
|
#include "gcc-plugin.h"
|
||||||
|
#include "config.h"
|
||||||
|
#include "system.h"
|
||||||
|
#include "coretypes.h"
|
||||||
|
#include "tm.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#include "stringpool.h"
|
||||||
|
#include "toplev.h"
|
||||||
|
#include "basic-block.h"
|
||||||
|
#include "hash-table.h"
|
||||||
|
#include "vec.h"
|
||||||
|
#include "ggc.h"
|
||||||
|
#include "basic-block.h"
|
||||||
|
#include "tree-ssa-alias.h"
|
||||||
|
#include "internal-fn.h"
|
||||||
|
#include "gimple-fold.h"
|
||||||
|
#include "tree-eh.h"
|
||||||
|
#include "gimple-expr.h"
|
||||||
|
#include "is-a.h"
|
||||||
|
#include "gimple.h"
|
||||||
|
#include "gimple-iterator.h"
|
||||||
|
#include "tree.h"
|
||||||
|
#include "tree-pass.h"
|
||||||
|
#include "intl.h"
|
||||||
|
#include "plugin-version.h"
|
||||||
|
#include "c-family/c-common.h"
|
||||||
|
#include "diagnostic.h"
|
||||||
|
#include "context.h"
|
||||||
|
#include "print-tree.h"
|
||||||
|
#include "cpplib.h"
|
||||||
|
#include "c-family/c-pragma.h"
|
||||||
|
|
||||||
|
int plugin_is_GPL_compatible;
|
||||||
|
|
||||||
|
/* A custom pass for printing string literal location information. */
|
||||||
|
|
||||||
|
const pass_data pass_data_test_string_literals =
|
||||||
|
{
|
||||||
|
GIMPLE_PASS, /* type */
|
||||||
|
"test_string_literals", /* name */
|
||||||
|
OPTGROUP_NONE, /* optinfo_flags */
|
||||||
|
TV_NONE, /* tv_id */
|
||||||
|
PROP_ssa, /* properties_required */
|
||||||
|
0, /* properties_provided */
|
||||||
|
0, /* properties_destroyed */
|
||||||
|
0, /* todo_flags_start */
|
||||||
|
0, /* todo_flags_finish */
|
||||||
|
};
|
||||||
|
|
||||||
|
class pass_test_string_literals : public gimple_opt_pass
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
pass_test_string_literals(gcc::context *ctxt)
|
||||||
|
: gimple_opt_pass(pass_data_test_string_literals, ctxt)
|
||||||
|
{}
|
||||||
|
|
||||||
|
/* opt_pass methods: */
|
||||||
|
bool gate (function *) { return true; }
|
||||||
|
virtual unsigned int execute (function *);
|
||||||
|
|
||||||
|
}; // class pass_test_string_literals
|
||||||
|
|
||||||
|
/* Determine if STMT is a call with NUM_ARGS arguments to a function
|
||||||
|
named FUNCNAME.
|
||||||
|
If so, return STMT as a gcall *. Otherwise return NULL. */
|
||||||
|
|
||||||
|
static gcall *
|
||||||
|
check_for_named_call (gimple *stmt,
|
||||||
|
const char *funcname, unsigned int num_args)
|
||||||
|
{
|
||||||
|
gcc_assert (funcname);
|
||||||
|
|
||||||
|
gcall *call = dyn_cast <gcall *> (stmt);
|
||||||
|
if (!call)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
tree fndecl = gimple_call_fndecl (call);
|
||||||
|
if (!fndecl)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (gimple_call_num_args (call) != num_args)
|
||||||
|
{
|
||||||
|
error_at (stmt->location, "expected number of args: %i (got %i)",
|
||||||
|
num_args, gimple_call_num_args (call));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return call;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Emit a warning covering SRC_RANGE, with the caret at the start of
|
||||||
|
SRC_RANGE. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_warning (source_range src_range)
|
||||||
|
{
|
||||||
|
location_t loc
|
||||||
|
= make_location (src_range.m_start, src_range.m_start, src_range.m_finish);
|
||||||
|
warning_at (loc, 0, "range %i:%i-%i:%i",
|
||||||
|
LOCATION_LINE (src_range.m_start),
|
||||||
|
LOCATION_COLUMN (src_range.m_start),
|
||||||
|
LOCATION_LINE (src_range.m_finish),
|
||||||
|
LOCATION_COLUMN (src_range.m_finish));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Support code for verifying that we are correctly tracking ranges
|
||||||
|
within string literals, for use by diagnostic-test-string-literals-*.c.
|
||||||
|
Emit a warning showing the range of a string literal, for each call to
|
||||||
|
a function named "__emit_string_literal_range".
|
||||||
|
The initial argument should be a string literal; arguments 2 and 3
|
||||||
|
should be integer constants, giving the range within the string
|
||||||
|
to be printed. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
test_string_literals (gimple *stmt)
|
||||||
|
{
|
||||||
|
gcall *call = check_for_named_call (stmt, "__emit_string_literal_range", 3);
|
||||||
|
if (!call)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* We expect an ADDR_EXPR with a STRING_CST inside it for the
|
||||||
|
initial arg. */
|
||||||
|
tree t_addr_string = gimple_call_arg (call, 0);
|
||||||
|
if (TREE_CODE (t_addr_string) != ADDR_EXPR)
|
||||||
|
{
|
||||||
|
error_at (call->location, "string literal required for arg 1");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tree t_string = TREE_OPERAND (t_addr_string, 0);
|
||||||
|
if (TREE_CODE (t_string) != STRING_CST)
|
||||||
|
{
|
||||||
|
error_at (call->location, "string literal required for arg 1");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tree t_start_idx = gimple_call_arg (call, 1);
|
||||||
|
if (TREE_CODE (t_start_idx) != INTEGER_CST)
|
||||||
|
{
|
||||||
|
error_at (call->location, "integer constant required for arg 2");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int start_idx = TREE_INT_CST_LOW (t_start_idx);
|
||||||
|
|
||||||
|
tree t_end_idx = gimple_call_arg (call, 2);
|
||||||
|
if (TREE_CODE (t_end_idx) != INTEGER_CST)
|
||||||
|
{
|
||||||
|
error_at (call->location, "integer constant required for arg 3");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int end_idx = TREE_INT_CST_LOW (t_end_idx);
|
||||||
|
|
||||||
|
/* A STRING_CST doesn't have a location, but the ADDR_EXPR does. */
|
||||||
|
location_t strloc = EXPR_LOCATION (t_addr_string);
|
||||||
|
source_range src_range;
|
||||||
|
substring_loc substr_loc (strloc, TREE_TYPE (t_string),
|
||||||
|
start_idx, end_idx);
|
||||||
|
const char *err = substr_loc.get_range (&src_range);
|
||||||
|
if (err)
|
||||||
|
error_at (strloc, "unable to read substring range: %s", err);
|
||||||
|
else
|
||||||
|
emit_warning (src_range);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Call test_string_literals on every statement within FUN. */
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
pass_test_string_literals::execute (function *fun)
|
||||||
|
{
|
||||||
|
gimple_stmt_iterator gsi;
|
||||||
|
basic_block bb;
|
||||||
|
|
||||||
|
FOR_EACH_BB_FN (bb, fun)
|
||||||
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
{
|
||||||
|
gimple *stmt = gsi_stmt (gsi);
|
||||||
|
test_string_literals (stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Entrypoint for the plugin. Create and register the custom pass. */
|
||||||
|
|
||||||
|
int
|
||||||
|
plugin_init (struct plugin_name_args *plugin_info,
|
||||||
|
struct plugin_gcc_version *version)
|
||||||
|
{
|
||||||
|
struct register_pass_info pass_info;
|
||||||
|
const char *plugin_name = plugin_info->base_name;
|
||||||
|
int argc = plugin_info->argc;
|
||||||
|
struct plugin_argument *argv = plugin_info->argv;
|
||||||
|
|
||||||
|
if (!plugin_default_version_check (version, &gcc_version))
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
pass_info.pass = new pass_test_string_literals (g);
|
||||||
|
pass_info.reference_pass_name = "ssa";
|
||||||
|
pass_info.ref_pass_instance_number = 1;
|
||||||
|
pass_info.pos_op = PASS_POS_INSERT_AFTER;
|
||||||
|
register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
|
||||||
|
&pass_info);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -70,6 +70,9 @@ set plugin_test_list [list \
|
||||||
diagnostic-test-expressions-1.c } \
|
diagnostic-test-expressions-1.c } \
|
||||||
{ diagnostic_plugin_show_trees.c \
|
{ diagnostic_plugin_show_trees.c \
|
||||||
diagnostic-test-show-trees-1.c } \
|
diagnostic-test-show-trees-1.c } \
|
||||||
|
{ diagnostic_plugin_test_string_literals.c \
|
||||||
|
diagnostic-test-string-literals-1.c \
|
||||||
|
diagnostic-test-string-literals-2.c } \
|
||||||
{ location_overflow_plugin.c \
|
{ location_overflow_plugin.c \
|
||||||
location-overflow-test-1.c \
|
location-overflow-test-1.c \
|
||||||
location-overflow-test-2.c } \
|
location-overflow-test-2.c } \
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,49 @@
|
||||||
|
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||||
|
|
||||||
|
* charset.c (cpp_substring_ranges::cpp_substring_ranges): New
|
||||||
|
constructor.
|
||||||
|
(cpp_substring_ranges::~cpp_substring_ranges): New destructor.
|
||||||
|
(cpp_substring_ranges::add_range): New method.
|
||||||
|
(cpp_substring_ranges::add_n_ranges): New method.
|
||||||
|
(_cpp_valid_ucn): Add "char_range" and "loc_reader" params; if
|
||||||
|
they are non-NULL, read position information from *loc_reader
|
||||||
|
and update char_range->m_finish accordingly.
|
||||||
|
(convert_ucn): Add "char_range", "loc_reader", and "ranges"
|
||||||
|
params. If loc_reader is non-NULL, read location information from
|
||||||
|
it, and update *ranges accordingly, using char_range.
|
||||||
|
Conditionalize the conversion into tbuf on tbuf being non-NULL.
|
||||||
|
(convert_hex): Likewise, conditionalizing the call to
|
||||||
|
emit_numeric_escape on tbuf.
|
||||||
|
(convert_oct): Likewise.
|
||||||
|
(convert_escape): Add params "loc_reader" and "ranges". If
|
||||||
|
loc_reader is non-NULL, read location information from it, and
|
||||||
|
update *ranges accordingly. Conditionalize the conversion into
|
||||||
|
tbuf on tbuf being non-NULL.
|
||||||
|
(cpp_interpret_string): Rename to...
|
||||||
|
(cpp_interpret_string_1): ...this, adding params "loc_readers" and
|
||||||
|
"out". Use "to" to conditionalize the initialization and usage of
|
||||||
|
"tbuf", such as running the converter. If "loc_readers" is
|
||||||
|
non-NULL, use the instances within it, reading location
|
||||||
|
information from them, and passing them to convert_escape; likewise
|
||||||
|
write to "out" if loc_readers is non-NULL. Check for leading
|
||||||
|
quote and issue an error if it is not present. Update boundary
|
||||||
|
check from "== limit" to ">= limit" to protect against erroneous
|
||||||
|
location values to calls that are not parsing string literals.
|
||||||
|
(cpp_interpret_string): Reimplement in terms to
|
||||||
|
cpp_interpret_string_1.
|
||||||
|
(noop_error_cb): New function.
|
||||||
|
(cpp_interpret_string_ranges): New function.
|
||||||
|
(cpp_string_location_reader::cpp_string_location_reader): New
|
||||||
|
constructor.
|
||||||
|
(cpp_string_location_reader::get_next): New method.
|
||||||
|
* include/cpplib.h (class cpp_string_location_reader): New class.
|
||||||
|
(class cpp_substring_ranges): New class.
|
||||||
|
(cpp_interpret_string_ranges): New prototype.
|
||||||
|
* internal.h (_cpp_valid_ucn): Add params "char_range" and
|
||||||
|
"loc_reader".
|
||||||
|
* lex.c (forms_identifier_p): Pass NULL for new params to
|
||||||
|
_cpp_valid_ucn.
|
||||||
|
|
||||||
2016-08-01 Andreas Schwab <schwab@suse.de>
|
2016-08-01 Andreas Schwab <schwab@suse.de>
|
||||||
|
|
||||||
* include/cpplib.h: Fix comment typo.
|
* include/cpplib.h: Fix comment typo.
|
||||||
|
|
|
||||||
432
libcpp/charset.c
432
libcpp/charset.c
|
|
@ -812,6 +812,51 @@ cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* cpp_substring_ranges's constructor. */
|
||||||
|
|
||||||
|
cpp_substring_ranges::cpp_substring_ranges () :
|
||||||
|
m_ranges (NULL),
|
||||||
|
m_num_ranges (0),
|
||||||
|
m_alloc_ranges (8)
|
||||||
|
{
|
||||||
|
m_ranges = XNEWVEC (source_range, m_alloc_ranges);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* cpp_substring_ranges's destructor. */
|
||||||
|
|
||||||
|
cpp_substring_ranges::~cpp_substring_ranges ()
|
||||||
|
{
|
||||||
|
free (m_ranges);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add RANGE to the vector of source_range information. */
|
||||||
|
|
||||||
|
void
|
||||||
|
cpp_substring_ranges::add_range (source_range range)
|
||||||
|
{
|
||||||
|
if (m_num_ranges >= m_alloc_ranges)
|
||||||
|
{
|
||||||
|
m_alloc_ranges *= 2;
|
||||||
|
m_ranges
|
||||||
|
= (source_range *)xrealloc (m_ranges,
|
||||||
|
sizeof (source_range) * m_alloc_ranges);
|
||||||
|
}
|
||||||
|
m_ranges[m_num_ranges++] = range;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read NUM ranges from LOC_READER, adding them to the vector of source_range
|
||||||
|
information. */
|
||||||
|
|
||||||
|
void
|
||||||
|
cpp_substring_ranges::add_n_ranges (int num,
|
||||||
|
cpp_string_location_reader &loc_reader)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < num; i++)
|
||||||
|
add_range (loc_reader.get_next ());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Utility routine that computes a mask of the form 0000...111... with
|
/* Utility routine that computes a mask of the form 0000...111... with
|
||||||
WIDTH 1-bits. */
|
WIDTH 1-bits. */
|
||||||
static inline size_t
|
static inline size_t
|
||||||
|
|
@ -980,18 +1025,27 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
||||||
one beyond the UCN, or to the syntactically invalid character.
|
one beyond the UCN, or to the syntactically invalid character.
|
||||||
|
|
||||||
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
|
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
|
||||||
an identifier, or 2 otherwise. */
|
an identifier, or 2 otherwise.
|
||||||
|
|
||||||
|
If CHAR_RANGE and LOC_READER are non-NULL, then position information is
|
||||||
|
read from *LOC_READER and CHAR_RANGE->m_finish is updated accordingly. */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||||
const uchar *limit, int identifier_pos,
|
const uchar *limit, int identifier_pos,
|
||||||
struct normalize_state *nst, cppchar_t *cp)
|
struct normalize_state *nst, cppchar_t *cp,
|
||||||
|
source_range *char_range,
|
||||||
|
cpp_string_location_reader *loc_reader)
|
||||||
{
|
{
|
||||||
cppchar_t result, c;
|
cppchar_t result, c;
|
||||||
unsigned int length;
|
unsigned int length;
|
||||||
const uchar *str = *pstr;
|
const uchar *str = *pstr;
|
||||||
const uchar *base = str - 2;
|
const uchar *base = str - 2;
|
||||||
|
|
||||||
|
/* char_range and loc_reader must either be both NULL, or both be
|
||||||
|
non-NULL. */
|
||||||
|
gcc_assert ((char_range != NULL) == (loc_reader != NULL));
|
||||||
|
|
||||||
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
|
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
|
||||||
cpp_error (pfile, CPP_DL_WARNING,
|
cpp_error (pfile, CPP_DL_WARNING,
|
||||||
"universal character names are only valid in C++ and C99");
|
"universal character names are only valid in C++ and C99");
|
||||||
|
|
@ -1021,6 +1075,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||||
if (!ISXDIGIT (c))
|
if (!ISXDIGIT (c))
|
||||||
break;
|
break;
|
||||||
str++;
|
str++;
|
||||||
|
if (loc_reader)
|
||||||
|
char_range->m_finish = loc_reader->get_next ().m_finish;
|
||||||
result = (result << 4) + hex_value (c);
|
result = (result << 4) + hex_value (c);
|
||||||
}
|
}
|
||||||
while (--length && str < limit);
|
while (--length && str < limit);
|
||||||
|
|
@ -1086,11 +1142,18 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
|
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
|
||||||
it to the execution character set and write the result into TBUF.
|
it to the execution character set and write the result into TBUF,
|
||||||
An advanced pointer is returned. Issues all relevant diagnostics. */
|
if TBUF is non-NULL.
|
||||||
|
An advanced pointer is returned. Issues all relevant diagnostics.
|
||||||
|
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||||
|
contains the location of the character so far: location information
|
||||||
|
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||||
static const uchar *
|
static const uchar *
|
||||||
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||||
|
source_range char_range,
|
||||||
|
cpp_string_location_reader *loc_reader,
|
||||||
|
cpp_substring_ranges *ranges)
|
||||||
{
|
{
|
||||||
cppchar_t ucn;
|
cppchar_t ucn;
|
||||||
uchar buf[6];
|
uchar buf[6];
|
||||||
|
|
@ -1099,8 +1162,17 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
int rval;
|
int rval;
|
||||||
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
||||||
|
|
||||||
|
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||||
|
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||||
|
|
||||||
from++; /* Skip u/U. */
|
from++; /* Skip u/U. */
|
||||||
_cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
|
|
||||||
|
if (loc_reader)
|
||||||
|
/* The u/U is part of the spelling of this character. */
|
||||||
|
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||||
|
|
||||||
|
_cpp_valid_ucn (pfile, &from, limit, 0, &nst,
|
||||||
|
&ucn, &char_range, loc_reader);
|
||||||
|
|
||||||
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
|
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
|
||||||
if (rval)
|
if (rval)
|
||||||
|
|
@ -1109,9 +1181,20 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
cpp_errno (pfile, CPP_DL_ERROR,
|
cpp_errno (pfile, CPP_DL_ERROR,
|
||||||
"converting UCN to source character set");
|
"converting UCN to source character set");
|
||||||
}
|
}
|
||||||
else if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
|
else
|
||||||
cpp_errno (pfile, CPP_DL_ERROR,
|
{
|
||||||
"converting UCN to execution character set");
|
if (tbuf)
|
||||||
|
if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
|
||||||
|
cpp_errno (pfile, CPP_DL_ERROR,
|
||||||
|
"converting UCN to execution character set");
|
||||||
|
|
||||||
|
if (loc_reader)
|
||||||
|
{
|
||||||
|
int num_encoded_bytes = 6 - bytesleft;
|
||||||
|
for (int i = 0; i < num_encoded_bytes; i++)
|
||||||
|
ranges->add_range (char_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return from;
|
return from;
|
||||||
}
|
}
|
||||||
|
|
@ -1167,31 +1250,48 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert a hexadecimal escape, pointed to by FROM, to the execution
|
/* Convert a hexadecimal escape, pointed to by FROM, to the execution
|
||||||
character set and write it into the string buffer TBUF. Returns an
|
character set and write it into the string buffer TBUF (if non-NULL).
|
||||||
advanced pointer, and issues diagnostics as necessary.
|
Returns an advanced pointer, and issues diagnostics as necessary.
|
||||||
No character set translation occurs; this routine always produces the
|
No character set translation occurs; this routine always produces the
|
||||||
execution-set character with numeric value equal to the given hex
|
execution-set character with numeric value equal to the given hex
|
||||||
number. You can, e.g. generate surrogate pairs this way. */
|
number. You can, e.g. generate surrogate pairs this way.
|
||||||
|
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||||
|
contains the location of the character so far: location information
|
||||||
|
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||||
static const uchar *
|
static const uchar *
|
||||||
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||||
|
source_range char_range,
|
||||||
|
cpp_string_location_reader *loc_reader,
|
||||||
|
cpp_substring_ranges *ranges)
|
||||||
{
|
{
|
||||||
cppchar_t c, n = 0, overflow = 0;
|
cppchar_t c, n = 0, overflow = 0;
|
||||||
int digits_found = 0;
|
int digits_found = 0;
|
||||||
size_t width = cvt.width;
|
size_t width = cvt.width;
|
||||||
size_t mask = width_to_mask (width);
|
size_t mask = width_to_mask (width);
|
||||||
|
|
||||||
|
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||||
|
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||||
|
|
||||||
if (CPP_WTRADITIONAL (pfile))
|
if (CPP_WTRADITIONAL (pfile))
|
||||||
cpp_warning (pfile, CPP_W_TRADITIONAL,
|
cpp_warning (pfile, CPP_W_TRADITIONAL,
|
||||||
"the meaning of '\\x' is different in traditional C");
|
"the meaning of '\\x' is different in traditional C");
|
||||||
|
|
||||||
from++; /* Skip 'x'. */
|
/* Skip 'x'. */
|
||||||
|
from++;
|
||||||
|
|
||||||
|
/* The 'x' is part of the spelling of this character. */
|
||||||
|
if (loc_reader)
|
||||||
|
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||||
|
|
||||||
while (from < limit)
|
while (from < limit)
|
||||||
{
|
{
|
||||||
c = *from;
|
c = *from;
|
||||||
if (! hex_p (c))
|
if (! hex_p (c))
|
||||||
break;
|
break;
|
||||||
from++;
|
from++;
|
||||||
|
if (loc_reader)
|
||||||
|
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||||
overflow |= n ^ (n << 4 >> 4);
|
overflow |= n ^ (n << 4 >> 4);
|
||||||
n = (n << 4) + hex_value (c);
|
n = (n << 4) + hex_value (c);
|
||||||
digits_found = 1;
|
digits_found = 1;
|
||||||
|
|
@ -1211,7 +1311,10 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
n &= mask;
|
n &= mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
if (tbuf)
|
||||||
|
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||||
|
if (ranges)
|
||||||
|
ranges->add_range (char_range);
|
||||||
|
|
||||||
return from;
|
return from;
|
||||||
}
|
}
|
||||||
|
|
@ -1221,10 +1324,16 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
advanced pointer, and issues diagnostics as necessary.
|
advanced pointer, and issues diagnostics as necessary.
|
||||||
No character set translation occurs; this routine always produces the
|
No character set translation occurs; this routine always produces the
|
||||||
execution-set character with numeric value equal to the given octal
|
execution-set character with numeric value equal to the given octal
|
||||||
number. */
|
number.
|
||||||
|
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||||
|
contains the location of the character so far: location information
|
||||||
|
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||||
static const uchar *
|
static const uchar *
|
||||||
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||||
|
source_range char_range,
|
||||||
|
cpp_string_location_reader *loc_reader,
|
||||||
|
cpp_substring_ranges *ranges)
|
||||||
{
|
{
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
cppchar_t c, n = 0;
|
cppchar_t c, n = 0;
|
||||||
|
|
@ -1232,12 +1341,17 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
size_t mask = width_to_mask (width);
|
size_t mask = width_to_mask (width);
|
||||||
bool overflow = false;
|
bool overflow = false;
|
||||||
|
|
||||||
|
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||||
|
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||||
|
|
||||||
while (from < limit && count++ < 3)
|
while (from < limit && count++ < 3)
|
||||||
{
|
{
|
||||||
c = *from;
|
c = *from;
|
||||||
if (c < '0' || c > '7')
|
if (c < '0' || c > '7')
|
||||||
break;
|
break;
|
||||||
from++;
|
from++;
|
||||||
|
if (loc_reader)
|
||||||
|
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||||
overflow |= n ^ (n << 3 >> 3);
|
overflow |= n ^ (n << 3 >> 3);
|
||||||
n = (n << 3) + c - '0';
|
n = (n << 3) + c - '0';
|
||||||
}
|
}
|
||||||
|
|
@ -1249,18 +1363,26 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
n &= mask;
|
n &= mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
if (tbuf)
|
||||||
|
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||||
|
if (ranges)
|
||||||
|
ranges->add_range (char_range);
|
||||||
|
|
||||||
return from;
|
return from;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert an escape sequence (pointed to by FROM) to its value on
|
/* Convert an escape sequence (pointed to by FROM) to its value on
|
||||||
the target, and to the execution character set. Do not scan past
|
the target, and to the execution character set. Do not scan past
|
||||||
LIMIT. Write the converted value into TBUF. Returns an advanced
|
LIMIT. Write the converted value into TBUF, if TBUF is non-NULL.
|
||||||
pointer. Handles all relevant diagnostics. */
|
Returns an advanced pointer. Handles all relevant diagnostics.
|
||||||
|
If LOC_READER is non-NULL, then RANGES must be non-NULL: location
|
||||||
|
information is read from *LOC_READER, and *RANGES is updated
|
||||||
|
accordingly. */
|
||||||
static const uchar *
|
static const uchar *
|
||||||
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||||
|
cpp_string_location_reader *loc_reader,
|
||||||
|
cpp_substring_ranges *ranges)
|
||||||
{
|
{
|
||||||
/* Values of \a \b \e \f \n \r \t \v respectively. */
|
/* Values of \a \b \e \f \n \r \t \v respectively. */
|
||||||
#if HOST_CHARSET == HOST_CHARSET_ASCII
|
#if HOST_CHARSET == HOST_CHARSET_ASCII
|
||||||
|
|
@ -1273,20 +1395,28 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
|
|
||||||
uchar c;
|
uchar c;
|
||||||
|
|
||||||
|
/* Record the location of the backslash. */
|
||||||
|
source_range char_range;
|
||||||
|
if (loc_reader)
|
||||||
|
char_range = loc_reader->get_next ();
|
||||||
|
|
||||||
c = *from;
|
c = *from;
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
/* UCNs, hex escapes, and octal escapes are processed separately. */
|
/* UCNs, hex escapes, and octal escapes are processed separately. */
|
||||||
case 'u': case 'U':
|
case 'u': case 'U':
|
||||||
return convert_ucn (pfile, from, limit, tbuf, cvt);
|
return convert_ucn (pfile, from, limit, tbuf, cvt,
|
||||||
|
char_range, loc_reader, ranges);
|
||||||
|
|
||||||
case 'x':
|
case 'x':
|
||||||
return convert_hex (pfile, from, limit, tbuf, cvt);
|
return convert_hex (pfile, from, limit, tbuf, cvt,
|
||||||
|
char_range, loc_reader, ranges);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '0': case '1': case '2': case '3':
|
case '0': case '1': case '2': case '3':
|
||||||
case '4': case '5': case '6': case '7':
|
case '4': case '5': case '6': case '7':
|
||||||
return convert_oct (pfile, from, limit, tbuf, cvt);
|
return convert_oct (pfile, from, limit, tbuf, cvt,
|
||||||
|
char_range, loc_reader, ranges);
|
||||||
|
|
||||||
/* Various letter escapes. Get the appropriate host-charset
|
/* Various letter escapes. Get the appropriate host-charset
|
||||||
value into C. */
|
value into C. */
|
||||||
|
|
@ -1338,10 +1468,17 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now convert what we have to the execution character set. */
|
if (tbuf)
|
||||||
if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
|
/* Now convert what we have to the execution character set. */
|
||||||
cpp_errno (pfile, CPP_DL_ERROR,
|
if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
|
||||||
"converting escape sequence to execution character set");
|
cpp_errno (pfile, CPP_DL_ERROR,
|
||||||
|
"converting escape sequence to execution character set");
|
||||||
|
|
||||||
|
if (loc_reader)
|
||||||
|
{
|
||||||
|
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||||
|
ranges->add_range (char_range);
|
||||||
|
}
|
||||||
|
|
||||||
return from + 1;
|
return from + 1;
|
||||||
}
|
}
|
||||||
|
|
@ -1374,28 +1511,52 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
|
||||||
are to be converted from the source to the execution character set,
|
are to be converted from the source to the execution character set,
|
||||||
escape sequences translated, and finally all are to be
|
escape sequences translated, and finally all are to be
|
||||||
concatenated. WIDE indicates whether or not to produce a wide
|
concatenated. WIDE indicates whether or not to produce a wide
|
||||||
string. The result is written into TO. Returns true for success,
|
string. If TO is non-NULL, the result is written into TO.
|
||||||
false for failure. */
|
If LOC_READERS and OUT are non-NULL, then location information
|
||||||
bool
|
is read from LOC_READERS (which must be an array of length COUNT),
|
||||||
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
and location information is written to *RANGES.
|
||||||
cpp_string *to, enum cpp_ttype type)
|
|
||||||
|
Returns true for success, false for failure. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||||
|
cpp_string *to, enum cpp_ttype type,
|
||||||
|
cpp_string_location_reader *loc_readers,
|
||||||
|
cpp_substring_ranges *out)
|
||||||
{
|
{
|
||||||
struct _cpp_strbuf tbuf;
|
struct _cpp_strbuf tbuf;
|
||||||
const uchar *p, *base, *limit;
|
const uchar *p, *base, *limit;
|
||||||
size_t i;
|
size_t i;
|
||||||
struct cset_converter cvt = converter_for_type (pfile, type);
|
struct cset_converter cvt = converter_for_type (pfile, type);
|
||||||
|
|
||||||
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
|
/* loc_readers and out must either be both NULL, or both be non-NULL. */
|
||||||
tbuf.text = XNEWVEC (uchar, tbuf.asize);
|
gcc_assert ((loc_readers != NULL) == (out != NULL));
|
||||||
tbuf.len = 0;
|
|
||||||
|
if (to)
|
||||||
|
{
|
||||||
|
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
|
||||||
|
tbuf.text = XNEWVEC (uchar, tbuf.asize);
|
||||||
|
tbuf.len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < count; i++)
|
for (i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
|
cpp_string_location_reader *loc_reader = NULL;
|
||||||
|
if (loc_readers)
|
||||||
|
loc_reader = &loc_readers[i];
|
||||||
|
|
||||||
p = from[i].text;
|
p = from[i].text;
|
||||||
if (*p == 'u')
|
if (*p == 'u')
|
||||||
{
|
{
|
||||||
if (*++p == '8')
|
p++;
|
||||||
p++;
|
if (loc_reader)
|
||||||
|
loc_reader->get_next ();
|
||||||
|
if (*p == '8')
|
||||||
|
{
|
||||||
|
p++;
|
||||||
|
if (loc_reader)
|
||||||
|
loc_reader->get_next ();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (*p == 'L' || *p == 'U') p++;
|
else if (*p == 'L' || *p == 'U') p++;
|
||||||
if (*p == 'R')
|
if (*p == 'R')
|
||||||
|
|
@ -1414,13 +1575,43 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||||
|
|
||||||
/* Raw strings are all normal characters; these can be fed
|
/* Raw strings are all normal characters; these can be fed
|
||||||
directly to convert_cset. */
|
directly to convert_cset. */
|
||||||
if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
|
if (to)
|
||||||
goto fail;
|
if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
if (loc_reader)
|
||||||
|
{
|
||||||
|
/* If generating source ranges, assume we have a 1:1
|
||||||
|
correspondence between bytes in the source encoding and bytes
|
||||||
|
in the execution encoding (e.g. if we have a UTF-8 to UTF-8
|
||||||
|
conversion), so that this run of bytes in the source file
|
||||||
|
corresponds to a run of bytes in the execution string.
|
||||||
|
This requirement is guaranteed by an early-reject in
|
||||||
|
cpp_interpret_string_ranges. */
|
||||||
|
gcc_assert (cvt.func == convert_no_conversion);
|
||||||
|
out->add_n_ranges (limit - p, *loc_reader);
|
||||||
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
p++; /* Skip leading quote. */
|
/* If we don't now have a leading quote, something has gone wrong.
|
||||||
|
This can occur if cpp_interpret_string_ranges is handling a
|
||||||
|
stringified macro argument, but should not be possible otherwise. */
|
||||||
|
if (*p != '"' && *p != '\'')
|
||||||
|
{
|
||||||
|
gcc_assert (out != NULL);
|
||||||
|
cpp_error (pfile, CPP_DL_ERROR, "missing open quote");
|
||||||
|
if (to)
|
||||||
|
free (tbuf.text);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip leading quote. */
|
||||||
|
p++;
|
||||||
|
if (loc_reader)
|
||||||
|
loc_reader->get_next ();
|
||||||
|
|
||||||
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
|
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
|
|
@ -1432,29 +1623,130 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||||
{
|
{
|
||||||
/* We have a run of normal characters; these can be fed
|
/* We have a run of normal characters; these can be fed
|
||||||
directly to convert_cset. */
|
directly to convert_cset. */
|
||||||
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
|
if (to)
|
||||||
goto fail;
|
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
|
||||||
|
goto fail;
|
||||||
|
/* Similar to above: assumes we have a 1:1 correspondence
|
||||||
|
between bytes in the source encoding and bytes in the
|
||||||
|
execution encoding. */
|
||||||
|
if (loc_reader)
|
||||||
|
{
|
||||||
|
gcc_assert (cvt.func == convert_no_conversion);
|
||||||
|
out->add_n_ranges (p - base, *loc_reader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (p == limit)
|
if (p >= limit)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
|
struct _cpp_strbuf *tbuf_ptr = to ? &tbuf : NULL;
|
||||||
|
p = convert_escape (pfile, p + 1, limit, tbuf_ptr, cvt,
|
||||||
|
loc_reader, out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
|
|
||||||
structure. */
|
if (to)
|
||||||
emit_numeric_escape (pfile, 0, &tbuf, cvt);
|
{
|
||||||
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
|
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
|
||||||
to->text = tbuf.text;
|
structure. */
|
||||||
to->len = tbuf.len;
|
emit_numeric_escape (pfile, 0, &tbuf, cvt);
|
||||||
|
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
|
||||||
|
to->text = tbuf.text;
|
||||||
|
to->len = tbuf.len;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
|
cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
|
||||||
free (tbuf.text);
|
if (to)
|
||||||
|
free (tbuf.text);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* FROM is an array of cpp_string structures of length COUNT. These
|
||||||
|
are to be converted from the source to the execution character set,
|
||||||
|
escape sequences translated, and finally all are to be
|
||||||
|
concatenated. WIDE indicates whether or not to produce a wide
|
||||||
|
string. The result is written into TO. Returns true for success,
|
||||||
|
false for failure. */
|
||||||
|
bool
|
||||||
|
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||||
|
cpp_string *to, enum cpp_ttype type)
|
||||||
|
{
|
||||||
|
return cpp_interpret_string_1 (pfile, from, count, to, type, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A "do nothing" error-handling callback for use by
|
||||||
|
cpp_interpret_string_ranges, so that it can temporarily suppress
|
||||||
|
error-handling. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
noop_error_cb (cpp_reader *, int, int, rich_location *,
|
||||||
|
const char *, va_list *)
|
||||||
|
{
|
||||||
|
/* no-op. */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This function mimics the behavior of cpp_interpret_string, but
|
||||||
|
rather than generating a string in the execution character set,
|
||||||
|
*OUT is written to with the source code ranges of the characters
|
||||||
|
in such a string.
|
||||||
|
FROM and LOC_READERS should both be arrays of length COUNT.
|
||||||
|
Returns NULL for success, or an error message for failure. */
|
||||||
|
|
||||||
|
const char *
|
||||||
|
cpp_interpret_string_ranges (cpp_reader *pfile, const cpp_string *from,
|
||||||
|
cpp_string_location_reader *loc_readers,
|
||||||
|
size_t count,
|
||||||
|
cpp_substring_ranges *out,
|
||||||
|
enum cpp_ttype type)
|
||||||
|
{
|
||||||
|
/* There are a couple of cases in the range-handling in
|
||||||
|
cpp_interpret_string_1 that rely on there being a 1:1 correspondence
|
||||||
|
between bytes in the source encoding and bytes in the execution
|
||||||
|
encoding, so that each byte in the execution string can correspond
|
||||||
|
to the location of a byte in the source string.
|
||||||
|
|
||||||
|
This holds for the typical case of a UTF-8 to UTF-8 conversion.
|
||||||
|
Enforce this requirement by only attempting to track substring
|
||||||
|
locations if we have source encoding == execution encoding.
|
||||||
|
|
||||||
|
This is a stronger condition than we need, since we could e.g.
|
||||||
|
have ASCII to EBCDIC (with 1 byte per character before and after),
|
||||||
|
but it seems to be a reasonable restriction. */
|
||||||
|
struct cset_converter cvt = converter_for_type (pfile, type);
|
||||||
|
if (cvt.func != convert_no_conversion)
|
||||||
|
return "execution character set != source character set";
|
||||||
|
|
||||||
|
/* For on-demand strings we have already lexed the strings, so there
|
||||||
|
should be no errors. However, if we have bogus source location
|
||||||
|
data (or stringified macro arguments), the attempt to lex the
|
||||||
|
strings could fail with an error. Temporarily install an
|
||||||
|
error-handler to catch the error, so that it can lead to this call
|
||||||
|
failing, rather than being emitted as a user-visible diagnostic.
|
||||||
|
If an error does occur, we should see it via the return value of
|
||||||
|
cpp_interpret_string_1. */
|
||||||
|
bool (*saved_error_handler) (cpp_reader *, int, int, rich_location *,
|
||||||
|
const char *, va_list *)
|
||||||
|
ATTRIBUTE_FPTR_PRINTF(5,0);
|
||||||
|
|
||||||
|
saved_error_handler = pfile->cb.error;
|
||||||
|
pfile->cb.error = noop_error_cb;
|
||||||
|
|
||||||
|
bool result = cpp_interpret_string_1 (pfile, from, count, NULL, type,
|
||||||
|
loc_readers, out);
|
||||||
|
|
||||||
|
/* Restore the saved error-handler. */
|
||||||
|
pfile->cb.error = saved_error_handler;
|
||||||
|
|
||||||
|
if (!result)
|
||||||
|
return "cpp_interpret_string_1 failed";
|
||||||
|
|
||||||
|
/* Success. */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* Subroutine of do_line and do_linemarker. Convert escape sequences
|
/* Subroutine of do_line and do_linemarker. Convert escape sequences
|
||||||
in a string, but do not perform character set conversion. */
|
in a string, but do not perform character set conversion. */
|
||||||
bool
|
bool
|
||||||
|
|
@ -1818,3 +2110,39 @@ _cpp_default_encoding (void)
|
||||||
|
|
||||||
return current_encoding;
|
return current_encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Implementation of class cpp_string_location_reader. */
|
||||||
|
|
||||||
|
/* Constructor for cpp_string_location_reader. */
|
||||||
|
|
||||||
|
cpp_string_location_reader::
|
||||||
|
cpp_string_location_reader (source_location src_loc,
|
||||||
|
line_maps *line_table)
|
||||||
|
: m_line_table (line_table)
|
||||||
|
{
|
||||||
|
src_loc = get_range_from_loc (line_table, src_loc).m_start;
|
||||||
|
|
||||||
|
/* SRC_LOC might be a macro location. It only makes sense to do
|
||||||
|
column-by-column calculations on ordinary maps, so get the
|
||||||
|
corresponding location in an ordinary map. */
|
||||||
|
m_loc
|
||||||
|
= linemap_resolve_location (line_table, src_loc,
|
||||||
|
LRK_SPELLING_LOCATION, NULL);
|
||||||
|
|
||||||
|
const line_map_ordinary *map
|
||||||
|
= linemap_check_ordinary (linemap_lookup (line_table, m_loc));
|
||||||
|
m_offset_per_column = (1 << map->m_range_bits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the range of the next source byte. */
|
||||||
|
|
||||||
|
source_range
|
||||||
|
cpp_string_location_reader::get_next ()
|
||||||
|
{
|
||||||
|
source_range result;
|
||||||
|
result.m_start = m_loc;
|
||||||
|
result.m_finish = m_loc;
|
||||||
|
if (m_loc <= LINE_MAP_MAX_LOCATION_WITH_COLS)
|
||||||
|
m_loc += m_offset_per_column;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -743,6 +743,51 @@ struct GTY(()) cpp_hashnode {
|
||||||
union _cpp_hashnode_value GTY ((desc ("CPP_HASHNODE_VALUE_IDX (%1)"))) value;
|
union _cpp_hashnode_value GTY ((desc ("CPP_HASHNODE_VALUE_IDX (%1)"))) value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* A class for iterating through the source locations within a
|
||||||
|
string token (before escapes are interpreted, and before
|
||||||
|
concatenation). */
|
||||||
|
|
||||||
|
class cpp_string_location_reader {
|
||||||
|
public:
|
||||||
|
cpp_string_location_reader (source_location src_loc,
|
||||||
|
line_maps *line_table);
|
||||||
|
|
||||||
|
source_range get_next ();
|
||||||
|
|
||||||
|
private:
|
||||||
|
source_location m_loc;
|
||||||
|
int m_offset_per_column;
|
||||||
|
line_maps *m_line_table;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* A class for storing the source ranges of all of the characters within
|
||||||
|
a string literal, after escapes are interpreted, and after
|
||||||
|
concatenation.
|
||||||
|
|
||||||
|
This is not GTY-marked, as instances are intended to be temporary. */
|
||||||
|
|
||||||
|
class cpp_substring_ranges
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
cpp_substring_ranges ();
|
||||||
|
~cpp_substring_ranges ();
|
||||||
|
|
||||||
|
int get_num_ranges () const { return m_num_ranges; }
|
||||||
|
source_range get_range (int idx) const
|
||||||
|
{
|
||||||
|
linemap_assert (idx < m_num_ranges);
|
||||||
|
return m_ranges[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_range (source_range range);
|
||||||
|
void add_n_ranges (int num, cpp_string_location_reader &loc_reader);
|
||||||
|
|
||||||
|
private:
|
||||||
|
source_range *m_ranges;
|
||||||
|
int m_num_ranges;
|
||||||
|
int m_alloc_ranges;
|
||||||
|
};
|
||||||
|
|
||||||
/* Call this first to get a handle to pass to other functions.
|
/* Call this first to get a handle to pass to other functions.
|
||||||
|
|
||||||
If you want cpplib to manage its own hashtable, pass in a NULL
|
If you want cpplib to manage its own hashtable, pass in a NULL
|
||||||
|
|
@ -829,6 +874,12 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
|
||||||
extern bool cpp_interpret_string (cpp_reader *,
|
extern bool cpp_interpret_string (cpp_reader *,
|
||||||
const cpp_string *, size_t,
|
const cpp_string *, size_t,
|
||||||
cpp_string *, enum cpp_ttype);
|
cpp_string *, enum cpp_ttype);
|
||||||
|
extern const char *cpp_interpret_string_ranges (cpp_reader *pfile,
|
||||||
|
const cpp_string *from,
|
||||||
|
cpp_string_location_reader *,
|
||||||
|
size_t count,
|
||||||
|
cpp_substring_ranges *out,
|
||||||
|
enum cpp_ttype type);
|
||||||
extern bool cpp_interpret_string_notranslate (cpp_reader *,
|
extern bool cpp_interpret_string_notranslate (cpp_reader *,
|
||||||
const cpp_string *, size_t,
|
const cpp_string *, size_t,
|
||||||
cpp_string *, enum cpp_ttype);
|
cpp_string *, enum cpp_ttype);
|
||||||
|
|
|
||||||
|
|
@ -754,7 +754,9 @@ struct normalize_state
|
||||||
extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
|
extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
|
||||||
const unsigned char *, int,
|
const unsigned char *, int,
|
||||||
struct normalize_state *state,
|
struct normalize_state *state,
|
||||||
cppchar_t *);
|
cppchar_t *,
|
||||||
|
source_range *char_range,
|
||||||
|
cpp_string_location_reader *loc_reader);
|
||||||
extern void _cpp_destroy_iconv (cpp_reader *);
|
extern void _cpp_destroy_iconv (cpp_reader *);
|
||||||
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
|
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
|
||||||
unsigned char *, size_t, size_t,
|
unsigned char *, size_t, size_t,
|
||||||
|
|
|
||||||
|
|
@ -1247,7 +1247,7 @@ forms_identifier_p (cpp_reader *pfile, int first,
|
||||||
cppchar_t s;
|
cppchar_t s;
|
||||||
buffer->cur += 2;
|
buffer->cur += 2;
|
||||||
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
|
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
|
||||||
state, &s))
|
state, &s, NULL, NULL))
|
||||||
return true;
|
return true;
|
||||||
buffer->cur -= 2;
|
buffer->cur -= 2;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue