re PR preprocessor/53690 ([C++11] \u0000 and \U00000000 are wrongly encoded as U+0001.)

/libcpp
2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>

	PR c++/53690
	* charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
	return type to bool.  Fix encoding of \u0000 and \U00000000 in C++.
	(convert_ucn): Adjust call.
	* lex.c (forms_identifier_p): Likewise.
	* internal.h (_cpp_valid_ucn): Adjust declaration.

/gcc/testsuite
2015-07-02  Paolo Carlini  <paolo.carlini@oracle.com>

	PR c++/53690
	* g++.dg/cpp/pr53690.C: New.

From-SVN: r225353
This commit is contained in:
Paolo Carlini 2015-07-02 18:54:41 +00:00
parent a05d02b293
commit fbb22910cf
6 changed files with 43 additions and 20 deletions

View File

@ -1,3 +1,8 @@
2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/53690
* g++.dg/cpp/pr53690.C: New.
2015-07-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2015-07-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-cmp.c: New test. * gcc.target/powerpc/vec-cmp.c: New test.

View File

@ -0,0 +1,7 @@
// PR c++/53690
// { dg-do compile { target c++11 } }
int array1[U'\U00000000' == 0 ? 1 : -1];
int array2[U'\u0000' == 0 ? 1 : -1];
int array3[u'\U00000000' == 0 ? 1 : -1];
int array4[u'\u0000' == 0 ? 1 : -1];

View File

@ -1,3 +1,12 @@
2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/53690
* charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
return type to bool. Fix encoding of \u0000 and \U00000000 in C++.
(convert_ucn): Adjust call.
* lex.c (forms_identifier_p): Likewise.
* internal.h (_cpp_valid_ucn): Adjust declaration.
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals Implement N4197 - Adding u8 character literals

View File

@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
or 0060 (`), nor one in the range D800 through DFFF inclusive. or 0060 (`), nor one in the range D800 through DFFF inclusive.
*PSTR must be preceded by "\u" or "\U"; it is assumed that the *PSTR must be preceded by "\u" or "\U"; it is assumed that the
buffer end is delimited by a non-hex digit. Returns zero if the buffer end is delimited by a non-hex digit. Returns false if the
UCN has not been consumed. UCN has not been consumed, true otherwise.
Otherwise the nonzero value of the UCN, whether valid or invalid, The value of the UCN, whether valid or invalid, is returned in *CP.
is returned. Diagnostics are emitted for invalid values. PSTR Diagnostics are emitted for invalid values. PSTR is updated to point
is updated to point one beyond the UCN, or to the syntactically one beyond the UCN, or to the syntactically invalid character.
invalid character.
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
an identifier, or 2 otherwise. */ an identifier, or 2 otherwise. */
cppchar_t bool
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
const uchar *limit, int identifier_pos, const uchar *limit, int identifier_pos,
struct normalize_state *nst) struct normalize_state *nst, cppchar_t *cp)
{ {
cppchar_t result, c; cppchar_t result, c;
unsigned int length; unsigned int length;
@ -1030,7 +1029,10 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
multiple tokens in identifiers, so we can't give a helpful multiple tokens in identifiers, so we can't give a helpful
error message in that case. */ error message in that case. */
if (length && identifier_pos) if (length && identifier_pos)
return 0; {
*cp = 0;
return false;
}
*pstr = str; *pstr = str;
if (length) if (length)
@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
(int) (str - base), base); (int) (str - base), base);
} }
if (result == 0) *cp = result;
result = 1; return true;
return result;
} }
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate /* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct normalize_state nst = INITIAL_NORMALIZE_STATE; struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++; /* Skip u/U. */ from++; /* Skip u/U. */
ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst); _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft); rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
if (rval) if (rval)

View File

@ -744,9 +744,10 @@ struct normalize_state
#define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \ #define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \
((st)->previous = (c), (st)->prev_class = 0) ((st)->previous = (c), (st)->prev_class = 0)
extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **, extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
const unsigned char *, int, const unsigned char *, int,
struct normalize_state *state); struct normalize_state *state,
cppchar_t *);
extern void _cpp_destroy_iconv (cpp_reader *); extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *, extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t, unsigned char *, size_t, size_t,

View File

@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first,
&& *buffer->cur == '\\' && *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{ {
cppchar_t s;
buffer->cur += 2; buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
state)) state, &s))
return true; return true;
buffer->cur -= 2; buffer->cur -= 2;
} }