mirror of git://gcc.gnu.org/git/gcc.git
re PR libstdc++/77356 (regex error for a ECMAScript syntax string)
PR libstdc++/77356 * include/bits/regex_compiler.tcc(_M_insert_bracket_matcher, _M_expression_term): Modify to support dash literal. * include/bits/regex_scanner.h: Add dash as a token type to make a different from the mandated dash literal by escaping. * include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash token in bracket expression parsing. * testsuite/28_regex/regression.cc: Add new testcases. From-SVN: r239794
This commit is contained in:
parent
d8921e81e9
commit
4aebb4e4a6
|
|
@ -1,3 +1,14 @@
|
||||||
|
2016-08-27 Tim Shen <timshen@google.com>
|
||||||
|
|
||||||
|
PR libstdc++/77356
|
||||||
|
* include/bits/regex_compiler.tcc(_M_insert_bracket_matcher,
|
||||||
|
_M_expression_term): Modify to support dash literal.
|
||||||
|
* include/bits/regex_scanner.h: Add dash as a token type to make
|
||||||
|
a different from the mandated dash literal by escaping.
|
||||||
|
* include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash
|
||||||
|
token in bracket expression parsing.
|
||||||
|
* testsuite/28_regex/regression.cc: Add new testcases.
|
||||||
|
|
||||||
2016-08-26 Jonathan Wakely <jwakely@redhat.com>
|
2016-08-26 Jonathan Wakely <jwakely@redhat.com>
|
||||||
|
|
||||||
PR libstdc++/51960
|
PR libstdc++/51960
|
||||||
|
|
|
||||||
|
|
@ -426,13 +426,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
pair<bool, _CharT> __last_char; // Optional<_CharT>
|
pair<bool, _CharT> __last_char; // Optional<_CharT>
|
||||||
__last_char.first = false;
|
__last_char.first = false;
|
||||||
if (!(_M_flags & regex_constants::ECMAScript))
|
if (!(_M_flags & regex_constants::ECMAScript))
|
||||||
if (_M_try_char())
|
{
|
||||||
{
|
if (_M_try_char())
|
||||||
__matcher._M_add_char(_M_value[0]);
|
{
|
||||||
__last_char.first = true;
|
__last_char.first = true;
|
||||||
__last_char.second = _M_value[0];
|
__last_char.second = _M_value[0];
|
||||||
}
|
}
|
||||||
|
else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
|
||||||
|
{
|
||||||
|
__last_char.first = true;
|
||||||
|
__last_char.second = '-';
|
||||||
|
}
|
||||||
|
}
|
||||||
while (_M_expression_term(__last_char, __matcher));
|
while (_M_expression_term(__last_char, __matcher));
|
||||||
|
if (__last_char.first)
|
||||||
|
__matcher._M_add_char(__last_char.second);
|
||||||
__matcher._M_ready();
|
__matcher._M_ready();
|
||||||
_M_stack.push(_StateSeqT(
|
_M_stack.push(_StateSeqT(
|
||||||
*_M_nfa,
|
*_M_nfa,
|
||||||
|
|
@ -449,19 +457,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
const auto __push_char = [&](_CharT __ch)
|
||||||
|
{
|
||||||
|
if (__last_char.first)
|
||||||
|
__matcher._M_add_char(__last_char.second);
|
||||||
|
else
|
||||||
|
__last_char.first = true;
|
||||||
|
__last_char.second = __ch;
|
||||||
|
};
|
||||||
|
const auto __flush = [&]
|
||||||
|
{
|
||||||
|
if (__last_char.first)
|
||||||
|
{
|
||||||
|
__matcher._M_add_char(__last_char.second);
|
||||||
|
__last_char.first = false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
||||||
{
|
{
|
||||||
auto __symbol = __matcher._M_add_collate_element(_M_value);
|
auto __symbol = __matcher._M_add_collate_element(_M_value);
|
||||||
if (__symbol.size() == 1)
|
if (__symbol.size() == 1)
|
||||||
{
|
__push_char(__symbol[0]);
|
||||||
__last_char.first = true;
|
else
|
||||||
__last_char.second = __symbol[0];
|
__flush();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
|
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
|
||||||
__matcher._M_add_equivalence_class(_M_value);
|
{
|
||||||
|
__flush();
|
||||||
|
__matcher._M_add_equivalence_class(_M_value);
|
||||||
|
}
|
||||||
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
||||||
__matcher._M_add_character_class(_M_value, false);
|
{
|
||||||
|
__flush();
|
||||||
|
__matcher._M_add_character_class(_M_value, false);
|
||||||
|
}
|
||||||
|
else if (_M_try_char())
|
||||||
|
__push_char(_M_value[0]);
|
||||||
// POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
|
// POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
|
||||||
// except when the '-' is the first or last character in the bracket
|
// except when the '-' is the first or last character in the bracket
|
||||||
// expression ([--0]). ECMAScript treats all '-' after a range as a
|
// expression ([--0]). ECMAScript treats all '-' after a range as a
|
||||||
|
|
@ -472,55 +504,55 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
|
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
|
||||||
//
|
//
|
||||||
// It turns out that no one reads BNFs ;)
|
// It turns out that no one reads BNFs ;)
|
||||||
else if (_M_try_char())
|
else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
|
||||||
{
|
{
|
||||||
if (!__last_char.first)
|
if (!__last_char.first)
|
||||||
{
|
{
|
||||||
__matcher._M_add_char(_M_value[0]);
|
if (!(_M_flags & regex_constants::ECMAScript))
|
||||||
if (_M_value[0] == '-'
|
|
||||||
&& !(_M_flags & regex_constants::ECMAScript))
|
|
||||||
{
|
{
|
||||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||||
return false;
|
{
|
||||||
|
__push_char('-');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
__throw_regex_error(
|
__throw_regex_error(
|
||||||
regex_constants::error_range,
|
regex_constants::error_range,
|
||||||
"Unexpected dash in bracket expression. For POSIX syntax, "
|
"Unexpected dash in bracket expression. For POSIX syntax, "
|
||||||
"a dash is not treated literally only when it is at "
|
"a dash is not treated literally only when it is at "
|
||||||
"beginning or end.");
|
"beginning or end.");
|
||||||
}
|
}
|
||||||
__last_char.first = true;
|
__push_char('-');
|
||||||
__last_char.second = _M_value[0];
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (_M_value[0] == '-')
|
if (_M_try_char())
|
||||||
{
|
{
|
||||||
if (_M_try_char())
|
__matcher._M_make_range(__last_char.second, _M_value[0]);
|
||||||
{
|
__last_char.first = false;
|
||||||
__matcher._M_make_range(__last_char.second , _M_value[0]);
|
}
|
||||||
__last_char.first = false;
|
else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
|
||||||
}
|
{
|
||||||
else
|
__matcher._M_make_range(__last_char.second, '-');
|
||||||
{
|
__last_char.first = false;
|
||||||
if (_M_scanner._M_get_token()
|
|
||||||
!= _ScannerT::_S_token_bracket_end)
|
|
||||||
__throw_regex_error(
|
|
||||||
regex_constants::error_range,
|
|
||||||
"Unexpected end of bracket expression.");
|
|
||||||
__matcher._M_add_char(_M_value[0]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
__matcher._M_add_char(_M_value[0]);
|
if (_M_scanner._M_get_token()
|
||||||
__last_char.second = _M_value[0];
|
!= _ScannerT::_S_token_bracket_end)
|
||||||
|
__throw_regex_error(
|
||||||
|
regex_constants::error_range,
|
||||||
|
"Character is expected after a dash.");
|
||||||
|
__push_char('-');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
|
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
|
||||||
__matcher._M_add_character_class(_M_value,
|
{
|
||||||
_M_ctype.is(_CtypeT::upper,
|
__flush();
|
||||||
_M_value[0]));
|
__matcher._M_add_character_class(_M_value,
|
||||||
|
_M_ctype.is(_CtypeT::upper,
|
||||||
|
_M_value[0]));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
__throw_regex_error(regex_constants::error_brack,
|
__throw_regex_error(regex_constants::error_brack,
|
||||||
"Unexpected character in bracket expression.");
|
"Unexpected character in bracket expression.");
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/// Token types returned from the scanner.
|
/// Token types returned from the scanner.
|
||||||
enum _TokenT
|
enum _TokenT : unsigned
|
||||||
{
|
{
|
||||||
_S_token_anychar,
|
_S_token_anychar,
|
||||||
_S_token_ord_char,
|
_S_token_ord_char,
|
||||||
|
|
@ -73,7 +73,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_S_token_comma,
|
_S_token_comma,
|
||||||
_S_token_dup_count,
|
_S_token_dup_count,
|
||||||
_S_token_eof,
|
_S_token_eof,
|
||||||
_S_token_unknown
|
_S_token_bracket_dash,
|
||||||
|
_S_token_unknown = -1u
|
||||||
};
|
};
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
||||||
|
|
@ -210,7 +210,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
|
|
||||||
auto __c = *_M_current++;
|
auto __c = *_M_current++;
|
||||||
|
|
||||||
if (__c == '[')
|
if (__c == '-')
|
||||||
|
_M_token = _S_token_bracket_dash;
|
||||||
|
else if (__c == '[')
|
||||||
{
|
{
|
||||||
if (_M_current == _M_end)
|
if (_M_current == _M_end)
|
||||||
__throw_regex_error(regex_constants::error_brack,
|
__throw_regex_error(regex_constants::error_brack,
|
||||||
|
|
|
||||||
|
|
@ -61,12 +61,35 @@ test03()
|
||||||
VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow));
|
VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PR libstdc++/77356
|
||||||
|
void
|
||||||
|
test04()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
static const char* kNumericAnchor ="(\\$|usd)(usd|\\$|to|and|up to|[0-9,\\.\\-\\sk])+";
|
||||||
|
const std::regex re(kNumericAnchor);
|
||||||
|
(void)re;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test05()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
VERIFY(regex_match_debug("!", std::regex("[![:alnum:]]")));
|
||||||
|
VERIFY(regex_match_debug("-", std::regex("[a-]", regex_constants::basic)));
|
||||||
|
VERIFY(regex_match_debug("-", std::regex("[a-]")));
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main()
|
main()
|
||||||
{
|
{
|
||||||
test01();
|
test01();
|
||||||
test02();
|
test02();
|
||||||
test03();
|
test03();
|
||||||
|
test04();
|
||||||
|
test05();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue