mirror of git://gcc.gnu.org/git/gcc.git
re PR libstdc++/63775 ([C++11] Regex range with leading dash (-) not working)
PR libstdc++/63775 * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term, _BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range like [z-a]. Change _M_expression_term interface. * include/bits/regex_compiler.tcc ( _Compiler<>::_M_insert_bracket_matcher, _Compiler<>::_M_expression_term): Rewrite bracket expression parsing. * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc: Add testcases and move file out of extended. From-SVN: r217461
This commit is contained in:
parent
0a134b2aa3
commit
79b576cc38
|
|
@ -1,3 +1,15 @@
|
||||||
|
2014-11-13 Tim Shen <timshen@google.com>
|
||||||
|
|
||||||
|
PR libstdc++/63775
|
||||||
|
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
|
||||||
|
_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
|
||||||
|
like [z-a]. Change _M_expression_term interface.
|
||||||
|
* include/bits/regex_compiler.tcc (
|
||||||
|
_Compiler<>::_M_insert_bracket_matcher,
|
||||||
|
_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
|
||||||
|
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
|
||||||
|
Add testcases and move file out of extended.
|
||||||
|
|
||||||
2014-11-12 Jonathan Wakely <jwakely@redhat.com>
|
2014-11-12 Jonathan Wakely <jwakely@redhat.com>
|
||||||
|
|
||||||
PR libstdc++/57250
|
PR libstdc++/57250
|
||||||
|
|
|
||||||
|
|
@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
|
|
||||||
template<bool __icase, bool __collate>
|
template<bool __icase, bool __collate>
|
||||||
void
|
void
|
||||||
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
|
_M_expression_term(pair<bool, _CharT>& __last_char,
|
||||||
|
_BracketMatcher<_TraitsT, __icase, __collate>&
|
||||||
__matcher);
|
__matcher);
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
void
|
void
|
||||||
_M_make_range(_CharT __l, _CharT __r)
|
_M_make_range(_CharT __l, _CharT __r)
|
||||||
{
|
{
|
||||||
|
if (__l > __r)
|
||||||
|
__throw_regex_error(regex_constants::error_range);
|
||||||
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
|
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
|
||||||
_M_translator._M_transform(__r)));
|
_M_translator._M_transform(__r)));
|
||||||
#ifdef _GLIBCXX_DEBUG
|
#ifdef _GLIBCXX_DEBUG
|
||||||
|
|
|
||||||
|
|
@ -415,8 +415,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
_M_insert_bracket_matcher(bool __neg)
|
_M_insert_bracket_matcher(bool __neg)
|
||||||
{
|
{
|
||||||
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
|
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
|
||||||
|
pair<bool, _CharT> __last_char; // Optional<_CharT>
|
||||||
|
__last_char.first = false;
|
||||||
|
if (!(_M_flags & regex_constants::ECMAScript))
|
||||||
|
if (_M_try_char())
|
||||||
|
{
|
||||||
|
__matcher._M_add_char(_M_value[0]);
|
||||||
|
__last_char.first = true;
|
||||||
|
__last_char.second = _M_value[0];
|
||||||
|
}
|
||||||
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
|
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||||
_M_expression_term(__matcher);
|
_M_expression_term(__last_char, __matcher);
|
||||||
__matcher._M_ready();
|
__matcher._M_ready();
|
||||||
_M_stack.push(_StateSeqT(
|
_M_stack.push(_StateSeqT(
|
||||||
*_M_nfa,
|
*_M_nfa,
|
||||||
|
|
@ -427,7 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
template<bool __icase, bool __collate>
|
template<bool __icase, bool __collate>
|
||||||
void
|
void
|
||||||
_Compiler<_TraitsT>::
|
_Compiler<_TraitsT>::
|
||||||
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
|
_M_expression_term(pair<bool, _CharT>& __last_char,
|
||||||
|
_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
|
||||||
{
|
{
|
||||||
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
||||||
__matcher._M_add_collating_element(_M_value);
|
__matcher._M_add_collating_element(_M_value);
|
||||||
|
|
@ -435,27 +445,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||||
__matcher._M_add_equivalence_class(_M_value);
|
__matcher._M_add_equivalence_class(_M_value);
|
||||||
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
||||||
__matcher._M_add_character_class(_M_value, false);
|
__matcher._M_add_character_class(_M_value, false);
|
||||||
else if (_M_try_char()) // [a
|
// POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
|
||||||
|
// except when the '-' is the first character in the bracket expression
|
||||||
|
// ([--0]). ECMAScript treats all '-' after a range as a normal character.
|
||||||
|
// Also see above, where _M_expression_term gets called.
|
||||||
|
//
|
||||||
|
// As a result, POSIX rejects [-----], but ECMAScript doesn't.
|
||||||
|
// Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
|
||||||
|
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
|
||||||
|
//
|
||||||
|
// It turns out that no one reads BNFs ;)
|
||||||
|
else if (_M_try_char())
|
||||||
{
|
{
|
||||||
auto __ch = _M_value[0];
|
if (!__last_char.first)
|
||||||
if (_M_try_char())
|
|
||||||
{
|
{
|
||||||
if (_M_value[0] == '-') // [a-
|
if (_M_value[0] == '-'
|
||||||
{
|
&& !(_M_flags & regex_constants::ECMAScript))
|
||||||
if (_M_try_char()) // [a-z]
|
__throw_regex_error(regex_constants::error_range);
|
||||||
{
|
|
||||||
__matcher._M_make_range(__ch, _M_value[0]);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// If the dash is the last character in the bracket
|
|
||||||
// expression, it is not special.
|
|
||||||
if (_M_scanner._M_get_token()
|
|
||||||
!= _ScannerT::_S_token_bracket_end)
|
|
||||||
__throw_regex_error(regex_constants::error_range);
|
|
||||||
}
|
|
||||||
__matcher._M_add_char(_M_value[0]);
|
__matcher._M_add_char(_M_value[0]);
|
||||||
|
__last_char.first = true;
|
||||||
|
__last_char.second = _M_value[0];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (_M_value[0] == '-')
|
||||||
|
{
|
||||||
|
if (_M_try_char())
|
||||||
|
{
|
||||||
|
__matcher._M_make_range(__last_char.second , _M_value[0]);
|
||||||
|
__last_char.first = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (_M_scanner._M_get_token()
|
||||||
|
!= _ScannerT::_S_token_bracket_end)
|
||||||
|
__throw_regex_error(regex_constants::error_range);
|
||||||
|
__matcher._M_add_char(_M_value[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
__matcher._M_add_char(_M_value[0]);
|
||||||
|
__last_char.second = _M_value[0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
__matcher._M_add_char(__ch);
|
|
||||||
}
|
}
|
||||||
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
|
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
|
||||||
__matcher._M_add_character_class(_M_value,
|
__matcher._M_add_character_class(_M_value,
|
||||||
|
|
|
||||||
|
|
@ -67,9 +67,60 @@ test01()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test02()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::regex re("[-----]", std::regex::extended);
|
||||||
|
VERIFY(false);
|
||||||
|
}
|
||||||
|
catch (const std::regex_error& e)
|
||||||
|
{
|
||||||
|
VERIFY(e.code() == std::regex_constants::error_range);
|
||||||
|
}
|
||||||
|
std::regex re("[-----]", std::regex::ECMAScript);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test03()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::regex re("[z-a]", std::regex::extended);
|
||||||
|
VERIFY(false);
|
||||||
|
}
|
||||||
|
catch (const std::regex_error& e)
|
||||||
|
{
|
||||||
|
VERIFY(e.code() == std::regex_constants::error_range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
test04()
|
||||||
|
{
|
||||||
|
bool test __attribute__((unused)) = true;
|
||||||
|
|
||||||
|
std::regex re("[-0-9a-z]");
|
||||||
|
VERIFY(regex_match_debug("-", re));
|
||||||
|
VERIFY(regex_match_debug("1", re));
|
||||||
|
VERIFY(regex_match_debug("w", re));
|
||||||
|
re.assign("[-0-9a-z]", regex_constants::basic);
|
||||||
|
VERIFY(regex_match_debug("-", re));
|
||||||
|
VERIFY(regex_match_debug("1", re));
|
||||||
|
VERIFY(regex_match_debug("w", re));
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
main()
|
main()
|
||||||
{
|
{
|
||||||
test01();
|
test01();
|
||||||
|
test02();
|
||||||
|
test03();
|
||||||
|
test04();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue