mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			re PR libstdc++/77356 (regex error for a ECMAScript syntax string)
PR libstdc++/77356 * include/bits/regex_compiler.tcc(_M_insert_bracket_matcher, _M_expression_term): Modify to support dash literal. * include/bits/regex_scanner.h: Add dash as a token type to make a different from the mandated dash literal by escaping. * include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash token in bracket expression parsing. * testsuite/28_regex/regression.cc: Add new testcases. From-SVN: r239794
This commit is contained in:
		
							parent
							
								
									d8921e81e9
								
							
						
					
					
						commit
						4aebb4e4a6
					
				|  | @ -1,3 +1,14 @@ | ||||||
|  | 2016-08-27  Tim Shen  <timshen@google.com> | ||||||
|  | 
 | ||||||
|  | 	PR libstdc++/77356 | ||||||
|  | 	* include/bits/regex_compiler.tcc(_M_insert_bracket_matcher, | ||||||
|  | 	_M_expression_term): Modify to support dash literal. | ||||||
|  | 	* include/bits/regex_scanner.h: Add dash as a token type to make | ||||||
|  | 	a different from the mandated dash literal by escaping. | ||||||
|  | 	* include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash | ||||||
|  | 	token in bracket expression parsing. | ||||||
|  | 	* testsuite/28_regex/regression.cc: Add new testcases. | ||||||
|  | 
 | ||||||
| 2016-08-26  Jonathan Wakely  <jwakely@redhat.com> | 2016-08-26  Jonathan Wakely  <jwakely@redhat.com> | ||||||
| 
 | 
 | ||||||
| 	PR libstdc++/51960 | 	PR libstdc++/51960 | ||||||
|  |  | ||||||
|  | @ -426,13 +426,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
|       pair<bool, _CharT> __last_char; // Optional<_CharT> |       pair<bool, _CharT> __last_char; // Optional<_CharT> | ||||||
|       __last_char.first = false; |       __last_char.first = false; | ||||||
|       if (!(_M_flags & regex_constants::ECMAScript)) |       if (!(_M_flags & regex_constants::ECMAScript)) | ||||||
| 	if (_M_try_char()) | 	{ | ||||||
| 	  { | 	  if (_M_try_char()) | ||||||
| 	    __matcher._M_add_char(_M_value[0]); | 	    { | ||||||
| 	    __last_char.first = true; | 	      __last_char.first = true; | ||||||
| 	    __last_char.second = _M_value[0]; | 	      __last_char.second = _M_value[0]; | ||||||
| 	  } | 	    } | ||||||
|  | 	  else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||||
|  | 	    { | ||||||
|  | 	      __last_char.first = true; | ||||||
|  | 	      __last_char.second = '-'; | ||||||
|  | 	    } | ||||||
|  | 	} | ||||||
|       while (_M_expression_term(__last_char, __matcher)); |       while (_M_expression_term(__last_char, __matcher)); | ||||||
|  |       if (__last_char.first) | ||||||
|  | 	__matcher._M_add_char(__last_char.second); | ||||||
|       __matcher._M_ready(); |       __matcher._M_ready(); | ||||||
|       _M_stack.push(_StateSeqT( |       _M_stack.push(_StateSeqT( | ||||||
| 		      *_M_nfa, | 		      *_M_nfa, | ||||||
|  | @ -449,19 +457,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
|       if (_M_match_token(_ScannerT::_S_token_bracket_end)) |       if (_M_match_token(_ScannerT::_S_token_bracket_end)) | ||||||
| 	return false; | 	return false; | ||||||
| 
 | 
 | ||||||
|  |       const auto __push_char = [&](_CharT __ch) | ||||||
|  |       { | ||||||
|  | 	if (__last_char.first) | ||||||
|  | 	  __matcher._M_add_char(__last_char.second); | ||||||
|  | 	else | ||||||
|  | 	  __last_char.first = true; | ||||||
|  | 	__last_char.second = __ch; | ||||||
|  |       }; | ||||||
|  |       const auto __flush = [&] | ||||||
|  |       { | ||||||
|  | 	if (__last_char.first) | ||||||
|  | 	  { | ||||||
|  | 	    __matcher._M_add_char(__last_char.second); | ||||||
|  | 	    __last_char.first = false; | ||||||
|  | 	  } | ||||||
|  |       }; | ||||||
|  | 
 | ||||||
|       if (_M_match_token(_ScannerT::_S_token_collsymbol)) |       if (_M_match_token(_ScannerT::_S_token_collsymbol)) | ||||||
| 	{ | 	{ | ||||||
| 	  auto __symbol = __matcher._M_add_collate_element(_M_value); | 	  auto __symbol = __matcher._M_add_collate_element(_M_value); | ||||||
| 	  if (__symbol.size() == 1) | 	  if (__symbol.size() == 1) | ||||||
| 	    { | 	    __push_char(__symbol[0]); | ||||||
| 	      __last_char.first = true; | 	  else | ||||||
| 	      __last_char.second = __symbol[0]; | 	    __flush(); | ||||||
| 	    } |  | ||||||
| 	} | 	} | ||||||
|       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) |       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) | ||||||
| 	__matcher._M_add_equivalence_class(_M_value); | 	{ | ||||||
|  | 	  __flush(); | ||||||
|  | 	  __matcher._M_add_equivalence_class(_M_value); | ||||||
|  | 	} | ||||||
|       else if (_M_match_token(_ScannerT::_S_token_char_class_name)) |       else if (_M_match_token(_ScannerT::_S_token_char_class_name)) | ||||||
| 	__matcher._M_add_character_class(_M_value, false); | 	{ | ||||||
|  | 	  __flush(); | ||||||
|  | 	  __matcher._M_add_character_class(_M_value, false); | ||||||
|  | 	} | ||||||
|  |       else if (_M_try_char()) | ||||||
|  | 	__push_char(_M_value[0]); | ||||||
|       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), |       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), | ||||||
|       // except when the '-' is the first or last character in the bracket |       // except when the '-' is the first or last character in the bracket | ||||||
|       // expression ([--0]). ECMAScript treats all '-' after a range as a |       // expression ([--0]). ECMAScript treats all '-' after a range as a | ||||||
|  | @ -472,55 +504,55 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
|       // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. |       // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. | ||||||
|       // |       // | ||||||
|       // It turns out that no one reads BNFs ;) |       // It turns out that no one reads BNFs ;) | ||||||
|       else if (_M_try_char()) |       else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||||
| 	{ | 	{ | ||||||
| 	  if (!__last_char.first) | 	  if (!__last_char.first) | ||||||
| 	    { | 	    { | ||||||
| 	      __matcher._M_add_char(_M_value[0]); | 	      if (!(_M_flags & regex_constants::ECMAScript)) | ||||||
| 	      if (_M_value[0] == '-' |  | ||||||
| 		  && !(_M_flags & regex_constants::ECMAScript)) |  | ||||||
| 		{ | 		{ | ||||||
| 		  if (_M_match_token(_ScannerT::_S_token_bracket_end)) | 		  if (_M_match_token(_ScannerT::_S_token_bracket_end)) | ||||||
| 		    return false; | 		    { | ||||||
|  | 		      __push_char('-'); | ||||||
|  | 		      return false; | ||||||
|  | 		    } | ||||||
| 		  __throw_regex_error( | 		  __throw_regex_error( | ||||||
| 		    regex_constants::error_range, | 		    regex_constants::error_range, | ||||||
| 		    "Unexpected dash in bracket expression. For POSIX syntax, " | 		    "Unexpected dash in bracket expression. For POSIX syntax, " | ||||||
| 		    "a dash is not treated literally only when it is at " | 		    "a dash is not treated literally only when it is at " | ||||||
| 		    "beginning or end."); | 		    "beginning or end."); | ||||||
| 		} | 		} | ||||||
| 	      __last_char.first = true; | 	      __push_char('-'); | ||||||
| 	      __last_char.second = _M_value[0]; |  | ||||||
| 	    } | 	    } | ||||||
| 	  else | 	  else | ||||||
| 	    { | 	    { | ||||||
| 	      if (_M_value[0] == '-') | 	      if (_M_try_char()) | ||||||
| 		{ | 		{ | ||||||
| 		  if (_M_try_char()) | 		  __matcher._M_make_range(__last_char.second, _M_value[0]); | ||||||
| 		    { | 		  __last_char.first = false; | ||||||
| 		      __matcher._M_make_range(__last_char.second , _M_value[0]); | 		} | ||||||
| 		      __last_char.first = false; | 	      else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||||
| 		    } | 		{ | ||||||
| 		  else | 		  __matcher._M_make_range(__last_char.second, '-'); | ||||||
| 		    { | 		  __last_char.first = false; | ||||||
| 		      if (_M_scanner._M_get_token() |  | ||||||
| 			  != _ScannerT::_S_token_bracket_end) |  | ||||||
| 			__throw_regex_error( |  | ||||||
| 			  regex_constants::error_range, |  | ||||||
| 			  "Unexpected end of bracket expression."); |  | ||||||
| 		      __matcher._M_add_char(_M_value[0]); |  | ||||||
| 		    } |  | ||||||
| 		} | 		} | ||||||
| 	      else | 	      else | ||||||
| 		{ | 		{ | ||||||
| 		  __matcher._M_add_char(_M_value[0]); | 		  if (_M_scanner._M_get_token() | ||||||
| 		  __last_char.second = _M_value[0]; | 		      != _ScannerT::_S_token_bracket_end) | ||||||
|  | 		    __throw_regex_error( | ||||||
|  | 		      regex_constants::error_range, | ||||||
|  | 		      "Character is expected after a dash."); | ||||||
|  | 		  __push_char('-'); | ||||||
| 		} | 		} | ||||||
| 	    } | 	    } | ||||||
| 	} | 	} | ||||||
|       else if (_M_match_token(_ScannerT::_S_token_quoted_class)) |       else if (_M_match_token(_ScannerT::_S_token_quoted_class)) | ||||||
| 	__matcher._M_add_character_class(_M_value, | 	{ | ||||||
| 					 _M_ctype.is(_CtypeT::upper, | 	  __flush(); | ||||||
| 						     _M_value[0])); | 	  __matcher._M_add_character_class(_M_value, | ||||||
|  | 					   _M_ctype.is(_CtypeT::upper, | ||||||
|  | 						       _M_value[0])); | ||||||
|  | 	} | ||||||
|       else |       else | ||||||
| 	__throw_regex_error(regex_constants::error_brack, | 	__throw_regex_error(regex_constants::error_brack, | ||||||
| 			    "Unexpected character in bracket expression."); | 			    "Unexpected character in bracket expression."); | ||||||
|  |  | ||||||
|  | @ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
|   { |   { | ||||||
|   public: |   public: | ||||||
|     /// Token types returned from the scanner.
 |     /// Token types returned from the scanner.
 | ||||||
|     enum _TokenT |     enum _TokenT : unsigned | ||||||
|     { |     { | ||||||
|       _S_token_anychar, |       _S_token_anychar, | ||||||
|       _S_token_ord_char, |       _S_token_ord_char, | ||||||
|  | @ -73,7 +73,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
|       _S_token_comma, |       _S_token_comma, | ||||||
|       _S_token_dup_count, |       _S_token_dup_count, | ||||||
|       _S_token_eof, |       _S_token_eof, | ||||||
|       _S_token_unknown |       _S_token_bracket_dash, | ||||||
|  |       _S_token_unknown = -1u | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|   protected: |   protected: | ||||||
|  |  | ||||||
|  | @ -210,7 +210,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | ||||||
| 
 | 
 | ||||||
|       auto __c = *_M_current++; |       auto __c = *_M_current++; | ||||||
| 
 | 
 | ||||||
|       if (__c == '[') |       if (__c == '-') | ||||||
|  | 	_M_token = _S_token_bracket_dash; | ||||||
|  |       else if (__c == '[') | ||||||
| 	{ | 	{ | ||||||
| 	  if (_M_current == _M_end) | 	  if (_M_current == _M_end) | ||||||
| 	    __throw_regex_error(regex_constants::error_brack, | 	    __throw_regex_error(regex_constants::error_brack, | ||||||
|  |  | ||||||
|  | @ -61,12 +61,35 @@ test03() | ||||||
|   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow)); |   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | // PR libstdc++/77356
 | ||||||
|  | void | ||||||
|  | test04() | ||||||
|  | { | ||||||
|  |   bool test __attribute__((unused)) = true; | ||||||
|  | 
 | ||||||
|  |   static const char* kNumericAnchor ="(\\$|usd)(usd|\\$|to|and|up to|[0-9,\\.\\-\\sk])+"; | ||||||
|  |   const std::regex re(kNumericAnchor); | ||||||
|  |   (void)re; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | test05() | ||||||
|  | { | ||||||
|  |   bool test __attribute__((unused)) = true; | ||||||
|  | 
 | ||||||
|  |   VERIFY(regex_match_debug("!", std::regex("[![:alnum:]]"))); | ||||||
|  |   VERIFY(regex_match_debug("-", std::regex("[a-]", regex_constants::basic))); | ||||||
|  |   VERIFY(regex_match_debug("-", std::regex("[a-]"))); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int | int | ||||||
| main() | main() | ||||||
| { | { | ||||||
|   test01(); |   test01(); | ||||||
|   test02(); |   test02(); | ||||||
|   test03(); |   test03(); | ||||||
|  |   test04(); | ||||||
|  |   test05(); | ||||||
|   return 0; |   return 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Tim Shen
						Tim Shen