mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			re PR libstdc++/77356 (regex error for a ECMAScript syntax string)
PR libstdc++/77356 * include/bits/regex_compiler.tcc(_M_insert_bracket_matcher, _M_expression_term): Modify to support dash literal. * include/bits/regex_scanner.h: Add dash as a token type to make a different from the mandated dash literal by escaping. * include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash token in bracket expression parsing. * testsuite/28_regex/regression.cc: Add new testcases. From-SVN: r239794
This commit is contained in:
		
							parent
							
								
									d8921e81e9
								
							
						
					
					
						commit
						4aebb4e4a6
					
				|  | @ -1,3 +1,14 @@ | |||
| 2016-08-27  Tim Shen  <timshen@google.com> | ||||
| 
 | ||||
| 	PR libstdc++/77356 | ||||
| 	* include/bits/regex_compiler.tcc(_M_insert_bracket_matcher, | ||||
| 	_M_expression_term): Modify to support dash literal. | ||||
| 	* include/bits/regex_scanner.h: Add dash as a token type to make | ||||
| 	a different from the mandated dash literal by escaping. | ||||
| 	* include/bits/regex_scanner.tcc(_M_scan_in_bracket): Emit dash | ||||
| 	token in bracket expression parsing. | ||||
| 	* testsuite/28_regex/regression.cc: Add new testcases. | ||||
| 
 | ||||
| 2016-08-26  Jonathan Wakely  <jwakely@redhat.com> | ||||
| 
 | ||||
| 	PR libstdc++/51960 | ||||
|  |  | |||
|  | @ -426,13 +426,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
|       pair<bool, _CharT> __last_char; // Optional<_CharT> | ||||
|       __last_char.first = false; | ||||
|       if (!(_M_flags & regex_constants::ECMAScript)) | ||||
| 	{ | ||||
| 	  if (_M_try_char()) | ||||
| 	    { | ||||
| 	    __matcher._M_add_char(_M_value[0]); | ||||
| 	      __last_char.first = true; | ||||
| 	      __last_char.second = _M_value[0]; | ||||
| 	    } | ||||
| 	  else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||
| 	    { | ||||
| 	      __last_char.first = true; | ||||
| 	      __last_char.second = '-'; | ||||
| 	    } | ||||
| 	} | ||||
|       while (_M_expression_term(__last_char, __matcher)); | ||||
|       if (__last_char.first) | ||||
| 	__matcher._M_add_char(__last_char.second); | ||||
|       __matcher._M_ready(); | ||||
|       _M_stack.push(_StateSeqT( | ||||
| 		      *_M_nfa, | ||||
|  | @ -449,19 +457,43 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
|       if (_M_match_token(_ScannerT::_S_token_bracket_end)) | ||||
| 	return false; | ||||
| 
 | ||||
|       const auto __push_char = [&](_CharT __ch) | ||||
|       { | ||||
| 	if (__last_char.first) | ||||
| 	  __matcher._M_add_char(__last_char.second); | ||||
| 	else | ||||
| 	  __last_char.first = true; | ||||
| 	__last_char.second = __ch; | ||||
|       }; | ||||
|       const auto __flush = [&] | ||||
|       { | ||||
| 	if (__last_char.first) | ||||
| 	  { | ||||
| 	    __matcher._M_add_char(__last_char.second); | ||||
| 	    __last_char.first = false; | ||||
| 	  } | ||||
|       }; | ||||
| 
 | ||||
|       if (_M_match_token(_ScannerT::_S_token_collsymbol)) | ||||
| 	{ | ||||
| 	  auto __symbol = __matcher._M_add_collate_element(_M_value); | ||||
| 	  if (__symbol.size() == 1) | ||||
| 	    { | ||||
| 	      __last_char.first = true; | ||||
| 	      __last_char.second = __symbol[0]; | ||||
| 	    } | ||||
| 	    __push_char(__symbol[0]); | ||||
| 	  else | ||||
| 	    __flush(); | ||||
| 	} | ||||
|       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) | ||||
| 	{ | ||||
| 	  __flush(); | ||||
| 	  __matcher._M_add_equivalence_class(_M_value); | ||||
| 	} | ||||
|       else if (_M_match_token(_ScannerT::_S_token_char_class_name)) | ||||
| 	{ | ||||
| 	  __flush(); | ||||
| 	  __matcher._M_add_character_class(_M_value, false); | ||||
| 	} | ||||
|       else if (_M_try_char()) | ||||
| 	__push_char(_M_value[0]); | ||||
|       // POSIX doesn't allow '-' as a start-range char (say [a-z--0]), | ||||
|       // except when the '-' is the first or last character in the bracket | ||||
|       // expression ([--0]). ECMAScript treats all '-' after a range as a | ||||
|  | @ -472,32 +504,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
|       // Clang (3.5) always uses ECMAScript style even in its POSIX syntax. | ||||
|       // | ||||
|       // It turns out that no one reads BNFs ;) | ||||
|       else if (_M_try_char()) | ||||
|       else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||
| 	{ | ||||
| 	  if (!__last_char.first) | ||||
| 	    { | ||||
| 	      __matcher._M_add_char(_M_value[0]); | ||||
| 	      if (_M_value[0] == '-' | ||||
| 		  && !(_M_flags & regex_constants::ECMAScript)) | ||||
| 	      if (!(_M_flags & regex_constants::ECMAScript)) | ||||
| 		{ | ||||
| 		  if (_M_match_token(_ScannerT::_S_token_bracket_end)) | ||||
| 		    { | ||||
| 		      __push_char('-'); | ||||
| 		      return false; | ||||
| 		    } | ||||
| 		  __throw_regex_error( | ||||
| 		    regex_constants::error_range, | ||||
| 		    "Unexpected dash in bracket expression. For POSIX syntax, " | ||||
| 		    "a dash is not treated literally only when it is at " | ||||
| 		    "beginning or end."); | ||||
| 		} | ||||
| 	      __last_char.first = true; | ||||
| 	      __last_char.second = _M_value[0]; | ||||
| 	      __push_char('-'); | ||||
| 	    } | ||||
| 	  else | ||||
| 	    { | ||||
| 	      if (_M_value[0] == '-') | ||||
| 	    { | ||||
| 	      if (_M_try_char()) | ||||
| 		{ | ||||
| 		      __matcher._M_make_range(__last_char.second , _M_value[0]); | ||||
| 		  __matcher._M_make_range(__last_char.second, _M_value[0]); | ||||
| 		  __last_char.first = false; | ||||
| 		} | ||||
| 	      else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) | ||||
| 		{ | ||||
| 		  __matcher._M_make_range(__last_char.second, '-'); | ||||
| 		  __last_char.first = false; | ||||
| 		} | ||||
| 	      else | ||||
|  | @ -506,21 +541,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
| 		      != _ScannerT::_S_token_bracket_end) | ||||
| 		    __throw_regex_error( | ||||
| 		      regex_constants::error_range, | ||||
| 			  "Unexpected end of bracket expression."); | ||||
| 		      __matcher._M_add_char(_M_value[0]); | ||||
| 		    } | ||||
| 		} | ||||
| 	      else | ||||
| 		{ | ||||
| 		  __matcher._M_add_char(_M_value[0]); | ||||
| 		  __last_char.second = _M_value[0]; | ||||
| 		      "Character is expected after a dash."); | ||||
| 		  __push_char('-'); | ||||
| 		} | ||||
| 	    } | ||||
| 	} | ||||
|       else if (_M_match_token(_ScannerT::_S_token_quoted_class)) | ||||
| 	{ | ||||
| 	  __flush(); | ||||
| 	  __matcher._M_add_character_class(_M_value, | ||||
| 					   _M_ctype.is(_CtypeT::upper, | ||||
| 						       _M_value[0])); | ||||
| 	} | ||||
|       else | ||||
| 	__throw_regex_error(regex_constants::error_brack, | ||||
| 			    "Unexpected character in bracket expression."); | ||||
|  |  | |||
|  | @ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
|   { | ||||
|   public: | ||||
|     /// Token types returned from the scanner.
 | ||||
|     enum _TokenT | ||||
|     enum _TokenT : unsigned | ||||
|     { | ||||
|       _S_token_anychar, | ||||
|       _S_token_ord_char, | ||||
|  | @ -73,7 +73,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
|       _S_token_comma, | ||||
|       _S_token_dup_count, | ||||
|       _S_token_eof, | ||||
|       _S_token_unknown | ||||
|       _S_token_bracket_dash, | ||||
|       _S_token_unknown = -1u | ||||
|     }; | ||||
| 
 | ||||
|   protected: | ||||
|  |  | |||
|  | @ -210,7 +210,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION | |||
| 
 | ||||
|       auto __c = *_M_current++; | ||||
| 
 | ||||
|       if (__c == '[') | ||||
|       if (__c == '-') | ||||
| 	_M_token = _S_token_bracket_dash; | ||||
|       else if (__c == '[') | ||||
| 	{ | ||||
| 	  if (_M_current == _M_end) | ||||
| 	    __throw_regex_error(regex_constants::error_brack, | ||||
|  |  | |||
|  | @ -61,12 +61,35 @@ test03() | |||
|   VERIFY(!regex_search_debug("a", regex(R"(\b$)"), regex_constants::match_not_eow)); | ||||
| } | ||||
| 
 | ||||
| // PR libstdc++/77356
 | ||||
| void | ||||
| test04() | ||||
| { | ||||
|   bool test __attribute__((unused)) = true; | ||||
| 
 | ||||
|   static const char* kNumericAnchor ="(\\$|usd)(usd|\\$|to|and|up to|[0-9,\\.\\-\\sk])+"; | ||||
|   const std::regex re(kNumericAnchor); | ||||
|   (void)re; | ||||
| } | ||||
| 
 | ||||
| void | ||||
| test05() | ||||
| { | ||||
|   bool test __attribute__((unused)) = true; | ||||
| 
 | ||||
|   VERIFY(regex_match_debug("!", std::regex("[![:alnum:]]"))); | ||||
|   VERIFY(regex_match_debug("-", std::regex("[a-]", regex_constants::basic))); | ||||
|   VERIFY(regex_match_debug("-", std::regex("[a-]"))); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| main() | ||||
| { | ||||
|   test01(); | ||||
|   test02(); | ||||
|   test03(); | ||||
|   test04(); | ||||
|   test05(); | ||||
|   return 0; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Tim Shen
						Tim Shen