mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			152 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			152 lines
		
	
	
		
			4.0 KiB
		
	
	
	
		
			Go
		
	
	
	
// Copyright 2011 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
package syntax
 | 
						|
 | 
						|
import "testing"
 | 
						|
 | 
						|
var simplifyTests = []struct {
 | 
						|
	Regexp string
 | 
						|
	Simple string
 | 
						|
}{
 | 
						|
	// Already-simple constructs
 | 
						|
	{`a`, `a`},
 | 
						|
	{`ab`, `ab`},
 | 
						|
	{`a|b`, `[a-b]`},
 | 
						|
	{`ab|cd`, `ab|cd`},
 | 
						|
	{`(ab)*`, `(ab)*`},
 | 
						|
	{`(ab)+`, `(ab)+`},
 | 
						|
	{`(ab)?`, `(ab)?`},
 | 
						|
	{`.`, `(?s:.)`},
 | 
						|
	{`^`, `(?m:^)`},
 | 
						|
	{`$`, `(?m:$)`},
 | 
						|
	{`[ac]`, `[ac]`},
 | 
						|
	{`[^ac]`, `[^ac]`},
 | 
						|
 | 
						|
	// Posix character classes
 | 
						|
	{`[[:alnum:]]`, `[0-9A-Za-z]`},
 | 
						|
	{`[[:alpha:]]`, `[A-Za-z]`},
 | 
						|
	{`[[:blank:]]`, `[\t ]`},
 | 
						|
	{`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
 | 
						|
	{`[[:digit:]]`, `[0-9]`},
 | 
						|
	{`[[:graph:]]`, `[!-~]`},
 | 
						|
	{`[[:lower:]]`, `[a-z]`},
 | 
						|
	{`[[:print:]]`, `[ -~]`},
 | 
						|
	{`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
 | 
						|
	{`[[:space:]]`, `[\t-\r ]`},
 | 
						|
	{`[[:upper:]]`, `[A-Z]`},
 | 
						|
	{`[[:xdigit:]]`, `[0-9A-Fa-f]`},
 | 
						|
 | 
						|
	// Perl character classes
 | 
						|
	{`\d`, `[0-9]`},
 | 
						|
	{`\s`, `[\t-\n\f-\r ]`},
 | 
						|
	{`\w`, `[0-9A-Z_a-z]`},
 | 
						|
	{`\D`, `[^0-9]`},
 | 
						|
	{`\S`, `[^\t-\n\f-\r ]`},
 | 
						|
	{`\W`, `[^0-9A-Z_a-z]`},
 | 
						|
	{`[\d]`, `[0-9]`},
 | 
						|
	{`[\s]`, `[\t-\n\f-\r ]`},
 | 
						|
	{`[\w]`, `[0-9A-Z_a-z]`},
 | 
						|
	{`[\D]`, `[^0-9]`},
 | 
						|
	{`[\S]`, `[^\t-\n\f-\r ]`},
 | 
						|
	{`[\W]`, `[^0-9A-Z_a-z]`},
 | 
						|
 | 
						|
	// Posix repetitions
 | 
						|
	{`a{1}`, `a`},
 | 
						|
	{`a{2}`, `aa`},
 | 
						|
	{`a{5}`, `aaaaa`},
 | 
						|
	{`a{0,1}`, `a?`},
 | 
						|
	// The next three are illegible because Simplify inserts (?:)
 | 
						|
	// parens instead of () parens to avoid creating extra
 | 
						|
	// captured subexpressions. The comments show a version with fewer parens.
 | 
						|
	{`(a){0,2}`, `(?:(a)(a)?)?`},                       //       (aa?)?
 | 
						|
	{`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`},       //   (a(a(aa?)?)?)?
 | 
						|
	{`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
 | 
						|
	{`a{0,2}`, `(?:aa?)?`},                             //       (aa?)?
 | 
						|
	{`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`},                 //   (a(a(aa?)?)?)?
 | 
						|
	{`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`},               // aa(a(a(aa?)?)?)?
 | 
						|
	{`a{0,}`, `a*`},
 | 
						|
	{`a{1,}`, `a+`},
 | 
						|
	{`a{2,}`, `aa+`},
 | 
						|
	{`a{5,}`, `aaaaa+`},
 | 
						|
 | 
						|
	// Test that operators simplify their arguments.
 | 
						|
	{`(?:a{1,}){1,}`, `a+`},
 | 
						|
	{`(a{1,}b{1,})`, `(a+b+)`},
 | 
						|
	{`a{1,}|b{1,}`, `a+|b+`},
 | 
						|
	{`(?:a{1,})*`, `(?:a+)*`},
 | 
						|
	{`(?:a{1,})+`, `a+`},
 | 
						|
	{`(?:a{1,})?`, `(?:a+)?`},
 | 
						|
	{``, `(?:)`},
 | 
						|
	{`a{0}`, `(?:)`},
 | 
						|
 | 
						|
	// Character class simplification
 | 
						|
	{`[ab]`, `[a-b]`},
 | 
						|
	{`[a-za-za-z]`, `[a-z]`},
 | 
						|
	{`[A-Za-zA-Za-z]`, `[A-Za-z]`},
 | 
						|
	{`[ABCDEFGH]`, `[A-H]`},
 | 
						|
	{`[AB-CD-EF-GH]`, `[A-H]`},
 | 
						|
	{`[W-ZP-XE-R]`, `[E-Z]`},
 | 
						|
	{`[a-ee-gg-m]`, `[a-m]`},
 | 
						|
	{`[a-ea-ha-m]`, `[a-m]`},
 | 
						|
	{`[a-ma-ha-e]`, `[a-m]`},
 | 
						|
	{`[a-zA-Z0-9 -~]`, `[ -~]`},
 | 
						|
 | 
						|
	// Empty character classes
 | 
						|
	{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
 | 
						|
 | 
						|
	// Full character classes
 | 
						|
	{`[[:cntrl:][:^cntrl:]]`, `(?s:.)`},
 | 
						|
 | 
						|
	// Unicode case folding.
 | 
						|
	{`(?i)A`, `(?i:A)`},
 | 
						|
	{`(?i)a`, `(?i:A)`},
 | 
						|
	{`(?i)[A]`, `(?i:A)`},
 | 
						|
	{`(?i)[a]`, `(?i:A)`},
 | 
						|
	{`(?i)K`, `(?i:K)`},
 | 
						|
	{`(?i)k`, `(?i:K)`},
 | 
						|
	{`(?i)\x{212a}`, "(?i:K)"},
 | 
						|
	{`(?i)[K]`, "[Kk\u212A]"},
 | 
						|
	{`(?i)[k]`, "[Kk\u212A]"},
 | 
						|
	{`(?i)[\x{212a}]`, "[Kk\u212A]"},
 | 
						|
	{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
 | 
						|
	{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
 | 
						|
	{`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`},
 | 
						|
 | 
						|
	// Empty string as a regular expression.
 | 
						|
	// The empty string must be preserved inside parens in order
 | 
						|
	// to make submatches work right, so these tests are less
 | 
						|
	// interesting than they might otherwise be. String inserts
 | 
						|
	// explicit (?:) in place of non-parenthesized empty strings,
 | 
						|
	// to make them easier to spot for other parsers.
 | 
						|
	{`(a|b|)`, `([a-b]|(?:))`},
 | 
						|
	{`(|)`, `()`},
 | 
						|
	{`a()`, `a()`},
 | 
						|
	{`(()|())`, `(()|())`},
 | 
						|
	{`(a|)`, `(a|(?:))`},
 | 
						|
	{`ab()cd()`, `ab()cd()`},
 | 
						|
	{`()`, `()`},
 | 
						|
	{`()*`, `()*`},
 | 
						|
	{`()+`, `()+`},
 | 
						|
	{`()?`, `()?`},
 | 
						|
	{`(){0}`, `(?:)`},
 | 
						|
	{`(){1}`, `()`},
 | 
						|
	{`(){1,}`, `()+`},
 | 
						|
	{`(){0,2}`, `(?:()()?)?`},
 | 
						|
}
 | 
						|
 | 
						|
func TestSimplify(t *testing.T) {
 | 
						|
	for _, tt := range simplifyTests {
 | 
						|
		re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
 | 
						|
		if err != nil {
 | 
						|
			t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		s := re.Simplify().String()
 | 
						|
		if s != tt.Simple {
 | 
						|
			t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 |