mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			716 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			716 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright 2010 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package regexp
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"compress/bzip2"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"regexp/syntax"
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"testing"
 | |
| 	"unicode/utf8"
 | |
| )
 | |
| 
 | |
| // TestRE2 tests this package's regexp API against test cases
 | |
| // considered during RE2's exhaustive tests, which run all possible
 | |
| // regexps over a given set of atoms and operators, up to a given
 | |
| // complexity, over all possible strings over a given alphabet,
 | |
| // up to a given size.  Rather than try to link with RE2, we read a
 | |
| // log file containing the test cases and the expected matches.
 | |
| // The log file, re2.txt, is generated by running 'make exhaustive-log'
 | |
| // in the open source RE2 distribution.  http://code.google.com/p/re2/
 | |
| //
 | |
| // The test file format is a sequence of stanzas like:
 | |
| //
 | |
| //	strings
 | |
| //	"abc"
 | |
| //	"123x"
 | |
| //	regexps
 | |
| //	"[a-z]+"
 | |
| //	0-3;0-3
 | |
| //	-;-
 | |
| //	"([0-9])([0-9])([0-9])"
 | |
| //	-;-
 | |
| //	-;0-3 0-1 1-2 2-3
 | |
| //
 | |
| // The stanza begins by defining a set of strings, quoted
 | |
| // using Go double-quote syntax, one per line.  Then the
 | |
| // regexps section gives a sequence of regexps to run on
 | |
| // the strings.  In the block that follows a regexp, each line
 | |
| // gives the semicolon-separated match results of running
 | |
| // the regexp on the corresponding string.
 | |
| // Each match result is either a single -, meaning no match, or a
 | |
| // space-separated sequence of pairs giving the match and
 | |
| // submatch indices.  An unmatched subexpression formats
 | |
| // its pair as a single - (not illustrated above).  For now
 | |
| // each regexp run produces two match results, one for a
 | |
| // ``full match'' that restricts the regexp to matching the entire
 | |
| // string or nothing, and one for a ``partial match'' that gives
 | |
| // the leftmost first match found in the string.
 | |
| //
 | |
| // Lines beginning with # are comments.  Lines beginning with
 | |
| // a capital letter are test names printed during RE2's test suite
 | |
| // and are echoed into t but otherwise ignored.
 | |
| //
 | |
| // At time of writing, re2.txt is 32 MB but compresses to 760 kB,
 | |
| // so we store re2.txt.gz in the repository and decompress it on the fly.
 | |
| //
 | |
| func TestRE2Search(t *testing.T) {
 | |
| 	testRE2(t, "testdata/re2-search.txt")
 | |
| }
 | |
| 
 | |
| func testRE2(t *testing.T, file string) {
 | |
| 	f, err := os.Open(file)
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 	var txt io.Reader
 | |
| 	if strings.HasSuffix(file, ".bz2") {
 | |
| 		z := bzip2.NewReader(f)
 | |
| 		txt = z
 | |
| 		file = file[:len(file)-len(".bz2")] // for error messages
 | |
| 	} else {
 | |
| 		txt = f
 | |
| 	}
 | |
| 	lineno := 0
 | |
| 	scanner := bufio.NewScanner(txt)
 | |
| 	var (
 | |
| 		str       []string
 | |
| 		input     []string
 | |
| 		inStrings bool
 | |
| 		re        *Regexp
 | |
| 		refull    *Regexp
 | |
| 		nfail     int
 | |
| 		ncase     int
 | |
| 	)
 | |
| 	for lineno := 1; scanner.Scan(); lineno++ {
 | |
| 		line := scanner.Text()
 | |
| 		switch {
 | |
| 		case line == "":
 | |
| 			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
 | |
| 		case line[0] == '#':
 | |
| 			continue
 | |
| 		case 'A' <= line[0] && line[0] <= 'Z':
 | |
| 			// Test name.
 | |
| 			t.Logf("%s\n", line)
 | |
| 			continue
 | |
| 		case line == "strings":
 | |
| 			str = str[:0]
 | |
| 			inStrings = true
 | |
| 		case line == "regexps":
 | |
| 			inStrings = false
 | |
| 		case line[0] == '"':
 | |
| 			q, err := strconv.Unquote(line)
 | |
| 			if err != nil {
 | |
| 				// Fatal because we'll get out of sync.
 | |
| 				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
 | |
| 			}
 | |
| 			if inStrings {
 | |
| 				str = append(str, q)
 | |
| 				continue
 | |
| 			}
 | |
| 			// Is a regexp.
 | |
| 			if len(input) != 0 {
 | |
| 				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
 | |
| 			}
 | |
| 			re, err = tryCompile(q)
 | |
| 			if err != nil {
 | |
| 				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
 | |
| 					// We don't and likely never will support \C; keep going.
 | |
| 					continue
 | |
| 				}
 | |
| 				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
 | |
| 				if nfail++; nfail >= 100 {
 | |
| 					t.Fatalf("stopping after %d errors", nfail)
 | |
| 				}
 | |
| 				continue
 | |
| 			}
 | |
| 			full := `\A(?:` + q + `)\z`
 | |
| 			refull, err = tryCompile(full)
 | |
| 			if err != nil {
 | |
| 				// Fatal because q worked, so this should always work.
 | |
| 				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
 | |
| 			}
 | |
| 			input = str
 | |
| 		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
 | |
| 			// A sequence of match results.
 | |
| 			ncase++
 | |
| 			if re == nil {
 | |
| 				// Failed to compile: skip results.
 | |
| 				continue
 | |
| 			}
 | |
| 			if len(input) == 0 {
 | |
| 				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
 | |
| 			}
 | |
| 			var text string
 | |
| 			text, input = input[0], input[1:]
 | |
| 			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
 | |
| 				// RE2's \B considers every byte position,
 | |
| 				// so it sees 'not word boundary' in the
 | |
| 				// middle of UTF-8 sequences.  This package
 | |
| 				// only considers the positions between runes,
 | |
| 				// so it disagrees.  Skip those cases.
 | |
| 				continue
 | |
| 			}
 | |
| 			res := strings.Split(line, ";")
 | |
| 			if len(res) != len(run) {
 | |
| 				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
 | |
| 			}
 | |
| 			for i := range res {
 | |
| 				have, suffix := run[i](re, refull, text)
 | |
| 				want := parseResult(t, file, lineno, res[i])
 | |
| 				if !same(have, want) {
 | |
| 					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
 | |
| 					if nfail++; nfail >= 100 {
 | |
| 						t.Fatalf("stopping after %d errors", nfail)
 | |
| 					}
 | |
| 					continue
 | |
| 				}
 | |
| 				b, suffix := match[i](re, refull, text)
 | |
| 				if b != (want != nil) {
 | |
| 					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
 | |
| 					if nfail++; nfail >= 100 {
 | |
| 						t.Fatalf("stopping after %d errors", nfail)
 | |
| 					}
 | |
| 					continue
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 		default:
 | |
| 			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
 | |
| 		}
 | |
| 	}
 | |
| 	if err := scanner.Err(); err != nil {
 | |
| 		t.Fatalf("%s:%d: %v", file, lineno, err)
 | |
| 	}
 | |
| 	if len(input) != 0 {
 | |
| 		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
 | |
| 	}
 | |
| 	t.Logf("%d cases tested", ncase)
 | |
| }
 | |
| 
 | |
| var run = []func(*Regexp, *Regexp, string) ([]int, string){
 | |
| 	runFull,
 | |
| 	runPartial,
 | |
| 	runFullLongest,
 | |
| 	runPartialLongest,
 | |
| }
 | |
| 
 | |
| func runFull(re, refull *Regexp, text string) ([]int, string) {
 | |
| 	refull.longest = false
 | |
| 	return refull.FindStringSubmatchIndex(text), "[full]"
 | |
| }
 | |
| 
 | |
| func runPartial(re, refull *Regexp, text string) ([]int, string) {
 | |
| 	re.longest = false
 | |
| 	return re.FindStringSubmatchIndex(text), ""
 | |
| }
 | |
| 
 | |
| func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
 | |
| 	refull.longest = true
 | |
| 	return refull.FindStringSubmatchIndex(text), "[full,longest]"
 | |
| }
 | |
| 
 | |
| func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
 | |
| 	re.longest = true
 | |
| 	return re.FindStringSubmatchIndex(text), "[longest]"
 | |
| }
 | |
| 
 | |
| var match = []func(*Regexp, *Regexp, string) (bool, string){
 | |
| 	matchFull,
 | |
| 	matchPartial,
 | |
| 	matchFullLongest,
 | |
| 	matchPartialLongest,
 | |
| }
 | |
| 
 | |
| func matchFull(re, refull *Regexp, text string) (bool, string) {
 | |
| 	refull.longest = false
 | |
| 	return refull.MatchString(text), "[full]"
 | |
| }
 | |
| 
 | |
| func matchPartial(re, refull *Regexp, text string) (bool, string) {
 | |
| 	re.longest = false
 | |
| 	return re.MatchString(text), ""
 | |
| }
 | |
| 
 | |
| func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
 | |
| 	refull.longest = true
 | |
| 	return refull.MatchString(text), "[full,longest]"
 | |
| }
 | |
| 
 | |
| func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
 | |
| 	re.longest = true
 | |
| 	return re.MatchString(text), "[longest]"
 | |
| }
 | |
| 
 | |
| func isSingleBytes(s string) bool {
 | |
| 	for _, c := range s {
 | |
| 		if c >= utf8.RuneSelf {
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func tryCompile(s string) (re *Regexp, err error) {
 | |
| 	// Protect against panic during Compile.
 | |
| 	defer func() {
 | |
| 		if r := recover(); r != nil {
 | |
| 			err = fmt.Errorf("panic: %v", r)
 | |
| 		}
 | |
| 	}()
 | |
| 	return Compile(s)
 | |
| }
 | |
| 
 | |
| func parseResult(t *testing.T, file string, lineno int, res string) []int {
 | |
| 	// A single - indicates no match.
 | |
| 	if res == "-" {
 | |
| 		return nil
 | |
| 	}
 | |
| 	// Otherwise, a space-separated list of pairs.
 | |
| 	n := 1
 | |
| 	for j := 0; j < len(res); j++ {
 | |
| 		if res[j] == ' ' {
 | |
| 			n++
 | |
| 		}
 | |
| 	}
 | |
| 	out := make([]int, 2*n)
 | |
| 	i := 0
 | |
| 	n = 0
 | |
| 	for j := 0; j <= len(res); j++ {
 | |
| 		if j == len(res) || res[j] == ' ' {
 | |
| 			// Process a single pair.  - means no submatch.
 | |
| 			pair := res[i:j]
 | |
| 			if pair == "-" {
 | |
| 				out[n] = -1
 | |
| 				out[n+1] = -1
 | |
| 			} else {
 | |
| 				k := strings.Index(pair, "-")
 | |
| 				if k < 0 {
 | |
| 					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 | |
| 				}
 | |
| 				lo, err1 := strconv.Atoi(pair[:k])
 | |
| 				hi, err2 := strconv.Atoi(pair[k+1:])
 | |
| 				if err1 != nil || err2 != nil || lo > hi {
 | |
| 					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
 | |
| 				}
 | |
| 				out[n] = lo
 | |
| 				out[n+1] = hi
 | |
| 			}
 | |
| 			n += 2
 | |
| 			i = j + 1
 | |
| 		}
 | |
| 	}
 | |
| 	return out
 | |
| }
 | |
| 
 | |
| func same(x, y []int) bool {
 | |
| 	if len(x) != len(y) {
 | |
| 		return false
 | |
| 	}
 | |
| 	for i, xi := range x {
 | |
| 		if xi != y[i] {
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // TestFowler runs this package's regexp API against the
 | |
| // POSIX regular expression tests collected by Glenn Fowler
 | |
| // at http://www2.research.att.com/~gsf/testregex/.
 | |
| func TestFowler(t *testing.T) {
 | |
| 	files, err := filepath.Glob("testdata/*.dat")
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 	for _, file := range files {
 | |
| 		t.Log(file)
 | |
| 		testFowler(t, file)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| var notab = MustCompilePOSIX(`[^\t]+`)
 | |
| 
 | |
| func testFowler(t *testing.T, file string) {
 | |
| 	f, err := os.Open(file)
 | |
| 	if err != nil {
 | |
| 		t.Error(err)
 | |
| 		return
 | |
| 	}
 | |
| 	defer f.Close()
 | |
| 	b := bufio.NewReader(f)
 | |
| 	lineno := 0
 | |
| 	lastRegexp := ""
 | |
| Reading:
 | |
| 	for {
 | |
| 		lineno++
 | |
| 		line, err := b.ReadString('\n')
 | |
| 		if err != nil {
 | |
| 			if err != io.EOF {
 | |
| 				t.Errorf("%s:%d: %v", file, lineno, err)
 | |
| 			}
 | |
| 			break Reading
 | |
| 		}
 | |
| 
 | |
| 		// http://www2.research.att.com/~gsf/man/man1/testregex.html
 | |
| 		//
 | |
| 		// INPUT FORMAT
 | |
| 		//   Input lines may be blank, a comment beginning with #, or a test
 | |
| 		//   specification. A specification is five fields separated by one
 | |
| 		//   or more tabs. NULL denotes the empty string and NIL denotes the
 | |
| 		//   0 pointer.
 | |
| 		if line[0] == '#' || line[0] == '\n' {
 | |
| 			continue Reading
 | |
| 		}
 | |
| 		line = line[:len(line)-1]
 | |
| 		field := notab.FindAllString(line, -1)
 | |
| 		for i, f := range field {
 | |
| 			if f == "NULL" {
 | |
| 				field[i] = ""
 | |
| 			}
 | |
| 			if f == "NIL" {
 | |
| 				t.Logf("%s:%d: skip: %s", file, lineno, line)
 | |
| 				continue Reading
 | |
| 			}
 | |
| 		}
 | |
| 		if len(field) == 0 {
 | |
| 			continue Reading
 | |
| 		}
 | |
| 
 | |
| 		//   Field 1: the regex(3) flags to apply, one character per REG_feature
 | |
| 		//   flag. The test is skipped if REG_feature is not supported by the
 | |
| 		//   implementation. If the first character is not [BEASKLP] then the
 | |
| 		//   specification is a global control line. One or more of [BEASKLP] may be
 | |
| 		//   specified; the test will be repeated for each mode.
 | |
| 		//
 | |
| 		//     B 	basic			BRE	(grep, ed, sed)
 | |
| 		//     E 	REG_EXTENDED		ERE	(egrep)
 | |
| 		//     A	REG_AUGMENTED		ARE	(egrep with negation)
 | |
| 		//     S	REG_SHELL		SRE	(sh glob)
 | |
| 		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
 | |
| 		//     L	REG_LITERAL		LRE	(fgrep)
 | |
| 		//
 | |
| 		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
 | |
| 		//     b	REG_NOTBOL		lhs does not match ^
 | |
| 		//     c	REG_COMMENT		ignore space and #...\n
 | |
| 		//     d	REG_SHELL_DOT		explicit leading . match
 | |
| 		//     e	REG_NOTEOL		rhs does not match $
 | |
| 		//     f	REG_MULTIPLE		multiple \n separated patterns
 | |
| 		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
 | |
| 		//     h	REG_MULTIREF		multiple digit backref
 | |
| 		//     i	REG_ICASE		ignore case
 | |
| 		//     j	REG_SPAN		. matches \n
 | |
| 		//     k	REG_ESCAPE		\ to ecape [...] delimiter
 | |
| 		//     l	REG_LEFT		implicit ^...
 | |
| 		//     m	REG_MINIMAL		minimal match
 | |
| 		//     n	REG_NEWLINE		explicit \n match
 | |
| 		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
 | |
| 		//     p	REG_SHELL_PATH		explicit / match
 | |
| 		//     q	REG_DELIMITED		delimited pattern
 | |
| 		//     r	REG_RIGHT		implicit ...$
 | |
| 		//     s	REG_SHELL_ESCAPED	\ not special
 | |
| 		//     t	REG_MUSTDELIM		all delimiters must be specified
 | |
| 		//     u	standard unspecified behavior -- errors not counted
 | |
| 		//     v	REG_CLASS_ESCAPE	\ special inside [...]
 | |
| 		//     w	REG_NOSUB		no subexpression match array
 | |
| 		//     x	REG_LENIENT		let some errors slide
 | |
| 		//     y	REG_LEFT		regexec() implicit ^...
 | |
| 		//     z	REG_NULL		NULL subexpressions ok
 | |
| 		//     $	                        expand C \c escapes in fields 2 and 3
 | |
| 		//     /	                        field 2 is a regsubcomp() expression
 | |
| 		//     =	                        field 3 is a regdecomp() expression
 | |
| 		//
 | |
| 		//   Field 1 control lines:
 | |
| 		//
 | |
| 		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
 | |
| 		//
 | |
| 		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
 | |
| 		//     &test ...	output field 5 if current and previous passed
 | |
| 		//     |test ...	output field 5 if current passed and previous failed
 | |
| 		//     ; ...	output field 2 if previous failed
 | |
| 		//     {test ...	skip if failed until }
 | |
| 		//     }		end of skip
 | |
| 		//
 | |
| 		//     : comment		comment copied as output NOTE
 | |
| 		//     :comment:test	:comment: ignored
 | |
| 		//     N[OTE] comment	comment copied as output NOTE
 | |
| 		//     T[EST] comment	comment
 | |
| 		//
 | |
| 		//     number		use number for nmatch (20 by default)
 | |
| 		flag := field[0]
 | |
| 		switch flag[0] {
 | |
| 		case '?', '&', '|', ';', '{', '}':
 | |
| 			// Ignore all the control operators.
 | |
| 			// Just run everything.
 | |
| 			flag = flag[1:]
 | |
| 			if flag == "" {
 | |
| 				continue Reading
 | |
| 			}
 | |
| 		case ':':
 | |
| 			i := strings.Index(flag[1:], ":")
 | |
| 			if i < 0 {
 | |
| 				t.Logf("skip: %s", line)
 | |
| 				continue Reading
 | |
| 			}
 | |
| 			flag = flag[1+i+1:]
 | |
| 		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 | |
| 			t.Logf("skip: %s", line)
 | |
| 			continue Reading
 | |
| 		}
 | |
| 
 | |
| 		// Can check field count now that we've handled the myriad comment formats.
 | |
| 		if len(field) < 4 {
 | |
| 			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
 | |
| 			continue Reading
 | |
| 		}
 | |
| 
 | |
| 		// Expand C escapes (a.k.a. Go escapes).
 | |
| 		if strings.Contains(flag, "$") {
 | |
| 			f := `"` + field[1] + `"`
 | |
| 			if field[1], err = strconv.Unquote(f); err != nil {
 | |
| 				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 | |
| 			}
 | |
| 			f = `"` + field[2] + `"`
 | |
| 			if field[2], err = strconv.Unquote(f); err != nil {
 | |
| 				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		//   Field 2: the regular expression pattern; SAME uses the pattern from
 | |
| 		//     the previous specification.
 | |
| 		//
 | |
| 		if field[1] == "SAME" {
 | |
| 			field[1] = lastRegexp
 | |
| 		}
 | |
| 		lastRegexp = field[1]
 | |
| 
 | |
| 		//   Field 3: the string to match.
 | |
| 		text := field[2]
 | |
| 
 | |
| 		//   Field 4: the test outcome...
 | |
| 		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
 | |
| 		if !ok {
 | |
| 			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
 | |
| 			continue Reading
 | |
| 		}
 | |
| 
 | |
| 		//   Field 5: optional comment appended to the report.
 | |
| 
 | |
| 	Testing:
 | |
| 		// Run test once for each specified capital letter mode that we support.
 | |
| 		for _, c := range flag {
 | |
| 			pattern := field[1]
 | |
| 			syn := syntax.POSIX | syntax.ClassNL
 | |
| 			switch c {
 | |
| 			default:
 | |
| 				continue Testing
 | |
| 			case 'E':
 | |
| 				// extended regexp (what we support)
 | |
| 			case 'L':
 | |
| 				// literal
 | |
| 				pattern = QuoteMeta(pattern)
 | |
| 			}
 | |
| 
 | |
| 			for _, c := range flag {
 | |
| 				switch c {
 | |
| 				case 'i':
 | |
| 					syn |= syntax.FoldCase
 | |
| 				}
 | |
| 			}
 | |
| 
 | |
| 			re, err := compile(pattern, syn, true)
 | |
| 			if err != nil {
 | |
| 				if shouldCompile {
 | |
| 					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
 | |
| 				}
 | |
| 				continue Testing
 | |
| 			}
 | |
| 			if !shouldCompile {
 | |
| 				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
 | |
| 				continue Testing
 | |
| 			}
 | |
| 			match := re.MatchString(text)
 | |
| 			if match != shouldMatch {
 | |
| 				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
 | |
| 				continue Testing
 | |
| 			}
 | |
| 			have := re.FindStringSubmatchIndex(text)
 | |
| 			if (len(have) > 0) != match {
 | |
| 				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
 | |
| 				continue Testing
 | |
| 			}
 | |
| 			if len(have) > len(pos) {
 | |
| 				have = have[:len(pos)]
 | |
| 			}
 | |
| 			if !same(have, pos) {
 | |
| 				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
 | |
| 	//   Field 4: the test outcome. This is either one of the posix error
 | |
| 	//     codes (with REG_ omitted) or the match array, a list of (m,n)
 | |
| 	//     entries with m and n being first and last+1 positions in the
 | |
| 	//     field 3 string, or NULL if REG_NOSUB is in effect and success
 | |
| 	//     is expected. BADPAT is acceptable in place of any regcomp(3)
 | |
| 	//     error code. The match[] array is initialized to (-2,-2) before
 | |
| 	//     each test. All array elements from 0 to nmatch-1 must be specified
 | |
| 	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
 | |
| 	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
 | |
| 	//     matched (?{...}) expression, where x is the text enclosed by {...},
 | |
| 	//     o is the expression ordinal counting from 1, and n is the length of
 | |
| 	//     the unmatched portion of the subject string. If x starts with a
 | |
| 	//     number then that is the return value of re_execf(), otherwise 0 is
 | |
| 	//     returned.
 | |
| 	switch {
 | |
| 	case s == "":
 | |
| 		// Match with no position information.
 | |
| 		ok = true
 | |
| 		compiled = true
 | |
| 		matched = true
 | |
| 		return
 | |
| 	case s == "NOMATCH":
 | |
| 		// Match failure.
 | |
| 		ok = true
 | |
| 		compiled = true
 | |
| 		matched = false
 | |
| 		return
 | |
| 	case 'A' <= s[0] && s[0] <= 'Z':
 | |
| 		// All the other error codes are compile errors.
 | |
| 		ok = true
 | |
| 		compiled = false
 | |
| 		return
 | |
| 	}
 | |
| 	compiled = true
 | |
| 
 | |
| 	var x []int
 | |
| 	for s != "" {
 | |
| 		var end byte = ')'
 | |
| 		if len(x)%2 == 0 {
 | |
| 			if s[0] != '(' {
 | |
| 				ok = false
 | |
| 				return
 | |
| 			}
 | |
| 			s = s[1:]
 | |
| 			end = ','
 | |
| 		}
 | |
| 		i := 0
 | |
| 		for i < len(s) && s[i] != end {
 | |
| 			i++
 | |
| 		}
 | |
| 		if i == 0 || i == len(s) {
 | |
| 			ok = false
 | |
| 			return
 | |
| 		}
 | |
| 		var v = -1
 | |
| 		var err error
 | |
| 		if s[:i] != "?" {
 | |
| 			v, err = strconv.Atoi(s[:i])
 | |
| 			if err != nil {
 | |
| 				ok = false
 | |
| 				return
 | |
| 			}
 | |
| 		}
 | |
| 		x = append(x, v)
 | |
| 		s = s[i+1:]
 | |
| 	}
 | |
| 	if len(x)%2 != 0 {
 | |
| 		ok = false
 | |
| 		return
 | |
| 	}
 | |
| 	ok = true
 | |
| 	matched = true
 | |
| 	pos = x
 | |
| 	return
 | |
| }
 | |
| 
 | |
| var text []byte
 | |
| 
 | |
| func makeText(n int) []byte {
 | |
| 	if len(text) >= n {
 | |
| 		return text[:n]
 | |
| 	}
 | |
| 	text = make([]byte, n)
 | |
| 	x := ^uint32(0)
 | |
| 	for i := range text {
 | |
| 		x += x
 | |
| 		x ^= 1
 | |
| 		if int32(x) < 0 {
 | |
| 			x ^= 0x88888eef
 | |
| 		}
 | |
| 		if x%31 == 0 {
 | |
| 			text[i] = '\n'
 | |
| 		} else {
 | |
| 			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
 | |
| 		}
 | |
| 	}
 | |
| 	return text
 | |
| }
 | |
| 
 | |
| func benchmark(b *testing.B, re string, n int) {
 | |
| 	r := MustCompile(re)
 | |
| 	t := makeText(n)
 | |
| 	b.ResetTimer()
 | |
| 	b.SetBytes(int64(n))
 | |
| 	for i := 0; i < b.N; i++ {
 | |
| 		if r.Match(t) {
 | |
| 			b.Fatal("match!")
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| const (
 | |
| 	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 | |
| 	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
 | |
| 	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 | |
| 	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
 | |
| 	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
 | |
| 		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
 | |
| )
 | |
| 
 | |
| func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
 | |
| func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
 | |
| func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
 | |
| func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
 | |
| func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
 | |
| func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
 | |
| func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
 | |
| func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
 | |
| func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
 | |
| func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
 | |
| func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
 | |
| func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
 | |
| func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
 | |
| func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
 | |
| func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
 | |
| func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
 | |
| func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
 | |
| func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
 | |
| func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
 | |
| func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
 | |
| 
 | |
| func TestLongest(t *testing.T) {
 | |
| 	re, err := Compile(`a(|b)`)
 | |
| 	if err != nil {
 | |
| 		t.Fatal(err)
 | |
| 	}
 | |
| 	if g, w := re.FindString("ab"), "a"; g != w {
 | |
| 		t.Errorf("first match was %q, want %q", g, w)
 | |
| 	}
 | |
| 	re.Longest()
 | |
| 	if g, w := re.FindString("ab"), "ab"; g != w {
 | |
| 		t.Errorf("longest match was %q, want %q", g, w)
 | |
| 	}
 | |
| }
 |