mirror of git://gcc.gnu.org/git/gcc.git
				
				
				
			
		
			
				
	
	
		
			321 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			321 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			Go
		
	
	
	
| // Copyright 2011 The Go Authors. All rights reserved.
 | |
| // Use of this source code is governed by a BSD-style
 | |
| // license that can be found in the LICENSE file.
 | |
| 
 | |
| package syntax
 | |
| 
 | |
| // Note to implementers:
 | |
| // In this package, re is always a *Regexp and r is always a rune.
 | |
| 
 | |
| import (
 | |
| 	"strconv"
 | |
| 	"strings"
 | |
| 	"unicode"
 | |
| )
 | |
| 
 | |
| // A Regexp is a node in a regular expression syntax tree.
 | |
| type Regexp struct {
 | |
| 	Op       Op // operator
 | |
| 	Flags    Flags
 | |
| 	Sub      []*Regexp  // subexpressions, if any
 | |
| 	Sub0     [1]*Regexp // storage for short Sub
 | |
| 	Rune     []rune     // matched runes, for OpLiteral, OpCharClass
 | |
| 	Rune0    [2]rune    // storage for short Rune
 | |
| 	Min, Max int        // min, max for OpRepeat
 | |
| 	Cap      int        // capturing index, for OpCapture
 | |
| 	Name     string     // capturing name, for OpCapture
 | |
| }
 | |
| 
 | |
| //go:generate stringer -type Op -trimprefix Op
 | |
| 
 | |
| // An Op is a single regular expression operator.
 | |
| type Op uint8
 | |
| 
 | |
| // Operators are listed in precedence order, tightest binding to weakest.
 | |
| // Character class operators are listed simplest to most complex
 | |
| // (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
 | |
| 
 | |
| const (
 | |
| 	OpNoMatch        Op = 1 + iota // matches no strings
 | |
| 	OpEmptyMatch                   // matches empty string
 | |
| 	OpLiteral                      // matches Runes sequence
 | |
| 	OpCharClass                    // matches Runes interpreted as range pair list
 | |
| 	OpAnyCharNotNL                 // matches any character except newline
 | |
| 	OpAnyChar                      // matches any character
 | |
| 	OpBeginLine                    // matches empty string at beginning of line
 | |
| 	OpEndLine                      // matches empty string at end of line
 | |
| 	OpBeginText                    // matches empty string at beginning of text
 | |
| 	OpEndText                      // matches empty string at end of text
 | |
| 	OpWordBoundary                 // matches word boundary `\b`
 | |
| 	OpNoWordBoundary               // matches word non-boundary `\B`
 | |
| 	OpCapture                      // capturing subexpression with index Cap, optional name Name
 | |
| 	OpStar                         // matches Sub[0] zero or more times
 | |
| 	OpPlus                         // matches Sub[0] one or more times
 | |
| 	OpQuest                        // matches Sub[0] zero or one times
 | |
| 	OpRepeat                       // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
 | |
| 	OpConcat                       // matches concatenation of Subs
 | |
| 	OpAlternate                    // matches alternation of Subs
 | |
| )
 | |
| 
 | |
| const opPseudo Op = 128 // where pseudo-ops start
 | |
| 
 | |
| // Equal returns true if x and y have identical structure.
 | |
| func (x *Regexp) Equal(y *Regexp) bool {
 | |
| 	if x == nil || y == nil {
 | |
| 		return x == y
 | |
| 	}
 | |
| 	if x.Op != y.Op {
 | |
| 		return false
 | |
| 	}
 | |
| 	switch x.Op {
 | |
| 	case OpEndText:
 | |
| 		// The parse flags remember whether this is \z or \Z.
 | |
| 		if x.Flags&WasDollar != y.Flags&WasDollar {
 | |
| 			return false
 | |
| 		}
 | |
| 
 | |
| 	case OpLiteral, OpCharClass:
 | |
| 		if len(x.Rune) != len(y.Rune) {
 | |
| 			return false
 | |
| 		}
 | |
| 		for i, r := range x.Rune {
 | |
| 			if r != y.Rune[i] {
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	case OpAlternate, OpConcat:
 | |
| 		if len(x.Sub) != len(y.Sub) {
 | |
| 			return false
 | |
| 		}
 | |
| 		for i, sub := range x.Sub {
 | |
| 			if !sub.Equal(y.Sub[i]) {
 | |
| 				return false
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	case OpStar, OpPlus, OpQuest:
 | |
| 		if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
 | |
| 			return false
 | |
| 		}
 | |
| 
 | |
| 	case OpRepeat:
 | |
| 		if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
 | |
| 			return false
 | |
| 		}
 | |
| 
 | |
| 	case OpCapture:
 | |
| 		if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
 | |
| 			return false
 | |
| 		}
 | |
| 	}
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| // writeRegexp writes the Perl syntax for the regular expression re to b.
 | |
| func writeRegexp(b *strings.Builder, re *Regexp) {
 | |
| 	switch re.Op {
 | |
| 	default:
 | |
| 		b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
 | |
| 	case OpNoMatch:
 | |
| 		b.WriteString(`[^\x00-\x{10FFFF}]`)
 | |
| 	case OpEmptyMatch:
 | |
| 		b.WriteString(`(?:)`)
 | |
| 	case OpLiteral:
 | |
| 		if re.Flags&FoldCase != 0 {
 | |
| 			b.WriteString(`(?i:`)
 | |
| 		}
 | |
| 		for _, r := range re.Rune {
 | |
| 			escape(b, r, false)
 | |
| 		}
 | |
| 		if re.Flags&FoldCase != 0 {
 | |
| 			b.WriteString(`)`)
 | |
| 		}
 | |
| 	case OpCharClass:
 | |
| 		if len(re.Rune)%2 != 0 {
 | |
| 			b.WriteString(`[invalid char class]`)
 | |
| 			break
 | |
| 		}
 | |
| 		b.WriteRune('[')
 | |
| 		if len(re.Rune) == 0 {
 | |
| 			b.WriteString(`^\x00-\x{10FFFF}`)
 | |
| 		} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
 | |
| 			// Contains 0 and MaxRune. Probably a negated class.
 | |
| 			// Print the gaps.
 | |
| 			b.WriteRune('^')
 | |
| 			for i := 1; i < len(re.Rune)-1; i += 2 {
 | |
| 				lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
 | |
| 				escape(b, lo, lo == '-')
 | |
| 				if lo != hi {
 | |
| 					b.WriteRune('-')
 | |
| 					escape(b, hi, hi == '-')
 | |
| 				}
 | |
| 			}
 | |
| 		} else {
 | |
| 			for i := 0; i < len(re.Rune); i += 2 {
 | |
| 				lo, hi := re.Rune[i], re.Rune[i+1]
 | |
| 				escape(b, lo, lo == '-')
 | |
| 				if lo != hi {
 | |
| 					b.WriteRune('-')
 | |
| 					escape(b, hi, hi == '-')
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 		b.WriteRune(']')
 | |
| 	case OpAnyCharNotNL:
 | |
| 		b.WriteString(`(?-s:.)`)
 | |
| 	case OpAnyChar:
 | |
| 		b.WriteString(`(?s:.)`)
 | |
| 	case OpBeginLine:
 | |
| 		b.WriteString(`(?m:^)`)
 | |
| 	case OpEndLine:
 | |
| 		b.WriteString(`(?m:$)`)
 | |
| 	case OpBeginText:
 | |
| 		b.WriteString(`\A`)
 | |
| 	case OpEndText:
 | |
| 		if re.Flags&WasDollar != 0 {
 | |
| 			b.WriteString(`(?-m:$)`)
 | |
| 		} else {
 | |
| 			b.WriteString(`\z`)
 | |
| 		}
 | |
| 	case OpWordBoundary:
 | |
| 		b.WriteString(`\b`)
 | |
| 	case OpNoWordBoundary:
 | |
| 		b.WriteString(`\B`)
 | |
| 	case OpCapture:
 | |
| 		if re.Name != "" {
 | |
| 			b.WriteString(`(?P<`)
 | |
| 			b.WriteString(re.Name)
 | |
| 			b.WriteRune('>')
 | |
| 		} else {
 | |
| 			b.WriteRune('(')
 | |
| 		}
 | |
| 		if re.Sub[0].Op != OpEmptyMatch {
 | |
| 			writeRegexp(b, re.Sub[0])
 | |
| 		}
 | |
| 		b.WriteRune(')')
 | |
| 	case OpStar, OpPlus, OpQuest, OpRepeat:
 | |
| 		if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
 | |
| 			b.WriteString(`(?:`)
 | |
| 			writeRegexp(b, sub)
 | |
| 			b.WriteString(`)`)
 | |
| 		} else {
 | |
| 			writeRegexp(b, sub)
 | |
| 		}
 | |
| 		switch re.Op {
 | |
| 		case OpStar:
 | |
| 			b.WriteRune('*')
 | |
| 		case OpPlus:
 | |
| 			b.WriteRune('+')
 | |
| 		case OpQuest:
 | |
| 			b.WriteRune('?')
 | |
| 		case OpRepeat:
 | |
| 			b.WriteRune('{')
 | |
| 			b.WriteString(strconv.Itoa(re.Min))
 | |
| 			if re.Max != re.Min {
 | |
| 				b.WriteRune(',')
 | |
| 				if re.Max >= 0 {
 | |
| 					b.WriteString(strconv.Itoa(re.Max))
 | |
| 				}
 | |
| 			}
 | |
| 			b.WriteRune('}')
 | |
| 		}
 | |
| 		if re.Flags&NonGreedy != 0 {
 | |
| 			b.WriteRune('?')
 | |
| 		}
 | |
| 	case OpConcat:
 | |
| 		for _, sub := range re.Sub {
 | |
| 			if sub.Op == OpAlternate {
 | |
| 				b.WriteString(`(?:`)
 | |
| 				writeRegexp(b, sub)
 | |
| 				b.WriteString(`)`)
 | |
| 			} else {
 | |
| 				writeRegexp(b, sub)
 | |
| 			}
 | |
| 		}
 | |
| 	case OpAlternate:
 | |
| 		for i, sub := range re.Sub {
 | |
| 			if i > 0 {
 | |
| 				b.WriteRune('|')
 | |
| 			}
 | |
| 			writeRegexp(b, sub)
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func (re *Regexp) String() string {
 | |
| 	var b strings.Builder
 | |
| 	writeRegexp(&b, re)
 | |
| 	return b.String()
 | |
| }
 | |
| 
 | |
| const meta = `\.+*?()|[]{}^$`
 | |
| 
 | |
| func escape(b *strings.Builder, r rune, force bool) {
 | |
| 	if unicode.IsPrint(r) {
 | |
| 		if strings.ContainsRune(meta, r) || force {
 | |
| 			b.WriteRune('\\')
 | |
| 		}
 | |
| 		b.WriteRune(r)
 | |
| 		return
 | |
| 	}
 | |
| 
 | |
| 	switch r {
 | |
| 	case '\a':
 | |
| 		b.WriteString(`\a`)
 | |
| 	case '\f':
 | |
| 		b.WriteString(`\f`)
 | |
| 	case '\n':
 | |
| 		b.WriteString(`\n`)
 | |
| 	case '\r':
 | |
| 		b.WriteString(`\r`)
 | |
| 	case '\t':
 | |
| 		b.WriteString(`\t`)
 | |
| 	case '\v':
 | |
| 		b.WriteString(`\v`)
 | |
| 	default:
 | |
| 		if r < 0x100 {
 | |
| 			b.WriteString(`\x`)
 | |
| 			s := strconv.FormatInt(int64(r), 16)
 | |
| 			if len(s) == 1 {
 | |
| 				b.WriteRune('0')
 | |
| 			}
 | |
| 			b.WriteString(s)
 | |
| 			break
 | |
| 		}
 | |
| 		b.WriteString(`\x{`)
 | |
| 		b.WriteString(strconv.FormatInt(int64(r), 16))
 | |
| 		b.WriteString(`}`)
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // MaxCap walks the regexp to find the maximum capture index.
 | |
| func (re *Regexp) MaxCap() int {
 | |
| 	m := 0
 | |
| 	if re.Op == OpCapture {
 | |
| 		m = re.Cap
 | |
| 	}
 | |
| 	for _, sub := range re.Sub {
 | |
| 		if n := sub.MaxCap(); m < n {
 | |
| 			m = n
 | |
| 		}
 | |
| 	}
 | |
| 	return m
 | |
| }
 | |
| 
 | |
| // CapNames walks the regexp to find the names of capturing groups.
 | |
| func (re *Regexp) CapNames() []string {
 | |
| 	names := make([]string, re.MaxCap()+1)
 | |
| 	re.capNames(names)
 | |
| 	return names
 | |
| }
 | |
| 
 | |
| func (re *Regexp) capNames(names []string) {
 | |
| 	if re.Op == OpCapture {
 | |
| 		names[re.Cap] = re.Name
 | |
| 	}
 | |
| 	for _, sub := range re.Sub {
 | |
| 		sub.capNames(names)
 | |
| 	}
 | |
| }
 |