match.go

  1// Package match provides a simple pattern matcher with unicode support.
  2package match
  3
  4import (
  5	"unicode/utf8"
  6)
  7
  8// Match returns true if str matches pattern. This is a very
  9// simple wildcard match where '*' matches on any number characters
 10// and '?' matches on any one character.
 11//
 12// pattern:
 13// 	{ term }
 14// term:
 15// 	'*'         matches any sequence of non-Separator characters
 16// 	'?'         matches any single non-Separator character
 17// 	c           matches character c (c != '*', '?', '\\')
 18// 	'\\' c      matches character c
 19//
 20func Match(str, pattern string) bool {
 21	if pattern == "*" {
 22		return true
 23	}
 24	return match(str, pattern, 0, nil, -1) == rMatch
 25}
 26
 27// MatchLimit is the same as Match but will limit the complexity of the match
 28// operation. This is to avoid long running matches, specifically to avoid ReDos
 29// attacks from arbritary inputs.
 30//
 31// How it works:
 32// The underlying match routine is recursive and may call itself when it
 33// encounters a sandwiched wildcard pattern, such as: `user:*:name`.
 34// Everytime it calls itself a counter is incremented.
 35// The operation is stopped when counter > maxcomp*len(str).
 36func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
 37	if pattern == "*" {
 38		return true, false
 39	}
 40	counter := 0
 41	r := match(str, pattern, len(str), &counter, maxcomp)
 42	if r == rStop {
 43		return false, true
 44	}
 45	return r == rMatch, false
 46}
 47
 48type result int
 49
 50const (
 51	rNoMatch result = iota
 52	rMatch
 53	rStop
 54)
 55
 56func match(str, pat string, slen int, counter *int, maxcomp int) result {
 57	// check complexity limit
 58	if maxcomp > -1 {
 59		if *counter > slen*maxcomp {
 60			return rStop
 61		}
 62		*counter++
 63	}
 64
 65	for len(pat) > 0 {
 66		var wild bool
 67		pc, ps := rune(pat[0]), 1
 68		if pc > 0x7f {
 69			pc, ps = utf8.DecodeRuneInString(pat)
 70		}
 71		var sc rune
 72		var ss int
 73		if len(str) > 0 {
 74			sc, ss = rune(str[0]), 1
 75			if sc > 0x7f {
 76				sc, ss = utf8.DecodeRuneInString(str)
 77			}
 78		}
 79		switch pc {
 80		case '?':
 81			if ss == 0 {
 82				return rNoMatch
 83			}
 84		case '*':
 85			// Ignore repeating stars.
 86			for len(pat) > 1 && pat[1] == '*' {
 87				pat = pat[1:]
 88			}
 89
 90			// If this star is the last character then it must be a match.
 91			if len(pat) == 1 {
 92				return rMatch
 93			}
 94
 95			// Match and trim any non-wildcard suffix characters.
 96			var ok bool
 97			str, pat, ok = matchTrimSuffix(str, pat)
 98			if !ok {
 99				return rNoMatch
100			}
101
102			// Check for single star again.
103			if len(pat) == 1 {
104				return rMatch
105			}
106
107			// Perform recursive wildcard search.
108			r := match(str, pat[1:], slen, counter, maxcomp)
109			if r != rNoMatch {
110				return r
111			}
112			if len(str) == 0 {
113				return rNoMatch
114			}
115			wild = true
116		default:
117			if ss == 0 {
118				return rNoMatch
119			}
120			if pc == '\\' {
121				pat = pat[ps:]
122				pc, ps = utf8.DecodeRuneInString(pat)
123				if ps == 0 {
124					return rNoMatch
125				}
126			}
127			if sc != pc {
128				return rNoMatch
129			}
130		}
131		str = str[ss:]
132		if !wild {
133			pat = pat[ps:]
134		}
135	}
136	if len(str) == 0 {
137		return rMatch
138	}
139	return rNoMatch
140}
141
142// matchTrimSuffix matches and trims any non-wildcard suffix characters.
143// Returns the trimed string and pattern.
144//
145// This is called because the pattern contains extra data after the wildcard
146// star. Here we compare any suffix characters in the pattern to the suffix of
147// the target string. Basically a reverse match that stops when a wildcard
148// character is reached. This is a little trickier than a forward match because
149// we need to evaluate an escaped character in reverse.
150//
151// Any matched characters will be trimmed from both the target
152// string and the pattern.
153func matchTrimSuffix(str, pat string) (string, string, bool) {
154	// It's expected that the pattern has at least two bytes and the first byte
155	// is a wildcard star '*'
156	match := true
157	for len(str) > 0 && len(pat) > 1 {
158		pc, ps := utf8.DecodeLastRuneInString(pat)
159		var esc bool
160		for i := 0; ; i++ {
161			if pat[len(pat)-ps-i-1] != '\\' {
162				if i&1 == 1 {
163					esc = true
164					ps++
165				}
166				break
167			}
168		}
169		if pc == '*' && !esc {
170			match = true
171			break
172		}
173		sc, ss := utf8.DecodeLastRuneInString(str)
174		if !((pc == '?' && !esc) || pc == sc) {
175			match = false
176			break
177		}
178		str = str[:len(str)-ss]
179		pat = pat[:len(pat)-ps]
180	}
181	return str, pat, match
182}
183
184var maxRuneBytes = [...]byte{244, 143, 191, 191}
185
186// Allowable parses the pattern and determines the minimum and maximum allowable
187// values that the pattern can represent.
188// When the max cannot be determined, 'true' will be returned
189// for infinite.
190func Allowable(pattern string) (min, max string) {
191	if pattern == "" || pattern[0] == '*' {
192		return "", ""
193	}
194
195	minb := make([]byte, 0, len(pattern))
196	maxb := make([]byte, 0, len(pattern))
197	var wild bool
198	for i := 0; i < len(pattern); i++ {
199		if pattern[i] == '*' {
200			wild = true
201			break
202		}
203		if pattern[i] == '?' {
204			minb = append(minb, 0)
205			maxb = append(maxb, maxRuneBytes[:]...)
206		} else {
207			minb = append(minb, pattern[i])
208			maxb = append(maxb, pattern[i])
209		}
210	}
211	if wild {
212		r, n := utf8.DecodeLastRune(maxb)
213		if r != utf8.RuneError {
214			if r < utf8.MaxRune {
215				r++
216				if r > 0x7f {
217					b := make([]byte, 4)
218					nn := utf8.EncodeRune(b, r)
219					maxb = append(maxb[:len(maxb)-n], b[:nn]...)
220				} else {
221					maxb = append(maxb[:len(maxb)-n], byte(r))
222				}
223			}
224		}
225	}
226	return string(minb), string(maxb)
227}
228
229// IsPattern returns true if the string is a pattern.
230func IsPattern(str string) bool {
231	for i := 0; i < len(str); i++ {
232		if str[i] == '*' || str[i] == '?' {
233			return true
234		}
235	}
236	return false
237}