match.go

  1package doublestar
  2
  3import (
  4	"path/filepath"
  5	"unicode/utf8"
  6)
  7
  8// Match reports whether name matches the shell pattern.
  9// The pattern syntax is:
 10//
 11//  pattern:
 12//    { term }
 13//  term:
 14//    '*'         matches any sequence of non-path-separators
 15//    '/**/'      matches zero or more directories
 16//    '?'         matches any single non-path-separator character
 17//    '[' [ '^' '!' ] { character-range } ']'
 18//                character class (must be non-empty)
 19//                starting with `^` or `!` negates the class
 20//    '{' { term } [ ',' { term } ... ] '}'
 21//                alternatives
 22//    c           matches character c (c != '*', '?', '\\', '[')
 23//    '\\' c      matches character c
 24//
 25//  character-range:
 26//    c           matches character c (c != '\\', '-', ']')
 27//    '\\' c      matches character c
 28//    lo '-' hi   matches character c for lo <= c <= hi
 29//
 30// Match returns true if `name` matches the file name `pattern`. `name` and
 31// `pattern` are split on forward slash (`/`) characters and may be relative or
 32// absolute.
 33//
 34// Match requires pattern to match all of name, not just a substring.
 35// The only possible returned error is ErrBadPattern, when pattern
 36// is malformed.
 37//
 38// A doublestar (`**`) should appear surrounded by path separators such as
 39// `/**/`.  A mid-pattern doublestar (`**`) behaves like bash's globstar
 40// option: a pattern such as `path/to/**.txt` would return the same results as
 41// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
 42//
 43// Note: this is meant as a drop-in replacement for path.Match() which
 44// always uses '/' as the path separator. If you want to support systems
 45// which use a different path separator (such as Windows), what you want
 46// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
 47// pattern and name and then use this function.
 48//
 49// Note: users should _not_ count on the returned error,
 50// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
 51//
 52func Match(pattern, name string) (bool, error) {
 53	return matchWithSeparator(pattern, name, '/', true)
 54}
 55
 56// MatchUnvalidated can provide a small performance improvement if you don't
 57// care about whether or not the pattern is valid (perhaps because you already
 58// ran `ValidatePattern`). Note that there's really only one case where this
 59// performance improvement is realized: when pattern matching reaches the end
 60// of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
 61// "a")`.
 62func MatchUnvalidated(pattern, name string) bool {
 63	matched, _ := matchWithSeparator(pattern, name, '/', false)
 64	return matched
 65}
 66
 67// PathMatch returns true if `name` matches the file name `pattern`. The
 68// difference between Match and PathMatch is that PathMatch will automatically
 69// use your system's path separator to split `name` and `pattern`. On systems
 70// where the path separator is `'\'`, escaping will be disabled.
 71//
 72// Note: this is meant as a drop-in replacement for filepath.Match(). It
 73// assumes that both `pattern` and `name` are using the system's path
 74// separator. If you can't be sure of that, use filepath.ToSlash() on both
 75// `pattern` and `name`, and then use the Match() function instead.
 76//
 77func PathMatch(pattern, name string) (bool, error) {
 78	return matchWithSeparator(pattern, name, filepath.Separator, true)
 79}
 80
 81// PathMatchUnvalidated can provide a small performance improvement if you
 82// don't care about whether or not the pattern is valid (perhaps because you
 83// already ran `ValidatePattern`). Note that there's really only one case where
 84// this performance improvement is realized: when pattern matching reaches the
 85// end of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
 86// "a")`.
 87func PathMatchUnvalidated(pattern, name string) bool {
 88	matched, _ := matchWithSeparator(pattern, name, filepath.Separator, false)
 89	return matched
 90}
 91
 92func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
 93	return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
 94}
 95
 96func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
 97	patLen := len(pattern)
 98	nameLen := len(name)
 99	startOfSegment := true
100MATCH:
101	for nameIdx < nameLen {
102		if patIdx < patLen {
103			switch pattern[patIdx] {
104			case '*':
105				if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
106					// doublestar - must begin with a path separator, otherwise we'll
107					// treat it like a single star like bash
108					patIdx++
109					if startOfSegment {
110						if patIdx >= patLen {
111							// pattern ends in `/**`: return true
112							return true, nil
113						}
114
115						// doublestar must also end with a path separator, otherwise we're
116						// just going to treat the doublestar as a single star like bash
117						patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
118						if patRune == separator {
119							patIdx += patRuneLen
120
121							doublestarPatternBacktrack = patIdx
122							doublestarNameBacktrack = nameIdx
123							starPatternBacktrack = -1
124							starNameBacktrack = -1
125							continue
126						}
127					}
128				}
129				startOfSegment = false
130
131				starPatternBacktrack = patIdx
132				starNameBacktrack = nameIdx
133				continue
134
135			case '?':
136				startOfSegment = false
137				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
138				if nameRune == separator {
139					// `?` cannot match the separator
140					break
141				}
142
143				patIdx++
144				nameIdx += nameRuneLen
145				continue
146
147			case '[':
148				startOfSegment = false
149				if patIdx++; patIdx >= patLen {
150					// class didn't end
151					return false, ErrBadPattern
152				}
153				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
154
155				matched := false
156				negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
157				if negate {
158					patIdx++
159				}
160
161				if patIdx >= patLen || pattern[patIdx] == ']' {
162					// class didn't end or empty character class
163					return false, ErrBadPattern
164				}
165
166				last := utf8.MaxRune
167				for patIdx < patLen && pattern[patIdx] != ']' {
168					patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
169					patIdx += patRuneLen
170
171					// match a range
172					if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
173						if pattern[patIdx] == '\\' {
174							// next character is escaped
175							patIdx++
176						}
177						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
178						patIdx += patRuneLen
179
180						if last <= nameRune && nameRune <= patRune {
181							matched = true
182							break
183						}
184
185						// didn't match range - reset `last`
186						last = utf8.MaxRune
187						continue
188					}
189
190					// not a range - check if the next rune is escaped
191					if patRune == '\\' {
192						patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
193						patIdx += patRuneLen
194					}
195
196					// check if the rune matches
197					if patRune == nameRune {
198						matched = true
199						break
200					}
201
202					// no matches yet
203					last = patRune
204				}
205
206				if matched == negate {
207					// failed to match - if we reached the end of the pattern, that means
208					// we never found a closing `]`
209					if patIdx >= patLen {
210						return false, ErrBadPattern
211					}
212					break
213				}
214
215				closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
216				if closingIdx == -1 {
217					// no closing `]`
218					return false, ErrBadPattern
219				}
220
221				patIdx += closingIdx + 1
222				nameIdx += nameRuneLen
223				continue
224
225			case '{':
226				startOfSegment = false
227				beforeIdx := patIdx
228				patIdx++
229				closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
230				if closingIdx == -1 {
231					// no closing `}`
232					return false, ErrBadPattern
233				}
234				closingIdx += patIdx
235
236				for {
237					commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
238					if commaIdx == -1 {
239						break
240					}
241					commaIdx += patIdx
242
243					result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
244					if result || err != nil {
245						return result, err
246					}
247
248					patIdx = commaIdx + 1
249				}
250				return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
251
252			case '\\':
253				if separator != '\\' {
254					// next rune is "escaped" in the pattern - literal match
255					if patIdx++; patIdx >= patLen {
256						// pattern ended
257						return false, ErrBadPattern
258					}
259				}
260				fallthrough
261
262			default:
263				patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
264				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
265				if patRune != nameRune {
266					if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
267						// if this rune was meant to be escaped, we need to move patIdx
268						// back to the backslash before backtracking or validating below
269						patIdx--
270					}
271					break
272				}
273
274				patIdx += patRuneLen
275				nameIdx += nameRuneLen
276				startOfSegment = patRune == separator
277				continue
278			}
279		}
280
281		if starPatternBacktrack >= 0 {
282			// `*` backtrack, but only if the `name` rune isn't the separator
283			nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
284			if nameRune != separator {
285				starNameBacktrack += nameRuneLen
286				patIdx = starPatternBacktrack
287				nameIdx = starNameBacktrack
288				startOfSegment = false
289				continue
290			}
291		}
292
293		if doublestarPatternBacktrack >= 0 {
294			// `**` backtrack, advance `name` past next separator
295			nameIdx = doublestarNameBacktrack
296			for nameIdx < nameLen {
297				nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
298				nameIdx += nameRuneLen
299				if nameRune == separator {
300					doublestarNameBacktrack = nameIdx
301					patIdx = doublestarPatternBacktrack
302					startOfSegment = true
303					continue MATCH
304				}
305			}
306		}
307
308		if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
309			return false, ErrBadPattern
310		}
311		return false, nil
312	}
313
314	if nameIdx < nameLen {
315		// we reached the end of `pattern` before the end of `name`
316		return false, nil
317	}
318
319	// we've reached the end of `name`; we've successfully matched if we've also
320	// reached the end of `pattern`, or if the rest of `pattern` can match a
321	// zero-length string
322	return isZeroLengthPattern(pattern[patIdx:], separator, validate)
323}
324
325func isZeroLengthPattern(pattern string, separator rune, validate bool) (ret bool, err error) {
326	// `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
327	// *should* match `path/to/a` because `a` might be a directory
328	if pattern == "" ||
329		pattern == "*" ||
330		pattern == "**" ||
331		pattern == string(separator)+"**" ||
332		pattern == "**"+string(separator) ||
333		pattern == string(separator)+"**"+string(separator) {
334		return true, nil
335	}
336
337	if pattern[0] == '{' {
338		closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
339		if closingIdx == -1 {
340			// no closing '}'
341			return false, ErrBadPattern
342		}
343		closingIdx += 1
344
345		patIdx := 1
346		for {
347			commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
348			if commaIdx == -1 {
349				break
350			}
351			commaIdx += patIdx
352
353			ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator, validate)
354			if ret || err != nil {
355				return
356			}
357
358			patIdx = commaIdx + 1
359		}
360		return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator, validate)
361	}
362
363	// no luck - validate the rest of the pattern
364	if validate && !doValidatePattern(pattern, separator) {
365		return false, ErrBadPattern
366	}
367	return false, nil
368}
369
370// Finds the index of the first unescaped byte `c`, or negative 1.
371func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
372	l := len(s)
373	for i := 0; i < l; i++ {
374		if allowEscaping && s[i] == '\\' {
375			// skip next byte
376			i++
377		} else if s[i] == c {
378			return i
379		}
380	}
381	return -1
382}
383
384// Assuming the byte before the beginning of `s` is an opening `{`, this
385// function will find the index of the matching `}`. That is, it'll skip over
386// any nested `{}` and account for escaping
387func indexMatchedClosingAlt(s string, allowEscaping bool) int {
388	alts := 1
389	l := len(s)
390	for i := 0; i < l; i++ {
391		if allowEscaping && s[i] == '\\' {
392			// skip next byte
393			i++
394		} else if s[i] == '{' {
395			alts++
396		} else if s[i] == '}' {
397			if alts--; alts == 0 {
398				return i
399			}
400		}
401	}
402	return -1
403}