1package doublestar
2
3import (
4 "path/filepath"
5 "unicode/utf8"
6)
7
8// Match reports whether name matches the shell pattern.
9// The pattern syntax is:
10//
11// pattern:
12// { term }
13// term:
14// '*' matches any sequence of non-path-separators
15// '/**/' matches zero or more directories
16// '?' matches any single non-path-separator character
17// '[' [ '^' '!' ] { character-range } ']'
18// character class (must be non-empty)
19// starting with `^` or `!` negates the class
20// '{' { term } [ ',' { term } ... ] '}'
21// alternatives
22// c matches character c (c != '*', '?', '\\', '[')
23// '\\' c matches character c
24//
25// character-range:
26// c matches character c (c != '\\', '-', ']')
27// '\\' c matches character c
28// lo '-' hi matches character c for lo <= c <= hi
29//
30// Match returns true if `name` matches the file name `pattern`. `name` and
31// `pattern` are split on forward slash (`/`) characters and may be relative or
32// absolute.
33//
34// Match requires pattern to match all of name, not just a substring.
35// The only possible returned error is ErrBadPattern, when pattern
36// is malformed.
37//
38// A doublestar (`**`) should appear surrounded by path separators such as
39// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar
40// option: a pattern such as `path/to/**.txt` would return the same results as
41// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
42//
43// Note: this is meant as a drop-in replacement for path.Match() which
44// always uses '/' as the path separator. If you want to support systems
45// which use a different path separator (such as Windows), what you want
46// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
47// pattern and name and then use this function.
48//
49// Note: users should _not_ count on the returned error,
50// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
51//
52func Match(pattern, name string) (bool, error) {
53 return matchWithSeparator(pattern, name, '/', true)
54}
55
56// MatchUnvalidated can provide a small performance improvement if you don't
57// care about whether or not the pattern is valid (perhaps because you already
58// ran `ValidatePattern`). Note that there's really only one case where this
59// performance improvement is realized: when pattern matching reaches the end
60// of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
61// "a")`.
62func MatchUnvalidated(pattern, name string) bool {
63 matched, _ := matchWithSeparator(pattern, name, '/', false)
64 return matched
65}
66
67// PathMatch returns true if `name` matches the file name `pattern`. The
68// difference between Match and PathMatch is that PathMatch will automatically
69// use your system's path separator to split `name` and `pattern`. On systems
70// where the path separator is `'\'`, escaping will be disabled.
71//
72// Note: this is meant as a drop-in replacement for filepath.Match(). It
73// assumes that both `pattern` and `name` are using the system's path
74// separator. If you can't be sure of that, use filepath.ToSlash() on both
75// `pattern` and `name`, and then use the Match() function instead.
76//
77func PathMatch(pattern, name string) (bool, error) {
78 return matchWithSeparator(pattern, name, filepath.Separator, true)
79}
80
81// PathMatchUnvalidated can provide a small performance improvement if you
82// don't care about whether or not the pattern is valid (perhaps because you
83// already ran `ValidatePattern`). Note that there's really only one case where
84// this performance improvement is realized: when pattern matching reaches the
85// end of `name` before reaching the end of `pattern`, such as `Match("a/b/c",
86// "a")`.
87func PathMatchUnvalidated(pattern, name string) bool {
88 matched, _ := matchWithSeparator(pattern, name, filepath.Separator, false)
89 return matched
90}
91
92func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
93 return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
94}
95
96func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
97 patLen := len(pattern)
98 nameLen := len(name)
99 startOfSegment := true
100MATCH:
101 for nameIdx < nameLen {
102 if patIdx < patLen {
103 switch pattern[patIdx] {
104 case '*':
105 if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
106 // doublestar - must begin with a path separator, otherwise we'll
107 // treat it like a single star like bash
108 patIdx++
109 if startOfSegment {
110 if patIdx >= patLen {
111 // pattern ends in `/**`: return true
112 return true, nil
113 }
114
115 // doublestar must also end with a path separator, otherwise we're
116 // just going to treat the doublestar as a single star like bash
117 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
118 if patRune == separator {
119 patIdx += patRuneLen
120
121 doublestarPatternBacktrack = patIdx
122 doublestarNameBacktrack = nameIdx
123 starPatternBacktrack = -1
124 starNameBacktrack = -1
125 continue
126 }
127 }
128 }
129 startOfSegment = false
130
131 starPatternBacktrack = patIdx
132 starNameBacktrack = nameIdx
133 continue
134
135 case '?':
136 startOfSegment = false
137 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
138 if nameRune == separator {
139 // `?` cannot match the separator
140 break
141 }
142
143 patIdx++
144 nameIdx += nameRuneLen
145 continue
146
147 case '[':
148 startOfSegment = false
149 if patIdx++; patIdx >= patLen {
150 // class didn't end
151 return false, ErrBadPattern
152 }
153 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
154
155 matched := false
156 negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
157 if negate {
158 patIdx++
159 }
160
161 if patIdx >= patLen || pattern[patIdx] == ']' {
162 // class didn't end or empty character class
163 return false, ErrBadPattern
164 }
165
166 last := utf8.MaxRune
167 for patIdx < patLen && pattern[patIdx] != ']' {
168 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
169 patIdx += patRuneLen
170
171 // match a range
172 if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
173 if pattern[patIdx] == '\\' {
174 // next character is escaped
175 patIdx++
176 }
177 patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
178 patIdx += patRuneLen
179
180 if last <= nameRune && nameRune <= patRune {
181 matched = true
182 break
183 }
184
185 // didn't match range - reset `last`
186 last = utf8.MaxRune
187 continue
188 }
189
190 // not a range - check if the next rune is escaped
191 if patRune == '\\' {
192 patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
193 patIdx += patRuneLen
194 }
195
196 // check if the rune matches
197 if patRune == nameRune {
198 matched = true
199 break
200 }
201
202 // no matches yet
203 last = patRune
204 }
205
206 if matched == negate {
207 // failed to match - if we reached the end of the pattern, that means
208 // we never found a closing `]`
209 if patIdx >= patLen {
210 return false, ErrBadPattern
211 }
212 break
213 }
214
215 closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
216 if closingIdx == -1 {
217 // no closing `]`
218 return false, ErrBadPattern
219 }
220
221 patIdx += closingIdx + 1
222 nameIdx += nameRuneLen
223 continue
224
225 case '{':
226 startOfSegment = false
227 beforeIdx := patIdx
228 patIdx++
229 closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
230 if closingIdx == -1 {
231 // no closing `}`
232 return false, ErrBadPattern
233 }
234 closingIdx += patIdx
235
236 for {
237 commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
238 if commaIdx == -1 {
239 break
240 }
241 commaIdx += patIdx
242
243 result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
244 if result || err != nil {
245 return result, err
246 }
247
248 patIdx = commaIdx + 1
249 }
250 return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
251
252 case '\\':
253 if separator != '\\' {
254 // next rune is "escaped" in the pattern - literal match
255 if patIdx++; patIdx >= patLen {
256 // pattern ended
257 return false, ErrBadPattern
258 }
259 }
260 fallthrough
261
262 default:
263 patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
264 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
265 if patRune != nameRune {
266 if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
267 // if this rune was meant to be escaped, we need to move patIdx
268 // back to the backslash before backtracking or validating below
269 patIdx--
270 }
271 break
272 }
273
274 patIdx += patRuneLen
275 nameIdx += nameRuneLen
276 startOfSegment = patRune == separator
277 continue
278 }
279 }
280
281 if starPatternBacktrack >= 0 {
282 // `*` backtrack, but only if the `name` rune isn't the separator
283 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
284 if nameRune != separator {
285 starNameBacktrack += nameRuneLen
286 patIdx = starPatternBacktrack
287 nameIdx = starNameBacktrack
288 startOfSegment = false
289 continue
290 }
291 }
292
293 if doublestarPatternBacktrack >= 0 {
294 // `**` backtrack, advance `name` past next separator
295 nameIdx = doublestarNameBacktrack
296 for nameIdx < nameLen {
297 nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
298 nameIdx += nameRuneLen
299 if nameRune == separator {
300 doublestarNameBacktrack = nameIdx
301 patIdx = doublestarPatternBacktrack
302 startOfSegment = true
303 continue MATCH
304 }
305 }
306 }
307
308 if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
309 return false, ErrBadPattern
310 }
311 return false, nil
312 }
313
314 if nameIdx < nameLen {
315 // we reached the end of `pattern` before the end of `name`
316 return false, nil
317 }
318
319 // we've reached the end of `name`; we've successfully matched if we've also
320 // reached the end of `pattern`, or if the rest of `pattern` can match a
321 // zero-length string
322 return isZeroLengthPattern(pattern[patIdx:], separator, validate)
323}
324
325func isZeroLengthPattern(pattern string, separator rune, validate bool) (ret bool, err error) {
326 // `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
327 // *should* match `path/to/a` because `a` might be a directory
328 if pattern == "" ||
329 pattern == "*" ||
330 pattern == "**" ||
331 pattern == string(separator)+"**" ||
332 pattern == "**"+string(separator) ||
333 pattern == string(separator)+"**"+string(separator) {
334 return true, nil
335 }
336
337 if pattern[0] == '{' {
338 closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
339 if closingIdx == -1 {
340 // no closing '}'
341 return false, ErrBadPattern
342 }
343 closingIdx += 1
344
345 patIdx := 1
346 for {
347 commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
348 if commaIdx == -1 {
349 break
350 }
351 commaIdx += patIdx
352
353 ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator, validate)
354 if ret || err != nil {
355 return
356 }
357
358 patIdx = commaIdx + 1
359 }
360 return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator, validate)
361 }
362
363 // no luck - validate the rest of the pattern
364 if validate && !doValidatePattern(pattern, separator) {
365 return false, ErrBadPattern
366 }
367 return false, nil
368}
369
370// Finds the index of the first unescaped byte `c`, or negative 1.
371func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
372 l := len(s)
373 for i := 0; i < l; i++ {
374 if allowEscaping && s[i] == '\\' {
375 // skip next byte
376 i++
377 } else if s[i] == c {
378 return i
379 }
380 }
381 return -1
382}
383
384// Assuming the byte before the beginning of `s` is an opening `{`, this
385// function will find the index of the matching `}`. That is, it'll skip over
386// any nested `{}` and account for escaping
387func indexMatchedClosingAlt(s string, allowEscaping bool) int {
388 alts := 1
389 l := len(s)
390 for i := 0; i < l; i++ {
391 if allowEscaping && s[i] == '\\' {
392 // skip next byte
393 i++
394 } else if s[i] == '{' {
395 alts++
396 } else if s[i] == '}' {
397 if alts--; alts == 0 {
398 return i
399 }
400 }
401 }
402 return -1
403}