1package doublestar
2
3import (
4 "errors"
5 "io/fs"
6 "path"
7)
8
9// Glob returns the names of all files matching pattern or nil if there is no
10// matching file. The syntax of pattern is the same as in Match(). The pattern
11// may describe hierarchical names such as usr/*/bin/ed.
12//
13// Glob ignores file system errors such as I/O errors reading directories by
14// default. The only possible returned error is ErrBadPattern, reporting that
15// the pattern is malformed.
16//
17// To enable aborting on I/O errors, the WithFailOnIOErrors option can be
18// passed.
19//
20// Note: this is meant as a drop-in replacement for io/fs.Glob(). Like
21// io/fs.Glob(), this function assumes that your pattern uses `/` as the path
22// separator even if that's not correct for your OS (like Windows). If you
23// aren't sure if that's the case, you can use filepath.ToSlash() on your
24// pattern before calling Glob().
25//
26// Like `io/fs.Glob()`, patterns containing `/./`, `/../`, or starting with `/`
27// will return no results and no errors. You can use SplitPattern to divide a
28// pattern into a base path (to initialize an `FS` object) and pattern.
29//
30// Note: users should _not_ count on the returned error,
31// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
32func Glob(fsys fs.FS, pattern string, opts ...GlobOption) ([]string, error) {
33 if !ValidatePattern(pattern) {
34 return nil, ErrBadPattern
35 }
36
37 g := newGlob(opts...)
38
39 if hasMidDoubleStar(pattern) {
40 // If the pattern has a `**` anywhere but the very end, GlobWalk is more
41 // performant because it can get away with less allocations. If the pattern
42 // ends in a `**`, both methods are pretty much the same, but Glob has a
43 // _very_ slight advantage because of lower function call overhead.
44 var matches []string
45 err := g.doGlobWalk(fsys, pattern, true, true, func(p string, d fs.DirEntry) error {
46 matches = append(matches, p)
47 return nil
48 })
49 return matches, err
50 }
51 return g.doGlob(fsys, pattern, nil, true, true)
52}
53
54// Does the actual globbin'
55// - firstSegment is true if we're in the first segment of the pattern, ie,
56// the right-most part where we can match files. If it's false, we're
57// somewhere in the middle (or at the beginning) and can only match
58// directories since there are path segments above us.
59// - beforeMeta is true if we're exploring segments before any meta
60// characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/`
61// bit does not contain any meta characters.
62func (g *glob) doGlob(fsys fs.FS, pattern string, m []string, firstSegment, beforeMeta bool) (matches []string, err error) {
63 matches = m
64 patternStart := indexMeta(pattern)
65 if patternStart == -1 {
66 // pattern doesn't contain any meta characters - does a file matching the
67 // pattern exist?
68 // The pattern may contain escaped wildcard characters for an exact path match.
69 path := unescapeMeta(pattern)
70 pathInfo, pathExists, pathErr := g.exists(fsys, path, beforeMeta)
71 if pathErr != nil {
72 return nil, pathErr
73 }
74
75 if pathExists && (!firstSegment || !g.filesOnly || !pathInfo.IsDir()) {
76 matches = append(matches, path)
77 }
78
79 return
80 }
81
82 dir := "."
83 splitIdx := lastIndexSlashOrAlt(pattern)
84 if splitIdx != -1 {
85 if pattern[splitIdx] == '}' {
86 openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx])
87 if openingIdx == -1 {
88 // if there's no matching opening index, technically Match() will treat
89 // an unmatched `}` as nothing special, so... we will, too!
90 splitIdx = lastIndexSlash(pattern[:splitIdx])
91 if splitIdx != -1 {
92 dir = pattern[:splitIdx]
93 pattern = pattern[splitIdx+1:]
94 }
95 } else {
96 // otherwise, we have to handle the alts:
97 return g.globAlts(fsys, pattern, openingIdx, splitIdx, matches, firstSegment, beforeMeta)
98 }
99 } else {
100 dir = pattern[:splitIdx]
101 pattern = pattern[splitIdx+1:]
102 }
103 }
104
105 // if `splitIdx` is less than `patternStart`, we know `dir` has no meta
106 // characters. They would be equal if they are both -1, which means `dir`
107 // will be ".", and we know that doesn't have meta characters either.
108 if splitIdx <= patternStart {
109 return g.globDir(fsys, unescapeMeta(dir), pattern, matches, firstSegment, beforeMeta)
110 }
111
112 var dirs []string
113 dirs, err = g.doGlob(fsys, dir, matches, false, beforeMeta)
114 if err != nil {
115 return
116 }
117 for _, d := range dirs {
118 matches, err = g.globDir(fsys, d, pattern, matches, firstSegment, false)
119 if err != nil {
120 return
121 }
122 }
123
124 return
125}
126
127// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the
128// indexes of `{` and `}`, respectively
129func (g *glob) globAlts(fsys fs.FS, pattern string, openingIdx, closingIdx int, m []string, firstSegment, beforeMeta bool) (matches []string, err error) {
130 matches = m
131
132 var dirs []string
133 startIdx := 0
134 afterIdx := closingIdx + 1
135 splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx])
136 if splitIdx == -1 || pattern[splitIdx] == '}' {
137 // no common prefix
138 dirs = []string{""}
139 } else {
140 // our alts have a common prefix that we can process first
141 dirs, err = g.doGlob(fsys, pattern[:splitIdx], matches, false, beforeMeta)
142 if err != nil {
143 return
144 }
145
146 startIdx = splitIdx + 1
147 }
148
149 for _, d := range dirs {
150 patIdx := openingIdx + 1
151 altResultsStartIdx := len(matches)
152 thisResultStartIdx := altResultsStartIdx
153 for patIdx < closingIdx {
154 nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true)
155 if nextIdx == -1 {
156 nextIdx = closingIdx
157 } else {
158 nextIdx += patIdx
159 }
160
161 alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx)
162 matches, err = g.doGlob(fsys, alt, matches, firstSegment, beforeMeta)
163 if err != nil {
164 return
165 }
166
167 matchesLen := len(matches)
168 if altResultsStartIdx != thisResultStartIdx && thisResultStartIdx != matchesLen {
169 // Alts can result in matches that aren't sorted, or, worse, duplicates
170 // (consider the trivial pattern `path/to/{a,*}`). Since doGlob returns
171 // sorted results, we can do a sort of in-place merge and remove
172 // duplicates. But, we only need to do this if this isn't the first alt
173 // (ie, `altResultsStartIdx != thisResultsStartIdx`) and if the latest
174 // alt actually added some matches (`thisResultStartIdx !=
175 // len(matches)`)
176 matches = sortAndRemoveDups(matches, altResultsStartIdx, thisResultStartIdx, matchesLen)
177
178 // length of matches may have changed
179 thisResultStartIdx = len(matches)
180 } else {
181 thisResultStartIdx = matchesLen
182 }
183
184 patIdx = nextIdx + 1
185 }
186 }
187
188 return
189}
190
191// find files/subdirectories in the given `dir` that match `pattern`
192func (g *glob) globDir(fsys fs.FS, dir, pattern string, matches []string, canMatchFiles, beforeMeta bool) (m []string, e error) {
193 m = matches
194
195 if pattern == "" {
196 if !canMatchFiles || !g.filesOnly {
197 // pattern can be an empty string if the original pattern ended in a
198 // slash, in which case, we should just return dir, but only if it
199 // actually exists and it's a directory (or a symlink to a directory)
200 _, isDir, err := g.isPathDir(fsys, dir, beforeMeta)
201 if err != nil {
202 return nil, err
203 }
204 if isDir {
205 m = append(m, dir)
206 }
207 }
208 return
209 }
210
211 if pattern == "**" {
212 return g.globDoubleStar(fsys, dir, m, canMatchFiles, beforeMeta)
213 }
214
215 dirs, err := fs.ReadDir(fsys, dir)
216 if err != nil {
217 if errors.Is(err, fs.ErrNotExist) {
218 e = g.handlePatternNotExist(beforeMeta)
219 } else {
220 e = g.forwardErrIfFailOnIOErrors(err)
221 }
222 return
223 }
224
225 var matched bool
226 for _, info := range dirs {
227 name := info.Name()
228 matched, e = matchWithSeparator(pattern, name, '/', false)
229 if e != nil {
230 return
231 }
232 if matched {
233 matched = canMatchFiles
234 if !matched || g.filesOnly {
235 matched, e = g.isDir(fsys, dir, name, info)
236 if e != nil {
237 return
238 }
239 if canMatchFiles {
240 // if we're here, it's because g.filesOnly
241 // is set and we don't want directories
242 matched = !matched
243 }
244 }
245 if matched {
246 m = append(m, path.Join(dir, name))
247 }
248 }
249 }
250
251 return
252}
253
254func (g *glob) globDoubleStar(fsys fs.FS, dir string, matches []string, canMatchFiles, beforeMeta bool) ([]string, error) {
255 dirs, err := fs.ReadDir(fsys, dir)
256 if err != nil {
257 if errors.Is(err, fs.ErrNotExist) {
258 return matches, g.handlePatternNotExist(beforeMeta)
259 } else {
260 return matches, g.forwardErrIfFailOnIOErrors(err)
261 }
262 }
263
264 if !g.filesOnly {
265 // `**` can match *this* dir, so add it
266 matches = append(matches, dir)
267 }
268
269 for _, info := range dirs {
270 name := info.Name()
271 isDir, err := g.isDir(fsys, dir, name, info)
272 if err != nil {
273 return nil, err
274 }
275 if isDir {
276 matches, err = g.globDoubleStar(fsys, path.Join(dir, name), matches, canMatchFiles, false)
277 if err != nil {
278 return nil, err
279 }
280 } else if canMatchFiles {
281 matches = append(matches, path.Join(dir, name))
282 }
283 }
284
285 return matches, nil
286}
287
288// Returns true if the pattern has a doublestar in the middle of the pattern.
289// In this case, GlobWalk is faster because it can get away with less
290// allocations. However, Glob has a _very_ slight edge if the pattern ends in
291// `**`.
292func hasMidDoubleStar(p string) bool {
293 // subtract 3: 2 because we want to return false if the pattern ends in `**`
294 // (Glob is _very_ slightly faster in that case), and the extra 1 because our
295 // loop checks p[i] and p[i+1].
296 l := len(p) - 3
297 for i := 0; i < l; i++ {
298 if p[i] == '\\' {
299 // escape next byte
300 i++
301 } else if p[i] == '*' && p[i+1] == '*' {
302 return true
303 }
304 }
305 return false
306}
307
308// Returns the index of the first unescaped meta character, or negative 1.
309func indexMeta(s string) int {
310 var c byte
311 l := len(s)
312 for i := 0; i < l; i++ {
313 c = s[i]
314 if c == '*' || c == '?' || c == '[' || c == '{' {
315 return i
316 } else if c == '\\' {
317 // skip next byte
318 i++
319 }
320 }
321 return -1
322}
323
324// Returns the index of the last unescaped slash or closing alt (`}`) in the
325// string, or negative 1.
326func lastIndexSlashOrAlt(s string) int {
327 for i := len(s) - 1; i >= 0; i-- {
328 if (s[i] == '/' || s[i] == '}') && (i == 0 || s[i-1] != '\\') {
329 return i
330 }
331 }
332 return -1
333}
334
335// Returns the index of the last unescaped slash in the string, or negative 1.
336func lastIndexSlash(s string) int {
337 for i := len(s) - 1; i >= 0; i-- {
338 if s[i] == '/' && (i == 0 || s[i-1] != '\\') {
339 return i
340 }
341 }
342 return -1
343}
344
345// Assuming the byte after the end of `s` is a closing `}`, this function will
346// find the index of the matching `{`. That is, it'll skip over any nested `{}`
347// and account for escaping.
348func indexMatchedOpeningAlt(s string) int {
349 alts := 1
350 for i := len(s) - 1; i >= 0; i-- {
351 if s[i] == '}' && (i == 0 || s[i-1] != '\\') {
352 alts++
353 } else if s[i] == '{' && (i == 0 || s[i-1] != '\\') {
354 if alts--; alts == 0 {
355 return i
356 }
357 }
358 }
359 return -1
360}
361
362// Returns true if the path exists
363func (g *glob) exists(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) {
364 // name might end in a slash, but Stat doesn't like that
365 namelen := len(name)
366 if namelen > 1 && name[namelen-1] == '/' {
367 name = name[:namelen-1]
368 }
369
370 info, err := fs.Stat(fsys, name)
371 if errors.Is(err, fs.ErrNotExist) {
372 return nil, false, g.handlePatternNotExist(beforeMeta)
373 }
374 return info, err == nil, g.forwardErrIfFailOnIOErrors(err)
375}
376
377// Returns true if the path exists and is a directory or a symlink to a
378// directory
379func (g *glob) isPathDir(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) {
380 info, err := fs.Stat(fsys, name)
381 if errors.Is(err, fs.ErrNotExist) {
382 return nil, false, g.handlePatternNotExist(beforeMeta)
383 }
384 return info, err == nil && info.IsDir(), g.forwardErrIfFailOnIOErrors(err)
385}
386
387// Returns whether or not the given DirEntry is a directory. If the DirEntry
388// represents a symbolic link, the link is followed by running fs.Stat() on
389// `path.Join(dir, name)` (if dir is "", name will be used without joining)
390func (g *glob) isDir(fsys fs.FS, dir, name string, info fs.DirEntry) (bool, error) {
391 if !g.noFollow && (info.Type()&fs.ModeSymlink) > 0 {
392 p := name
393 if dir != "" {
394 p = path.Join(dir, name)
395 }
396 finfo, err := fs.Stat(fsys, p)
397 if err != nil {
398 if errors.Is(err, fs.ErrNotExist) {
399 // this function is only ever called while expanding a glob, so it can
400 // never return ErrPatternNotExist
401 return false, nil
402 }
403 return false, g.forwardErrIfFailOnIOErrors(err)
404 }
405 return finfo.IsDir(), nil
406 }
407 return info.IsDir(), nil
408}
409
410// Builds a string from an alt
411func buildAlt(prefix, pattern string, startIdx, openingIdx, currentIdx, nextIdx, afterIdx int) string {
412 // pattern:
413 // ignored/start{alts,go,here}remaining - len = 36
414 // | | | | ^--- afterIdx = 27
415 // | | | \--------- nextIdx = 21
416 // | | \----------- currentIdx = 19
417 // | \----------------- openingIdx = 13
418 // \---------------------- startIdx = 8
419 //
420 // result:
421 // prefix/startgoremaining - len = 7 + 5 + 2 + 9 = 23
422 var buf []byte
423 patLen := len(pattern)
424 size := (openingIdx - startIdx) + (nextIdx - currentIdx) + (patLen - afterIdx)
425 if prefix != "" && prefix != "." {
426 buf = make([]byte, 0, size+len(prefix)+1)
427 buf = append(buf, prefix...)
428 buf = append(buf, '/')
429 } else {
430 buf = make([]byte, 0, size)
431 }
432 buf = append(buf, pattern[startIdx:openingIdx]...)
433 buf = append(buf, pattern[currentIdx:nextIdx]...)
434 if afterIdx < patLen {
435 buf = append(buf, pattern[afterIdx:]...)
436 }
437 return string(buf)
438}
439
440// Running alts can produce results that are not sorted, and, worse, can cause
441// duplicates (consider the trivial pattern `path/to/{a,*}`). Since we know
442// each run of doGlob is sorted, we can basically do the "merge" step of a
443// merge sort in-place.
444func sortAndRemoveDups(matches []string, idx1, idx2, l int) []string {
445 var tmp string
446 for ; idx1 < idx2; idx1++ {
447 if matches[idx1] < matches[idx2] {
448 // order is correct
449 continue
450 } else if matches[idx1] > matches[idx2] {
451 // need to swap and then re-sort matches above idx2
452 tmp = matches[idx1]
453 matches[idx1] = matches[idx2]
454
455 shft := idx2 + 1
456 for ; shft < l && matches[shft] < tmp; shft++ {
457 matches[shft-1] = matches[shft]
458 }
459 matches[shft-1] = tmp
460 } else {
461 // duplicate - shift matches above idx2 down one and decrement l
462 for shft := idx2 + 1; shft < l; shft++ {
463 matches[shft-1] = matches[shft]
464 }
465 if l--; idx2 == l {
466 // nothing left to do... matches[idx2:] must have been full of dups
467 break
468 }
469 }
470 }
471 return matches[:l]
472}