glob.go

  1package doublestar
  2
  3import (
  4	"errors"
  5	"io/fs"
  6	"path"
  7)
  8
  9// Glob returns the names of all files matching pattern or nil if there is no
 10// matching file. The syntax of pattern is the same as in Match(). The pattern
 11// may describe hierarchical names such as usr/*/bin/ed.
 12//
 13// Glob ignores file system errors such as I/O errors reading directories by
 14// default. The only possible returned error is ErrBadPattern, reporting that
 15// the pattern is malformed.
 16//
 17// To enable aborting on I/O errors, the WithFailOnIOErrors option can be
 18// passed.
 19//
 20// Note: this is meant as a drop-in replacement for io/fs.Glob(). Like
 21// io/fs.Glob(), this function assumes that your pattern uses `/` as the path
 22// separator even if that's not correct for your OS (like Windows). If you
 23// aren't sure if that's the case, you can use filepath.ToSlash() on your
 24// pattern before calling Glob().
 25//
 26// Like `io/fs.Glob()`, patterns containing `/./`, `/../`, or starting with `/`
 27// will return no results and no errors. You can use SplitPattern to divide a
 28// pattern into a base path (to initialize an `FS` object) and pattern.
 29//
 30// Note: users should _not_ count on the returned error,
 31// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
 32func Glob(fsys fs.FS, pattern string, opts ...GlobOption) ([]string, error) {
 33	if !ValidatePattern(pattern) {
 34		return nil, ErrBadPattern
 35	}
 36
 37	g := newGlob(opts...)
 38
 39	if hasMidDoubleStar(pattern) {
 40		// If the pattern has a `**` anywhere but the very end, GlobWalk is more
 41		// performant because it can get away with less allocations. If the pattern
 42		// ends in a `**`, both methods are pretty much the same, but Glob has a
 43		// _very_ slight advantage because of lower function call overhead.
 44		var matches []string
 45		err := g.doGlobWalk(fsys, pattern, true, true, func(p string, d fs.DirEntry) error {
 46			matches = append(matches, p)
 47			return nil
 48		})
 49		return matches, err
 50	}
 51	return g.doGlob(fsys, pattern, nil, true, true)
 52}
 53
 54// Does the actual globbin'
 55//   - firstSegment is true if we're in the first segment of the pattern, ie,
 56//     the right-most part where we can match files. If it's false, we're
 57//     somewhere in the middle (or at the beginning) and can only match
 58//     directories since there are path segments above us.
 59//   - beforeMeta is true if we're exploring segments before any meta
 60//     characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/`
 61//     bit does not contain any meta characters.
 62func (g *glob) doGlob(fsys fs.FS, pattern string, m []string, firstSegment, beforeMeta bool) (matches []string, err error) {
 63	matches = m
 64	patternStart := indexMeta(pattern)
 65	if patternStart == -1 {
 66		// pattern doesn't contain any meta characters - does a file matching the
 67		// pattern exist?
 68		// The pattern may contain escaped wildcard characters for an exact path match.
 69		path := unescapeMeta(pattern)
 70		pathInfo, pathExists, pathErr := g.exists(fsys, path, beforeMeta)
 71		if pathErr != nil {
 72			return nil, pathErr
 73		}
 74
 75		if pathExists && (!firstSegment || !g.filesOnly || !pathInfo.IsDir()) {
 76			matches = append(matches, path)
 77		}
 78
 79		return
 80	}
 81
 82	dir := "."
 83	splitIdx := lastIndexSlashOrAlt(pattern)
 84	if splitIdx != -1 {
 85		if pattern[splitIdx] == '}' {
 86			openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx])
 87			if openingIdx == -1 {
 88				// if there's no matching opening index, technically Match() will treat
 89				// an unmatched `}` as nothing special, so... we will, too!
 90				splitIdx = lastIndexSlash(pattern[:splitIdx])
 91				if splitIdx != -1 {
 92					dir = pattern[:splitIdx]
 93					pattern = pattern[splitIdx+1:]
 94				}
 95			} else {
 96				// otherwise, we have to handle the alts:
 97				return g.globAlts(fsys, pattern, openingIdx, splitIdx, matches, firstSegment, beforeMeta)
 98			}
 99		} else {
100			dir = pattern[:splitIdx]
101			pattern = pattern[splitIdx+1:]
102		}
103	}
104
105	// if `splitIdx` is less than `patternStart`, we know `dir` has no meta
106	// characters. They would be equal if they are both -1, which means `dir`
107	// will be ".", and we know that doesn't have meta characters either.
108	if splitIdx <= patternStart {
109		return g.globDir(fsys, unescapeMeta(dir), pattern, matches, firstSegment, beforeMeta)
110	}
111
112	var dirs []string
113	dirs, err = g.doGlob(fsys, dir, matches, false, beforeMeta)
114	if err != nil {
115		return
116	}
117	for _, d := range dirs {
118		matches, err = g.globDir(fsys, d, pattern, matches, firstSegment, false)
119		if err != nil {
120			return
121		}
122	}
123
124	return
125}
126
127// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the
128// indexes of `{` and `}`, respectively
129func (g *glob) globAlts(fsys fs.FS, pattern string, openingIdx, closingIdx int, m []string, firstSegment, beforeMeta bool) (matches []string, err error) {
130	matches = m
131
132	var dirs []string
133	startIdx := 0
134	afterIdx := closingIdx + 1
135	splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx])
136	if splitIdx == -1 || pattern[splitIdx] == '}' {
137		// no common prefix
138		dirs = []string{""}
139	} else {
140		// our alts have a common prefix that we can process first
141		dirs, err = g.doGlob(fsys, pattern[:splitIdx], matches, false, beforeMeta)
142		if err != nil {
143			return
144		}
145
146		startIdx = splitIdx + 1
147	}
148
149	for _, d := range dirs {
150		patIdx := openingIdx + 1
151		altResultsStartIdx := len(matches)
152		thisResultStartIdx := altResultsStartIdx
153		for patIdx < closingIdx {
154			nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true)
155			if nextIdx == -1 {
156				nextIdx = closingIdx
157			} else {
158				nextIdx += patIdx
159			}
160
161			alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx)
162			matches, err = g.doGlob(fsys, alt, matches, firstSegment, beforeMeta)
163			if err != nil {
164				return
165			}
166
167			matchesLen := len(matches)
168			if altResultsStartIdx != thisResultStartIdx && thisResultStartIdx != matchesLen {
169				// Alts can result in matches that aren't sorted, or, worse, duplicates
170				// (consider the trivial pattern `path/to/{a,*}`). Since doGlob returns
171				// sorted results, we can do a sort of in-place merge and remove
172				// duplicates. But, we only need to do this if this isn't the first alt
173				// (ie, `altResultsStartIdx != thisResultsStartIdx`) and if the latest
174				// alt actually added some matches (`thisResultStartIdx !=
175				// len(matches)`)
176				matches = sortAndRemoveDups(matches, altResultsStartIdx, thisResultStartIdx, matchesLen)
177
178				// length of matches may have changed
179				thisResultStartIdx = len(matches)
180			} else {
181				thisResultStartIdx = matchesLen
182			}
183
184			patIdx = nextIdx + 1
185		}
186	}
187
188	return
189}
190
191// find files/subdirectories in the given `dir` that match `pattern`
192func (g *glob) globDir(fsys fs.FS, dir, pattern string, matches []string, canMatchFiles, beforeMeta bool) (m []string, e error) {
193	m = matches
194
195	if pattern == "" {
196		if !canMatchFiles || !g.filesOnly {
197			// pattern can be an empty string if the original pattern ended in a
198			// slash, in which case, we should just return dir, but only if it
199			// actually exists and it's a directory (or a symlink to a directory)
200			_, isDir, err := g.isPathDir(fsys, dir, beforeMeta)
201			if err != nil {
202				return nil, err
203			}
204			if isDir {
205				m = append(m, dir)
206			}
207		}
208		return
209	}
210
211	if pattern == "**" {
212		return g.globDoubleStar(fsys, dir, m, canMatchFiles, beforeMeta)
213	}
214
215	dirs, err := fs.ReadDir(fsys, dir)
216	if err != nil {
217		if errors.Is(err, fs.ErrNotExist) {
218			e = g.handlePatternNotExist(beforeMeta)
219		} else {
220			e = g.forwardErrIfFailOnIOErrors(err)
221		}
222		return
223	}
224
225	var matched bool
226	for _, info := range dirs {
227		name := info.Name()
228		matched, e = matchWithSeparator(pattern, name, '/', false)
229		if e != nil {
230			return
231		}
232		if matched {
233			matched = canMatchFiles
234			if !matched || g.filesOnly {
235				matched, e = g.isDir(fsys, dir, name, info)
236				if e != nil {
237					return
238				}
239				if canMatchFiles {
240					// if we're here, it's because g.filesOnly
241					// is set and we don't want directories
242					matched = !matched
243				}
244			}
245			if matched {
246				m = append(m, path.Join(dir, name))
247			}
248		}
249	}
250
251	return
252}
253
254func (g *glob) globDoubleStar(fsys fs.FS, dir string, matches []string, canMatchFiles, beforeMeta bool) ([]string, error) {
255	dirs, err := fs.ReadDir(fsys, dir)
256	if err != nil {
257		if errors.Is(err, fs.ErrNotExist) {
258			return matches, g.handlePatternNotExist(beforeMeta)
259		} else {
260			return matches, g.forwardErrIfFailOnIOErrors(err)
261		}
262	}
263
264	if !g.filesOnly {
265		// `**` can match *this* dir, so add it
266		matches = append(matches, dir)
267	}
268
269	for _, info := range dirs {
270		name := info.Name()
271		isDir, err := g.isDir(fsys, dir, name, info)
272		if err != nil {
273			return nil, err
274		}
275		if isDir {
276			matches, err = g.globDoubleStar(fsys, path.Join(dir, name), matches, canMatchFiles, false)
277			if err != nil {
278				return nil, err
279			}
280		} else if canMatchFiles {
281			matches = append(matches, path.Join(dir, name))
282		}
283	}
284
285	return matches, nil
286}
287
288// Returns true if the pattern has a doublestar in the middle of the pattern.
289// In this case, GlobWalk is faster because it can get away with less
290// allocations. However, Glob has a _very_ slight edge if the pattern ends in
291// `**`.
292func hasMidDoubleStar(p string) bool {
293	// subtract 3: 2 because we want to return false if the pattern ends in `**`
294	// (Glob is _very_ slightly faster in that case), and the extra 1 because our
295	// loop checks p[i] and p[i+1].
296	l := len(p) - 3
297	for i := 0; i < l; i++ {
298		if p[i] == '\\' {
299			// escape next byte
300			i++
301		} else if p[i] == '*' && p[i+1] == '*' {
302			return true
303		}
304	}
305	return false
306}
307
308// Returns the index of the first unescaped meta character, or negative 1.
309func indexMeta(s string) int {
310	var c byte
311	l := len(s)
312	for i := 0; i < l; i++ {
313		c = s[i]
314		if c == '*' || c == '?' || c == '[' || c == '{' {
315			return i
316		} else if c == '\\' {
317			// skip next byte
318			i++
319		}
320	}
321	return -1
322}
323
324// Returns the index of the last unescaped slash or closing alt (`}`) in the
325// string, or negative 1.
326func lastIndexSlashOrAlt(s string) int {
327	for i := len(s) - 1; i >= 0; i-- {
328		if (s[i] == '/' || s[i] == '}') && (i == 0 || s[i-1] != '\\') {
329			return i
330		}
331	}
332	return -1
333}
334
335// Returns the index of the last unescaped slash in the string, or negative 1.
336func lastIndexSlash(s string) int {
337	for i := len(s) - 1; i >= 0; i-- {
338		if s[i] == '/' && (i == 0 || s[i-1] != '\\') {
339			return i
340		}
341	}
342	return -1
343}
344
345// Assuming the byte after the end of `s` is a closing `}`, this function will
346// find the index of the matching `{`. That is, it'll skip over any nested `{}`
347// and account for escaping.
348func indexMatchedOpeningAlt(s string) int {
349	alts := 1
350	for i := len(s) - 1; i >= 0; i-- {
351		if s[i] == '}' && (i == 0 || s[i-1] != '\\') {
352			alts++
353		} else if s[i] == '{' && (i == 0 || s[i-1] != '\\') {
354			if alts--; alts == 0 {
355				return i
356			}
357		}
358	}
359	return -1
360}
361
362// Returns true if the path exists
363func (g *glob) exists(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) {
364	// name might end in a slash, but Stat doesn't like that
365	namelen := len(name)
366	if namelen > 1 && name[namelen-1] == '/' {
367		name = name[:namelen-1]
368	}
369
370	info, err := fs.Stat(fsys, name)
371	if errors.Is(err, fs.ErrNotExist) {
372		return nil, false, g.handlePatternNotExist(beforeMeta)
373	}
374	return info, err == nil, g.forwardErrIfFailOnIOErrors(err)
375}
376
377// Returns true if the path exists and is a directory or a symlink to a
378// directory
379func (g *glob) isPathDir(fsys fs.FS, name string, beforeMeta bool) (fs.FileInfo, bool, error) {
380	info, err := fs.Stat(fsys, name)
381	if errors.Is(err, fs.ErrNotExist) {
382		return nil, false, g.handlePatternNotExist(beforeMeta)
383	}
384	return info, err == nil && info.IsDir(), g.forwardErrIfFailOnIOErrors(err)
385}
386
387// Returns whether or not the given DirEntry is a directory. If the DirEntry
388// represents a symbolic link, the link is followed by running fs.Stat() on
389// `path.Join(dir, name)` (if dir is "", name will be used without joining)
390func (g *glob) isDir(fsys fs.FS, dir, name string, info fs.DirEntry) (bool, error) {
391	if !g.noFollow && (info.Type()&fs.ModeSymlink) > 0 {
392		p := name
393		if dir != "" {
394			p = path.Join(dir, name)
395		}
396		finfo, err := fs.Stat(fsys, p)
397		if err != nil {
398			if errors.Is(err, fs.ErrNotExist) {
399				// this function is only ever called while expanding a glob, so it can
400				// never return ErrPatternNotExist
401				return false, nil
402			}
403			return false, g.forwardErrIfFailOnIOErrors(err)
404		}
405		return finfo.IsDir(), nil
406	}
407	return info.IsDir(), nil
408}
409
410// Builds a string from an alt
411func buildAlt(prefix, pattern string, startIdx, openingIdx, currentIdx, nextIdx, afterIdx int) string {
412	// pattern:
413	//   ignored/start{alts,go,here}remaining - len = 36
414	//           |    |     | |     ^--- afterIdx   = 27
415	//           |    |     | \--------- nextIdx    = 21
416	//           |    |     \----------- currentIdx = 19
417	//           |    \----------------- openingIdx = 13
418	//           \---------------------- startIdx   = 8
419	//
420	// result:
421	//   prefix/startgoremaining - len = 7 + 5 + 2 + 9 = 23
422	var buf []byte
423	patLen := len(pattern)
424	size := (openingIdx - startIdx) + (nextIdx - currentIdx) + (patLen - afterIdx)
425	if prefix != "" && prefix != "." {
426		buf = make([]byte, 0, size+len(prefix)+1)
427		buf = append(buf, prefix...)
428		buf = append(buf, '/')
429	} else {
430		buf = make([]byte, 0, size)
431	}
432	buf = append(buf, pattern[startIdx:openingIdx]...)
433	buf = append(buf, pattern[currentIdx:nextIdx]...)
434	if afterIdx < patLen {
435		buf = append(buf, pattern[afterIdx:]...)
436	}
437	return string(buf)
438}
439
440// Running alts can produce results that are not sorted, and, worse, can cause
441// duplicates (consider the trivial pattern `path/to/{a,*}`). Since we know
442// each run of doGlob is sorted, we can basically do the "merge" step of a
443// merge sort in-place.
444func sortAndRemoveDups(matches []string, idx1, idx2, l int) []string {
445	var tmp string
446	for ; idx1 < idx2; idx1++ {
447		if matches[idx1] < matches[idx2] {
448			// order is correct
449			continue
450		} else if matches[idx1] > matches[idx2] {
451			// need to swap and then re-sort matches above idx2
452			tmp = matches[idx1]
453			matches[idx1] = matches[idx2]
454
455			shft := idx2 + 1
456			for ; shft < l && matches[shft] < tmp; shft++ {
457				matches[shft-1] = matches[shft]
458			}
459			matches[shft-1] = tmp
460		} else {
461			// duplicate - shift matches above idx2 down one and decrement l
462			for shft := idx2 + 1; shft < l; shft++ {
463				matches[shft-1] = matches[shft]
464			}
465			if l--; idx2 == l {
466				// nothing left to do... matches[idx2:] must have been full of dups
467				break
468			}
469		}
470	}
471	return matches[:l]
472}