globwalk.go

  1package doublestar
  2
  3import (
  4	"errors"
  5	"io/fs"
  6	"path"
  7	"path/filepath"
  8	"strings"
  9)
 10
 11// If returned from GlobWalkFunc, will cause GlobWalk to skip the current
 12// directory. In other words, if the current path is a directory, GlobWalk will
 13// not recurse into it. Otherwise, GlobWalk will skip the rest of the current
 14// directory.
 15var SkipDir = fs.SkipDir
 16
 17// Callback function for GlobWalk(). If the function returns an error, GlobWalk
 18// will end immediately and return the same error.
 19type GlobWalkFunc func(path string, d fs.DirEntry) error
 20
 21// GlobWalk calls the callback function `fn` for every file matching pattern.
 22// The syntax of pattern is the same as in Match() and the behavior is the same
 23// as Glob(), with regard to limitations (such as patterns containing `/./`,
 24// `/../`, or starting with `/`). The pattern may describe hierarchical names
 25// such as usr/*/bin/ed.
 26//
 27// GlobWalk may have a small performance benefit over Glob if you do not need a
 28// slice of matches because it can avoid allocating memory for the matches.
 29// Additionally, GlobWalk gives you access to the `fs.DirEntry` objects for
 30// each match, and lets you quit early by returning a non-nil error from your
 31// callback function. Like `io/fs.WalkDir`, if your callback returns `SkipDir`,
 32// GlobWalk will skip the current directory. This means that if the current
 33// path _is_ a directory, GlobWalk will not recurse into it. If the current
 34// path is not a directory, the rest of the parent directory will be skipped.
 35//
 36// GlobWalk ignores file system errors such as I/O errors reading directories
 37// by default. GlobWalk may return ErrBadPattern, reporting that the pattern is
 38// malformed.
 39//
 40// To enable aborting on I/O errors, the WithFailOnIOErrors option can be
 41// passed.
 42//
 43// Additionally, if the callback function `fn` returns an error, GlobWalk will
 44// exit immediately and return that error.
 45//
 46// Like Glob(), this function assumes that your pattern uses `/` as the path
 47// separator even if that's not correct for your OS (like Windows). If you
 48// aren't sure if that's the case, you can use filepath.ToSlash() on your
 49// pattern before calling GlobWalk().
 50//
 51// Note: users should _not_ count on the returned error,
 52// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
 53//
 54func GlobWalk(fsys fs.FS, pattern string, fn GlobWalkFunc, opts ...GlobOption) error {
 55	if !ValidatePattern(pattern) {
 56		return ErrBadPattern
 57	}
 58
 59	g := newGlob(opts...)
 60	return g.doGlobWalk(fsys, pattern, true, true, fn)
 61}
 62
 63// Actually execute GlobWalk
 64//   - firstSegment is true if we're in the first segment of the pattern, ie,
 65//     the right-most part where we can match files. If it's false, we're
 66//     somewhere in the middle (or at the beginning) and can only match
 67//     directories since there are path segments above us.
 68//   - beforeMeta is true if we're exploring segments before any meta
 69//     characters, ie, in a pattern such as `path/to/file*.txt`, the `path/to/`
 70//     bit does not contain any meta characters.
 71func (g *glob) doGlobWalk(fsys fs.FS, pattern string, firstSegment, beforeMeta bool, fn GlobWalkFunc) error {
 72	patternStart := indexMeta(pattern)
 73	if patternStart == -1 {
 74		// pattern doesn't contain any meta characters - does a file matching the
 75		// pattern exist?
 76		// The pattern may contain escaped wildcard characters for an exact path match.
 77		path := unescapeMeta(pattern)
 78		info, pathExists, err := g.exists(fsys, path, beforeMeta)
 79		if pathExists && (!firstSegment || !g.filesOnly || !info.IsDir()) {
 80			err = fn(path, dirEntryFromFileInfo(info))
 81			if err == SkipDir {
 82				err = nil
 83			}
 84		}
 85		return err
 86	}
 87
 88	dir := "."
 89	splitIdx := lastIndexSlashOrAlt(pattern)
 90	if splitIdx != -1 {
 91		if pattern[splitIdx] == '}' {
 92			openingIdx := indexMatchedOpeningAlt(pattern[:splitIdx])
 93			if openingIdx == -1 {
 94				// if there's no matching opening index, technically Match() will treat
 95				// an unmatched `}` as nothing special, so... we will, too!
 96				splitIdx = lastIndexSlash(pattern[:splitIdx])
 97				if splitIdx != -1 {
 98					dir = pattern[:splitIdx]
 99					pattern = pattern[splitIdx+1:]
100				}
101			} else {
102				// otherwise, we have to handle the alts:
103				return g.globAltsWalk(fsys, pattern, openingIdx, splitIdx, firstSegment, beforeMeta, fn)
104			}
105		} else {
106			dir = pattern[:splitIdx]
107			pattern = pattern[splitIdx+1:]
108		}
109	}
110
111	// if `splitIdx` is less than `patternStart`, we know `dir` has no meta
112	// characters. They would be equal if they are both -1, which means `dir`
113	// will be ".", and we know that doesn't have meta characters either.
114	if splitIdx <= patternStart {
115		return g.globDirWalk(fsys, unescapeMeta(dir), pattern, firstSegment, beforeMeta, fn)
116	}
117
118	return g.doGlobWalk(fsys, dir, false, beforeMeta, func(p string, d fs.DirEntry) error {
119		if err := g.globDirWalk(fsys, p, pattern, firstSegment, false, fn); err != nil {
120			return err
121		}
122		return nil
123	})
124}
125
126// handle alts in the glob pattern - `openingIdx` and `closingIdx` are the
127// indexes of `{` and `}`, respectively
128func (g *glob) globAltsWalk(fsys fs.FS, pattern string, openingIdx, closingIdx int, firstSegment, beforeMeta bool, fn GlobWalkFunc) (err error) {
129	var matches []DirEntryWithFullPath
130	startIdx := 0
131	afterIdx := closingIdx + 1
132	splitIdx := lastIndexSlashOrAlt(pattern[:openingIdx])
133	if splitIdx == -1 || pattern[splitIdx] == '}' {
134		// no common prefix
135		matches, err = g.doGlobAltsWalk(fsys, "", pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, beforeMeta, matches)
136		if err != nil {
137			return
138		}
139	} else {
140		// our alts have a common prefix that we can process first
141		startIdx = splitIdx + 1
142		innerBeforeMeta := beforeMeta && !hasMetaExceptAlts(pattern[:splitIdx])
143		err = g.doGlobWalk(fsys, pattern[:splitIdx], false, beforeMeta, func(p string, d fs.DirEntry) (e error) {
144			matches, e = g.doGlobAltsWalk(fsys, p, pattern, startIdx, openingIdx, closingIdx, afterIdx, firstSegment, innerBeforeMeta, matches)
145			return e
146		})
147		if err != nil {
148			return
149		}
150	}
151
152	skip := ""
153	for _, m := range matches {
154		if skip != "" {
155			// Because matches are sorted, we know that descendants of the skipped
156			// item must come immediately after the skipped item. If we find an item
157			// that does not have a prefix matching the skipped item, we know we're
158			// done skipping. I'm using strings.HasPrefix here because
159			// filepath.HasPrefix has been marked deprecated (and just calls
160			// strings.HasPrefix anyway). The reason it's deprecated is because it
161			// doesn't handle case-insensitive paths, nor does it guarantee that the
162			// prefix is actually a parent directory. Neither is an issue here: the
163			// paths come from the system so their cases will match, and we guarantee
164			// a parent directory by appending a slash to the prefix.
165			//
166			// NOTE: m.Path will always use slashes as path separators.
167			if strings.HasPrefix(m.Path, skip) {
168				continue
169			}
170			skip = ""
171		}
172		if err = fn(m.Path, m.Entry); err != nil {
173			if err == SkipDir {
174				isDir, err := g.isDir(fsys, "", m.Path, m.Entry)
175				if err != nil {
176					return err
177				}
178				if isDir {
179					// append a slash to guarantee `skip` will be treated as a parent dir
180					skip = m.Path + "/"
181				} else {
182					// Dir() calls Clean() which calls FromSlash(), so we need to convert
183					// back to slashes
184					skip = filepath.ToSlash(filepath.Dir(m.Path)) + "/"
185				}
186				err = nil
187				continue
188			}
189			return
190		}
191	}
192
193	return
194}
195
196// runs actual matching for alts
197func (g *glob) doGlobAltsWalk(fsys fs.FS, d, pattern string, startIdx, openingIdx, closingIdx, afterIdx int, firstSegment, beforeMeta bool, m []DirEntryWithFullPath) (matches []DirEntryWithFullPath, err error) {
198	matches = m
199	matchesLen := len(m)
200	patIdx := openingIdx + 1
201	for patIdx < closingIdx {
202		nextIdx := indexNextAlt(pattern[patIdx:closingIdx], true)
203		if nextIdx == -1 {
204			nextIdx = closingIdx
205		} else {
206			nextIdx += patIdx
207		}
208
209		alt := buildAlt(d, pattern, startIdx, openingIdx, patIdx, nextIdx, afterIdx)
210		err = g.doGlobWalk(fsys, alt, firstSegment, beforeMeta, func(p string, d fs.DirEntry) error {
211			// insertion sort, ignoring dups
212			insertIdx := matchesLen
213			for insertIdx > 0 && matches[insertIdx-1].Path > p {
214				insertIdx--
215			}
216			if insertIdx > 0 && matches[insertIdx-1].Path == p {
217				// dup
218				return nil
219			}
220
221			// append to grow the slice, then insert
222			entry := DirEntryWithFullPath{d, p}
223			matches = append(matches, entry)
224			for i := matchesLen; i > insertIdx; i-- {
225				matches[i] = matches[i-1]
226			}
227			matches[insertIdx] = entry
228			matchesLen++
229
230			return nil
231		})
232		if err != nil {
233			return
234		}
235
236		patIdx = nextIdx + 1
237	}
238
239	return
240}
241
242func (g *glob) globDirWalk(fsys fs.FS, dir, pattern string, canMatchFiles, beforeMeta bool, fn GlobWalkFunc) (e error) {
243	if pattern == "" {
244		if !canMatchFiles || !g.filesOnly {
245			// pattern can be an empty string if the original pattern ended in a
246			// slash, in which case, we should just return dir, but only if it
247			// actually exists and it's a directory (or a symlink to a directory)
248			info, isDir, err := g.isPathDir(fsys, dir, beforeMeta)
249			if err != nil {
250				return err
251			}
252			if isDir {
253				e = fn(dir, dirEntryFromFileInfo(info))
254				if e == SkipDir {
255					e = nil
256				}
257			}
258		}
259		return
260	}
261
262	if pattern == "**" {
263		// `**` can match *this* dir
264		info, dirExists, err := g.exists(fsys, dir, beforeMeta)
265		if err != nil {
266			return err
267		}
268		if !dirExists || !info.IsDir() {
269			return nil
270		}
271		if !canMatchFiles || !g.filesOnly {
272			if e = fn(dir, dirEntryFromFileInfo(info)); e != nil {
273				if e == SkipDir {
274					e = nil
275				}
276				return
277			}
278		}
279		return g.globDoubleStarWalk(fsys, dir, canMatchFiles, fn)
280	}
281
282	dirs, err := fs.ReadDir(fsys, dir)
283	if err != nil {
284		if errors.Is(err, fs.ErrNotExist) {
285			return g.handlePatternNotExist(beforeMeta)
286		}
287		return g.forwardErrIfFailOnIOErrors(err)
288	}
289
290	var matched bool
291	for _, info := range dirs {
292		name := info.Name()
293		matched, e = matchWithSeparator(pattern, name, '/', false)
294		if e != nil {
295			return
296		}
297		if matched {
298			matched = canMatchFiles
299			if !matched || g.filesOnly {
300				matched, e = g.isDir(fsys, dir, name, info)
301				if e != nil {
302					return e
303				}
304				if canMatchFiles {
305					// if we're here, it's because g.filesOnly
306					// is set and we don't want directories
307					matched = !matched
308				}
309			}
310			if matched {
311				if e = fn(path.Join(dir, name), info); e != nil {
312					if e == SkipDir {
313						e = nil
314					}
315					return
316				}
317			}
318		}
319	}
320
321	return
322}
323
324// recursively walk files/directories in a directory
325func (g *glob) globDoubleStarWalk(fsys fs.FS, dir string, canMatchFiles bool, fn GlobWalkFunc) (e error) {
326	dirs, err := fs.ReadDir(fsys, dir)
327	if err != nil {
328		if errors.Is(err, fs.ErrNotExist) {
329			// This function is only ever called after we know the top-most directory
330			// exists, so, if we ever get here, we know we'll never return
331			// ErrPatternNotExist.
332			return nil
333		}
334		return g.forwardErrIfFailOnIOErrors(err)
335	}
336
337	for _, info := range dirs {
338		name := info.Name()
339		isDir, err := g.isDir(fsys, dir, name, info)
340		if err != nil {
341			return err
342		}
343
344		if isDir {
345			p := path.Join(dir, name)
346			if !canMatchFiles || !g.filesOnly {
347				// `**` can match *this* dir, so add it
348				if e = fn(p, info); e != nil {
349					if e == SkipDir {
350						e = nil
351						continue
352					}
353					return
354				}
355			}
356			if e = g.globDoubleStarWalk(fsys, p, canMatchFiles, fn); e != nil {
357				return
358			}
359		} else if canMatchFiles {
360			if e = fn(path.Join(dir, name), info); e != nil {
361				if e == SkipDir {
362					e = nil
363				}
364				return
365			}
366		}
367	}
368
369	return
370}
371
372type DirEntryFromFileInfo struct {
373	fi fs.FileInfo
374}
375
376func (d *DirEntryFromFileInfo) Name() string {
377	return d.fi.Name()
378}
379
380func (d *DirEntryFromFileInfo) IsDir() bool {
381	return d.fi.IsDir()
382}
383
384func (d *DirEntryFromFileInfo) Type() fs.FileMode {
385	return d.fi.Mode().Type()
386}
387
388func (d *DirEntryFromFileInfo) Info() (fs.FileInfo, error) {
389	return d.fi, nil
390}
391
392func dirEntryFromFileInfo(fi fs.FileInfo) fs.DirEntry {
393	return &DirEntryFromFileInfo{fi}
394}
395
396type DirEntryWithFullPath struct {
397	Entry fs.DirEntry
398	Path  string
399}
400
401func hasMetaExceptAlts(s string) bool {
402	var c byte
403	l := len(s)
404	for i := 0; i < l; i++ {
405		c = s[i]
406		if c == '*' || c == '?' || c == '[' {
407			return true
408		} else if c == '\\' {
409			// skip next byte
410			i++
411		}
412	}
413	return false
414}