1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package fastwalk provides a faster version of filepath.Walk for file system
6// scanning tools.
7package fastwalk
8
9import (
10 "errors"
11 "os"
12 "path/filepath"
13 "runtime"
14 "sync"
15)
16
17// TraverseLink is used as a return value from WalkFuncs to indicate that the
18// symlink named in the call may be traversed.
19var TraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
20
21// SkipFiles is a used as a return value from WalkFuncs to indicate that the
22// callback should not be called for any other files in the current directory.
23// Child directories will still be traversed.
24var SkipFiles = errors.New("fastwalk: skip remaining files in directory")
25
26// Walk is a faster implementation of filepath.Walk.
27//
28// filepath.Walk's design necessarily calls os.Lstat on each file,
29// even if the caller needs less info.
30// Many tools need only the type of each file.
31// On some platforms, this information is provided directly by the readdir
32// system call, avoiding the need to stat each file individually.
33// fastwalk_unix.go contains a fork of the syscall routines.
34//
35// See golang.org/issue/16399
36//
37// Walk walks the file tree rooted at root, calling walkFn for
38// each file or directory in the tree, including root.
39//
40// If fastWalk returns filepath.SkipDir, the directory is skipped.
41//
42// Unlike filepath.Walk:
43// * file stat calls must be done by the user.
44// The only provided metadata is the file type, which does not include
45// any permission bits.
46// * multiple goroutines stat the filesystem concurrently. The provided
47// walkFn must be safe for concurrent use.
48// * fastWalk can follow symlinks if walkFn returns the TraverseLink
49// sentinel error. It is the walkFn's responsibility to prevent
50// fastWalk from going into symlink cycles.
51func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
52 // TODO(bradfitz): make numWorkers configurable? We used a
53 // minimum of 4 to give the kernel more info about multiple
54 // things we want, in hopes its I/O scheduling can take
55 // advantage of that. Hopefully most are in cache. Maybe 4 is
56 // even too low of a minimum. Profile more.
57 numWorkers := 4
58 if n := runtime.NumCPU(); n > numWorkers {
59 numWorkers = n
60 }
61
62 // Make sure to wait for all workers to finish, otherwise
63 // walkFn could still be called after returning. This Wait call
64 // runs after close(e.donec) below.
65 var wg sync.WaitGroup
66 defer wg.Wait()
67
68 w := &walker{
69 fn: walkFn,
70 enqueuec: make(chan walkItem, numWorkers), // buffered for performance
71 workc: make(chan walkItem, numWorkers), // buffered for performance
72 donec: make(chan struct{}),
73
74 // buffered for correctness & not leaking goroutines:
75 resc: make(chan error, numWorkers),
76 }
77 defer close(w.donec)
78
79 for i := 0; i < numWorkers; i++ {
80 wg.Add(1)
81 go w.doWork(&wg)
82 }
83 todo := []walkItem{{dir: root}}
84 out := 0
85 for {
86 workc := w.workc
87 var workItem walkItem
88 if len(todo) == 0 {
89 workc = nil
90 } else {
91 workItem = todo[len(todo)-1]
92 }
93 select {
94 case workc <- workItem:
95 todo = todo[:len(todo)-1]
96 out++
97 case it := <-w.enqueuec:
98 todo = append(todo, it)
99 case err := <-w.resc:
100 out--
101 if err != nil {
102 return err
103 }
104 if out == 0 && len(todo) == 0 {
105 // It's safe to quit here, as long as the buffered
106 // enqueue channel isn't also readable, which might
107 // happen if the worker sends both another unit of
108 // work and its result before the other select was
109 // scheduled and both w.resc and w.enqueuec were
110 // readable.
111 select {
112 case it := <-w.enqueuec:
113 todo = append(todo, it)
114 default:
115 return nil
116 }
117 }
118 }
119 }
120}
121
122// doWork reads directories as instructed (via workc) and runs the
123// user's callback function.
124func (w *walker) doWork(wg *sync.WaitGroup) {
125 defer wg.Done()
126 for {
127 select {
128 case <-w.donec:
129 return
130 case it := <-w.workc:
131 select {
132 case <-w.donec:
133 return
134 case w.resc <- w.walk(it.dir, !it.callbackDone):
135 }
136 }
137 }
138}
139
140type walker struct {
141 fn func(path string, typ os.FileMode) error
142
143 donec chan struct{} // closed on fastWalk's return
144 workc chan walkItem // to workers
145 enqueuec chan walkItem // from workers
146 resc chan error // from workers
147}
148
149type walkItem struct {
150 dir string
151 callbackDone bool // callback already called; don't do it again
152}
153
154func (w *walker) enqueue(it walkItem) {
155 select {
156 case w.enqueuec <- it:
157 case <-w.donec:
158 }
159}
160
161func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
162 joined := dirName + string(os.PathSeparator) + baseName
163 if typ == os.ModeDir {
164 w.enqueue(walkItem{dir: joined})
165 return nil
166 }
167
168 err := w.fn(joined, typ)
169 if typ == os.ModeSymlink {
170 if err == TraverseLink {
171 // Set callbackDone so we don't call it twice for both the
172 // symlink-as-symlink and the symlink-as-directory later:
173 w.enqueue(walkItem{dir: joined, callbackDone: true})
174 return nil
175 }
176 if err == filepath.SkipDir {
177 // Permit SkipDir on symlinks too.
178 return nil
179 }
180 }
181 return err
182}
183
184func (w *walker) walk(root string, runUserCallback bool) error {
185 if runUserCallback {
186 err := w.fn(root, os.ModeDir)
187 if err == filepath.SkipDir {
188 return nil
189 }
190 if err != nil {
191 return err
192 }
193 }
194
195 return readDir(root, w.onDirEnt)
196}