1package fsext
2
3import (
4 "cmp"
5 "errors"
6 "log/slog"
7 "os"
8 "path/filepath"
9 "slices"
10 "strings"
11 "sync"
12
13 "github.com/charlievieth/fastwalk"
14 "github.com/charmbracelet/crush/internal/csync"
15 "github.com/charmbracelet/crush/internal/home"
16 gitconfig "github.com/go-git/go-git/v5/config"
17 "github.com/go-git/go-git/v5/plumbing/format/gitignore"
18)
19
20// fastIgnoreDirs is a set of directory names that are always ignored.
21// This provides O(1) lookup for common cases to avoid expensive pattern matching.
22var fastIgnoreDirs = map[string]bool{
23 ".git": true,
24 ".svn": true,
25 ".hg": true,
26 ".bzr": true,
27 ".vscode": true,
28 ".idea": true,
29 "node_modules": true,
30 "__pycache__": true,
31 ".pytest_cache": true,
32 ".cache": true,
33 ".tmp": true,
34 ".Trash": true,
35 ".Spotlight-V100": true,
36 ".fseventsd": true,
37 ".crush": true,
38 "OrbStack": true,
39 ".local": true,
40 ".share": true,
41}
42
43// commonIgnorePatterns contains commonly ignored files and directories.
44// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
45// This list contains wildcard patterns and file-specific patterns.
46var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
47 patterns := []string{
48 // IDE and editor files (wildcards)
49 "*.swp",
50 "*.swo",
51 "*~",
52 ".DS_Store",
53 "Thumbs.db",
54
55 // Build artifacts (non-fastIgnoreDirs)
56 "target",
57 "build",
58 "dist",
59 "out",
60 "bin",
61 "obj",
62 "*.o",
63 "*.so",
64 "*.dylib",
65 "*.dll",
66 "*.exe",
67
68 // Logs and temporary files (wildcards)
69 "*.log",
70 "*.tmp",
71 "*.temp",
72
73 // Language-specific (wildcards and non-fastIgnoreDirs)
74 "*.pyc",
75 "*.pyo",
76 "vendor",
77 "Cargo.lock",
78 "package-lock.json",
79 "yarn.lock",
80 "pnpm-lock.yaml",
81 }
82 return parsePatterns(patterns, nil)
83})
84
85// gitGlobalIgnorePatterns returns patterns from git's global excludes file
86// (core.excludesFile), following git's config resolution order.
87var gitGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
88 cfg, err := gitconfig.LoadConfig(gitconfig.GlobalScope)
89 if err != nil {
90 slog.Debug("Failed to load global git config", "error", err)
91 return nil
92 }
93
94 excludesFilePath := cmp.Or(
95 cfg.Raw.Section("core").Options.Get("excludesfile"),
96 filepath.Join(home.Config(), "git", "ignore"),
97 )
98 excludesFilePath = home.Long(excludesFilePath)
99
100 bts, err := os.ReadFile(excludesFilePath)
101 if err != nil {
102 if !os.IsNotExist(err) {
103 slog.Debug("Failed to read git global excludes file", "path", excludesFilePath, "error", err)
104 }
105 return nil
106 }
107
108 return parsePatterns(strings.Split(string(bts), "\n"), nil)
109})
110
111// crushGlobalIgnorePatterns returns patterns from the user's
112// ~/.config/crush/ignore file.
113var crushGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
114 name := filepath.Join(home.Config(), "crush", "ignore")
115 bts, err := os.ReadFile(name)
116 if err != nil {
117 if !os.IsNotExist(err) {
118 slog.Debug("Failed to read crush global ignore file", "path", name, "error", err)
119 }
120 return nil
121 }
122 lines := strings.Split(string(bts), "\n")
123 return parsePatterns(lines, nil)
124})
125
126// parsePatterns parses gitignore pattern strings into Pattern objects.
127// domain is the path components where the patterns are defined (nil for global).
128func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
129 var patterns []gitignore.Pattern
130 for _, line := range lines {
131 line = strings.TrimSpace(line)
132 if line == "" || strings.HasPrefix(line, "#") {
133 continue
134 }
135 patterns = append(patterns, gitignore.ParsePattern(line, domain))
136 }
137 return patterns
138}
139
140type directoryLister struct {
141 // dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
142 // This avoids re-reading files when building combined matchers.
143 dirPatterns *csync.Map[string, []gitignore.Pattern]
144 // combinedMatchers caches a combined matcher for each directory that includes
145 // all ancestor patterns. This allows O(1) matching per file.
146 combinedMatchers *csync.Map[string, gitignore.Matcher]
147 rootPath string
148}
149
150func NewDirectoryLister(rootPath string) *directoryLister {
151 return &directoryLister{
152 rootPath: rootPath,
153 dirPatterns: csync.NewMap[string, []gitignore.Pattern](),
154 combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
155 }
156}
157
158// pathToComponents splits a path into its components for gitignore matching.
159func pathToComponents(path string) []string {
160 path = filepath.ToSlash(path)
161 if path == "" || path == "." {
162 return nil
163 }
164 return strings.Split(path, "/")
165}
166
167// getDirPatterns returns the parsed patterns for a specific directory's
168// .gitignore and .crushignore files. Results are cached.
169func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
170 return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
171 var allPatterns []gitignore.Pattern
172
173 relPath, _ := filepath.Rel(dl.rootPath, dir)
174 var domain []string
175 if relPath != "" && relPath != "." {
176 domain = pathToComponents(relPath)
177 }
178
179 for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
180 ignPath := filepath.Join(dir, ignoreFile)
181 if content, err := os.ReadFile(ignPath); err == nil {
182 lines := strings.Split(string(content), "\n")
183 allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
184 }
185 }
186 return allPatterns
187 })
188}
189
190// getCombinedMatcher returns a matcher that combines all gitignore patterns
191// from the root to the given directory, plus common patterns and home patterns.
192// Results are cached per directory, and we reuse parent directory matchers.
193func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
194 return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
195 var allPatterns []gitignore.Pattern
196
197 // Add common patterns first (lowest priority).
198 allPatterns = append(allPatterns, commonIgnorePatterns()...)
199
200 // Add global ignore patterns (git core.excludesFile + crush global ignore).
201 allPatterns = append(allPatterns, gitGlobalIgnorePatterns()...)
202 allPatterns = append(allPatterns, crushGlobalIgnorePatterns()...)
203
204 // Collect patterns from root to this directory.
205 relDir, _ := filepath.Rel(dl.rootPath, dir)
206 var pathParts []string
207 if relDir != "" && relDir != "." {
208 pathParts = pathToComponents(relDir)
209 }
210
211 // Add patterns from each directory from root to current.
212 currentPath := dl.rootPath
213 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
214
215 for _, part := range pathParts {
216 currentPath = filepath.Join(currentPath, part)
217 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
218 }
219
220 return gitignore.NewMatcher(allPatterns)
221 })
222}
223
224// shouldIgnore checks if a path should be ignored based on gitignore rules.
225// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
226func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
227 base := filepath.Base(path)
228
229 // Fast path: O(1) lookup for commonly ignored directories.
230 if isDir && fastIgnoreDirs[base] {
231 return true
232 }
233
234 // Check explicit ignore patterns.
235 if len(ignorePatterns) > 0 {
236 for _, pattern := range ignorePatterns {
237 if matched, err := filepath.Match(pattern, base); err == nil && matched {
238 return true
239 }
240 }
241 }
242
243 // Don't apply gitignore rules to the root directory itself.
244 if path == dl.rootPath {
245 return false
246 }
247
248 relPath, err := filepath.Rel(dl.rootPath, path)
249 if err != nil {
250 relPath = path
251 }
252
253 pathComponents := pathToComponents(relPath)
254 if len(pathComponents) == 0 {
255 return false
256 }
257
258 // Get the combined matcher for the parent directory.
259 parentDir := filepath.Dir(path)
260 matcher := dl.getCombinedMatcher(parentDir)
261
262 if matcher.Match(pathComponents, isDir) {
263 slog.Debug("Ignoring path", "path", relPath)
264 return true
265 }
266
267 return false
268}
269
270// ListDirectory lists files and directories in the specified path.
271func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
272 found := csync.NewSlice[string]()
273 dl := NewDirectoryLister(initialPath)
274
275 slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
276
277 conf := fastwalk.Config{
278 Follow: true,
279 ToSlash: fastwalk.DefaultToSlash(),
280 Sort: fastwalk.SortDirsFirst,
281 MaxDepth: depth,
282 }
283
284 err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
285 if err != nil {
286 return nil // Skip files we don't have permission to access
287 }
288
289 isDir := d.IsDir()
290 if dl.shouldIgnore(path, ignorePatterns, isDir) {
291 if isDir {
292 return filepath.SkipDir
293 }
294 return nil
295 }
296
297 if path != initialPath {
298 if isDir {
299 path = path + string(filepath.Separator)
300 }
301 found.Append(path)
302 }
303
304 if limit > 0 && found.Len() >= limit {
305 return filepath.SkipAll
306 }
307
308 return nil
309 })
310 if err != nil && !errors.Is(err, filepath.SkipAll) {
311 return nil, false, err
312 }
313
314 matches, truncated := truncate(slices.Collect(found.Seq()), limit)
315 return matches, truncated || errors.Is(err, filepath.SkipAll), nil
316}