1package fsext
2
3import (
4 "cmp"
5 "errors"
6 "log/slog"
7 "os"
8 "path/filepath"
9 "slices"
10 "strings"
11 "sync"
12
13 "github.com/charlievieth/fastwalk"
14 "github.com/charmbracelet/crush/internal/csync"
15 "github.com/charmbracelet/crush/internal/home"
16 gitconfig "github.com/go-git/go-git/v5/config"
17 "github.com/go-git/go-git/v5/plumbing/format/gitignore"
18)
19
20// fastIgnoreDirs is a set of directory names that are always ignored.
21// This provides O(1) lookup for common cases to avoid expensive pattern matching.
22var fastIgnoreDirs = map[string]bool{
23 ".git": true,
24 ".svn": true,
25 ".hg": true,
26 ".bzr": true,
27 ".vscode": true,
28 ".idea": true,
29 "node_modules": true,
30 "__pycache__": true,
31 ".pytest_cache": true,
32 ".cache": true,
33 ".tmp": true,
34 ".Trash": true,
35 ".Spotlight-V100": true,
36 ".fseventsd": true,
37 ".crush": true,
38 "OrbStack": true,
39 ".local": true,
40 ".share": true,
41}
42
43// commonIgnorePatterns contains commonly ignored files and directories.
44// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
45// This list contains wildcard patterns and file-specific patterns.
46var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
47 patterns := []string{
48 // IDE and editor files (wildcards)
49 "*.swp",
50 "*.swo",
51 "*~",
52 ".DS_Store",
53 "Thumbs.db",
54
55 // Build artifacts (non-fastIgnoreDirs)
56 "target",
57 "build",
58 "dist",
59 "out",
60 "bin",
61 "obj",
62 "*.o",
63 "*.so",
64 "*.dylib",
65 "*.dll",
66 "*.exe",
67
68 // Logs and temporary files (wildcards)
69 "*.log",
70 "*.tmp",
71 "*.temp",
72
73 // Language-specific (wildcards and non-fastIgnoreDirs)
74 "*.pyc",
75 "*.pyo",
76 "vendor",
77 "Cargo.lock",
78 "package-lock.json",
79 "yarn.lock",
80 "pnpm-lock.yaml",
81 }
82 return parsePatterns(patterns, nil)
83})
84
85// gitGlobalIgnorePatterns returns patterns from git's global excludes file
86// (core.excludesFile), following git's config resolution order.
87var gitGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
88 cfg, err := gitconfig.LoadConfig(gitconfig.GlobalScope)
89 if err != nil {
90 slog.Debug("Failed to load global git config", "error", err)
91 return nil
92 }
93
94 configPath := cmp.Or(
95 os.Getenv("XDG_CONFIG_HOME"),
96 filepath.Join(home.Dir(), ".config"),
97 )
98 excludesFilePath := cmp.Or(
99 cfg.Raw.Section("core").Options.Get("excludesfile"),
100 filepath.Join(configPath, "git", "ignore"),
101 )
102 excludesFilePath = home.Long(excludesFilePath)
103
104 bts, err := os.ReadFile(excludesFilePath)
105 if err != nil {
106 if !os.IsNotExist(err) {
107 slog.Debug("Failed to read git global excludes file", "path", excludesFilePath, "error", err)
108 }
109 return nil
110 }
111
112 return parsePatterns(strings.Split(string(bts), "\n"), nil)
113})
114
115// crushGlobalIgnorePatterns returns patterns from the user's
116// ~/.config/crush/ignore file.
117var crushGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
118 configPath := cmp.Or(
119 os.Getenv("XDG_CONFIG_HOME"),
120 filepath.Join(home.Dir(), ".config"),
121 )
122 name := filepath.Join(configPath, "crush", "ignore")
123 bts, err := os.ReadFile(name)
124 if err != nil {
125 if !os.IsNotExist(err) {
126 slog.Debug("Failed to read crush global ignore file", "path", name, "error", err)
127 }
128 return nil
129 }
130 lines := strings.Split(string(bts), "\n")
131 return parsePatterns(lines, nil)
132})
133
134// parsePatterns parses gitignore pattern strings into Pattern objects.
135// domain is the path components where the patterns are defined (nil for global).
136func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
137 var patterns []gitignore.Pattern
138 for _, line := range lines {
139 line = strings.TrimSpace(line)
140 if line == "" || strings.HasPrefix(line, "#") {
141 continue
142 }
143 patterns = append(patterns, gitignore.ParsePattern(line, domain))
144 }
145 return patterns
146}
147
148type directoryLister struct {
149 // dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
150 // This avoids re-reading files when building combined matchers.
151 dirPatterns *csync.Map[string, []gitignore.Pattern]
152 // combinedMatchers caches a combined matcher for each directory that includes
153 // all ancestor patterns. This allows O(1) matching per file.
154 combinedMatchers *csync.Map[string, gitignore.Matcher]
155 rootPath string
156}
157
158func NewDirectoryLister(rootPath string) *directoryLister {
159 return &directoryLister{
160 rootPath: rootPath,
161 dirPatterns: csync.NewMap[string, []gitignore.Pattern](),
162 combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
163 }
164}
165
166// pathToComponents splits a path into its components for gitignore matching.
167func pathToComponents(path string) []string {
168 path = filepath.ToSlash(path)
169 if path == "" || path == "." {
170 return nil
171 }
172 return strings.Split(path, "/")
173}
174
175// getDirPatterns returns the parsed patterns for a specific directory's
176// .gitignore and .crushignore files. Results are cached.
177func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
178 return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
179 var allPatterns []gitignore.Pattern
180
181 relPath, _ := filepath.Rel(dl.rootPath, dir)
182 var domain []string
183 if relPath != "" && relPath != "." {
184 domain = pathToComponents(relPath)
185 }
186
187 for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
188 ignPath := filepath.Join(dir, ignoreFile)
189 if content, err := os.ReadFile(ignPath); err == nil {
190 lines := strings.Split(string(content), "\n")
191 allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
192 }
193 }
194 return allPatterns
195 })
196}
197
198// getCombinedMatcher returns a matcher that combines all gitignore patterns
199// from the root to the given directory, plus common patterns and home patterns.
200// Results are cached per directory, and we reuse parent directory matchers.
201func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
202 return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
203 var allPatterns []gitignore.Pattern
204
205 // Add common patterns first (lowest priority).
206 allPatterns = append(allPatterns, commonIgnorePatterns()...)
207
208 // Add global ignore patterns (git core.excludesFile + crush global ignore).
209 allPatterns = append(allPatterns, gitGlobalIgnorePatterns()...)
210 allPatterns = append(allPatterns, crushGlobalIgnorePatterns()...)
211
212 // Collect patterns from root to this directory.
213 relDir, _ := filepath.Rel(dl.rootPath, dir)
214 var pathParts []string
215 if relDir != "" && relDir != "." {
216 pathParts = pathToComponents(relDir)
217 }
218
219 // Add patterns from each directory from root to current.
220 currentPath := dl.rootPath
221 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
222
223 for _, part := range pathParts {
224 currentPath = filepath.Join(currentPath, part)
225 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
226 }
227
228 return gitignore.NewMatcher(allPatterns)
229 })
230}
231
232// shouldIgnore checks if a path should be ignored based on gitignore rules.
233// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
234func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
235 base := filepath.Base(path)
236
237 // Fast path: O(1) lookup for commonly ignored directories.
238 if isDir && fastIgnoreDirs[base] {
239 return true
240 }
241
242 // Check explicit ignore patterns.
243 if len(ignorePatterns) > 0 {
244 for _, pattern := range ignorePatterns {
245 if matched, err := filepath.Match(pattern, base); err == nil && matched {
246 return true
247 }
248 }
249 }
250
251 // Don't apply gitignore rules to the root directory itself.
252 if path == dl.rootPath {
253 return false
254 }
255
256 relPath, err := filepath.Rel(dl.rootPath, path)
257 if err != nil {
258 relPath = path
259 }
260
261 pathComponents := pathToComponents(relPath)
262 if len(pathComponents) == 0 {
263 return false
264 }
265
266 // Get the combined matcher for the parent directory.
267 parentDir := filepath.Dir(path)
268 matcher := dl.getCombinedMatcher(parentDir)
269
270 if matcher.Match(pathComponents, isDir) {
271 slog.Debug("Ignoring path", "path", relPath)
272 return true
273 }
274
275 return false
276}
277
278// ListDirectory lists files and directories in the specified path.
279func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
280 found := csync.NewSlice[string]()
281 dl := NewDirectoryLister(initialPath)
282
283 slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
284
285 conf := fastwalk.Config{
286 Follow: true,
287 ToSlash: fastwalk.DefaultToSlash(),
288 Sort: fastwalk.SortDirsFirst,
289 MaxDepth: depth,
290 }
291
292 err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
293 if err != nil {
294 return nil // Skip files we don't have permission to access
295 }
296
297 isDir := d.IsDir()
298 if dl.shouldIgnore(path, ignorePatterns, isDir) {
299 if isDir {
300 return filepath.SkipDir
301 }
302 return nil
303 }
304
305 if path != initialPath {
306 if isDir {
307 path = path + string(filepath.Separator)
308 }
309 found.Append(path)
310 }
311
312 if limit > 0 && found.Len() >= limit {
313 return filepath.SkipAll
314 }
315
316 return nil
317 })
318 if err != nil && !errors.Is(err, filepath.SkipAll) {
319 return nil, false, err
320 }
321
322 matches, truncated := truncate(slices.Collect(found.Seq()), limit)
323 return matches, truncated || errors.Is(err, filepath.SkipAll), nil
324}