1package fsext
2
3import (
4 "errors"
5 "log/slog"
6 "os"
7 "path/filepath"
8 "slices"
9 "strings"
10 "sync"
11
12 "github.com/charlievieth/fastwalk"
13 "github.com/charmbracelet/crush/internal/csync"
14 "github.com/charmbracelet/crush/internal/home"
15 "github.com/go-git/go-git/v5/plumbing/format/gitignore"
16)
17
18// fastIgnoreDirs is a set of directory names that are always ignored.
19// This provides O(1) lookup for common cases to avoid expensive pattern matching.
20var fastIgnoreDirs = map[string]bool{
21 ".git": true,
22 ".svn": true,
23 ".hg": true,
24 ".bzr": true,
25 ".vscode": true,
26 ".idea": true,
27 "node_modules": true,
28 "__pycache__": true,
29 ".pytest_cache": true,
30 ".cache": true,
31 ".tmp": true,
32 ".Trash": true,
33 ".Spotlight-V100": true,
34 ".fseventsd": true,
35 ".crush": true,
36 "OrbStack": true,
37 ".local": true,
38 ".share": true,
39}
40
41// commonIgnorePatterns contains commonly ignored files and directories.
42// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
43// This list contains wildcard patterns and file-specific patterns.
44var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
45 patterns := []string{
46 // IDE and editor files (wildcards)
47 "*.swp",
48 "*.swo",
49 "*~",
50 ".DS_Store",
51 "Thumbs.db",
52
53 // Build artifacts (non-fastIgnoreDirs)
54 "target",
55 "build",
56 "dist",
57 "out",
58 "bin",
59 "obj",
60 "*.o",
61 "*.so",
62 "*.dylib",
63 "*.dll",
64 "*.exe",
65
66 // Logs and temporary files (wildcards)
67 "*.log",
68 "*.tmp",
69 "*.temp",
70
71 // Language-specific (wildcards and non-fastIgnoreDirs)
72 "*.pyc",
73 "*.pyo",
74 "vendor",
75 "Cargo.lock",
76 "package-lock.json",
77 "yarn.lock",
78 "pnpm-lock.yaml",
79 }
80 return parsePatterns(patterns, nil)
81})
82
83var homeIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
84 homeDir := home.Dir()
85 var lines []string
86 for _, name := range []string{
87 filepath.Join(homeDir, ".gitignore"),
88 filepath.Join(homeDir, ".config", "git", "ignore"),
89 filepath.Join(homeDir, ".config", "crush", "ignore"),
90 } {
91 if bts, err := os.ReadFile(name); err == nil {
92 lines = append(lines, strings.Split(string(bts), "\n")...)
93 }
94 }
95 return parsePatterns(lines, nil)
96})
97
98// parsePatterns parses gitignore pattern strings into Pattern objects.
99// domain is the path components where the patterns are defined (nil for global).
100func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
101 var patterns []gitignore.Pattern
102 for _, line := range lines {
103 line = strings.TrimSpace(line)
104 if line == "" || strings.HasPrefix(line, "#") {
105 continue
106 }
107 patterns = append(patterns, gitignore.ParsePattern(line, domain))
108 }
109 return patterns
110}
111
112type directoryLister struct {
113 // dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
114 // This avoids re-reading files when building combined matchers.
115 dirPatterns *csync.Map[string, []gitignore.Pattern]
116 // combinedMatchers caches a combined matcher for each directory that includes
117 // all ancestor patterns. This allows O(1) matching per file.
118 combinedMatchers *csync.Map[string, gitignore.Matcher]
119 rootPath string
120}
121
122func NewDirectoryLister(rootPath string) *directoryLister {
123 return &directoryLister{
124 rootPath: rootPath,
125 dirPatterns: csync.NewMap[string, []gitignore.Pattern](),
126 combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
127 }
128}
129
130// pathToComponents splits a path into its components for gitignore matching.
131func pathToComponents(path string) []string {
132 path = filepath.ToSlash(path)
133 if path == "" || path == "." {
134 return nil
135 }
136 return strings.Split(path, "/")
137}
138
139// getDirPatterns returns the parsed patterns for a specific directory's
140// .gitignore and .crushignore files. Results are cached.
141func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
142 return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
143 var allPatterns []gitignore.Pattern
144
145 relPath, _ := filepath.Rel(dl.rootPath, dir)
146 var domain []string
147 if relPath != "" && relPath != "." {
148 domain = pathToComponents(relPath)
149 }
150
151 for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
152 ignPath := filepath.Join(dir, ignoreFile)
153 if content, err := os.ReadFile(ignPath); err == nil {
154 lines := strings.Split(string(content), "\n")
155 allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
156 }
157 }
158 return allPatterns
159 })
160}
161
162// getCombinedMatcher returns a matcher that combines all gitignore patterns
163// from the root to the given directory, plus common patterns and home patterns.
164// Results are cached per directory, and we reuse parent directory matchers.
165func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
166 return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
167 var allPatterns []gitignore.Pattern
168
169 // Add common patterns first (lowest priority).
170 allPatterns = append(allPatterns, commonIgnorePatterns()...)
171
172 // Add home ignore patterns.
173 allPatterns = append(allPatterns, homeIgnorePatterns()...)
174
175 // Collect patterns from root to this directory.
176 relDir, _ := filepath.Rel(dl.rootPath, dir)
177 var pathParts []string
178 if relDir != "" && relDir != "." {
179 pathParts = pathToComponents(relDir)
180 }
181
182 // Add patterns from each directory from root to current.
183 currentPath := dl.rootPath
184 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
185
186 for _, part := range pathParts {
187 currentPath = filepath.Join(currentPath, part)
188 allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
189 }
190
191 return gitignore.NewMatcher(allPatterns)
192 })
193}
194
195// shouldIgnore checks if a path should be ignored based on gitignore rules.
196// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
197func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
198 base := filepath.Base(path)
199
200 // Fast path: O(1) lookup for commonly ignored directories.
201 if isDir && fastIgnoreDirs[base] {
202 return true
203 }
204
205 // Check explicit ignore patterns.
206 if len(ignorePatterns) > 0 {
207 for _, pattern := range ignorePatterns {
208 if matched, err := filepath.Match(pattern, base); err == nil && matched {
209 return true
210 }
211 }
212 }
213
214 // Don't apply gitignore rules to the root directory itself.
215 if path == dl.rootPath {
216 return false
217 }
218
219 relPath, err := filepath.Rel(dl.rootPath, path)
220 if err != nil {
221 relPath = path
222 }
223
224 pathComponents := pathToComponents(relPath)
225 if len(pathComponents) == 0 {
226 return false
227 }
228
229 // Get the combined matcher for the parent directory.
230 parentDir := filepath.Dir(path)
231 matcher := dl.getCombinedMatcher(parentDir)
232
233 if matcher.Match(pathComponents, isDir) {
234 slog.Debug("Ignoring path", "path", relPath)
235 return true
236 }
237
238 return false
239}
240
241// ListDirectory lists files and directories in the specified path.
242func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
243 found := csync.NewSlice[string]()
244 dl := NewDirectoryLister(initialPath)
245
246 slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
247
248 conf := fastwalk.Config{
249 Follow: true,
250 ToSlash: fastwalk.DefaultToSlash(),
251 Sort: fastwalk.SortDirsFirst,
252 MaxDepth: depth,
253 }
254
255 err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
256 if err != nil {
257 return nil // Skip files we don't have permission to access
258 }
259
260 isDir := d.IsDir()
261 if dl.shouldIgnore(path, ignorePatterns, isDir) {
262 if isDir {
263 return filepath.SkipDir
264 }
265 return nil
266 }
267
268 if path != initialPath {
269 if isDir {
270 path = path + string(filepath.Separator)
271 }
272 found.Append(path)
273 }
274
275 if limit > 0 && found.Len() >= limit {
276 return filepath.SkipAll
277 }
278
279 return nil
280 })
281 if err != nil && !errors.Is(err, filepath.SkipAll) {
282 return nil, false, err
283 }
284
285 matches, truncated := truncate(slices.Collect(found.Seq()), limit)
286 return matches, truncated || errors.Is(err, filepath.SkipAll), nil
287}