ls.go

  1package fsext
  2
  3import (
  4	"cmp"
  5	"errors"
  6	"log/slog"
  7	"os"
  8	"path/filepath"
  9	"slices"
 10	"strings"
 11	"sync"
 12
 13	"github.com/charlievieth/fastwalk"
 14	"github.com/charmbracelet/crush/internal/csync"
 15	"github.com/charmbracelet/crush/internal/home"
 16	gitconfig "github.com/go-git/go-git/v5/config"
 17	"github.com/go-git/go-git/v5/plumbing/format/gitignore"
 18)
 19
 20// fastIgnoreDirs is a set of directory names that are always ignored.
 21// This provides O(1) lookup for common cases to avoid expensive pattern matching.
 22var fastIgnoreDirs = map[string]bool{
 23	".git":            true,
 24	".svn":            true,
 25	".hg":             true,
 26	".bzr":            true,
 27	".vscode":         true,
 28	".idea":           true,
 29	"node_modules":    true,
 30	"__pycache__":     true,
 31	".pytest_cache":   true,
 32	".cache":          true,
 33	".tmp":            true,
 34	".Trash":          true,
 35	".Spotlight-V100": true,
 36	".fseventsd":      true,
 37	".crush":          true,
 38	"OrbStack":        true,
 39	".local":          true,
 40	".share":          true,
 41}
 42
 43// commonIgnorePatterns contains commonly ignored files and directories.
 44// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
 45// This list contains wildcard patterns and file-specific patterns.
 46var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 47	patterns := []string{
 48		// IDE and editor files (wildcards)
 49		"*.swp",
 50		"*.swo",
 51		"*~",
 52		".DS_Store",
 53		"Thumbs.db",
 54
 55		// Build artifacts (non-fastIgnoreDirs)
 56		"target",
 57		"build",
 58		"dist",
 59		"out",
 60		"bin",
 61		"obj",
 62		"*.o",
 63		"*.so",
 64		"*.dylib",
 65		"*.dll",
 66		"*.exe",
 67
 68		// Logs and temporary files (wildcards)
 69		"*.log",
 70		"*.tmp",
 71		"*.temp",
 72
 73		// Language-specific (wildcards and non-fastIgnoreDirs)
 74		"*.pyc",
 75		"*.pyo",
 76		"vendor",
 77		"Cargo.lock",
 78		"package-lock.json",
 79		"yarn.lock",
 80		"pnpm-lock.yaml",
 81	}
 82	return parsePatterns(patterns, nil)
 83})
 84
 85// gitGlobalIgnorePatterns returns patterns from git's global excludes file
 86// (core.excludesFile), following git's config resolution order.
 87var gitGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 88	cfg, err := gitconfig.LoadConfig(gitconfig.GlobalScope)
 89	if err != nil {
 90		slog.Debug("Failed to load global git config", "error", err)
 91		return nil
 92	}
 93
 94	excludesFilePath := cmp.Or(
 95		cfg.Raw.Section("core").Options.Get("excludesfile"),
 96		filepath.Join(home.Config(), "git", "ignore"),
 97	)
 98	excludesFilePath = home.Long(excludesFilePath)
 99
100	bts, err := os.ReadFile(excludesFilePath)
101	if err != nil {
102		if !os.IsNotExist(err) {
103			slog.Debug("Failed to read git global excludes file", "path", excludesFilePath, "error", err)
104		}
105		return nil
106	}
107
108	return parsePatterns(strings.Split(string(bts), "\n"), nil)
109})
110
111// crushGlobalIgnorePatterns returns patterns from the user's
112// ~/.config/crush/ignore file.
113var crushGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
114	name := filepath.Join(home.Config(), "crush", "ignore")
115	bts, err := os.ReadFile(name)
116	if err != nil {
117		if !os.IsNotExist(err) {
118			slog.Debug("Failed to read crush global ignore file", "path", name, "error", err)
119		}
120		return nil
121	}
122	lines := strings.Split(string(bts), "\n")
123	return parsePatterns(lines, nil)
124})
125
126// parsePatterns parses gitignore pattern strings into Pattern objects.
127// domain is the path components where the patterns are defined (nil for global).
128func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
129	var patterns []gitignore.Pattern
130	for _, line := range lines {
131		line = strings.TrimSpace(line)
132		if line == "" || strings.HasPrefix(line, "#") {
133			continue
134		}
135		patterns = append(patterns, gitignore.ParsePattern(line, domain))
136	}
137	return patterns
138}
139
140type directoryLister struct {
141	// dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
142	// This avoids re-reading files when building combined matchers.
143	dirPatterns *csync.Map[string, []gitignore.Pattern]
144	// combinedMatchers caches a combined matcher for each directory that includes
145	// all ancestor patterns. This allows O(1) matching per file.
146	combinedMatchers *csync.Map[string, gitignore.Matcher]
147	rootPath         string
148}
149
150func NewDirectoryLister(rootPath string) *directoryLister {
151	return &directoryLister{
152		rootPath:         rootPath,
153		dirPatterns:      csync.NewMap[string, []gitignore.Pattern](),
154		combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
155	}
156}
157
158// pathToComponents splits a path into its components for gitignore matching.
159func pathToComponents(path string) []string {
160	path = filepath.ToSlash(path)
161	if path == "" || path == "." {
162		return nil
163	}
164	return strings.Split(path, "/")
165}
166
167// getDirPatterns returns the parsed patterns for a specific directory's
168// .gitignore and .crushignore files. Results are cached.
169func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
170	return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
171		var allPatterns []gitignore.Pattern
172
173		relPath, _ := filepath.Rel(dl.rootPath, dir)
174		var domain []string
175		if relPath != "" && relPath != "." {
176			domain = pathToComponents(relPath)
177		}
178
179		for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
180			ignPath := filepath.Join(dir, ignoreFile)
181			if content, err := os.ReadFile(ignPath); err == nil {
182				lines := strings.Split(string(content), "\n")
183				allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
184			}
185		}
186		return allPatterns
187	})
188}
189
190// getCombinedMatcher returns a matcher that combines all gitignore patterns
191// from the root to the given directory, plus common patterns and home patterns.
192// Results are cached per directory, and we reuse parent directory matchers.
193func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
194	return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
195		var allPatterns []gitignore.Pattern
196
197		// Add common patterns first (lowest priority).
198		allPatterns = append(allPatterns, commonIgnorePatterns()...)
199
200		// Add global ignore patterns (git core.excludesFile + crush global ignore).
201		allPatterns = append(allPatterns, gitGlobalIgnorePatterns()...)
202		allPatterns = append(allPatterns, crushGlobalIgnorePatterns()...)
203
204		// Collect patterns from root to this directory.
205		relDir, _ := filepath.Rel(dl.rootPath, dir)
206		var pathParts []string
207		if relDir != "" && relDir != "." {
208			pathParts = pathToComponents(relDir)
209		}
210
211		// Add patterns from each directory from root to current.
212		currentPath := dl.rootPath
213		allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
214
215		for _, part := range pathParts {
216			currentPath = filepath.Join(currentPath, part)
217			allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
218		}
219
220		return gitignore.NewMatcher(allPatterns)
221	})
222}
223
224// shouldIgnore checks if a path should be ignored based on gitignore rules.
225// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
226func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
227	base := filepath.Base(path)
228
229	// Fast path: O(1) lookup for commonly ignored directories.
230	if isDir && fastIgnoreDirs[base] {
231		return true
232	}
233
234	// Check explicit ignore patterns.
235	if len(ignorePatterns) > 0 {
236		for _, pattern := range ignorePatterns {
237			if matched, err := filepath.Match(pattern, base); err == nil && matched {
238				return true
239			}
240		}
241	}
242
243	// Don't apply gitignore rules to the root directory itself.
244	if path == dl.rootPath {
245		return false
246	}
247
248	relPath, err := filepath.Rel(dl.rootPath, path)
249	if err != nil {
250		relPath = path
251	}
252
253	pathComponents := pathToComponents(relPath)
254	if len(pathComponents) == 0 {
255		return false
256	}
257
258	// Get the combined matcher for the parent directory.
259	parentDir := filepath.Dir(path)
260	matcher := dl.getCombinedMatcher(parentDir)
261
262	if matcher.Match(pathComponents, isDir) {
263		slog.Debug("Ignoring path", "path", relPath)
264		return true
265	}
266
267	return false
268}
269
270// ListDirectory lists files and directories in the specified path.
271func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
272	found := csync.NewSlice[string]()
273	dl := NewDirectoryLister(initialPath)
274
275	slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
276
277	conf := fastwalk.Config{
278		Follow:   true,
279		ToSlash:  fastwalk.DefaultToSlash(),
280		Sort:     fastwalk.SortDirsFirst,
281		MaxDepth: depth,
282	}
283
284	err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
285		if err != nil {
286			return nil // Skip files we don't have permission to access
287		}
288
289		isDir := d.IsDir()
290		if dl.shouldIgnore(path, ignorePatterns, isDir) {
291			if isDir {
292				return filepath.SkipDir
293			}
294			return nil
295		}
296
297		if path != initialPath {
298			if isDir {
299				path = path + string(filepath.Separator)
300			}
301			found.Append(path)
302		}
303
304		if limit > 0 && found.Len() >= limit {
305			return filepath.SkipAll
306		}
307
308		return nil
309	})
310	if err != nil && !errors.Is(err, filepath.SkipAll) {
311		return nil, false, err
312	}
313
314	matches, truncated := truncate(slices.Collect(found.Seq()), limit)
315	return matches, truncated || errors.Is(err, filepath.SkipAll), nil
316}