ls.go

  1package fsext
  2
  3import (
  4	"cmp"
  5	"errors"
  6	"log/slog"
  7	"os"
  8	"path/filepath"
  9	"slices"
 10	"strings"
 11	"sync"
 12
 13	"github.com/charlievieth/fastwalk"
 14	"github.com/charmbracelet/crush/internal/csync"
 15	"github.com/charmbracelet/crush/internal/home"
 16	gitconfig "github.com/go-git/go-git/v5/config"
 17	"github.com/go-git/go-git/v5/plumbing/format/gitignore"
 18)
 19
 20// fastIgnoreDirs is a set of directory names that are always ignored.
 21// This provides O(1) lookup for common cases to avoid expensive pattern matching.
 22var fastIgnoreDirs = map[string]bool{
 23	".git":            true,
 24	".svn":            true,
 25	".hg":             true,
 26	".bzr":            true,
 27	".vscode":         true,
 28	".idea":           true,
 29	"node_modules":    true,
 30	"__pycache__":     true,
 31	".pytest_cache":   true,
 32	".cache":          true,
 33	".tmp":            true,
 34	".Trash":          true,
 35	".Spotlight-V100": true,
 36	".fseventsd":      true,
 37	".crush":          true,
 38	"OrbStack":        true,
 39	".local":          true,
 40	".share":          true,
 41}
 42
 43// commonIgnorePatterns contains commonly ignored files and directories.
 44// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
 45// This list contains wildcard patterns and file-specific patterns.
 46var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 47	patterns := []string{
 48		// IDE and editor files (wildcards)
 49		"*.swp",
 50		"*.swo",
 51		"*~",
 52		".DS_Store",
 53		"Thumbs.db",
 54
 55		// Build artifacts (non-fastIgnoreDirs)
 56		"target",
 57		"build",
 58		"dist",
 59		"out",
 60		"bin",
 61		"obj",
 62		"*.o",
 63		"*.so",
 64		"*.dylib",
 65		"*.dll",
 66		"*.exe",
 67
 68		// Logs and temporary files (wildcards)
 69		"*.log",
 70		"*.tmp",
 71		"*.temp",
 72
 73		// Language-specific (wildcards and non-fastIgnoreDirs)
 74		"*.pyc",
 75		"*.pyo",
 76		"vendor",
 77		"Cargo.lock",
 78		"package-lock.json",
 79		"yarn.lock",
 80		"pnpm-lock.yaml",
 81	}
 82	return parsePatterns(patterns, nil)
 83})
 84
 85// gitGlobalIgnorePatterns returns patterns from git's global excludes file
 86// (core.excludesFile), following git's config resolution order.
 87var gitGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 88	cfg, err := gitconfig.LoadConfig(gitconfig.GlobalScope)
 89	if err != nil {
 90		slog.Debug("Failed to load global git config", "error", err)
 91		return nil
 92	}
 93
 94	configPath := cmp.Or(
 95		os.Getenv("XDG_CONFIG_HOME"),
 96		filepath.Join(home.Dir(), ".config"),
 97	)
 98	excludesFilePath := cmp.Or(
 99		cfg.Raw.Section("core").Options.Get("excludesfile"),
100		filepath.Join(configPath, "git", "ignore"),
101	)
102	excludesFilePath = home.Long(excludesFilePath)
103
104	bts, err := os.ReadFile(excludesFilePath)
105	if err != nil {
106		if !os.IsNotExist(err) {
107			slog.Debug("Failed to read git global excludes file", "path", excludesFilePath, "error", err)
108		}
109		return nil
110	}
111
112	return parsePatterns(strings.Split(string(bts), "\n"), nil)
113})
114
115// crushGlobalIgnorePatterns returns patterns from the user's
116// ~/.config/crush/ignore file.
117var crushGlobalIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
118	configPath := cmp.Or(
119		os.Getenv("XDG_CONFIG_HOME"),
120		filepath.Join(home.Dir(), ".config"),
121	)
122	name := filepath.Join(configPath, "crush", "ignore")
123	bts, err := os.ReadFile(name)
124	if err != nil {
125		if !os.IsNotExist(err) {
126			slog.Debug("Failed to read crush global ignore file", "path", name, "error", err)
127		}
128		return nil
129	}
130	lines := strings.Split(string(bts), "\n")
131	return parsePatterns(lines, nil)
132})
133
134// parsePatterns parses gitignore pattern strings into Pattern objects.
135// domain is the path components where the patterns are defined (nil for global).
136func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
137	var patterns []gitignore.Pattern
138	for _, line := range lines {
139		line = strings.TrimSpace(line)
140		if line == "" || strings.HasPrefix(line, "#") {
141			continue
142		}
143		patterns = append(patterns, gitignore.ParsePattern(line, domain))
144	}
145	return patterns
146}
147
148type directoryLister struct {
149	// dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
150	// This avoids re-reading files when building combined matchers.
151	dirPatterns *csync.Map[string, []gitignore.Pattern]
152	// combinedMatchers caches a combined matcher for each directory that includes
153	// all ancestor patterns. This allows O(1) matching per file.
154	combinedMatchers *csync.Map[string, gitignore.Matcher]
155	rootPath         string
156}
157
158func NewDirectoryLister(rootPath string) *directoryLister {
159	return &directoryLister{
160		rootPath:         rootPath,
161		dirPatterns:      csync.NewMap[string, []gitignore.Pattern](),
162		combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
163	}
164}
165
166// pathToComponents splits a path into its components for gitignore matching.
167func pathToComponents(path string) []string {
168	path = filepath.ToSlash(path)
169	if path == "" || path == "." {
170		return nil
171	}
172	return strings.Split(path, "/")
173}
174
175// getDirPatterns returns the parsed patterns for a specific directory's
176// .gitignore and .crushignore files. Results are cached.
177func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
178	return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
179		var allPatterns []gitignore.Pattern
180
181		relPath, _ := filepath.Rel(dl.rootPath, dir)
182		var domain []string
183		if relPath != "" && relPath != "." {
184			domain = pathToComponents(relPath)
185		}
186
187		for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
188			ignPath := filepath.Join(dir, ignoreFile)
189			if content, err := os.ReadFile(ignPath); err == nil {
190				lines := strings.Split(string(content), "\n")
191				allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
192			}
193		}
194		return allPatterns
195	})
196}
197
198// getCombinedMatcher returns a matcher that combines all gitignore patterns
199// from the root to the given directory, plus common patterns and home patterns.
200// Results are cached per directory, and we reuse parent directory matchers.
201func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
202	return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
203		var allPatterns []gitignore.Pattern
204
205		// Add common patterns first (lowest priority).
206		allPatterns = append(allPatterns, commonIgnorePatterns()...)
207
208		// Add global ignore patterns (git core.excludesFile + crush global ignore).
209		allPatterns = append(allPatterns, gitGlobalIgnorePatterns()...)
210		allPatterns = append(allPatterns, crushGlobalIgnorePatterns()...)
211
212		// Collect patterns from root to this directory.
213		relDir, _ := filepath.Rel(dl.rootPath, dir)
214		var pathParts []string
215		if relDir != "" && relDir != "." {
216			pathParts = pathToComponents(relDir)
217		}
218
219		// Add patterns from each directory from root to current.
220		currentPath := dl.rootPath
221		allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
222
223		for _, part := range pathParts {
224			currentPath = filepath.Join(currentPath, part)
225			allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
226		}
227
228		return gitignore.NewMatcher(allPatterns)
229	})
230}
231
232// shouldIgnore checks if a path should be ignored based on gitignore rules.
233// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
234func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
235	base := filepath.Base(path)
236
237	// Fast path: O(1) lookup for commonly ignored directories.
238	if isDir && fastIgnoreDirs[base] {
239		return true
240	}
241
242	// Check explicit ignore patterns.
243	if len(ignorePatterns) > 0 {
244		for _, pattern := range ignorePatterns {
245			if matched, err := filepath.Match(pattern, base); err == nil && matched {
246				return true
247			}
248		}
249	}
250
251	// Don't apply gitignore rules to the root directory itself.
252	if path == dl.rootPath {
253		return false
254	}
255
256	relPath, err := filepath.Rel(dl.rootPath, path)
257	if err != nil {
258		relPath = path
259	}
260
261	pathComponents := pathToComponents(relPath)
262	if len(pathComponents) == 0 {
263		return false
264	}
265
266	// Get the combined matcher for the parent directory.
267	parentDir := filepath.Dir(path)
268	matcher := dl.getCombinedMatcher(parentDir)
269
270	if matcher.Match(pathComponents, isDir) {
271		slog.Debug("Ignoring path", "path", relPath)
272		return true
273	}
274
275	return false
276}
277
278// ListDirectory lists files and directories in the specified path.
279func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
280	found := csync.NewSlice[string]()
281	dl := NewDirectoryLister(initialPath)
282
283	slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
284
285	conf := fastwalk.Config{
286		Follow:   true,
287		ToSlash:  fastwalk.DefaultToSlash(),
288		Sort:     fastwalk.SortDirsFirst,
289		MaxDepth: depth,
290	}
291
292	err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
293		if err != nil {
294			return nil // Skip files we don't have permission to access
295		}
296
297		isDir := d.IsDir()
298		if dl.shouldIgnore(path, ignorePatterns, isDir) {
299			if isDir {
300				return filepath.SkipDir
301			}
302			return nil
303		}
304
305		if path != initialPath {
306			if isDir {
307				path = path + string(filepath.Separator)
308			}
309			found.Append(path)
310		}
311
312		if limit > 0 && found.Len() >= limit {
313			return filepath.SkipAll
314		}
315
316		return nil
317	})
318	if err != nil && !errors.Is(err, filepath.SkipAll) {
319		return nil, false, err
320	}
321
322	matches, truncated := truncate(slices.Collect(found.Seq()), limit)
323	return matches, truncated || errors.Is(err, filepath.SkipAll), nil
324}