ls.go

  1package fsext
  2
  3import (
  4	"errors"
  5	"log/slog"
  6	"os"
  7	"path/filepath"
  8	"slices"
  9	"strings"
 10	"sync"
 11
 12	"github.com/charlievieth/fastwalk"
 13	"github.com/charmbracelet/crush/internal/csync"
 14	"github.com/charmbracelet/crush/internal/home"
 15	"github.com/go-git/go-git/v5/plumbing/format/gitignore"
 16)
 17
 18// fastIgnoreDirs is a set of directory names that are always ignored.
 19// This provides O(1) lookup for common cases to avoid expensive pattern matching.
 20var fastIgnoreDirs = map[string]bool{
 21	".git":            true,
 22	".svn":            true,
 23	".hg":             true,
 24	".bzr":            true,
 25	".vscode":         true,
 26	".idea":           true,
 27	"node_modules":    true,
 28	"__pycache__":     true,
 29	".pytest_cache":   true,
 30	".cache":          true,
 31	".tmp":            true,
 32	".Trash":          true,
 33	".Spotlight-V100": true,
 34	".fseventsd":      true,
 35	".crush":          true,
 36	"OrbStack":        true,
 37	".local":          true,
 38	".share":          true,
 39}
 40
 41// commonIgnorePatterns contains commonly ignored files and directories.
 42// Note: Exact directory names that are in fastIgnoreDirs are handled there for O(1) lookup.
 43// This list contains wildcard patterns and file-specific patterns.
 44var commonIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 45	patterns := []string{
 46		// IDE and editor files (wildcards)
 47		"*.swp",
 48		"*.swo",
 49		"*~",
 50		".DS_Store",
 51		"Thumbs.db",
 52
 53		// Build artifacts (non-fastIgnoreDirs)
 54		"target",
 55		"build",
 56		"dist",
 57		"out",
 58		"bin",
 59		"obj",
 60		"*.o",
 61		"*.so",
 62		"*.dylib",
 63		"*.dll",
 64		"*.exe",
 65
 66		// Logs and temporary files (wildcards)
 67		"*.log",
 68		"*.tmp",
 69		"*.temp",
 70
 71		// Language-specific (wildcards and non-fastIgnoreDirs)
 72		"*.pyc",
 73		"*.pyo",
 74		"vendor",
 75		"Cargo.lock",
 76		"package-lock.json",
 77		"yarn.lock",
 78		"pnpm-lock.yaml",
 79	}
 80	return parsePatterns(patterns, nil)
 81})
 82
 83var homeIgnorePatterns = sync.OnceValue(func() []gitignore.Pattern {
 84	homeDir := home.Dir()
 85	var lines []string
 86	for _, name := range []string{
 87		filepath.Join(homeDir, ".gitignore"),
 88		filepath.Join(homeDir, ".config", "git", "ignore"),
 89		filepath.Join(homeDir, ".config", "crush", "ignore"),
 90	} {
 91		if bts, err := os.ReadFile(name); err == nil {
 92			lines = append(lines, strings.Split(string(bts), "\n")...)
 93		}
 94	}
 95	return parsePatterns(lines, nil)
 96})
 97
 98// parsePatterns parses gitignore pattern strings into Pattern objects.
 99// domain is the path components where the patterns are defined (nil for global).
100func parsePatterns(lines []string, domain []string) []gitignore.Pattern {
101	var patterns []gitignore.Pattern
102	for _, line := range lines {
103		line = strings.TrimSpace(line)
104		if line == "" || strings.HasPrefix(line, "#") {
105			continue
106		}
107		patterns = append(patterns, gitignore.ParsePattern(line, domain))
108	}
109	return patterns
110}
111
112type directoryLister struct {
113	// dirPatterns caches parsed patterns from .gitignore/.crushignore for each directory.
114	// This avoids re-reading files when building combined matchers.
115	dirPatterns *csync.Map[string, []gitignore.Pattern]
116	// combinedMatchers caches a combined matcher for each directory that includes
117	// all ancestor patterns. This allows O(1) matching per file.
118	combinedMatchers *csync.Map[string, gitignore.Matcher]
119	rootPath         string
120}
121
122func NewDirectoryLister(rootPath string) *directoryLister {
123	return &directoryLister{
124		rootPath:         rootPath,
125		dirPatterns:      csync.NewMap[string, []gitignore.Pattern](),
126		combinedMatchers: csync.NewMap[string, gitignore.Matcher](),
127	}
128}
129
130// pathToComponents splits a path into its components for gitignore matching.
131func pathToComponents(path string) []string {
132	path = filepath.ToSlash(path)
133	if path == "" || path == "." {
134		return nil
135	}
136	return strings.Split(path, "/")
137}
138
139// getDirPatterns returns the parsed patterns for a specific directory's
140// .gitignore and .crushignore files. Results are cached.
141func (dl *directoryLister) getDirPatterns(dir string) []gitignore.Pattern {
142	return dl.dirPatterns.GetOrSet(dir, func() []gitignore.Pattern {
143		var allPatterns []gitignore.Pattern
144
145		relPath, _ := filepath.Rel(dl.rootPath, dir)
146		var domain []string
147		if relPath != "" && relPath != "." {
148			domain = pathToComponents(relPath)
149		}
150
151		for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
152			ignPath := filepath.Join(dir, ignoreFile)
153			if content, err := os.ReadFile(ignPath); err == nil {
154				lines := strings.Split(string(content), "\n")
155				allPatterns = append(allPatterns, parsePatterns(lines, domain)...)
156			}
157		}
158		return allPatterns
159	})
160}
161
162// getCombinedMatcher returns a matcher that combines all gitignore patterns
163// from the root to the given directory, plus common patterns and home patterns.
164// Results are cached per directory, and we reuse parent directory matchers.
165func (dl *directoryLister) getCombinedMatcher(dir string) gitignore.Matcher {
166	return dl.combinedMatchers.GetOrSet(dir, func() gitignore.Matcher {
167		var allPatterns []gitignore.Pattern
168
169		// Add common patterns first (lowest priority).
170		allPatterns = append(allPatterns, commonIgnorePatterns()...)
171
172		// Add home ignore patterns.
173		allPatterns = append(allPatterns, homeIgnorePatterns()...)
174
175		// Collect patterns from root to this directory.
176		relDir, _ := filepath.Rel(dl.rootPath, dir)
177		var pathParts []string
178		if relDir != "" && relDir != "." {
179			pathParts = pathToComponents(relDir)
180		}
181
182		// Add patterns from each directory from root to current.
183		currentPath := dl.rootPath
184		allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
185
186		for _, part := range pathParts {
187			currentPath = filepath.Join(currentPath, part)
188			allPatterns = append(allPatterns, dl.getDirPatterns(currentPath)...)
189		}
190
191		return gitignore.NewMatcher(allPatterns)
192	})
193}
194
195// shouldIgnore checks if a path should be ignored based on gitignore rules.
196// This uses a combined matcher that includes all ancestor patterns for O(1) matching.
197func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string, isDir bool) bool {
198	base := filepath.Base(path)
199
200	// Fast path: O(1) lookup for commonly ignored directories.
201	if isDir && fastIgnoreDirs[base] {
202		return true
203	}
204
205	// Check explicit ignore patterns.
206	if len(ignorePatterns) > 0 {
207		for _, pattern := range ignorePatterns {
208			if matched, err := filepath.Match(pattern, base); err == nil && matched {
209				return true
210			}
211		}
212	}
213
214	// Don't apply gitignore rules to the root directory itself.
215	if path == dl.rootPath {
216		return false
217	}
218
219	relPath, err := filepath.Rel(dl.rootPath, path)
220	if err != nil {
221		relPath = path
222	}
223
224	pathComponents := pathToComponents(relPath)
225	if len(pathComponents) == 0 {
226		return false
227	}
228
229	// Get the combined matcher for the parent directory.
230	parentDir := filepath.Dir(path)
231	matcher := dl.getCombinedMatcher(parentDir)
232
233	if matcher.Match(pathComponents, isDir) {
234		slog.Debug("Ignoring path", "path", relPath)
235		return true
236	}
237
238	return false
239}
240
241// ListDirectory lists files and directories in the specified path.
242func ListDirectory(initialPath string, ignorePatterns []string, depth, limit int) ([]string, bool, error) {
243	found := csync.NewSlice[string]()
244	dl := NewDirectoryLister(initialPath)
245
246	slog.Debug("Listing directory", "path", initialPath, "depth", depth, "limit", limit, "ignorePatterns", ignorePatterns)
247
248	conf := fastwalk.Config{
249		Follow:   true,
250		ToSlash:  fastwalk.DefaultToSlash(),
251		Sort:     fastwalk.SortDirsFirst,
252		MaxDepth: depth,
253	}
254
255	err := fastwalk.Walk(&conf, initialPath, func(path string, d os.DirEntry, err error) error {
256		if err != nil {
257			return nil // Skip files we don't have permission to access
258		}
259
260		isDir := d.IsDir()
261		if dl.shouldIgnore(path, ignorePatterns, isDir) {
262			if isDir {
263				return filepath.SkipDir
264			}
265			return nil
266		}
267
268		if path != initialPath {
269			if isDir {
270				path = path + string(filepath.Separator)
271			}
272			found.Append(path)
273		}
274
275		if limit > 0 && found.Len() >= limit {
276			return filepath.SkipAll
277		}
278
279		return nil
280	})
281	if err != nil && !errors.Is(err, filepath.SkipAll) {
282		return nil, false, err
283	}
284
285	matches, truncated := truncate(slices.Collect(found.Seq()), limit)
286	return matches, truncated || errors.Is(err, filepath.SkipAll), nil
287}