refactor(fsext): improve hierarchical ignore handling and consolidate file exclusion logic (#999)

Carlos Alexandro Becker and Crush created

* refactor(fsext): improve hierarchical ignore handling and consolidate file exclusion logic

- Refactor FastGlobWalker to use directoryLister for consistent ignore handling
- Add ShouldExcludeFile function for unified file exclusion checking
- Add WalkDirectories function for directory traversal with ignore support
- Improve directory pattern matching by checking both with and without trailing slash
- Add comprehensive tests for hierarchical ignore behavior and common patterns
- Remove direct dependency on go-gitignore in favor of existing directoryLister implementation

💖 Generated with Crush
Co-Authored-By: Crush <crush@charm.land>

* fix: improvements

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* chore: t.Context()

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* fix: tests

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

---------

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>
Co-authored-by: Crush <crush@charm.land>

Change summary

internal/fsext/fileutil.go           | 93 ++++++++++++++---------------
internal/fsext/ignore_test.go        | 96 ++++++++++++++++++++++++++++++
internal/fsext/ls.go                 | 17 ++++
internal/llm/provider/openai_test.go |  2 
internal/llm/tools/grep.go           | 26 +++++--
internal/llm/tools/grep_test.go      | 61 ++++++++++--------
internal/shell/command_block_test.go |  3 
internal/shell/shell_test.go         |  4 
8 files changed, 213 insertions(+), 89 deletions(-)

Detailed changes

internal/fsext/fileutil.go 🔗

@@ -11,8 +11,6 @@ import (
 	"github.com/bmatcuk/doublestar/v4"
 	"github.com/charlievieth/fastwalk"
 	"github.com/charmbracelet/crush/internal/home"
-
-	ignore "github.com/sabhiram/go-gitignore"
 )
 
 type FileInfo struct {
@@ -58,60 +56,22 @@ func SkipHidden(path string) bool {
 }
 
 // FastGlobWalker provides gitignore-aware file walking with fastwalk
+// It uses hierarchical ignore checking like git does, checking .gitignore/.crushignore
+// files in each directory from the root to the target path.
 type FastGlobWalker struct {
-	gitignore   *ignore.GitIgnore
-	crushignore *ignore.GitIgnore
-	rootPath    string
+	directoryLister *directoryLister
 }
 
 func NewFastGlobWalker(searchPath string) *FastGlobWalker {
-	walker := &FastGlobWalker{
-		rootPath: searchPath,
-	}
-
-	// Load gitignore if it exists
-	gitignorePath := filepath.Join(searchPath, ".gitignore")
-	if _, err := os.Stat(gitignorePath); err == nil {
-		if gi, err := ignore.CompileIgnoreFile(gitignorePath); err == nil {
-			walker.gitignore = gi
-		}
+	return &FastGlobWalker{
+		directoryLister: NewDirectoryLister(searchPath),
 	}
-
-	// Load crushignore if it exists
-	crushignorePath := filepath.Join(searchPath, ".crushignore")
-	if _, err := os.Stat(crushignorePath); err == nil {
-		if ci, err := ignore.CompileIgnoreFile(crushignorePath); err == nil {
-			walker.crushignore = ci
-		}
-	}
-
-	return walker
 }
 
-// ShouldSkip checks if a path should be skipped based on gitignore, crushignore, and hidden file rules
+// ShouldSkip checks if a path should be skipped based on hierarchical gitignore,
+// crushignore, and hidden file rules
 func (w *FastGlobWalker) ShouldSkip(path string) bool {
-	if SkipHidden(path) {
-		return true
-	}
-
-	relPath, err := filepath.Rel(w.rootPath, path)
-	if err != nil {
-		return false
-	}
-
-	if w.gitignore != nil {
-		if w.gitignore.MatchesPath(relPath) {
-			return true
-		}
-	}
-
-	if w.crushignore != nil {
-		if w.crushignore.MatchesPath(relPath) {
-			return true
-		}
-	}
-
-	return false
+	return w.directoryLister.shouldIgnore(path, nil)
 }
 
 func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool, error) {
@@ -182,6 +142,43 @@ func GlobWithDoubleStar(pattern, searchPath string, limit int) ([]string, bool,
 	return results, truncated, nil
 }
 
+// ShouldExcludeFile checks if a file should be excluded from processing
+// based on common patterns and ignore rules
+func ShouldExcludeFile(rootPath, filePath string) bool {
+	return NewDirectoryLister(rootPath).
+		shouldIgnore(filePath, nil)
+}
+
+// WalkDirectories walks a directory tree and calls the provided function for each directory,
+// respecting hierarchical .gitignore/.crushignore files like git does.
+func WalkDirectories(rootPath string, fn func(path string, d os.DirEntry, err error) error) error {
+	dl := NewDirectoryLister(rootPath)
+
+	conf := fastwalk.Config{
+		Follow:  true,
+		ToSlash: fastwalk.DefaultToSlash(),
+		Sort:    fastwalk.SortDirsFirst,
+	}
+
+	return fastwalk.Walk(&conf, rootPath, func(path string, d os.DirEntry, err error) error {
+		if err != nil {
+			return fn(path, d, err)
+		}
+
+		// Only process directories
+		if !d.IsDir() {
+			return nil
+		}
+
+		// Check if directory should be ignored
+		if dl.shouldIgnore(path, nil) {
+			return filepath.SkipDir
+		}
+
+		return fn(path, d, err)
+	})
+}
+
 func PrettyPath(path string) string {
 	return home.Short(path)
 }

internal/fsext/ignore_test.go 🔗

@@ -2,6 +2,7 @@ package fsext
 
 import (
 	"os"
+	"path/filepath"
 	"testing"
 
 	"github.com/stretchr/testify/require"
@@ -30,3 +31,98 @@ func TestCrushIgnore(t *testing.T) {
 	require.False(t, dl.shouldIgnore("test1.txt", nil), ".txt files should not be ignored")
 	require.True(t, dl.shouldIgnore("test3.tmp", nil), ".tmp files should be ignored by common patterns")
 }
+
+func TestShouldExcludeFile(t *testing.T) {
+	t.Parallel()
+
+	// Create a temporary directory structure for testing
+	tempDir := t.TempDir()
+
+	// Create directories that should be ignored
+	nodeModules := filepath.Join(tempDir, "node_modules")
+	target := filepath.Join(tempDir, "target")
+	customIgnored := filepath.Join(tempDir, "custom_ignored")
+	normalDir := filepath.Join(tempDir, "src")
+
+	for _, dir := range []string{nodeModules, target, customIgnored, normalDir} {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			t.Fatalf("Failed to create directory %s: %v", dir, err)
+		}
+	}
+
+	// Create .gitignore file
+	gitignoreContent := "node_modules/\ntarget/\n"
+	if err := os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644); err != nil {
+		t.Fatalf("Failed to create .gitignore: %v", err)
+	}
+
+	// Create .crushignore file
+	crushignoreContent := "custom_ignored/\n"
+	if err := os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644); err != nil {
+		t.Fatalf("Failed to create .crushignore: %v", err)
+	}
+
+	// Test that ignored directories are properly ignored
+	require.True(t, ShouldExcludeFile(tempDir, nodeModules), "Expected node_modules to be ignored by .gitignore")
+	require.True(t, ShouldExcludeFile(tempDir, target), "Expected target to be ignored by .gitignore")
+	require.True(t, ShouldExcludeFile(tempDir, customIgnored), "Expected custom_ignored to be ignored by .crushignore")
+
+	// Test that normal directories are not ignored
+	require.False(t, ShouldExcludeFile(tempDir, normalDir), "Expected src directory to not be ignored")
+
+	// Test that the workspace root itself is not ignored
+	require.False(t, ShouldExcludeFile(tempDir, tempDir), "Expected workspace root to not be ignored")
+}
+
+func TestShouldExcludeFileHierarchical(t *testing.T) {
+	t.Parallel()
+
+	// Create a nested directory structure for testing hierarchical ignore
+	tempDir := t.TempDir()
+
+	// Create nested directories
+	subDir := filepath.Join(tempDir, "subdir")
+	nestedNormal := filepath.Join(subDir, "normal_nested")
+
+	for _, dir := range []string{subDir, nestedNormal} {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			t.Fatalf("Failed to create directory %s: %v", dir, err)
+		}
+	}
+
+	// Create .crushignore in subdir that ignores normal_nested
+	subCrushignore := "normal_nested/\n"
+	if err := os.WriteFile(filepath.Join(subDir, ".crushignore"), []byte(subCrushignore), 0o644); err != nil {
+		t.Fatalf("Failed to create subdir .crushignore: %v", err)
+	}
+
+	// Test hierarchical ignore behavior - this should work because the .crushignore is in the parent directory
+	require.True(t, ShouldExcludeFile(tempDir, nestedNormal), "Expected normal_nested to be ignored by subdir .crushignore")
+	require.False(t, ShouldExcludeFile(tempDir, subDir), "Expected subdir itself to not be ignored")
+}
+
+func TestShouldExcludeFileCommonPatterns(t *testing.T) {
+	t.Parallel()
+
+	tempDir := t.TempDir()
+
+	// Create directories that should be ignored by common patterns
+	commonIgnored := []string{
+		filepath.Join(tempDir, ".git"),
+		filepath.Join(tempDir, "node_modules"),
+		filepath.Join(tempDir, "__pycache__"),
+		filepath.Join(tempDir, "target"),
+		filepath.Join(tempDir, ".vscode"),
+	}
+
+	for _, dir := range commonIgnored {
+		if err := os.MkdirAll(dir, 0o755); err != nil {
+			t.Fatalf("Failed to create directory %s: %v", dir, err)
+		}
+	}
+
+	// Test that common patterns are ignored even without explicit ignore files
+	for _, dir := range commonIgnored {
+		require.True(t, ShouldExcludeFile(tempDir, dir), "Expected %s to be ignored by common patterns", filepath.Base(dir))
+	}
+}

internal/fsext/ls.go 🔗

@@ -141,8 +141,16 @@ func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string) bo
 		return true
 	}
 
-	if dl.getIgnore(filepath.Dir(path)).MatchesPath(relPath) {
-		slog.Debug("ignoring dir pattern", "path", relPath, "dir", filepath.Dir(path))
+	parentDir := filepath.Dir(path)
+	ignoreParser := dl.getIgnore(parentDir)
+	if ignoreParser.MatchesPath(relPath) {
+		slog.Debug("ignoring dir pattern", "path", relPath, "dir", parentDir)
+		return true
+	}
+
+	// For directories, also check with trailing slash (gitignore convention)
+	if ignoreParser.MatchesPath(relPath + "/") {
+		slog.Debug("ignoring dir pattern with slash", "path", relPath+"/", "dir", parentDir)
 		return true
 	}
 
@@ -160,11 +168,14 @@ func (dl *directoryLister) shouldIgnore(path string, ignorePatterns []string) bo
 
 func (dl *directoryLister) checkParentIgnores(path string) bool {
 	parent := filepath.Dir(filepath.Dir(path))
-	for parent != dl.rootPath && parent != "." && path != "." {
+	for parent != "." && path != "." {
 		if dl.getIgnore(parent).MatchesPath(path) {
 			slog.Debug("ingoring parent dir pattern", "path", path, "dir", parent)
 			return true
 		}
+		if parent == dl.rootPath {
+			break
+		}
 		parent = filepath.Dir(parent)
 	}
 	return false

internal/llm/provider/openai_test.go 🔗

@@ -75,7 +75,7 @@ func TestOpenAIClientStreamChoices(t *testing.T) {
 		},
 	}
 
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
 	defer cancel()
 
 	eventsChan := client.stream(ctx, messages, nil)

internal/llm/tools/grep.go 🔗

@@ -279,11 +279,13 @@ func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]gr
 		return nil, fmt.Errorf("ripgrep not found in $PATH")
 	}
 
-	cmd.Args = append(
-		cmd.Args,
-		"--ignore-file", filepath.Join(path, ".gitignore"),
-		"--ignore-file", filepath.Join(path, ".crushignore"),
-	)
+	// Only add ignore files if they exist
+	for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
+		ignorePath := filepath.Join(path, ignoreFile)
+		if _, err := os.Stat(ignorePath); err == nil {
+			cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
+		}
+	}
 
 	output, err := cmd.Output()
 	if err != nil {
@@ -357,14 +359,24 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
 		}
 
 		if info.IsDir() {
-			return nil // Skip directories
+			// Check if directory should be skipped
+			if walker.ShouldSkip(path) {
+				return filepath.SkipDir
+			}
+			return nil // Continue into directory
 		}
 
-		// Use walker's shouldSkip method instead of just SkipHidden
+		// Use walker's shouldSkip method for files
 		if walker.ShouldSkip(path) {
 			return nil
 		}
 
+		// Skip hidden files (starting with a dot) to match ripgrep's default behavior
+		base := filepath.Base(path)
+		if base != "." && strings.HasPrefix(base, ".") {
+			return nil
+		}
+
 		if includePattern != nil && !includePattern.MatchString(path) {
 			return nil
 		}

internal/llm/tools/grep_test.go 🔗

@@ -1,8 +1,6 @@
 package tools
 
 import (
-	"context"
-	"encoding/json"
 	"os"
 	"path/filepath"
 	"regexp"
@@ -59,6 +57,7 @@ func TestGlobToRegexCaching(t *testing.T) {
 }
 
 func TestGrepWithIgnoreFiles(t *testing.T) {
+	t.Parallel()
 	tempDir := t.TempDir()
 
 	// Create test files
@@ -84,32 +83,42 @@ func TestGrepWithIgnoreFiles(t *testing.T) {
 	crushignoreContent := "node_modules/\n"
 	require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
 
-	// Create grep tool
-	grepTool := NewGrepTool(tempDir)
+	// Test both implementations
+	for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
+		"regex": searchFilesWithRegex,
+		"rg": func(pattern, path, include string) ([]grepMatch, error) {
+			return searchWithRipgrep(t.Context(), pattern, path, include)
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			t.Parallel()
+
+			if name == "rg" && getRg() == "" {
+				t.Skip("rg is not in $PATH")
+			}
+
+			matches, err := fn("hello world", tempDir, "")
+			require.NoError(t, err)
 
-	// Create grep parameters
-	params := GrepParams{
-		Pattern: "hello world",
-		Path:    tempDir,
+			// Convert matches to a set of file paths for easier testing
+			foundFiles := make(map[string]bool)
+			for _, match := range matches {
+				foundFiles[filepath.Base(match.path)] = true
+			}
+
+			// Should find file1.txt and file2.txt
+			require.True(t, foundFiles["file1.txt"], "Should find file1.txt")
+			require.True(t, foundFiles["file2.txt"], "Should find file2.txt")
+
+			// Should NOT find ignored files
+			require.False(t, foundFiles["file3.txt"], "Should not find file3.txt (ignored by .gitignore)")
+			require.False(t, foundFiles["lib.js"], "Should not find lib.js (ignored by .crushignore)")
+			require.False(t, foundFiles["secret.key"], "Should not find secret.key (ignored by .gitignore)")
+
+			// Should find exactly 2 matches
+			require.Equal(t, 2, len(matches), "Should find exactly 2 matches")
+		})
 	}
-	paramsJSON, err := json.Marshal(params)
-	require.NoError(t, err)
-
-	// Run grep
-	call := ToolCall{Input: string(paramsJSON)}
-	response, err := grepTool.Run(context.Background(), call)
-	require.NoError(t, err)
-
-	// Check results - should only find file1.txt and file2.txt
-	// ignored/file3.txt should be ignored by .gitignore
-	// node_modules/lib.js should be ignored by .crushignore
-	// secret.key should be ignored by .gitignore
-	result := response.Content
-	require.Contains(t, result, "file1.txt")
-	require.Contains(t, result, "file2.txt")
-	require.NotContains(t, result, "file3.txt")
-	require.NotContains(t, result, "lib.js")
-	require.NotContains(t, result, "secret.key")
 }
 
 func TestSearchImplementations(t *testing.T) {

internal/shell/command_block_test.go 🔗

@@ -1,7 +1,6 @@
 package shell
 
 import (
-	"context"
 	"strings"
 	"testing"
 
@@ -92,7 +91,7 @@ func TestCommandBlocking(t *testing.T) {
 				BlockFuncs: tt.blockFuncs,
 			})
 
-			_, _, err := shell.Exec(context.Background(), tt.command)
+			_, _, err := shell.Exec(t.Context(), tt.command)
 
 			if tt.shouldBlock {
 				if err == nil {

internal/shell/shell_test.go 🔗

@@ -16,7 +16,7 @@ func BenchmarkShellQuickCommands(b *testing.B) {
 	b.ReportAllocs()
 
 	for b.Loop() {
-		_, _, err := shell.Exec(context.Background(), "echo test")
+		_, _, err := shell.Exec(b.Context(), "echo test")
 		exitCode := ExitCode(err)
 		if err != nil || exitCode != 0 {
 			b.Fatalf("Command failed: %v, exit code: %d", err, exitCode)
@@ -100,7 +100,7 @@ func TestRunContinuity(t *testing.T) {
 
 func TestCrossPlatformExecution(t *testing.T) {
 	shell := NewShell(&Options{WorkingDir: "."})
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+	ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second)
 	defer cancel()
 
 	// Test a simple command that should work on all platforms