grep_test.go

  1package tools
  2
  3import (
  4	"context"
  5	"fmt"
  6	"os"
  7	"os/exec"
  8	"path/filepath"
  9	"regexp"
 10	"testing"
 11	"time"
 12
 13	"github.com/stretchr/testify/require"
 14)
 15
 16func TestRegexCache(t *testing.T) {
 17	cache := newRegexCache()
 18
 19	// Test basic caching
 20	pattern := "test.*pattern"
 21	regex1, err := cache.get(pattern)
 22	if err != nil {
 23		t.Fatalf("Failed to compile regex: %v", err)
 24	}
 25
 26	regex2, err := cache.get(pattern)
 27	if err != nil {
 28		t.Fatalf("Failed to get cached regex: %v", err)
 29	}
 30
 31	// Should be the same instance (cached)
 32	if regex1 != regex2 {
 33		t.Error("Expected cached regex to be the same instance")
 34	}
 35
 36	// Test that it actually works
 37	if !regex1.MatchString("test123pattern") {
 38		t.Error("Regex should match test string")
 39	}
 40}
 41
 42func TestGlobToRegexCaching(t *testing.T) {
 43	// Test that globToRegex uses pre-compiled regex
 44	pattern1 := globToRegex("*.{js,ts}")
 45
 46	// Should not panic and should work correctly
 47	regex1, err := regexp.Compile(pattern1)
 48	if err != nil {
 49		t.Fatalf("Failed to compile glob regex: %v", err)
 50	}
 51
 52	if !regex1.MatchString("test.js") {
 53		t.Error("Glob regex should match .js files")
 54	}
 55	if !regex1.MatchString("test.ts") {
 56		t.Error("Glob regex should match .ts files")
 57	}
 58	if regex1.MatchString("test.go") {
 59		t.Error("Glob regex should not match .go files")
 60	}
 61}
 62
 63func TestGrepWithIgnoreFiles(t *testing.T) {
 64	t.Parallel()
 65	tempDir := t.TempDir()
 66
 67	// Create test files
 68	testFiles := map[string]string{
 69		"file1.txt":           "hello world",
 70		"file2.txt":           "hello world",
 71		"ignored/file3.txt":   "hello world",
 72		"node_modules/lib.js": "hello world",
 73		"secret.key":          "hello world",
 74	}
 75
 76	for path, content := range testFiles {
 77		fullPath := filepath.Join(tempDir, path)
 78		require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
 79		require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
 80	}
 81
 82	// Create .gitignore file
 83	gitignoreContent := "ignored/\n*.key\n"
 84	require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte(gitignoreContent), 0o644))
 85
 86	// Create .crushignore file
 87	crushignoreContent := "node_modules/\n"
 88	require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte(crushignoreContent), 0o644))
 89
 90	// Test both implementations
 91	for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
 92		"regex": searchFilesWithRegex,
 93		"rg": func(pattern, path, include string) ([]grepMatch, error) {
 94			return searchWithRipgrep(t.Context(), getRgSearchCmd, pattern, path, include)
 95		},
 96	} {
 97		t.Run(name, func(t *testing.T) {
 98			t.Parallel()
 99
100			if name == "rg" && getRg() == "" {
101				t.Skip("rg is not in $PATH")
102			}
103
104			matches, err := fn("hello world", tempDir, "")
105			require.NoError(t, err)
106
107			// Convert matches to a set of file paths for easier testing
108			foundFiles := make(map[string]bool)
109			for _, match := range matches {
110				foundFiles[filepath.Base(match.path)] = true
111			}
112
113			// Should find file1.txt and file2.txt
114			require.True(t, foundFiles["file1.txt"], "Should find file1.txt")
115			require.True(t, foundFiles["file2.txt"], "Should find file2.txt")
116
117			// Should NOT find ignored files
118			require.False(t, foundFiles["file3.txt"], "Should not find file3.txt (ignored by .gitignore)")
119			require.False(t, foundFiles["lib.js"], "Should not find lib.js (ignored by .crushignore)")
120			require.False(t, foundFiles["secret.key"], "Should not find secret.key (ignored by .gitignore)")
121
122			// Should find exactly 2 matches
123			require.Equal(t, 2, len(matches), "Should find exactly 2 matches")
124		})
125	}
126}
127
128func TestSearchImplementations(t *testing.T) {
129	t.Parallel()
130	tempDir := t.TempDir()
131
132	for path, content := range map[string]string{
133		"file1.go":         "package main\nfunc main() {\n\tfmt.Println(\"hello world\")\n}",
134		"file2.js":         "console.log('hello world');",
135		"file3.txt":        "hello world from text file",
136		"binary.exe":       "\x00\x01\x02\x03",
137		"empty.txt":        "",
138		"subdir/nested.go": "package nested\n// hello world comment",
139		".hidden.txt":      "hello world in hidden file",
140		"file4.txt":        "hello world from a banana",
141		"file5.txt":        "hello world from a grape",
142	} {
143		fullPath := filepath.Join(tempDir, path)
144		require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755))
145		require.NoError(t, os.WriteFile(fullPath, []byte(content), 0o644))
146	}
147
148	require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gitignore"), []byte("file4.txt\n"), 0o644))
149	require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".crushignore"), []byte("file5.txt\n"), 0o644))
150
151	for name, fn := range map[string]func(pattern, path, include string) ([]grepMatch, error){
152		"regex": searchFilesWithRegex,
153		"rg": func(pattern, path, include string) ([]grepMatch, error) {
154			return searchWithRipgrep(t.Context(), getRgSearchCmd, pattern, path, include)
155		},
156	} {
157		t.Run(name, func(t *testing.T) {
158			t.Parallel()
159
160			if name == "rg" && getRg() == "" {
161				t.Skip("rg is not in $PATH")
162			}
163
164			matches, err := fn("hello world", tempDir, "")
165			require.NoError(t, err)
166
167			require.Equal(t, len(matches), 4)
168			for _, match := range matches {
169				require.NotEmpty(t, match.path)
170				require.NotZero(t, match.lineNum)
171				require.NotEmpty(t, match.lineText)
172				require.NotZero(t, match.modTime)
173				require.NotContains(t, match.path, ".hidden.txt")
174				require.NotContains(t, match.path, "file4.txt")
175				require.NotContains(t, match.path, "file5.txt")
176				require.NotContains(t, match.path, "binary.exe")
177			}
178		})
179	}
180}
181
182type mockRgExecCmd struct {
183	args []string
184	err  error
185}
186
187func (m *mockRgExecCmd) AddArgs(args ...string) {
188	m.args = append(m.args, args...)
189}
190
191func (m *mockRgExecCmd) Output() ([]byte, error) {
192	if m.err != nil {
193		return nil, m.err
194	}
195	return []byte{}, nil
196}
197
198func TestSearchWithRipGrepButItFailsToRunHandleError(t *testing.T) {
199	tests := []struct {
200		name          string
201		err           error
202		expectMatches bool
203		expectError   bool
204	}{
205		{
206			name: "exit code 1 returns no matches and no error",
207			err: func() error {
208				ctx, cancel := context.WithTimeout(t.Context(), 1*time.Second)
209				defer cancel()
210				cmd := exec.CommandContext(ctx, "sh", "-c", "exit 1")
211				err := cmd.Run()
212				require.Error(t, err)
213				exitErr, ok := err.(*exec.ExitError)
214				require.True(t, ok)
215				require.Equal(t, 1, exitErr.ExitCode())
216				return exitErr
217			}(),
218			expectMatches: false,
219			expectError:   false,
220		},
221		{
222			name:          "non-exit error returns error",
223			err:           os.ErrPermission,
224			expectMatches: false,
225			expectError:   true,
226		},
227	}
228
229	for _, tt := range tests {
230		t.Run(tt.name, func(t *testing.T) {
231			mockRgCmd := mockRgExecCmd{
232				err: tt.err,
233			}
234
235			matches, err := searchWithRipgrep(t.Context(), func(ctx context.Context, pattern, path, include string) execCmd {
236				return &mockRgCmd
237			}, "", "", "")
238
239			if tt.expectMatches {
240				require.NotEmpty(t, matches)
241			} else {
242				require.Empty(t, matches)
243			}
244
245			if tt.expectError {
246				require.Error(t, err)
247			} else {
248				require.NoError(t, err)
249			}
250		})
251	}
252}
253
254func TestSearchFilesWithLimit(t *testing.T) {
255	t.Parallel()
256
257	tests := []struct {
258		name              string
259		limit             int
260		numMatches        int
261		expectedMatches   int
262		expectedTruncated bool
263	}{
264		{
265			name:              "limit of 100 truncates 150 results",
266			limit:             100,
267			numMatches:        150,
268			expectedMatches:   100,
269			expectedTruncated: true,
270		},
271		{
272			name:              "limit of 200 does not truncate 150 results",
273			limit:             200,
274			numMatches:        150,
275			expectedMatches:   150,
276			expectedTruncated: false,
277		},
278		{
279			name:              "limit of 150 exactly matches all files",
280			limit:             150,
281			numMatches:        150,
282			expectedMatches:   150,
283			expectedTruncated: false,
284		},
285	}
286
287	for _, tt := range tests {
288		t.Run(tt.name, func(t *testing.T) {
289			t.Parallel()
290
291			// Create mock ripgrep search that returns fake matches.
292			mockRipgrepSearch := func(ctx context.Context, rgSearchCmd resolveRgSearchCmd, pattern, path, include string) ([]grepMatch, error) {
293				matches := make([]grepMatch, tt.numMatches)
294				for i := 0; i < tt.numMatches; i++ {
295					matches[i] = grepMatch{
296						path:     fmt.Sprintf("/fake/path/file%03d.txt", i),
297						modTime:  time.Now().Add(-time.Duration(i) * time.Minute),
298						lineNum:  1,
299						lineText: "test pattern",
300					}
301				}
302				return matches, nil
303			}
304
305			matches, truncated, err := searchFiles(t.Context(), mockRipgrepSearch, "test pattern", "/fake/path", "", tt.limit)
306			require.NoError(t, err)
307			require.Equal(t, tt.expectedMatches, len(matches))
308			require.Equal(t, tt.expectedTruncated, truncated)
309		})
310	}
311}
312
313// Benchmark to show performance improvement
314func BenchmarkRegexCacheVsCompile(b *testing.B) {
315	cache := newRegexCache()
316	pattern := "test.*pattern.*[0-9]+"
317
318	b.Run("WithCache", func(b *testing.B) {
319		for b.Loop() {
320			_, err := cache.get(pattern)
321			if err != nil {
322				b.Fatal(err)
323			}
324		}
325	})
326
327	b.Run("WithoutCache", func(b *testing.B) {
328		for b.Loop() {
329			_, err := regexp.Compile(pattern)
330			if err != nil {
331				b.Fatal(err)
332			}
333		}
334	})
335}