grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"fmt"
  7	"io"
  8	"os"
  9	"os/exec"
 10	"path/filepath"
 11	"regexp"
 12	"sort"
 13	"strconv"
 14	"strings"
 15	"sync"
 16	"time"
 17
 18	"github.com/charmbracelet/crush/internal/ai"
 19	"github.com/charmbracelet/crush/internal/fsext"
 20)
 21
 22// regexCache provides thread-safe caching of compiled regex patterns
 23type regexCache struct {
 24	cache map[string]*regexp.Regexp
 25	mu    sync.RWMutex
 26}
 27
 28// newRegexCache creates a new regex cache
 29func newRegexCache() *regexCache {
 30	return &regexCache{
 31		cache: make(map[string]*regexp.Regexp),
 32	}
 33}
 34
 35// get retrieves a compiled regex from cache or compiles and caches it
 36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 37	// Try to get from cache first (read lock)
 38	rc.mu.RLock()
 39	if regex, exists := rc.cache[pattern]; exists {
 40		rc.mu.RUnlock()
 41		return regex, nil
 42	}
 43	rc.mu.RUnlock()
 44
 45	// Compile the regex (write lock)
 46	rc.mu.Lock()
 47	defer rc.mu.Unlock()
 48
 49	// Double-check in case another goroutine compiled it while we waited
 50	if regex, exists := rc.cache[pattern]; exists {
 51		return regex, nil
 52	}
 53
 54	// Compile and cache the regex
 55	regex, err := regexp.Compile(pattern)
 56	if err != nil {
 57		return nil, err
 58	}
 59
 60	rc.cache[pattern] = regex
 61	return regex, nil
 62}
 63
 64// Global regex cache instances
 65var (
 66	searchRegexCache = newRegexCache()
 67	globRegexCache   = newRegexCache()
 68	// Pre-compiled regex for glob conversion (used frequently)
 69	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 70)
 71
 72type GrepParams struct {
 73	Pattern     string `json:"pattern" description:"The regex pattern to search for in file contents"`
 74	Path        string `json:"path" description:"The directory to search in. Defaults to the current working directory."`
 75	Include     string `json:"include" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
 76	LiteralText bool   `json:"literal_text" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
 77}
 78
 79type grepMatch struct {
 80	path     string
 81	modTime  time.Time
 82	lineNum  int
 83	lineText string
 84}
 85
 86type GrepResponseMetadata struct {
 87	NumberOfMatches int  `json:"number_of_matches"`
 88	Truncated       bool `json:"truncated"`
 89}
 90
 91const (
 92	GrepToolName = "grep"
 93)
 94
 95func NewGrepTool(workingDir string) ai.AgentTool {
 96	return ai.NewTypedToolFunc(
 97		GrepToolName,
 98		`Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 99
100WHEN TO USE THIS TOOL:
101- Use when you need to find files containing specific text or patterns
102- Great for searching code bases for function names, variable declarations, or error messages
103- Useful for finding all files that use a particular API or pattern
104
105HOW TO USE:
106- Provide a regex pattern to search for within file contents
107- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
108- Optionally specify a starting directory (defaults to current working directory)
109- Optionally provide an include pattern to filter which files to search
110- Results are sorted with most recently modified files first
111
112REGEX PATTERN SYNTAX (when literal_text=false):
113- Supports standard regular expression syntax
114- 'function' searches for the literal text "function"
115- 'log\..*Error' finds text starting with "log." and ending with "Error"
116- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
117
118COMMON INCLUDE PATTERN EXAMPLES:
119- '*.js' - Only search JavaScript files
120- '*.{ts,tsx}' - Only search TypeScript files
121- '*.go' - Only search Go files
122
123LIMITATIONS:
124- Results are limited to 100 files (newest first)
125- Performance depends on the number of files being searched
126- Very large binary files may be skipped
127- Hidden files (starting with '.') are skipped
128
129IGNORE FILE SUPPORT:
130- Respects .gitignore patterns to skip ignored files and directories
131- Respects .crushignore patterns for additional ignore rules
132- Both ignore files are automatically detected in the search root directory
133
134CROSS-PLATFORM NOTES:
135- Uses ripgrep (rg) command if available for better performance
136- Falls back to built-in Go implementation if ripgrep is not available
137- File paths are normalized automatically for cross-platform compatibility
138
139TIPS:
140- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
141- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
142- Always check if results are truncated and refine your search pattern if needed
143- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`,
144		func(ctx context.Context, params GrepParams, call ai.ToolCall) (ai.ToolResponse, error) {
145			if params.Pattern == "" {
146				return ai.NewTextErrorResponse("pattern is required"), nil
147			}
148
149			// If literal_text is true, escape the pattern
150			searchPattern := params.Pattern
151			if params.LiteralText {
152				searchPattern = escapeRegexPattern(params.Pattern)
153			}
154
155			searchPath := params.Path
156			if searchPath == "" {
157				searchPath = workingDir
158			}
159
160			matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
161			if err != nil {
162				return ai.ToolResponse{}, fmt.Errorf("error searching files: %w", err)
163			}
164
165			var output strings.Builder
166			if len(matches) == 0 {
167				output.WriteString("No files found")
168			} else {
169				fmt.Fprintf(&output, "Found %d matches\n", len(matches))
170
171				currentFile := ""
172				for _, match := range matches {
173					if currentFile != match.path {
174						if currentFile != "" {
175							output.WriteString("\n")
176						}
177						currentFile = match.path
178						fmt.Fprintf(&output, "%s:\n", match.path)
179					}
180					if match.lineNum > 0 {
181						fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
182					} else {
183						fmt.Fprintf(&output, "  %s\n", match.path)
184					}
185				}
186
187				if truncated {
188					output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
189				}
190			}
191
192			return ai.WithResponseMetadata(
193				ai.NewTextResponse(output.String()),
194				GrepResponseMetadata{
195					NumberOfMatches: len(matches),
196					Truncated:       truncated,
197				},
198			), nil
199		})
200}
201
202// escapeRegexPattern escapes special regex characters so they're treated as literal characters
203func escapeRegexPattern(pattern string) string {
204	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
205	escaped := pattern
206
207	for _, char := range specialChars {
208		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
209	}
210
211	return escaped
212}
213
214func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
215	matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
216	if err != nil {
217		matches, err = searchFilesWithRegex(pattern, rootPath, include)
218		if err != nil {
219			return nil, false, err
220		}
221	}
222
223	sort.Slice(matches, func(i, j int) bool {
224		return matches[i].modTime.After(matches[j].modTime)
225	})
226
227	truncated := len(matches) > limit
228	if truncated {
229		matches = matches[:limit]
230	}
231
232	return matches, truncated, nil
233}
234
235func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
236	cmd := getRgSearchCmd(ctx, pattern, path, include)
237	if cmd == nil {
238		return nil, fmt.Errorf("ripgrep not found in $PATH")
239	}
240
241	cmd.Args = append(
242		cmd.Args,
243		"--ignore-file", filepath.Join(path, ".gitignore"),
244		"--ignore-file", filepath.Join(path, ".crushignore"),
245	)
246
247	output, err := cmd.Output()
248	if err != nil {
249		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
250			return []grepMatch{}, nil
251		}
252		return nil, err
253	}
254
255	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
256	matches := make([]grepMatch, 0, len(lines))
257
258	for _, line := range lines {
259		if line == "" {
260			continue
261		}
262
263		// Parse ripgrep output format: file:line:content
264		parts := strings.SplitN(line, ":", 3)
265		if len(parts) < 3 {
266			continue
267		}
268
269		filePath := parts[0]
270		lineNum, err := strconv.Atoi(parts[1])
271		if err != nil {
272			continue
273		}
274		lineText := parts[2]
275
276		fileInfo, err := os.Stat(filePath)
277		if err != nil {
278			continue // Skip files we can't access
279		}
280
281		matches = append(matches, grepMatch{
282			path:     filePath,
283			modTime:  fileInfo.ModTime(),
284			lineNum:  lineNum,
285			lineText: lineText,
286		})
287	}
288
289	return matches, nil
290}
291
292func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
293	matches := []grepMatch{}
294
295	// Use cached regex compilation
296	regex, err := searchRegexCache.get(pattern)
297	if err != nil {
298		return nil, fmt.Errorf("invalid regex pattern: %w", err)
299	}
300
301	var includePattern *regexp.Regexp
302	if include != "" {
303		regexPattern := globToRegex(include)
304		includePattern, err = globRegexCache.get(regexPattern)
305		if err != nil {
306			return nil, fmt.Errorf("invalid include pattern: %w", err)
307		}
308	}
309
310	// Create walker with gitignore and crushignore support
311	walker := fsext.NewFastGlobWalker(rootPath)
312
313	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
314		if err != nil {
315			return nil // Skip errors
316		}
317
318		if info.IsDir() {
319			return nil // Skip directories
320		}
321
322		// Use walker's shouldSkip method instead of just SkipHidden
323		if walker.ShouldSkip(path) {
324			return nil
325		}
326
327		if includePattern != nil && !includePattern.MatchString(path) {
328			return nil
329		}
330
331		match, lineNum, lineText, err := fileContainsPattern(path, regex)
332		if err != nil {
333			return nil // Skip files we can't read
334		}
335
336		if match {
337			matches = append(matches, grepMatch{
338				path:     path,
339				modTime:  info.ModTime(),
340				lineNum:  lineNum,
341				lineText: lineText,
342			})
343
344			if len(matches) >= 200 {
345				return filepath.SkipAll
346			}
347		}
348
349		return nil
350	})
351	if err != nil {
352		return nil, err
353	}
354
355	return matches, nil
356}
357
358func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
359	// Quick binary file detection
360	if isBinaryFile(filePath) {
361		return false, 0, "", nil
362	}
363
364	file, err := os.Open(filePath)
365	if err != nil {
366		return false, 0, "", err
367	}
368	defer file.Close()
369
370	scanner := bufio.NewScanner(file)
371	lineNum := 0
372	for scanner.Scan() {
373		lineNum++
374		line := scanner.Text()
375		if pattern.MatchString(line) {
376			return true, lineNum, line, nil
377		}
378	}
379
380	return false, 0, "", scanner.Err()
381}
382
383var binaryExts = map[string]struct{}{
384	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
385	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
386	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
387	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
388	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
389	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
390}
391
392// isBinaryFile performs a quick check to determine if a file is binary
393func isBinaryFile(filePath string) bool {
394	// Check file extension first (fastest)
395	ext := strings.ToLower(filepath.Ext(filePath))
396	if _, isBinary := binaryExts[ext]; isBinary {
397		return true
398	}
399
400	// Quick content check for files without clear extensions
401	file, err := os.Open(filePath)
402	if err != nil {
403		return false // If we can't open it, let the caller handle the error
404	}
405	defer file.Close()
406
407	// Read first 512 bytes to check for null bytes
408	buffer := make([]byte, 512)
409	n, err := file.Read(buffer)
410	if err != nil && err != io.EOF {
411		return false
412	}
413
414	// Check for null bytes (common in binary files)
415	for i := range n {
416		if buffer[i] == 0 {
417			return true
418		}
419	}
420
421	return false
422}
423
424func globToRegex(glob string) string {
425	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
426	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
427	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
428
429	// Use pre-compiled regex instead of compiling each time
430	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
431		inner := match[1 : len(match)-1]
432		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
433	})
434
435	return regexPattern
436}