grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"os"
 10	"path/filepath"
 11	"regexp"
 12	"sort"
 13	"strings"
 14	"sync"
 15	"time"
 16
 17	"github.com/charmbracelet/crush/internal/fsext"
 18)
 19
 20// regexCache provides thread-safe caching of compiled regex patterns
 21type regexCache struct {
 22	cache map[string]*regexp.Regexp
 23	mu    sync.RWMutex
 24}
 25
 26// newRegexCache creates a new regex cache
 27func newRegexCache() *regexCache {
 28	return &regexCache{
 29		cache: make(map[string]*regexp.Regexp),
 30	}
 31}
 32
 33// get retrieves a compiled regex from cache or compiles and caches it
 34func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 35	// Try to get from cache first (read lock)
 36	rc.mu.RLock()
 37	if regex, exists := rc.cache[pattern]; exists {
 38		rc.mu.RUnlock()
 39		return regex, nil
 40	}
 41	rc.mu.RUnlock()
 42
 43	// Compile the regex (write lock)
 44	rc.mu.Lock()
 45	defer rc.mu.Unlock()
 46
 47	// Double-check in case another goroutine compiled it while we waited
 48	if regex, exists := rc.cache[pattern]; exists {
 49		return regex, nil
 50	}
 51
 52	// Compile and cache the regex
 53	regex, err := regexp.Compile(pattern)
 54	if err != nil {
 55		return nil, err
 56	}
 57
 58	rc.cache[pattern] = regex
 59	return regex, nil
 60}
 61
 62// Global regex cache instances
 63var (
 64	searchRegexCache = newRegexCache()
 65	globRegexCache   = newRegexCache()
 66	// Pre-compiled regex for glob conversion (used frequently)
 67	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 68)
 69
 70type GrepParams struct {
 71	Pattern     string `json:"pattern"`
 72	Path        string `json:"path"`
 73	Include     string `json:"include"`
 74	LiteralText bool   `json:"literal_text"`
 75}
 76
 77type grepMatch struct {
 78	path     string
 79	modTime  time.Time
 80	lineNum  int
 81	lineText string
 82}
 83
 84type GrepResponseMetadata struct {
 85	NumberOfMatches int  `json:"number_of_matches"`
 86	Truncated       bool `json:"truncated"`
 87}
 88
 89type grepTool struct {
 90	workingDir string
 91}
 92
 93const (
 94	GrepToolName    = "grep"
 95	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 96
 97WHEN TO USE THIS TOOL:
 98- Use when you need to find files containing specific text or patterns
 99- Great for searching code bases for function names, variable declarations, or error messages
100- Useful for finding all files that use a particular API or pattern
101
102HOW TO USE:
103- Provide a regex pattern to search for within file contents
104- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
105- Optionally specify a starting directory (defaults to current working directory)
106- Optionally provide an include pattern to filter which files to search
107- Results are sorted with most recently modified files first
108
109REGEX PATTERN SYNTAX (when literal_text=false):
110- Supports standard regular expression syntax
111- 'function' searches for the literal text "function"
112- 'log\..*Error' finds text starting with "log." and ending with "Error"
113- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
114
115COMMON INCLUDE PATTERN EXAMPLES:
116- '*.js' - Only search JavaScript files
117- '*.{ts,tsx}' - Only search TypeScript files
118- '*.go' - Only search Go files
119
120LIMITATIONS:
121- Results are limited to 100 files (newest first)
122- Performance depends on the number of files being searched
123- Very large binary files may be skipped
124- Hidden files (starting with '.') are skipped
125
126CROSS-PLATFORM NOTES:
127- Uses built-in Go implementation that is based on regexp
128- File paths are normalized automatically for cross-platform compatibility
129
130TIPS:
131- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
132- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
133- Always check if results are truncated and refine your search pattern if needed
134- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
135)
136
137func NewGrepTool(workingDir string) BaseTool {
138	return &grepTool{
139		workingDir: workingDir,
140	}
141}
142
143func (g *grepTool) Name() string {
144	return GrepToolName
145}
146
147func (g *grepTool) Info() ToolInfo {
148	return ToolInfo{
149		Name:        GrepToolName,
150		Description: grepDescription,
151		Parameters: map[string]any{
152			"pattern": map[string]any{
153				"type":        "string",
154				"description": "The regex pattern to search for in file contents",
155			},
156			"path": map[string]any{
157				"type":        "string",
158				"description": "The directory to search in. Defaults to the current working directory.",
159			},
160			"include": map[string]any{
161				"type":        "string",
162				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
163			},
164			"literal_text": map[string]any{
165				"type":        "boolean",
166				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
167			},
168		},
169		Required: []string{"pattern"},
170	}
171}
172
173// escapeRegexPattern escapes special regex characters so they're treated as literal characters
174func escapeRegexPattern(pattern string) string {
175	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
176	escaped := pattern
177
178	for _, char := range specialChars {
179		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
180	}
181
182	return escaped
183}
184
185func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
186	var params GrepParams
187	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
188		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
189	}
190
191	if params.Pattern == "" {
192		return NewTextErrorResponse("pattern is required"), nil
193	}
194
195	// If literal_text is true, escape the pattern
196	searchPattern := params.Pattern
197	if params.LiteralText {
198		searchPattern = escapeRegexPattern(params.Pattern)
199	}
200
201	searchPath := params.Path
202	if searchPath == "" {
203		searchPath = g.workingDir
204	}
205
206	matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
207	if err != nil {
208		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
209	}
210
211	var output strings.Builder
212	if len(matches) == 0 {
213		output.WriteString("No files found")
214	} else {
215		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
216
217		currentFile := ""
218		for _, match := range matches {
219			if currentFile != match.path {
220				if currentFile != "" {
221					output.WriteString("\n")
222				}
223				currentFile = match.path
224				fmt.Fprintf(&output, "%s:\n", match.path)
225			}
226			if match.lineNum > 0 {
227				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
228			} else {
229				fmt.Fprintf(&output, "  %s\n", match.path)
230			}
231		}
232
233		if truncated {
234			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
235		}
236	}
237
238	return WithResponseMetadata(
239		NewTextResponse(output.String()),
240		GrepResponseMetadata{
241			NumberOfMatches: len(matches),
242			Truncated:       truncated,
243		},
244	), nil
245}
246
247func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
248	matches, err := searchFilesWithRegex(pattern, rootPath, include)
249	if err != nil {
250		return nil, false, err
251	}
252
253	sort.Slice(matches, func(i, j int) bool {
254		return matches[i].modTime.After(matches[j].modTime)
255	})
256
257	truncated := len(matches) > limit
258	if truncated {
259		matches = matches[:limit]
260	}
261
262	return matches, truncated, nil
263}
264
265func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
266	matches := []grepMatch{}
267
268	// Use cached regex compilation
269	regex, err := searchRegexCache.get(pattern)
270	if err != nil {
271		return nil, fmt.Errorf("invalid regex pattern: %w", err)
272	}
273
274	var includePattern *regexp.Regexp
275	if include != "" {
276		regexPattern := globToRegex(include)
277		includePattern, err = globRegexCache.get(regexPattern)
278		if err != nil {
279			return nil, fmt.Errorf("invalid include pattern: %w", err)
280		}
281	}
282
283	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
284		if err != nil {
285			return nil // Skip errors
286		}
287
288		if info.IsDir() {
289			return nil // Skip directories
290		}
291
292		if fsext.SkipHidden(path) {
293			return nil
294		}
295
296		if includePattern != nil && !includePattern.MatchString(path) {
297			return nil
298		}
299
300		match, lineNum, lineText, err := fileContainsPattern(path, regex)
301		if err != nil {
302			return nil // Skip files we can't read
303		}
304
305		if match {
306			matches = append(matches, grepMatch{
307				path:     path,
308				modTime:  info.ModTime(),
309				lineNum:  lineNum,
310				lineText: lineText,
311			})
312
313			if len(matches) >= 200 {
314				return filepath.SkipAll
315			}
316		}
317
318		return nil
319	})
320	if err != nil {
321		return nil, err
322	}
323
324	return matches, nil
325}
326
327func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
328	// Quick binary file detection
329	if isBinaryFile(filePath) {
330		return false, 0, "", nil
331	}
332
333	file, err := os.Open(filePath)
334	if err != nil {
335		return false, 0, "", err
336	}
337	defer file.Close()
338
339	scanner := bufio.NewScanner(file)
340	lineNum := 0
341	for scanner.Scan() {
342		lineNum++
343		line := scanner.Text()
344		if pattern.MatchString(line) {
345			return true, lineNum, line, nil
346		}
347	}
348
349	return false, 0, "", scanner.Err()
350}
351
352var binaryExts = map[string]struct{}{
353	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
354	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
355	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
356	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
357	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
358	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
359}
360
361// isBinaryFile performs a quick check to determine if a file is binary
362func isBinaryFile(filePath string) bool {
363	// Check file extension first (fastest)
364	ext := strings.ToLower(filepath.Ext(filePath))
365	if _, isBinary := binaryExts[ext]; isBinary {
366		return true
367	}
368
369	// Quick content check for files without clear extensions
370	file, err := os.Open(filePath)
371	if err != nil {
372		return false // If we can't open it, let the caller handle the error
373	}
374	defer file.Close()
375
376	// Read first 512 bytes to check for null bytes
377	buffer := make([]byte, 512)
378	n, err := file.Read(buffer)
379	if err != nil && err != io.EOF {
380		return false
381	}
382
383	// Check for null bytes (common in binary files)
384	for i := range n {
385		if buffer[i] == 0 {
386			return true
387		}
388	}
389
390	return false
391}
392
393func globToRegex(glob string) string {
394	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
395	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
396	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
397
398	// Use pre-compiled regex instead of compiling each time
399	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
400		inner := match[1 : len(match)-1]
401		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
402	})
403
404	return regexPattern
405}