grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"os"
 10	"path/filepath"
 11	"regexp"
 12	"sort"
 13	"strings"
 14	"sync"
 15	"time"
 16
 17	"github.com/charmbracelet/crush/internal/fsext"
 18)
 19
 20// regexCache provides thread-safe caching of compiled regex patterns
 21type regexCache struct {
 22	cache map[string]*regexp.Regexp
 23	mu    sync.RWMutex
 24}
 25
 26// newRegexCache creates a new regex cache
 27func newRegexCache() *regexCache {
 28	return &regexCache{
 29		cache: make(map[string]*regexp.Regexp),
 30	}
 31}
 32
 33// get retrieves a compiled regex from cache or compiles and caches it
 34func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 35	// Try to get from cache first (read lock)
 36	rc.mu.RLock()
 37	if regex, exists := rc.cache[pattern]; exists {
 38		rc.mu.RUnlock()
 39		return regex, nil
 40	}
 41	rc.mu.RUnlock()
 42
 43	// Compile the regex (write lock)
 44	rc.mu.Lock()
 45	defer rc.mu.Unlock()
 46
 47	// Double-check in case another goroutine compiled it while we waited
 48	if regex, exists := rc.cache[pattern]; exists {
 49		return regex, nil
 50	}
 51
 52	// Compile and cache the regex
 53	regex, err := regexp.Compile(pattern)
 54	if err != nil {
 55		return nil, err
 56	}
 57
 58	rc.cache[pattern] = regex
 59	return regex, nil
 60}
 61
 62// Global regex cache instances
 63var (
 64	searchRegexCache = newRegexCache()
 65	globRegexCache   = newRegexCache()
 66	// Pre-compiled regex for glob conversion (used frequently)
 67	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 68)
 69
 70type GrepParams struct {
 71	Pattern     string `json:"pattern"`
 72	Path        string `json:"path"`
 73	Include     string `json:"include"`
 74	LiteralText bool   `json:"literal_text"`
 75}
 76
 77type grepMatch struct {
 78	path     string
 79	modTime  time.Time
 80	lineNum  int
 81	lineText string
 82}
 83
 84type GrepResponseMetadata struct {
 85	NumberOfMatches int  `json:"number_of_matches"`
 86	Truncated       bool `json:"truncated"`
 87}
 88
 89type grepTool struct {
 90	workingDir string
 91}
 92
 93const (
 94	GrepToolName    = "grep"
 95	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 96
 97WHEN TO USE THIS TOOL:
 98- Use when you need to find files containing specific text or patterns
 99- Great for searching code bases for function names, variable declarations, or error messages
100- Useful for finding all files that use a particular API or pattern
101
102HOW TO USE:
103- Provide a regex pattern to search for within file contents
104- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
105- Optionally specify a starting directory (defaults to current working directory)
106- Optionally provide an include pattern to filter which files to search
107- Results are sorted with most recently modified files first
108
109REGEX PATTERN SYNTAX (when literal_text=false):
110- Supports standard regular expression syntax
111- 'function' searches for the literal text "function"
112- 'log\..*Error' finds text starting with "log." and ending with "Error"
113- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
114
115COMMON INCLUDE PATTERN EXAMPLES:
116- '*.js' - Only search JavaScript files
117- '*.{ts,tsx}' - Only search TypeScript files
118- '*.go' - Only search Go files
119
120LIMITATIONS:
121- Results are limited to 100 files (newest first)
122- Performance depends on the number of files being searched
123- Very large binary files may be skipped
124- Hidden files (starting with '.') are skipped
125
126CROSS-PLATFORM NOTES:
127- Uses ripgrep (rg) command if available for better performance
128- Falls back to built-in Go implementation if ripgrep is not available
129- File paths are normalized automatically for cross-platform compatibility
130
131TIPS:
132- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
133- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
134- Always check if results are truncated and refine your search pattern if needed
135- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
136)
137
138func NewGrepTool(workingDir string) BaseTool {
139	return &grepTool{
140		workingDir: workingDir,
141	}
142}
143
144func (g *grepTool) Name() string {
145	return GrepToolName
146}
147
148func (g *grepTool) Info() ToolInfo {
149	return ToolInfo{
150		Name:        GrepToolName,
151		Description: grepDescription,
152		Parameters: map[string]any{
153			"pattern": map[string]any{
154				"type":        "string",
155				"description": "The regex pattern to search for in file contents",
156			},
157			"path": map[string]any{
158				"type":        "string",
159				"description": "The directory to search in. Defaults to the current working directory.",
160			},
161			"include": map[string]any{
162				"type":        "string",
163				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
164			},
165			"literal_text": map[string]any{
166				"type":        "boolean",
167				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
168			},
169		},
170		Required: []string{"pattern"},
171	}
172}
173
174// escapeRegexPattern escapes special regex characters so they're treated as literal characters
175func escapeRegexPattern(pattern string) string {
176	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
177	escaped := pattern
178
179	for _, char := range specialChars {
180		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
181	}
182
183	return escaped
184}
185
186func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
187	var params GrepParams
188	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
189		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
190	}
191
192	if params.Pattern == "" {
193		return NewTextErrorResponse("pattern is required"), nil
194	}
195
196	// If literal_text is true, escape the pattern
197	searchPattern := params.Pattern
198	if params.LiteralText {
199		searchPattern = escapeRegexPattern(params.Pattern)
200	}
201
202	searchPath := params.Path
203	if searchPath == "" {
204		searchPath = g.workingDir
205	}
206
207	matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
208	if err != nil {
209		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
210	}
211
212	var output strings.Builder
213	if len(matches) == 0 {
214		output.WriteString("No files found")
215	} else {
216		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
217
218		currentFile := ""
219		for _, match := range matches {
220			if currentFile != match.path {
221				if currentFile != "" {
222					output.WriteString("\n")
223				}
224				currentFile = match.path
225				fmt.Fprintf(&output, "%s:\n", match.path)
226			}
227			if match.lineNum > 0 {
228				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
229			} else {
230				fmt.Fprintf(&output, "  %s\n", match.path)
231			}
232		}
233
234		if truncated {
235			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
236		}
237	}
238
239	return WithResponseMetadata(
240		NewTextResponse(output.String()),
241		GrepResponseMetadata{
242			NumberOfMatches: len(matches),
243			Truncated:       truncated,
244		},
245	), nil
246}
247
248func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
249	matches, err := searchFilesWithRegex(pattern, rootPath, include)
250	if err != nil {
251		return nil, false, err
252	}
253
254	sort.Slice(matches, func(i, j int) bool {
255		return matches[i].modTime.After(matches[j].modTime)
256	})
257
258	truncated := len(matches) > limit
259	if truncated {
260		matches = matches[:limit]
261	}
262
263	return matches, truncated, nil
264}
265
266func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
267	matches := []grepMatch{}
268
269	// Use cached regex compilation
270	regex, err := searchRegexCache.get(pattern)
271	if err != nil {
272		return nil, fmt.Errorf("invalid regex pattern: %w", err)
273	}
274
275	var includePattern *regexp.Regexp
276	if include != "" {
277		regexPattern := globToRegex(include)
278		includePattern, err = globRegexCache.get(regexPattern)
279		if err != nil {
280			return nil, fmt.Errorf("invalid include pattern: %w", err)
281		}
282	}
283
284	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
285		if err != nil {
286			return nil // Skip errors
287		}
288
289		if info.IsDir() {
290			return nil // Skip directories
291		}
292
293		if fsext.SkipHidden(path) {
294			return nil
295		}
296
297		if includePattern != nil && !includePattern.MatchString(path) {
298			return nil
299		}
300
301		match, lineNum, lineText, err := fileContainsPattern(path, regex)
302		if err != nil {
303			return nil // Skip files we can't read
304		}
305
306		if match {
307			matches = append(matches, grepMatch{
308				path:     path,
309				modTime:  info.ModTime(),
310				lineNum:  lineNum,
311				lineText: lineText,
312			})
313
314			if len(matches) >= 200 {
315				return filepath.SkipAll
316			}
317		}
318
319		return nil
320	})
321	if err != nil {
322		return nil, err
323	}
324
325	return matches, nil
326}
327
328func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
329	// Quick binary file detection
330	if isBinaryFile(filePath) {
331		return false, 0, "", nil
332	}
333
334	file, err := os.Open(filePath)
335	if err != nil {
336		return false, 0, "", err
337	}
338	defer file.Close()
339
340	scanner := bufio.NewScanner(file)
341	lineNum := 0
342	for scanner.Scan() {
343		lineNum++
344		line := scanner.Text()
345		if pattern.MatchString(line) {
346			return true, lineNum, line, nil
347		}
348	}
349
350	return false, 0, "", scanner.Err()
351}
352
353var binaryExts = map[string]struct{}{
354	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
355	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
356	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
357	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
358	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
359	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
360}
361
362// isBinaryFile performs a quick check to determine if a file is binary
363func isBinaryFile(filePath string) bool {
364	// Check file extension first (fastest)
365	ext := strings.ToLower(filepath.Ext(filePath))
366	if _, isBinary := binaryExts[ext]; isBinary {
367		return true
368	}
369
370	// Quick content check for files without clear extensions
371	file, err := os.Open(filePath)
372	if err != nil {
373		return false // If we can't open it, let the caller handle the error
374	}
375	defer file.Close()
376
377	// Read first 512 bytes to check for null bytes
378	buffer := make([]byte, 512)
379	n, err := file.Read(buffer)
380	if err != nil && err != io.EOF {
381		return false
382	}
383
384	// Check for null bytes (common in binary files)
385	for i := range n {
386		if buffer[i] == 0 {
387			return true
388		}
389	}
390
391	return false
392}
393
394func globToRegex(glob string) string {
395	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
396	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
397	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
398
399	// Use pre-compiled regex instead of compiling each time
400	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
401		inner := match[1 : len(match)-1]
402		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
403	})
404
405	return regexPattern
406}