grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"os"
 10	"os/exec"
 11	"path/filepath"
 12	"regexp"
 13	"sort"
 14	"strconv"
 15	"strings"
 16	"sync"
 17	"time"
 18
 19	"github.com/charmbracelet/crush/internal/config"
 20	"github.com/charmbracelet/crush/internal/fsext"
 21)
 22
 23// regexCache provides thread-safe caching of compiled regex patterns
 24type regexCache struct {
 25	cache map[string]*regexp.Regexp
 26	mu    sync.RWMutex
 27}
 28
 29// newRegexCache creates a new regex cache
 30func newRegexCache() *regexCache {
 31	return &regexCache{
 32		cache: make(map[string]*regexp.Regexp),
 33	}
 34}
 35
 36// get retrieves a compiled regex from cache or compiles and caches it
 37func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 38	// Try to get from cache first (read lock)
 39	rc.mu.RLock()
 40	if regex, exists := rc.cache[pattern]; exists {
 41		rc.mu.RUnlock()
 42		return regex, nil
 43	}
 44	rc.mu.RUnlock()
 45
 46	// Compile the regex (write lock)
 47	rc.mu.Lock()
 48	defer rc.mu.Unlock()
 49
 50	// Double-check in case another goroutine compiled it while we waited
 51	if regex, exists := rc.cache[pattern]; exists {
 52		return regex, nil
 53	}
 54
 55	// Compile and cache the regex
 56	regex, err := regexp.Compile(pattern)
 57	if err != nil {
 58		return nil, err
 59	}
 60
 61	rc.cache[pattern] = regex
 62	return regex, nil
 63}
 64
 65// Global regex cache instances
 66var (
 67	searchRegexCache = newRegexCache()
 68	globRegexCache   = newRegexCache()
 69	// Pre-compiled regex for glob conversion (used frequently)
 70	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 71)
 72
 73type GrepParams struct {
 74	Pattern     string `json:"pattern"`
 75	Path        string `json:"path"`
 76	Include     string `json:"include"`
 77	LiteralText bool   `json:"literal_text"`
 78}
 79
 80type grepMatch struct {
 81	path     string
 82	modTime  time.Time
 83	lineNum  int
 84	lineText string
 85}
 86
 87type GrepResponseMetadata struct {
 88	NumberOfMatches int  `json:"number_of_matches"`
 89	Truncated       bool `json:"truncated"`
 90}
 91
 92type grepTool struct{}
 93
 94const (
 95	GrepToolName    = "grep"
 96	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 97
 98WHEN TO USE THIS TOOL:
 99- Use when you need to find files containing specific text or patterns
100- Great for searching code bases for function names, variable declarations, or error messages
101- Useful for finding all files that use a particular API or pattern
102
103HOW TO USE:
104- Provide a regex pattern to search for within file contents
105- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
106- Optionally specify a starting directory (defaults to current working directory)
107- Optionally provide an include pattern to filter which files to search
108- Results are sorted with most recently modified files first
109
110REGEX PATTERN SYNTAX (when literal_text=false):
111- Supports standard regular expression syntax
112- 'function' searches for the literal text "function"
113- 'log\..*Error' finds text starting with "log." and ending with "Error"
114- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
115
116COMMON INCLUDE PATTERN EXAMPLES:
117- '*.js' - Only search JavaScript files
118- '*.{ts,tsx}' - Only search TypeScript files
119- '*.go' - Only search Go files
120
121LIMITATIONS:
122- Results are limited to 100 files (newest first)
123- Performance depends on the number of files being searched
124- Very large binary files may be skipped
125- Hidden files (starting with '.') are skipped
126
127CROSS-PLATFORM NOTES:
128- Uses ripgrep (rg) command if available for better performance
129- Falls back to built-in Go implementation if ripgrep is not available
130- File paths are normalized automatically for cross-platform compatibility
131
132TIPS:
133- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
134- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
135- Always check if results are truncated and refine your search pattern if needed
136- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
137)
138
139func NewGrepTool() BaseTool {
140	return &grepTool{}
141}
142
143func (g *grepTool) Name() string {
144	return GrepToolName
145}
146
147func (g *grepTool) Info() ToolInfo {
148	return ToolInfo{
149		Name:        GrepToolName,
150		Description: grepDescription,
151		Parameters: map[string]any{
152			"pattern": map[string]any{
153				"type":        "string",
154				"description": "The regex pattern to search for in file contents",
155			},
156			"path": map[string]any{
157				"type":        "string",
158				"description": "The directory to search in. Defaults to the current working directory.",
159			},
160			"include": map[string]any{
161				"type":        "string",
162				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
163			},
164			"literal_text": map[string]any{
165				"type":        "boolean",
166				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
167			},
168		},
169		Required: []string{"pattern"},
170	}
171}
172
173// escapeRegexPattern escapes special regex characters so they're treated as literal characters
174func escapeRegexPattern(pattern string) string {
175	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
176	escaped := pattern
177
178	for _, char := range specialChars {
179		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
180	}
181
182	return escaped
183}
184
185func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
186	var params GrepParams
187	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
188		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
189	}
190
191	if params.Pattern == "" {
192		return NewTextErrorResponse("pattern is required"), nil
193	}
194
195	// If literal_text is true, escape the pattern
196	searchPattern := params.Pattern
197	if params.LiteralText {
198		searchPattern = escapeRegexPattern(params.Pattern)
199	}
200
201	searchPath := params.Path
202	if searchPath == "" {
203		searchPath = config.Get().WorkingDir()
204	}
205
206	matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
207	if err != nil {
208		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
209	}
210
211	var output strings.Builder
212	if len(matches) == 0 {
213		output.WriteString("No files found")
214	} else {
215		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
216
217		currentFile := ""
218		for _, match := range matches {
219			if currentFile != match.path {
220				if currentFile != "" {
221					output.WriteString("\n")
222				}
223				currentFile = match.path
224				fmt.Fprintf(&output, "%s:\n", match.path)
225			}
226			if match.lineNum > 0 {
227				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
228			} else {
229				fmt.Fprintf(&output, "  %s\n", match.path)
230			}
231		}
232
233		if truncated {
234			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
235		}
236	}
237
238	return WithResponseMetadata(
239		NewTextResponse(output.String()),
240		GrepResponseMetadata{
241			NumberOfMatches: len(matches),
242			Truncated:       truncated,
243		},
244	), nil
245}
246
247func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
248	matches, err := searchWithRipgrep(pattern, rootPath, include)
249	if err != nil {
250		matches, err = searchFilesWithRegex(pattern, rootPath, include)
251		if err != nil {
252			return nil, false, err
253		}
254	}
255
256	sort.Slice(matches, func(i, j int) bool {
257		return matches[i].modTime.After(matches[j].modTime)
258	})
259
260	truncated := len(matches) > limit
261	if truncated {
262		matches = matches[:limit]
263	}
264
265	return matches, truncated, nil
266}
267
268func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) {
269	cmd := fsext.GetRgSearchCmd(pattern, path, include)
270	if cmd == nil {
271		return nil, fmt.Errorf("ripgrep not found in $PATH")
272	}
273
274	output, err := cmd.Output()
275	if err != nil {
276		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
277			return []grepMatch{}, nil
278		}
279		return nil, err
280	}
281
282	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
283	matches := make([]grepMatch, 0, len(lines))
284
285	for _, line := range lines {
286		if line == "" {
287			continue
288		}
289
290		// Parse ripgrep output format: file:line:content
291		parts := strings.SplitN(line, ":", 3)
292		if len(parts) < 3 {
293			continue
294		}
295
296		filePath := parts[0]
297		lineNum, err := strconv.Atoi(parts[1])
298		if err != nil {
299			continue
300		}
301		lineText := parts[2]
302
303		fileInfo, err := os.Stat(filePath)
304		if err != nil {
305			continue // Skip files we can't access
306		}
307
308		matches = append(matches, grepMatch{
309			path:     filePath,
310			modTime:  fileInfo.ModTime(),
311			lineNum:  lineNum,
312			lineText: lineText,
313		})
314	}
315
316	return matches, nil
317}
318
319func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
320	matches := []grepMatch{}
321
322	// Use cached regex compilation
323	regex, err := searchRegexCache.get(pattern)
324	if err != nil {
325		return nil, fmt.Errorf("invalid regex pattern: %w", err)
326	}
327
328	var includePattern *regexp.Regexp
329	if include != "" {
330		regexPattern := globToRegex(include)
331		includePattern, err = globRegexCache.get(regexPattern)
332		if err != nil {
333			return nil, fmt.Errorf("invalid include pattern: %w", err)
334		}
335	}
336
337	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
338		if err != nil {
339			return nil // Skip errors
340		}
341
342		if info.IsDir() {
343			return nil // Skip directories
344		}
345
346		if fsext.SkipHidden(path) {
347			return nil
348		}
349
350		if includePattern != nil && !includePattern.MatchString(path) {
351			return nil
352		}
353
354		match, lineNum, lineText, err := fileContainsPattern(path, regex)
355		if err != nil {
356			return nil // Skip files we can't read
357		}
358
359		if match {
360			matches = append(matches, grepMatch{
361				path:     path,
362				modTime:  info.ModTime(),
363				lineNum:  lineNum,
364				lineText: lineText,
365			})
366
367			if len(matches) >= 200 {
368				return filepath.SkipAll
369			}
370		}
371
372		return nil
373	})
374	if err != nil {
375		return nil, err
376	}
377
378	return matches, nil
379}
380
381func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
382	// Quick binary file detection
383	if isBinaryFile(filePath) {
384		return false, 0, "", nil
385	}
386
387	file, err := os.Open(filePath)
388	if err != nil {
389		return false, 0, "", err
390	}
391	defer file.Close()
392
393	scanner := bufio.NewScanner(file)
394	lineNum := 0
395	for scanner.Scan() {
396		lineNum++
397		line := scanner.Text()
398		if pattern.MatchString(line) {
399			return true, lineNum, line, nil
400		}
401	}
402
403	return false, 0, "", scanner.Err()
404}
405
406var binaryExts = map[string]struct{}{
407	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
408	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
409	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
410	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
411	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
412	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
413}
414
415// isBinaryFile performs a quick check to determine if a file is binary
416func isBinaryFile(filePath string) bool {
417	// Check file extension first (fastest)
418	ext := strings.ToLower(filepath.Ext(filePath))
419	if _, isBinary := binaryExts[ext]; isBinary {
420		return true
421	}
422
423	// Quick content check for files without clear extensions
424	file, err := os.Open(filePath)
425	if err != nil {
426		return false // If we can't open it, let the caller handle the error
427	}
428	defer file.Close()
429
430	// Read first 512 bytes to check for null bytes
431	buffer := make([]byte, 512)
432	n, err := file.Read(buffer)
433	if err != nil && err != io.EOF {
434		return false
435	}
436
437	// Check for null bytes (common in binary files)
438	for i := range n {
439		if buffer[i] == 0 {
440			return true
441		}
442	}
443
444	return false
445}
446
447func globToRegex(glob string) string {
448	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
449	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
450	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
451
452	// Use pre-compiled regex instead of compiling each time
453	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
454		inner := match[1 : len(match)-1]
455		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
456	})
457
458	return regexPattern
459}