1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"os"
 10	"os/exec"
 11	"path/filepath"
 12	"regexp"
 13	"sort"
 14	"strconv"
 15	"strings"
 16	"sync"
 17	"time"
 18
 19	"github.com/charmbracelet/crush/internal/fsext"
 20)
 21
 22// regexCache provides thread-safe caching of compiled regex patterns
 23type regexCache struct {
 24	cache map[string]*regexp.Regexp
 25	mu    sync.RWMutex
 26}
 27
 28// newRegexCache creates a new regex cache
 29func newRegexCache() *regexCache {
 30	return ®exCache{
 31		cache: make(map[string]*regexp.Regexp),
 32	}
 33}
 34
 35// get retrieves a compiled regex from cache or compiles and caches it
 36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 37	// Try to get from cache first (read lock)
 38	rc.mu.RLock()
 39	if regex, exists := rc.cache[pattern]; exists {
 40		rc.mu.RUnlock()
 41		return regex, nil
 42	}
 43	rc.mu.RUnlock()
 44
 45	// Compile the regex (write lock)
 46	rc.mu.Lock()
 47	defer rc.mu.Unlock()
 48
 49	// Double-check in case another goroutine compiled it while we waited
 50	if regex, exists := rc.cache[pattern]; exists {
 51		return regex, nil
 52	}
 53
 54	// Compile and cache the regex
 55	regex, err := regexp.Compile(pattern)
 56	if err != nil {
 57		return nil, err
 58	}
 59
 60	rc.cache[pattern] = regex
 61	return regex, nil
 62}
 63
 64// Global regex cache instances
 65var (
 66	searchRegexCache = newRegexCache()
 67	globRegexCache   = newRegexCache()
 68	// Pre-compiled regex for glob conversion (used frequently)
 69	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 70)
 71
 72type GrepParams struct {
 73	Pattern     string `json:"pattern"`
 74	Path        string `json:"path"`
 75	Include     string `json:"include"`
 76	LiteralText bool   `json:"literal_text"`
 77}
 78
 79type grepMatch struct {
 80	path     string
 81	modTime  time.Time
 82	lineNum  int
 83	lineText string
 84}
 85
 86type GrepResponseMetadata struct {
 87	NumberOfMatches int  `json:"number_of_matches"`
 88	Truncated       bool `json:"truncated"`
 89}
 90
 91type grepTool struct {
 92	workingDir string
 93}
 94
 95const (
 96	GrepToolName    = "grep"
 97	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 98
 99WHEN TO USE THIS TOOL:
100- Use when you need to find files containing specific text or patterns
101- Great for searching code bases for function names, variable declarations, or error messages
102- Useful for finding all files that use a particular API or pattern
103
104HOW TO USE:
105- Provide a regex pattern to search for within file contents
106- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
107- Optionally specify a starting directory (defaults to current working directory)
108- Optionally provide an include pattern to filter which files to search
109- Results are sorted with most recently modified files first
110
111REGEX PATTERN SYNTAX (when literal_text=false):
112- Supports standard regular expression syntax
113- 'function' searches for the literal text "function"
114- 'log\..*Error' finds text starting with "log." and ending with "Error"
115- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
116
117COMMON INCLUDE PATTERN EXAMPLES:
118- '*.js' - Only search JavaScript files
119- '*.{ts,tsx}' - Only search TypeScript files
120- '*.go' - Only search Go files
121
122LIMITATIONS:
123- Results are limited to 100 files (newest first)
124- Performance depends on the number of files being searched
125- Very large binary files may be skipped
126- Hidden files (starting with '.') are skipped
127
128IGNORE FILE SUPPORT:
129- Respects .gitignore patterns to skip ignored files and directories
130- Respects .crushignore patterns for additional ignore rules
131- Both ignore files are automatically detected in the search root directory
132
133CROSS-PLATFORM NOTES:
134- Uses ripgrep (rg) command if available for better performance
135- Falls back to built-in Go implementation if ripgrep is not available
136- File paths are normalized automatically for cross-platform compatibility
137
138TIPS:
139- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
140- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
141- Always check if results are truncated and refine your search pattern if needed
142- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
143)
144
145func NewGrepTool(workingDir string) BaseTool {
146	return &grepTool{
147		workingDir: workingDir,
148	}
149}
150
151func (g *grepTool) Name() string {
152	return GrepToolName
153}
154
155func (g *grepTool) Info() ToolInfo {
156	return ToolInfo{
157		Name:        GrepToolName,
158		Description: grepDescription,
159		Parameters: map[string]any{
160			"pattern": map[string]any{
161				"type":        "string",
162				"description": "The regex pattern to search for in file contents",
163			},
164			"path": map[string]any{
165				"type":        "string",
166				"description": "The directory to search in. Defaults to the current working directory.",
167			},
168			"include": map[string]any{
169				"type":        "string",
170				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
171			},
172			"literal_text": map[string]any{
173				"type":        "boolean",
174				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
175			},
176		},
177		Required: []string{"pattern"},
178	}
179}
180
181// escapeRegexPattern escapes special regex characters so they're treated as literal characters
182func escapeRegexPattern(pattern string) string {
183	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
184	escaped := pattern
185
186	for _, char := range specialChars {
187		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
188	}
189
190	return escaped
191}
192
193func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
194	var params GrepParams
195	if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
196		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
197	}
198
199	if params.Pattern == "" {
200		return NewTextErrorResponse("pattern is required"), nil
201	}
202
203	// If literal_text is true, escape the pattern
204	searchPattern := params.Pattern
205	if params.LiteralText {
206		searchPattern = escapeRegexPattern(params.Pattern)
207	}
208
209	searchPath := params.Path
210	if searchPath == "" {
211		searchPath = g.workingDir
212	}
213
214	matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
215	if err != nil {
216		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
217	}
218
219	var output strings.Builder
220	if len(matches) == 0 {
221		output.WriteString("No files found")
222	} else {
223		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
224
225		currentFile := ""
226		for _, match := range matches {
227			if currentFile != match.path {
228				if currentFile != "" {
229					output.WriteString("\n")
230				}
231				currentFile = match.path
232				fmt.Fprintf(&output, "%s:\n", match.path)
233			}
234			if match.lineNum > 0 {
235				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
236			} else {
237				fmt.Fprintf(&output, "  %s\n", match.path)
238			}
239		}
240
241		if truncated {
242			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
243		}
244	}
245
246	return WithResponseMetadata(
247		NewTextResponse(output.String()),
248		GrepResponseMetadata{
249			NumberOfMatches: len(matches),
250			Truncated:       truncated,
251		},
252	), nil
253}
254
255func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
256	matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
257	if err != nil {
258		matches, err = searchFilesWithRegex(pattern, rootPath, include)
259		if err != nil {
260			return nil, false, err
261		}
262	}
263
264	sort.Slice(matches, func(i, j int) bool {
265		return matches[i].modTime.After(matches[j].modTime)
266	})
267
268	truncated := len(matches) > limit
269	if truncated {
270		matches = matches[:limit]
271	}
272
273	return matches, truncated, nil
274}
275
276func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
277	cmd := getRgSearchCmd(ctx, pattern, path, include)
278	if cmd == nil {
279		return nil, fmt.Errorf("ripgrep not found in $PATH")
280	}
281
282	// Only add ignore files if they exist
283	for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
284		ignorePath := filepath.Join(path, ignoreFile)
285		if _, err := os.Stat(ignorePath); err == nil {
286			cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
287		}
288	}
289
290	output, err := cmd.Output()
291	if err != nil {
292		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
293			return []grepMatch{}, nil
294		}
295		return nil, err
296	}
297
298	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
299	matches := make([]grepMatch, 0, len(lines))
300
301	for _, line := range lines {
302		if line == "" {
303			continue
304		}
305
306		// Parse ripgrep output format: file:line:content
307		parts := strings.SplitN(line, ":", 3)
308		if len(parts) < 3 {
309			continue
310		}
311
312		filePath := parts[0]
313		lineNum, err := strconv.Atoi(parts[1])
314		if err != nil {
315			continue
316		}
317		lineText := parts[2]
318
319		fileInfo, err := os.Stat(filePath)
320		if err != nil {
321			continue // Skip files we can't access
322		}
323
324		matches = append(matches, grepMatch{
325			path:     filePath,
326			modTime:  fileInfo.ModTime(),
327			lineNum:  lineNum,
328			lineText: lineText,
329		})
330	}
331
332	return matches, nil
333}
334
335func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
336	matches := []grepMatch{}
337
338	// Use cached regex compilation
339	regex, err := searchRegexCache.get(pattern)
340	if err != nil {
341		return nil, fmt.Errorf("invalid regex pattern: %w", err)
342	}
343
344	var includePattern *regexp.Regexp
345	if include != "" {
346		regexPattern := globToRegex(include)
347		includePattern, err = globRegexCache.get(regexPattern)
348		if err != nil {
349			return nil, fmt.Errorf("invalid include pattern: %w", err)
350		}
351	}
352
353	// Create walker with gitignore and crushignore support
354	walker := fsext.NewFastGlobWalker(rootPath)
355
356	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
357		if err != nil {
358			return nil // Skip errors
359		}
360
361		if info.IsDir() {
362			// Check if directory should be skipped
363			if walker.ShouldSkip(path) {
364				return filepath.SkipDir
365			}
366			return nil // Continue into directory
367		}
368
369		// Use walker's shouldSkip method for files
370		if walker.ShouldSkip(path) {
371			return nil
372		}
373
374		// Skip hidden files (starting with a dot) to match ripgrep's default behavior
375		base := filepath.Base(path)
376		if base != "." && strings.HasPrefix(base, ".") {
377			return nil
378		}
379
380		if includePattern != nil && !includePattern.MatchString(path) {
381			return nil
382		}
383
384		match, lineNum, lineText, err := fileContainsPattern(path, regex)
385		if err != nil {
386			return nil // Skip files we can't read
387		}
388
389		if match {
390			matches = append(matches, grepMatch{
391				path:     path,
392				modTime:  info.ModTime(),
393				lineNum:  lineNum,
394				lineText: lineText,
395			})
396
397			if len(matches) >= 200 {
398				return filepath.SkipAll
399			}
400		}
401
402		return nil
403	})
404	if err != nil {
405		return nil, err
406	}
407
408	return matches, nil
409}
410
411func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
412	// Quick binary file detection
413	if isBinaryFile(filePath) {
414		return false, 0, "", nil
415	}
416
417	file, err := os.Open(filePath)
418	if err != nil {
419		return false, 0, "", err
420	}
421	defer file.Close()
422
423	scanner := bufio.NewScanner(file)
424	lineNum := 0
425	for scanner.Scan() {
426		lineNum++
427		line := scanner.Text()
428		if pattern.MatchString(line) {
429			return true, lineNum, line, nil
430		}
431	}
432
433	return false, 0, "", scanner.Err()
434}
435
436var binaryExts = map[string]struct{}{
437	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
438	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
439	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
440	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
441	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
442	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
443}
444
445// isBinaryFile performs a quick check to determine if a file is binary
446func isBinaryFile(filePath string) bool {
447	// Check file extension first (fastest)
448	ext := strings.ToLower(filepath.Ext(filePath))
449	if _, isBinary := binaryExts[ext]; isBinary {
450		return true
451	}
452
453	// Quick content check for files without clear extensions
454	file, err := os.Open(filePath)
455	if err != nil {
456		return false // If we can't open it, let the caller handle the error
457	}
458	defer file.Close()
459
460	// Read first 512 bytes to check for null bytes
461	buffer := make([]byte, 512)
462	n, err := file.Read(buffer)
463	if err != nil && err != io.EOF {
464		return false
465	}
466
467	// Check for null bytes (common in binary files)
468	for i := range n {
469		if buffer[i] == 0 {
470			return true
471		}
472	}
473
474	return false
475}
476
477func globToRegex(glob string) string {
478	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
479	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
480	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
481
482	// Use pre-compiled regex instead of compiling each time
483	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
484		inner := match[1 : len(match)-1]
485		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
486	})
487
488	return regexPattern
489}