1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"os"
 10	"os/exec"
 11	"path/filepath"
 12	"regexp"
 13	"sort"
 14	"strconv"
 15	"strings"
 16	"sync"
 17	"time"
 18
 19	"github.com/charmbracelet/crush/internal/fsext"
 20)
 21
 22// regexCache provides thread-safe caching of compiled regex patterns
 23type regexCache struct {
 24	cache map[string]*regexp.Regexp
 25	mu    sync.RWMutex
 26}
 27
 28// newRegexCache creates a new regex cache
 29func newRegexCache() *regexCache {
 30	return ®exCache{
 31		cache: make(map[string]*regexp.Regexp),
 32	}
 33}
 34
 35// get retrieves a compiled regex from cache or compiles and caches it
 36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 37	// Try to get from cache first (read lock)
 38	rc.mu.RLock()
 39	if regex, exists := rc.cache[pattern]; exists {
 40		rc.mu.RUnlock()
 41		return regex, nil
 42	}
 43	rc.mu.RUnlock()
 44
 45	// Compile the regex (write lock)
 46	rc.mu.Lock()
 47	defer rc.mu.Unlock()
 48
 49	// Double-check in case another goroutine compiled it while we waited
 50	if regex, exists := rc.cache[pattern]; exists {
 51		return regex, nil
 52	}
 53
 54	// Compile and cache the regex
 55	regex, err := regexp.Compile(pattern)
 56	if err != nil {
 57		return nil, err
 58	}
 59
 60	rc.cache[pattern] = regex
 61	return regex, nil
 62}
 63
 64// Global regex cache instances
 65var (
 66	searchRegexCache = newRegexCache()
 67	globRegexCache   = newRegexCache()
 68	// Pre-compiled regex for glob conversion (used frequently)
 69	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 70)
 71
 72type GrepParams struct {
 73	Pattern     string `json:"pattern"`
 74	Path        string `json:"path"`
 75	Include     string `json:"include"`
 76	LiteralText bool   `json:"literal_text"`
 77}
 78
 79type grepMatch struct {
 80	path     string
 81	modTime  time.Time
 82	lineNum  int
 83	lineText string
 84}
 85
 86type GrepResponseMetadata struct {
 87	NumberOfMatches int  `json:"number_of_matches"`
 88	Truncated       bool `json:"truncated"`
 89}
 90
 91type grepTool struct {
 92	workingDir string
 93}
 94
 95const (
 96	GrepToolName    = "grep"
 97	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 98
 99WHEN TO USE THIS TOOL:
100- Use when you need to find files containing specific text or patterns
101- Great for searching code bases for function names, variable declarations, or error messages
102- Useful for finding all files that use a particular API or pattern
103
104HOW TO USE:
105- Provide a regex pattern to search for within file contents
106- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
107- Optionally specify a starting directory (defaults to current working directory)
108- Optionally provide an include pattern to filter which files to search
109- Results are sorted with most recently modified files first
110
111REGEX PATTERN SYNTAX (when literal_text=false):
112- Supports standard regular expression syntax
113- 'function' searches for the literal text "function"
114- 'log\..*Error' finds text starting with "log." and ending with "Error"
115- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
116
117COMMON INCLUDE PATTERN EXAMPLES:
118- '*.js' - Only search JavaScript files
119- '*.{ts,tsx}' - Only search TypeScript files
120- '*.go' - Only search Go files
121
122LIMITATIONS:
123- Results are limited to 100 files (newest first)
124- Performance depends on the number of files being searched
125- Very large binary files may be skipped
126- Hidden files (starting with '.') are skipped
127
128CROSS-PLATFORM NOTES:
129- Uses ripgrep (rg) command if available for better performance
130- Falls back to built-in Go implementation if ripgrep is not available
131- File paths are normalized automatically for cross-platform compatibility
132
133TIPS:
134- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
135- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
136- Always check if results are truncated and refine your search pattern if needed
137- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
138)
139
140func NewGrepTool(workingDir string) BaseTool {
141	return &grepTool{
142		workingDir: workingDir,
143	}
144}
145
146func (g *grepTool) Name() string {
147	return GrepToolName
148}
149
150func (g *grepTool) Info() ToolInfo {
151	return ToolInfo{
152		Name:        GrepToolName,
153		Description: grepDescription,
154		Parameters: map[string]any{
155			"pattern": map[string]any{
156				"type":        "string",
157				"description": "The regex pattern to search for in file contents",
158			},
159			"path": map[string]any{
160				"type":        "string",
161				"description": "The directory to search in. Defaults to the current working directory.",
162			},
163			"include": map[string]any{
164				"type":        "string",
165				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
166			},
167			"literal_text": map[string]any{
168				"type":        "boolean",
169				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
170			},
171		},
172		Required: []string{"pattern"},
173	}
174}
175
176// escapeRegexPattern escapes special regex characters so they're treated as literal characters
177func escapeRegexPattern(pattern string) string {
178	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
179	escaped := pattern
180
181	for _, char := range specialChars {
182		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
183	}
184
185	return escaped
186}
187
188func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
189	var params GrepParams
190	if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
191		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
192	}
193
194	if params.Pattern == "" {
195		return NewTextErrorResponse("pattern is required"), nil
196	}
197
198	// If literal_text is true, escape the pattern
199	searchPattern := params.Pattern
200	if params.LiteralText {
201		searchPattern = escapeRegexPattern(params.Pattern)
202	}
203
204	searchPath := params.Path
205	if searchPath == "" {
206		searchPath = g.workingDir
207	}
208
209	matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
210	if err != nil {
211		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
212	}
213
214	var output strings.Builder
215	if len(matches) == 0 {
216		output.WriteString("No files found")
217	} else {
218		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
219
220		currentFile := ""
221		for _, match := range matches {
222			if currentFile != match.path {
223				if currentFile != "" {
224					output.WriteString("\n")
225				}
226				currentFile = match.path
227				fmt.Fprintf(&output, "%s:\n", match.path)
228			}
229			if match.lineNum > 0 {
230				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
231			} else {
232				fmt.Fprintf(&output, "  %s\n", match.path)
233			}
234		}
235
236		if truncated {
237			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
238		}
239	}
240
241	return WithResponseMetadata(
242		NewTextResponse(output.String()),
243		GrepResponseMetadata{
244			NumberOfMatches: len(matches),
245			Truncated:       truncated,
246		},
247	), nil
248}
249
250func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
251	matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
252	if err != nil {
253		matches, err = searchFilesWithRegex(pattern, rootPath, include)
254		if err != nil {
255			return nil, false, err
256		}
257	}
258
259	sort.Slice(matches, func(i, j int) bool {
260		return matches[i].modTime.After(matches[j].modTime)
261	})
262
263	truncated := len(matches) > limit
264	if truncated {
265		matches = matches[:limit]
266	}
267
268	return matches, truncated, nil
269}
270
271func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
272	cmd := fsext.GetRgSearchCmd(ctx, pattern, path, include)
273	if cmd == nil {
274		return nil, fmt.Errorf("ripgrep not found in $PATH")
275	}
276
277	output, err := cmd.Output()
278	if err != nil {
279		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
280			return []grepMatch{}, nil
281		}
282		return nil, err
283	}
284
285	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
286	matches := make([]grepMatch, 0, len(lines))
287
288	for _, line := range lines {
289		if line == "" {
290			continue
291		}
292
293		// Parse ripgrep output format: file:line:content
294		parts := strings.SplitN(line, ":", 3)
295		if len(parts) < 3 {
296			continue
297		}
298
299		filePath := parts[0]
300		lineNum, err := strconv.Atoi(parts[1])
301		if err != nil {
302			continue
303		}
304		lineText := parts[2]
305
306		fileInfo, err := os.Stat(filePath)
307		if err != nil {
308			continue // Skip files we can't access
309		}
310
311		matches = append(matches, grepMatch{
312			path:     filePath,
313			modTime:  fileInfo.ModTime(),
314			lineNum:  lineNum,
315			lineText: lineText,
316		})
317	}
318
319	return matches, nil
320}
321
322func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
323	matches := []grepMatch{}
324
325	// Use cached regex compilation
326	regex, err := searchRegexCache.get(pattern)
327	if err != nil {
328		return nil, fmt.Errorf("invalid regex pattern: %w", err)
329	}
330
331	var includePattern *regexp.Regexp
332	if include != "" {
333		regexPattern := globToRegex(include)
334		includePattern, err = globRegexCache.get(regexPattern)
335		if err != nil {
336			return nil, fmt.Errorf("invalid include pattern: %w", err)
337		}
338	}
339
340	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
341		if err != nil {
342			return nil // Skip errors
343		}
344
345		if info.IsDir() {
346			return nil // Skip directories
347		}
348
349		if fsext.SkipHidden(path) {
350			return nil
351		}
352
353		if includePattern != nil && !includePattern.MatchString(path) {
354			return nil
355		}
356
357		match, lineNum, lineText, err := fileContainsPattern(path, regex)
358		if err != nil {
359			return nil // Skip files we can't read
360		}
361
362		if match {
363			matches = append(matches, grepMatch{
364				path:     path,
365				modTime:  info.ModTime(),
366				lineNum:  lineNum,
367				lineText: lineText,
368			})
369
370			if len(matches) >= 200 {
371				return filepath.SkipAll
372			}
373		}
374
375		return nil
376	})
377	if err != nil {
378		return nil, err
379	}
380
381	return matches, nil
382}
383
384func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
385	// Quick binary file detection
386	if isBinaryFile(filePath) {
387		return false, 0, "", nil
388	}
389
390	file, err := os.Open(filePath)
391	if err != nil {
392		return false, 0, "", err
393	}
394	defer file.Close()
395
396	scanner := bufio.NewScanner(file)
397	lineNum := 0
398	for scanner.Scan() {
399		lineNum++
400		line := scanner.Text()
401		if pattern.MatchString(line) {
402			return true, lineNum, line, nil
403		}
404	}
405
406	return false, 0, "", scanner.Err()
407}
408
409var binaryExts = map[string]struct{}{
410	".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
411	".bin": {}, ".obj": {}, ".o": {}, ".a": {},
412	".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
413	".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
414	".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
415	".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
416}
417
418// isBinaryFile performs a quick check to determine if a file is binary
419func isBinaryFile(filePath string) bool {
420	// Check file extension first (fastest)
421	ext := strings.ToLower(filepath.Ext(filePath))
422	if _, isBinary := binaryExts[ext]; isBinary {
423		return true
424	}
425
426	// Quick content check for files without clear extensions
427	file, err := os.Open(filePath)
428	if err != nil {
429		return false // If we can't open it, let the caller handle the error
430	}
431	defer file.Close()
432
433	// Read first 512 bytes to check for null bytes
434	buffer := make([]byte, 512)
435	n, err := file.Read(buffer)
436	if err != nil && err != io.EOF {
437		return false
438	}
439
440	// Check for null bytes (common in binary files)
441	for i := range n {
442		if buffer[i] == 0 {
443			return true
444		}
445	}
446
447	return false
448}
449
450func globToRegex(glob string) string {
451	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
452	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
453	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
454
455	// Use pre-compiled regex instead of compiling each time
456	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
457		inner := match[1 : len(match)-1]
458		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
459	})
460
461	return regexPattern
462}