1package tools
2
3import (
4 "bufio"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "os"
10 "os/exec"
11 "path/filepath"
12 "regexp"
13 "sort"
14 "strconv"
15 "strings"
16 "sync"
17 "time"
18
19 "github.com/charmbracelet/crush/internal/config"
20 "github.com/charmbracelet/crush/internal/fsext"
21)
22
23// regexCache provides thread-safe caching of compiled regex patterns
24type regexCache struct {
25 cache map[string]*regexp.Regexp
26 mu sync.RWMutex
27}
28
29// newRegexCache creates a new regex cache
30func newRegexCache() *regexCache {
31 return ®exCache{
32 cache: make(map[string]*regexp.Regexp),
33 }
34}
35
36// get retrieves a compiled regex from cache or compiles and caches it
37func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
38 // Try to get from cache first (read lock)
39 rc.mu.RLock()
40 if regex, exists := rc.cache[pattern]; exists {
41 rc.mu.RUnlock()
42 return regex, nil
43 }
44 rc.mu.RUnlock()
45
46 // Compile the regex (write lock)
47 rc.mu.Lock()
48 defer rc.mu.Unlock()
49
50 // Double-check in case another goroutine compiled it while we waited
51 if regex, exists := rc.cache[pattern]; exists {
52 return regex, nil
53 }
54
55 // Compile and cache the regex
56 regex, err := regexp.Compile(pattern)
57 if err != nil {
58 return nil, err
59 }
60
61 rc.cache[pattern] = regex
62 return regex, nil
63}
64
65// Global regex cache instances
66var (
67 searchRegexCache = newRegexCache()
68 globRegexCache = newRegexCache()
69 // Pre-compiled regex for glob conversion (used frequently)
70 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
71)
72
73type GrepParams struct {
74 Pattern string `json:"pattern"`
75 Path string `json:"path"`
76 Include string `json:"include"`
77 LiteralText bool `json:"literal_text"`
78}
79
80type grepMatch struct {
81 path string
82 modTime time.Time
83 lineNum int
84 lineText string
85}
86
87type GrepResponseMetadata struct {
88 NumberOfMatches int `json:"number_of_matches"`
89 Truncated bool `json:"truncated"`
90}
91
92type grepTool struct{}
93
94const (
95 GrepToolName = "grep"
96 grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
97
98WHEN TO USE THIS TOOL:
99- Use when you need to find files containing specific text or patterns
100- Great for searching code bases for function names, variable declarations, or error messages
101- Useful for finding all files that use a particular API or pattern
102
103HOW TO USE:
104- Provide a regex pattern to search for within file contents
105- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
106- Optionally specify a starting directory (defaults to current working directory)
107- Optionally provide an include pattern to filter which files to search
108- Results are sorted with most recently modified files first
109
110REGEX PATTERN SYNTAX (when literal_text=false):
111- Supports standard regular expression syntax
112- 'function' searches for the literal text "function"
113- 'log\..*Error' finds text starting with "log." and ending with "Error"
114- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
115
116COMMON INCLUDE PATTERN EXAMPLES:
117- '*.js' - Only search JavaScript files
118- '*.{ts,tsx}' - Only search TypeScript files
119- '*.go' - Only search Go files
120
121LIMITATIONS:
122- Results are limited to 100 files (newest first)
123- Performance depends on the number of files being searched
124- Very large binary files may be skipped
125- Hidden files (starting with '.') are skipped
126
127CROSS-PLATFORM NOTES:
128- Uses ripgrep (rg) command if available for better performance
129- Falls back to built-in Go implementation if ripgrep is not available
130- File paths are normalized automatically for cross-platform compatibility
131
132TIPS:
133- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
134- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
135- Always check if results are truncated and refine your search pattern if needed
136- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
137)
138
139func NewGrepTool() BaseTool {
140 return &grepTool{}
141}
142
143func (g *grepTool) Name() string {
144 return GrepToolName
145}
146
147func (g *grepTool) Info() ToolInfo {
148 return ToolInfo{
149 Name: GrepToolName,
150 Description: grepDescription,
151 Parameters: map[string]any{
152 "pattern": map[string]any{
153 "type": "string",
154 "description": "The regex pattern to search for in file contents",
155 },
156 "path": map[string]any{
157 "type": "string",
158 "description": "The directory to search in. Defaults to the current working directory.",
159 },
160 "include": map[string]any{
161 "type": "string",
162 "description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
163 },
164 "literal_text": map[string]any{
165 "type": "boolean",
166 "description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
167 },
168 },
169 Required: []string{"pattern"},
170 }
171}
172
173// escapeRegexPattern escapes special regex characters so they're treated as literal characters
174func escapeRegexPattern(pattern string) string {
175 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
176 escaped := pattern
177
178 for _, char := range specialChars {
179 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
180 }
181
182 return escaped
183}
184
185func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
186 var params GrepParams
187 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
188 return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
189 }
190
191 if params.Pattern == "" {
192 return NewTextErrorResponse("pattern is required"), nil
193 }
194
195 // If literal_text is true, escape the pattern
196 searchPattern := params.Pattern
197 if params.LiteralText {
198 searchPattern = escapeRegexPattern(params.Pattern)
199 }
200
201 searchPath := params.Path
202 if searchPath == "" {
203 searchPath = config.Get().WorkingDir()
204 }
205
206 matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
207 if err != nil {
208 return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
209 }
210
211 var output strings.Builder
212 if len(matches) == 0 {
213 output.WriteString("No files found")
214 } else {
215 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
216
217 currentFile := ""
218 for _, match := range matches {
219 if currentFile != match.path {
220 if currentFile != "" {
221 output.WriteString("\n")
222 }
223 currentFile = match.path
224 fmt.Fprintf(&output, "%s:\n", match.path)
225 }
226 if match.lineNum > 0 {
227 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
228 } else {
229 fmt.Fprintf(&output, " %s\n", match.path)
230 }
231 }
232
233 if truncated {
234 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
235 }
236 }
237
238 return WithResponseMetadata(
239 NewTextResponse(output.String()),
240 GrepResponseMetadata{
241 NumberOfMatches: len(matches),
242 Truncated: truncated,
243 },
244 ), nil
245}
246
247func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
248 matches, err := searchWithRipgrep(pattern, rootPath, include)
249 if err != nil {
250 matches, err = searchFilesWithRegex(pattern, rootPath, include)
251 if err != nil {
252 return nil, false, err
253 }
254 }
255
256 sort.Slice(matches, func(i, j int) bool {
257 return matches[i].modTime.After(matches[j].modTime)
258 })
259
260 truncated := len(matches) > limit
261 if truncated {
262 matches = matches[:limit]
263 }
264
265 return matches, truncated, nil
266}
267
268func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) {
269 cmd := fsext.GetRgSearchCmd(pattern, path, include)
270 if cmd == nil {
271 return nil, fmt.Errorf("ripgrep not found in $PATH")
272 }
273
274 output, err := cmd.Output()
275 if err != nil {
276 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
277 return []grepMatch{}, nil
278 }
279 return nil, err
280 }
281
282 lines := strings.Split(strings.TrimSpace(string(output)), "\n")
283 matches := make([]grepMatch, 0, len(lines))
284
285 for _, line := range lines {
286 if line == "" {
287 continue
288 }
289
290 // Parse ripgrep output format: file:line:content
291 parts := strings.SplitN(line, ":", 3)
292 if len(parts) < 3 {
293 continue
294 }
295
296 filePath := parts[0]
297 lineNum, err := strconv.Atoi(parts[1])
298 if err != nil {
299 continue
300 }
301 lineText := parts[2]
302
303 fileInfo, err := os.Stat(filePath)
304 if err != nil {
305 continue // Skip files we can't access
306 }
307
308 matches = append(matches, grepMatch{
309 path: filePath,
310 modTime: fileInfo.ModTime(),
311 lineNum: lineNum,
312 lineText: lineText,
313 })
314 }
315
316 return matches, nil
317}
318
319func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
320 matches := []grepMatch{}
321
322 // Use cached regex compilation
323 regex, err := searchRegexCache.get(pattern)
324 if err != nil {
325 return nil, fmt.Errorf("invalid regex pattern: %w", err)
326 }
327
328 var includePattern *regexp.Regexp
329 if include != "" {
330 regexPattern := globToRegex(include)
331 includePattern, err = globRegexCache.get(regexPattern)
332 if err != nil {
333 return nil, fmt.Errorf("invalid include pattern: %w", err)
334 }
335 }
336
337 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
338 if err != nil {
339 return nil // Skip errors
340 }
341
342 if info.IsDir() {
343 return nil // Skip directories
344 }
345
346 if fsext.SkipHidden(path) {
347 return nil
348 }
349
350 if includePattern != nil && !includePattern.MatchString(path) {
351 return nil
352 }
353
354 match, lineNum, lineText, err := fileContainsPattern(path, regex)
355 if err != nil {
356 return nil // Skip files we can't read
357 }
358
359 if match {
360 matches = append(matches, grepMatch{
361 path: path,
362 modTime: info.ModTime(),
363 lineNum: lineNum,
364 lineText: lineText,
365 })
366
367 if len(matches) >= 200 {
368 return filepath.SkipAll
369 }
370 }
371
372 return nil
373 })
374 if err != nil {
375 return nil, err
376 }
377
378 return matches, nil
379}
380
381func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
382 // Quick binary file detection
383 if isBinaryFile(filePath) {
384 return false, 0, "", nil
385 }
386
387 file, err := os.Open(filePath)
388 if err != nil {
389 return false, 0, "", err
390 }
391 defer file.Close()
392
393 scanner := bufio.NewScanner(file)
394 lineNum := 0
395 for scanner.Scan() {
396 lineNum++
397 line := scanner.Text()
398 if pattern.MatchString(line) {
399 return true, lineNum, line, nil
400 }
401 }
402
403 return false, 0, "", scanner.Err()
404}
405
406var binaryExts = map[string]struct{}{
407 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
408 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
409 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
410 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
411 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
412 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
413}
414
415// isBinaryFile performs a quick check to determine if a file is binary
416func isBinaryFile(filePath string) bool {
417 // Check file extension first (fastest)
418 ext := strings.ToLower(filepath.Ext(filePath))
419 if _, isBinary := binaryExts[ext]; isBinary {
420 return true
421 }
422
423 // Quick content check for files without clear extensions
424 file, err := os.Open(filePath)
425 if err != nil {
426 return false // If we can't open it, let the caller handle the error
427 }
428 defer file.Close()
429
430 // Read first 512 bytes to check for null bytes
431 buffer := make([]byte, 512)
432 n, err := file.Read(buffer)
433 if err != nil && err != io.EOF {
434 return false
435 }
436
437 // Check for null bytes (common in binary files)
438 for i := range n {
439 if buffer[i] == 0 {
440 return true
441 }
442 }
443
444 return false
445}
446
447func globToRegex(glob string) string {
448 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
449 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
450 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
451
452 // Use pre-compiled regex instead of compiling each time
453 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
454 inner := match[1 : len(match)-1]
455 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
456 })
457
458 return regexPattern
459}