1package tools
2
3import (
4 "bufio"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "os"
10 "os/exec"
11 "path/filepath"
12 "regexp"
13 "sort"
14 "strconv"
15 "strings"
16 "sync"
17 "time"
18
19 "github.com/charmbracelet/crush/internal/fsext"
20)
21
22// regexCache provides thread-safe caching of compiled regex patterns
23type regexCache struct {
24 cache map[string]*regexp.Regexp
25 mu sync.RWMutex
26}
27
28// newRegexCache creates a new regex cache
29func newRegexCache() *regexCache {
30 return ®exCache{
31 cache: make(map[string]*regexp.Regexp),
32 }
33}
34
35// get retrieves a compiled regex from cache or compiles and caches it
36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
37 // Try to get from cache first (read lock)
38 rc.mu.RLock()
39 if regex, exists := rc.cache[pattern]; exists {
40 rc.mu.RUnlock()
41 return regex, nil
42 }
43 rc.mu.RUnlock()
44
45 // Compile the regex (write lock)
46 rc.mu.Lock()
47 defer rc.mu.Unlock()
48
49 // Double-check in case another goroutine compiled it while we waited
50 if regex, exists := rc.cache[pattern]; exists {
51 return regex, nil
52 }
53
54 // Compile and cache the regex
55 regex, err := regexp.Compile(pattern)
56 if err != nil {
57 return nil, err
58 }
59
60 rc.cache[pattern] = regex
61 return regex, nil
62}
63
64// Global regex cache instances
65var (
66 searchRegexCache = newRegexCache()
67 globRegexCache = newRegexCache()
68 // Pre-compiled regex for glob conversion (used frequently)
69 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
70)
71
72type GrepParams struct {
73 Pattern string `json:"pattern"`
74 Path string `json:"path"`
75 Include string `json:"include"`
76 LiteralText bool `json:"literal_text"`
77}
78
79type grepMatch struct {
80 path string
81 modTime time.Time
82 lineNum int
83 lineText string
84}
85
86type GrepResponseMetadata struct {
87 NumberOfMatches int `json:"number_of_matches"`
88 Truncated bool `json:"truncated"`
89}
90
91type grepTool struct {
92 workingDir string
93}
94
95const (
96 GrepToolName = "grep"
97 grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
98
99WHEN TO USE THIS TOOL:
100- Use when you need to find files containing specific text or patterns
101- Great for searching code bases for function names, variable declarations, or error messages
102- Useful for finding all files that use a particular API or pattern
103
104HOW TO USE:
105- Provide a regex pattern to search for within file contents
106- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
107- Optionally specify a starting directory (defaults to current working directory)
108- Optionally provide an include pattern to filter which files to search
109- Results are sorted with most recently modified files first
110
111REGEX PATTERN SYNTAX (when literal_text=false):
112- Supports standard regular expression syntax
113- 'function' searches for the literal text "function"
114- 'log\..*Error' finds text starting with "log." and ending with "Error"
115- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
116
117COMMON INCLUDE PATTERN EXAMPLES:
118- '*.js' - Only search JavaScript files
119- '*.{ts,tsx}' - Only search TypeScript files
120- '*.go' - Only search Go files
121
122LIMITATIONS:
123- Results are limited to 100 files (newest first)
124- Performance depends on the number of files being searched
125- Very large binary files may be skipped
126- Hidden files (starting with '.') are skipped
127
128IGNORE FILE SUPPORT:
129- Respects .gitignore patterns to skip ignored files and directories
130- Respects .crushignore patterns for additional ignore rules
131- Both ignore files are automatically detected in the search root directory
132
133CROSS-PLATFORM NOTES:
134- Uses ripgrep (rg) command if available for better performance
135- Falls back to built-in Go implementation if ripgrep is not available
136- File paths are normalized automatically for cross-platform compatibility
137
138TIPS:
139- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
140- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
141- Always check if results are truncated and refine your search pattern if needed
142- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
143)
144
145func NewGrepTool(workingDir string) BaseTool {
146 return &grepTool{
147 workingDir: workingDir,
148 }
149}
150
151func (g *grepTool) Name() string {
152 return GrepToolName
153}
154
155func (g *grepTool) Info() ToolInfo {
156 return ToolInfo{
157 Name: GrepToolName,
158 Description: grepDescription,
159 Parameters: map[string]any{
160 "pattern": map[string]any{
161 "type": "string",
162 "description": "The regex pattern to search for in file contents",
163 },
164 "path": map[string]any{
165 "type": "string",
166 "description": "The directory to search in. Defaults to the current working directory.",
167 },
168 "include": map[string]any{
169 "type": "string",
170 "description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
171 },
172 "literal_text": map[string]any{
173 "type": "boolean",
174 "description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
175 },
176 },
177 Required: []string{"pattern"},
178 }
179}
180
181// escapeRegexPattern escapes special regex characters so they're treated as literal characters
182func escapeRegexPattern(pattern string) string {
183 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
184 escaped := pattern
185
186 for _, char := range specialChars {
187 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
188 }
189
190 return escaped
191}
192
193func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
194 var params GrepParams
195 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
196 return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
197 }
198
199 if params.Pattern == "" {
200 return NewTextErrorResponse("pattern is required"), nil
201 }
202
203 // If literal_text is true, escape the pattern
204 searchPattern := params.Pattern
205 if params.LiteralText {
206 searchPattern = escapeRegexPattern(params.Pattern)
207 }
208
209 searchPath := params.Path
210 if searchPath == "" {
211 searchPath = g.workingDir
212 }
213
214 matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
215 if err != nil {
216 return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
217 }
218
219 var output strings.Builder
220 if len(matches) == 0 {
221 output.WriteString("No files found")
222 } else {
223 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
224
225 currentFile := ""
226 for _, match := range matches {
227 if currentFile != match.path {
228 if currentFile != "" {
229 output.WriteString("\n")
230 }
231 currentFile = match.path
232 fmt.Fprintf(&output, "%s:\n", match.path)
233 }
234 if match.lineNum > 0 {
235 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
236 } else {
237 fmt.Fprintf(&output, " %s\n", match.path)
238 }
239 }
240
241 if truncated {
242 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
243 }
244 }
245
246 return WithResponseMetadata(
247 NewTextResponse(output.String()),
248 GrepResponseMetadata{
249 NumberOfMatches: len(matches),
250 Truncated: truncated,
251 },
252 ), nil
253}
254
255func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
256 matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
257 if err != nil {
258 matches, err = searchFilesWithRegex(pattern, rootPath, include)
259 if err != nil {
260 return nil, false, err
261 }
262 }
263
264 sort.Slice(matches, func(i, j int) bool {
265 return matches[i].modTime.After(matches[j].modTime)
266 })
267
268 truncated := len(matches) > limit
269 if truncated {
270 matches = matches[:limit]
271 }
272
273 return matches, truncated, nil
274}
275
276func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
277 cmd := getRgSearchCmd(ctx, pattern, path, include)
278 if cmd == nil {
279 return nil, fmt.Errorf("ripgrep not found in $PATH")
280 }
281
282 // Only add ignore files if they exist
283 for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
284 ignorePath := filepath.Join(path, ignoreFile)
285 if _, err := os.Stat(ignorePath); err == nil {
286 cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
287 }
288 }
289
290 output, err := cmd.Output()
291 if err != nil {
292 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
293 return []grepMatch{}, nil
294 }
295 return nil, err
296 }
297
298 lines := strings.Split(strings.TrimSpace(string(output)), "\n")
299 matches := make([]grepMatch, 0, len(lines))
300
301 for _, line := range lines {
302 if line == "" {
303 continue
304 }
305
306 // Parse ripgrep output format: file:line:content
307 parts := strings.SplitN(line, ":", 3)
308 if len(parts) < 3 {
309 continue
310 }
311
312 filePath := parts[0]
313 lineNum, err := strconv.Atoi(parts[1])
314 if err != nil {
315 continue
316 }
317 lineText := parts[2]
318
319 fileInfo, err := os.Stat(filePath)
320 if err != nil {
321 continue // Skip files we can't access
322 }
323
324 matches = append(matches, grepMatch{
325 path: filePath,
326 modTime: fileInfo.ModTime(),
327 lineNum: lineNum,
328 lineText: lineText,
329 })
330 }
331
332 return matches, nil
333}
334
335func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
336 matches := []grepMatch{}
337
338 // Use cached regex compilation
339 regex, err := searchRegexCache.get(pattern)
340 if err != nil {
341 return nil, fmt.Errorf("invalid regex pattern: %w", err)
342 }
343
344 var includePattern *regexp.Regexp
345 if include != "" {
346 regexPattern := globToRegex(include)
347 includePattern, err = globRegexCache.get(regexPattern)
348 if err != nil {
349 return nil, fmt.Errorf("invalid include pattern: %w", err)
350 }
351 }
352
353 // Create walker with gitignore and crushignore support
354 walker := fsext.NewFastGlobWalker(rootPath)
355
356 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
357 if err != nil {
358 return nil // Skip errors
359 }
360
361 if info.IsDir() {
362 // Check if directory should be skipped
363 if walker.ShouldSkip(path) {
364 return filepath.SkipDir
365 }
366 return nil // Continue into directory
367 }
368
369 // Use walker's shouldSkip method for files
370 if walker.ShouldSkip(path) {
371 return nil
372 }
373
374 // Skip hidden files (starting with a dot) to match ripgrep's default behavior
375 base := filepath.Base(path)
376 if base != "." && strings.HasPrefix(base, ".") {
377 return nil
378 }
379
380 if includePattern != nil && !includePattern.MatchString(path) {
381 return nil
382 }
383
384 match, lineNum, lineText, err := fileContainsPattern(path, regex)
385 if err != nil {
386 return nil // Skip files we can't read
387 }
388
389 if match {
390 matches = append(matches, grepMatch{
391 path: path,
392 modTime: info.ModTime(),
393 lineNum: lineNum,
394 lineText: lineText,
395 })
396
397 if len(matches) >= 200 {
398 return filepath.SkipAll
399 }
400 }
401
402 return nil
403 })
404 if err != nil {
405 return nil, err
406 }
407
408 return matches, nil
409}
410
411func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
412 // Quick binary file detection
413 if isBinaryFile(filePath) {
414 return false, 0, "", nil
415 }
416
417 file, err := os.Open(filePath)
418 if err != nil {
419 return false, 0, "", err
420 }
421 defer file.Close()
422
423 scanner := bufio.NewScanner(file)
424 lineNum := 0
425 for scanner.Scan() {
426 lineNum++
427 line := scanner.Text()
428 if pattern.MatchString(line) {
429 return true, lineNum, line, nil
430 }
431 }
432
433 return false, 0, "", scanner.Err()
434}
435
436var binaryExts = map[string]struct{}{
437 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
438 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
439 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
440 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
441 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
442 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
443}
444
445// isBinaryFile performs a quick check to determine if a file is binary
446func isBinaryFile(filePath string) bool {
447 // Check file extension first (fastest)
448 ext := strings.ToLower(filepath.Ext(filePath))
449 if _, isBinary := binaryExts[ext]; isBinary {
450 return true
451 }
452
453 // Quick content check for files without clear extensions
454 file, err := os.Open(filePath)
455 if err != nil {
456 return false // If we can't open it, let the caller handle the error
457 }
458 defer file.Close()
459
460 // Read first 512 bytes to check for null bytes
461 buffer := make([]byte, 512)
462 n, err := file.Read(buffer)
463 if err != nil && err != io.EOF {
464 return false
465 }
466
467 // Check for null bytes (common in binary files)
468 for i := range n {
469 if buffer[i] == 0 {
470 return true
471 }
472 }
473
474 return false
475}
476
477func globToRegex(glob string) string {
478 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
479 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
480 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
481
482 // Use pre-compiled regex instead of compiling each time
483 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
484 inner := match[1 : len(match)-1]
485 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
486 })
487
488 return regexPattern
489}