1package tools
2
3import (
4 "bufio"
5 "context"
6 "fmt"
7 "io"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "regexp"
12 "sort"
13 "strconv"
14 "strings"
15 "sync"
16 "time"
17
18 "github.com/charmbracelet/crush/internal/ai"
19 "github.com/charmbracelet/crush/internal/fsext"
20)
21
22// regexCache provides thread-safe caching of compiled regex patterns
23type regexCache struct {
24 cache map[string]*regexp.Regexp
25 mu sync.RWMutex
26}
27
28// newRegexCache creates a new regex cache
29func newRegexCache() *regexCache {
30 return ®exCache{
31 cache: make(map[string]*regexp.Regexp),
32 }
33}
34
35// get retrieves a compiled regex from cache or compiles and caches it
36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
37 // Try to get from cache first (read lock)
38 rc.mu.RLock()
39 if regex, exists := rc.cache[pattern]; exists {
40 rc.mu.RUnlock()
41 return regex, nil
42 }
43 rc.mu.RUnlock()
44
45 // Compile the regex (write lock)
46 rc.mu.Lock()
47 defer rc.mu.Unlock()
48
49 // Double-check in case another goroutine compiled it while we waited
50 if regex, exists := rc.cache[pattern]; exists {
51 return regex, nil
52 }
53
54 // Compile and cache the regex
55 regex, err := regexp.Compile(pattern)
56 if err != nil {
57 return nil, err
58 }
59
60 rc.cache[pattern] = regex
61 return regex, nil
62}
63
64// Global regex cache instances
65var (
66 searchRegexCache = newRegexCache()
67 globRegexCache = newRegexCache()
68 // Pre-compiled regex for glob conversion (used frequently)
69 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
70)
71
72type GrepParams struct {
73 Pattern string `json:"pattern" description:"The regex pattern to search for in file contents"`
74 Path string `json:"path" description:"The directory to search in. Defaults to the current working directory."`
75 Include string `json:"include" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
76 LiteralText bool `json:"literal_text" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
77}
78
79type grepMatch struct {
80 path string
81 modTime time.Time
82 lineNum int
83 lineText string
84}
85
86type GrepResponseMetadata struct {
87 NumberOfMatches int `json:"number_of_matches"`
88 Truncated bool `json:"truncated"`
89}
90
91const (
92 GrepToolName = "grep"
93)
94
95func NewGrepTool(workingDir string) ai.AgentTool {
96 return ai.NewTypedToolFunc(
97 GrepToolName,
98 `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
99
100WHEN TO USE THIS TOOL:
101- Use when you need to find files containing specific text or patterns
102- Great for searching code bases for function names, variable declarations, or error messages
103- Useful for finding all files that use a particular API or pattern
104
105HOW TO USE:
106- Provide a regex pattern to search for within file contents
107- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
108- Optionally specify a starting directory (defaults to current working directory)
109- Optionally provide an include pattern to filter which files to search
110- Results are sorted with most recently modified files first
111
112REGEX PATTERN SYNTAX (when literal_text=false):
113- Supports standard regular expression syntax
114- 'function' searches for the literal text "function"
115- 'log\..*Error' finds text starting with "log." and ending with "Error"
116- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
117
118COMMON INCLUDE PATTERN EXAMPLES:
119- '*.js' - Only search JavaScript files
120- '*.{ts,tsx}' - Only search TypeScript files
121- '*.go' - Only search Go files
122
123LIMITATIONS:
124- Results are limited to 100 files (newest first)
125- Performance depends on the number of files being searched
126- Very large binary files may be skipped
127- Hidden files (starting with '.') are skipped
128
129IGNORE FILE SUPPORT:
130- Respects .gitignore patterns to skip ignored files and directories
131- Respects .crushignore patterns for additional ignore rules
132- Both ignore files are automatically detected in the search root directory
133
134CROSS-PLATFORM NOTES:
135- Uses ripgrep (rg) command if available for better performance
136- Falls back to built-in Go implementation if ripgrep is not available
137- File paths are normalized automatically for cross-platform compatibility
138
139TIPS:
140- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
141- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
142- Always check if results are truncated and refine your search pattern if needed
143- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`,
144 func(ctx context.Context, params GrepParams, call ai.ToolCall) (ai.ToolResponse, error) {
145 if params.Pattern == "" {
146 return ai.NewTextErrorResponse("pattern is required"), nil
147 }
148
149 // If literal_text is true, escape the pattern
150 searchPattern := params.Pattern
151 if params.LiteralText {
152 searchPattern = escapeRegexPattern(params.Pattern)
153 }
154
155 searchPath := params.Path
156 if searchPath == "" {
157 searchPath = workingDir
158 }
159
160 matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
161 if err != nil {
162 return ai.ToolResponse{}, fmt.Errorf("error searching files: %w", err)
163 }
164
165 var output strings.Builder
166 if len(matches) == 0 {
167 output.WriteString("No files found")
168 } else {
169 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
170
171 currentFile := ""
172 for _, match := range matches {
173 if currentFile != match.path {
174 if currentFile != "" {
175 output.WriteString("\n")
176 }
177 currentFile = match.path
178 fmt.Fprintf(&output, "%s:\n", match.path)
179 }
180 if match.lineNum > 0 {
181 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
182 } else {
183 fmt.Fprintf(&output, " %s\n", match.path)
184 }
185 }
186
187 if truncated {
188 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
189 }
190 }
191
192 return ai.WithResponseMetadata(
193 ai.NewTextResponse(output.String()),
194 GrepResponseMetadata{
195 NumberOfMatches: len(matches),
196 Truncated: truncated,
197 },
198 ), nil
199 })
200}
201
202// escapeRegexPattern escapes special regex characters so they're treated as literal characters
203func escapeRegexPattern(pattern string) string {
204 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
205 escaped := pattern
206
207 for _, char := range specialChars {
208 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
209 }
210
211 return escaped
212}
213
214func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
215 matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
216 if err != nil {
217 matches, err = searchFilesWithRegex(pattern, rootPath, include)
218 if err != nil {
219 return nil, false, err
220 }
221 }
222
223 sort.Slice(matches, func(i, j int) bool {
224 return matches[i].modTime.After(matches[j].modTime)
225 })
226
227 truncated := len(matches) > limit
228 if truncated {
229 matches = matches[:limit]
230 }
231
232 return matches, truncated, nil
233}
234
235func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
236 cmd := getRgSearchCmd(ctx, pattern, path, include)
237 if cmd == nil {
238 return nil, fmt.Errorf("ripgrep not found in $PATH")
239 }
240
241 cmd.Args = append(
242 cmd.Args,
243 "--ignore-file", filepath.Join(path, ".gitignore"),
244 "--ignore-file", filepath.Join(path, ".crushignore"),
245 )
246
247 output, err := cmd.Output()
248 if err != nil {
249 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
250 return []grepMatch{}, nil
251 }
252 return nil, err
253 }
254
255 lines := strings.Split(strings.TrimSpace(string(output)), "\n")
256 matches := make([]grepMatch, 0, len(lines))
257
258 for _, line := range lines {
259 if line == "" {
260 continue
261 }
262
263 // Parse ripgrep output format: file:line:content
264 parts := strings.SplitN(line, ":", 3)
265 if len(parts) < 3 {
266 continue
267 }
268
269 filePath := parts[0]
270 lineNum, err := strconv.Atoi(parts[1])
271 if err != nil {
272 continue
273 }
274 lineText := parts[2]
275
276 fileInfo, err := os.Stat(filePath)
277 if err != nil {
278 continue // Skip files we can't access
279 }
280
281 matches = append(matches, grepMatch{
282 path: filePath,
283 modTime: fileInfo.ModTime(),
284 lineNum: lineNum,
285 lineText: lineText,
286 })
287 }
288
289 return matches, nil
290}
291
292func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
293 matches := []grepMatch{}
294
295 // Use cached regex compilation
296 regex, err := searchRegexCache.get(pattern)
297 if err != nil {
298 return nil, fmt.Errorf("invalid regex pattern: %w", err)
299 }
300
301 var includePattern *regexp.Regexp
302 if include != "" {
303 regexPattern := globToRegex(include)
304 includePattern, err = globRegexCache.get(regexPattern)
305 if err != nil {
306 return nil, fmt.Errorf("invalid include pattern: %w", err)
307 }
308 }
309
310 // Create walker with gitignore and crushignore support
311 walker := fsext.NewFastGlobWalker(rootPath)
312
313 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
314 if err != nil {
315 return nil // Skip errors
316 }
317
318 if info.IsDir() {
319 return nil // Skip directories
320 }
321
322 // Use walker's shouldSkip method instead of just SkipHidden
323 if walker.ShouldSkip(path) {
324 return nil
325 }
326
327 if includePattern != nil && !includePattern.MatchString(path) {
328 return nil
329 }
330
331 match, lineNum, lineText, err := fileContainsPattern(path, regex)
332 if err != nil {
333 return nil // Skip files we can't read
334 }
335
336 if match {
337 matches = append(matches, grepMatch{
338 path: path,
339 modTime: info.ModTime(),
340 lineNum: lineNum,
341 lineText: lineText,
342 })
343
344 if len(matches) >= 200 {
345 return filepath.SkipAll
346 }
347 }
348
349 return nil
350 })
351 if err != nil {
352 return nil, err
353 }
354
355 return matches, nil
356}
357
358func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
359 // Quick binary file detection
360 if isBinaryFile(filePath) {
361 return false, 0, "", nil
362 }
363
364 file, err := os.Open(filePath)
365 if err != nil {
366 return false, 0, "", err
367 }
368 defer file.Close()
369
370 scanner := bufio.NewScanner(file)
371 lineNum := 0
372 for scanner.Scan() {
373 lineNum++
374 line := scanner.Text()
375 if pattern.MatchString(line) {
376 return true, lineNum, line, nil
377 }
378 }
379
380 return false, 0, "", scanner.Err()
381}
382
383var binaryExts = map[string]struct{}{
384 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
385 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
386 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
387 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
388 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
389 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
390}
391
392// isBinaryFile performs a quick check to determine if a file is binary
393func isBinaryFile(filePath string) bool {
394 // Check file extension first (fastest)
395 ext := strings.ToLower(filepath.Ext(filePath))
396 if _, isBinary := binaryExts[ext]; isBinary {
397 return true
398 }
399
400 // Quick content check for files without clear extensions
401 file, err := os.Open(filePath)
402 if err != nil {
403 return false // If we can't open it, let the caller handle the error
404 }
405 defer file.Close()
406
407 // Read first 512 bytes to check for null bytes
408 buffer := make([]byte, 512)
409 n, err := file.Read(buffer)
410 if err != nil && err != io.EOF {
411 return false
412 }
413
414 // Check for null bytes (common in binary files)
415 for i := range n {
416 if buffer[i] == 0 {
417 return true
418 }
419 }
420
421 return false
422}
423
424func globToRegex(glob string) string {
425 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
426 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
427 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
428
429 // Use pre-compiled regex instead of compiling each time
430 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
431 inner := match[1 : len(match)-1]
432 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
433 })
434
435 return regexPattern
436}