1package tools
2
3import (
4 "bufio"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "os"
10 "path/filepath"
11 "regexp"
12 "sort"
13 "strings"
14 "sync"
15 "time"
16
17 "github.com/charmbracelet/crush/internal/fsext"
18)
19
20// regexCache provides thread-safe caching of compiled regex patterns
21type regexCache struct {
22 cache map[string]*regexp.Regexp
23 mu sync.RWMutex
24}
25
26// newRegexCache creates a new regex cache
27func newRegexCache() *regexCache {
28 return ®exCache{
29 cache: make(map[string]*regexp.Regexp),
30 }
31}
32
33// get retrieves a compiled regex from cache or compiles and caches it
34func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
35 // Try to get from cache first (read lock)
36 rc.mu.RLock()
37 if regex, exists := rc.cache[pattern]; exists {
38 rc.mu.RUnlock()
39 return regex, nil
40 }
41 rc.mu.RUnlock()
42
43 // Compile the regex (write lock)
44 rc.mu.Lock()
45 defer rc.mu.Unlock()
46
47 // Double-check in case another goroutine compiled it while we waited
48 if regex, exists := rc.cache[pattern]; exists {
49 return regex, nil
50 }
51
52 // Compile and cache the regex
53 regex, err := regexp.Compile(pattern)
54 if err != nil {
55 return nil, err
56 }
57
58 rc.cache[pattern] = regex
59 return regex, nil
60}
61
62// Global regex cache instances
63var (
64 searchRegexCache = newRegexCache()
65 globRegexCache = newRegexCache()
66 // Pre-compiled regex for glob conversion (used frequently)
67 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
68)
69
70type GrepParams struct {
71 Pattern string `json:"pattern"`
72 Path string `json:"path"`
73 Include string `json:"include"`
74 LiteralText bool `json:"literal_text"`
75}
76
77type grepMatch struct {
78 path string
79 modTime time.Time
80 lineNum int
81 lineText string
82}
83
84type GrepResponseMetadata struct {
85 NumberOfMatches int `json:"number_of_matches"`
86 Truncated bool `json:"truncated"`
87}
88
89type grepTool struct {
90 workingDir string
91}
92
93const (
94 GrepToolName = "grep"
95 grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
96
97WHEN TO USE THIS TOOL:
98- Use when you need to find files containing specific text or patterns
99- Great for searching code bases for function names, variable declarations, or error messages
100- Useful for finding all files that use a particular API or pattern
101
102HOW TO USE:
103- Provide a regex pattern to search for within file contents
104- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
105- Optionally specify a starting directory (defaults to current working directory)
106- Optionally provide an include pattern to filter which files to search
107- Results are sorted with most recently modified files first
108
109REGEX PATTERN SYNTAX (when literal_text=false):
110- Supports standard regular expression syntax
111- 'function' searches for the literal text "function"
112- 'log\..*Error' finds text starting with "log." and ending with "Error"
113- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
114
115COMMON INCLUDE PATTERN EXAMPLES:
116- '*.js' - Only search JavaScript files
117- '*.{ts,tsx}' - Only search TypeScript files
118- '*.go' - Only search Go files
119
120LIMITATIONS:
121- Results are limited to 100 files (newest first)
122- Performance depends on the number of files being searched
123- Very large binary files may be skipped
124- Hidden files (starting with '.') are skipped
125
126CROSS-PLATFORM NOTES:
127- Uses built-in Go implementation that is based on regexp
128- File paths are normalized automatically for cross-platform compatibility
129
130TIPS:
131- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
132- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
133- Always check if results are truncated and refine your search pattern if needed
134- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
135)
136
137func NewGrepTool(workingDir string) BaseTool {
138 return &grepTool{
139 workingDir: workingDir,
140 }
141}
142
143func (g *grepTool) Name() string {
144 return GrepToolName
145}
146
147func (g *grepTool) Info() ToolInfo {
148 return ToolInfo{
149 Name: GrepToolName,
150 Description: grepDescription,
151 Parameters: map[string]any{
152 "pattern": map[string]any{
153 "type": "string",
154 "description": "The regex pattern to search for in file contents",
155 },
156 "path": map[string]any{
157 "type": "string",
158 "description": "The directory to search in. Defaults to the current working directory.",
159 },
160 "include": map[string]any{
161 "type": "string",
162 "description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
163 },
164 "literal_text": map[string]any{
165 "type": "boolean",
166 "description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
167 },
168 },
169 Required: []string{"pattern"},
170 }
171}
172
173// escapeRegexPattern escapes special regex characters so they're treated as literal characters
174func escapeRegexPattern(pattern string) string {
175 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
176 escaped := pattern
177
178 for _, char := range specialChars {
179 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
180 }
181
182 return escaped
183}
184
185func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
186 var params GrepParams
187 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
188 return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
189 }
190
191 if params.Pattern == "" {
192 return NewTextErrorResponse("pattern is required"), nil
193 }
194
195 // If literal_text is true, escape the pattern
196 searchPattern := params.Pattern
197 if params.LiteralText {
198 searchPattern = escapeRegexPattern(params.Pattern)
199 }
200
201 searchPath := params.Path
202 if searchPath == "" {
203 searchPath = g.workingDir
204 }
205
206 matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
207 if err != nil {
208 return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
209 }
210
211 var output strings.Builder
212 if len(matches) == 0 {
213 output.WriteString("No files found")
214 } else {
215 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
216
217 currentFile := ""
218 for _, match := range matches {
219 if currentFile != match.path {
220 if currentFile != "" {
221 output.WriteString("\n")
222 }
223 currentFile = match.path
224 fmt.Fprintf(&output, "%s:\n", match.path)
225 }
226 if match.lineNum > 0 {
227 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
228 } else {
229 fmt.Fprintf(&output, " %s\n", match.path)
230 }
231 }
232
233 if truncated {
234 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
235 }
236 }
237
238 return WithResponseMetadata(
239 NewTextResponse(output.String()),
240 GrepResponseMetadata{
241 NumberOfMatches: len(matches),
242 Truncated: truncated,
243 },
244 ), nil
245}
246
247func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
248 matches, err := searchFilesWithRegex(pattern, rootPath, include)
249 if err != nil {
250 return nil, false, err
251 }
252
253 sort.Slice(matches, func(i, j int) bool {
254 return matches[i].modTime.After(matches[j].modTime)
255 })
256
257 truncated := len(matches) > limit
258 if truncated {
259 matches = matches[:limit]
260 }
261
262 return matches, truncated, nil
263}
264
265func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
266 matches := []grepMatch{}
267
268 // Use cached regex compilation
269 regex, err := searchRegexCache.get(pattern)
270 if err != nil {
271 return nil, fmt.Errorf("invalid regex pattern: %w", err)
272 }
273
274 var includePattern *regexp.Regexp
275 if include != "" {
276 regexPattern := globToRegex(include)
277 includePattern, err = globRegexCache.get(regexPattern)
278 if err != nil {
279 return nil, fmt.Errorf("invalid include pattern: %w", err)
280 }
281 }
282
283 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
284 if err != nil {
285 return nil // Skip errors
286 }
287
288 if info.IsDir() {
289 return nil // Skip directories
290 }
291
292 if fsext.SkipHidden(path) {
293 return nil
294 }
295
296 if includePattern != nil && !includePattern.MatchString(path) {
297 return nil
298 }
299
300 match, lineNum, lineText, err := fileContainsPattern(path, regex)
301 if err != nil {
302 return nil // Skip files we can't read
303 }
304
305 if match {
306 matches = append(matches, grepMatch{
307 path: path,
308 modTime: info.ModTime(),
309 lineNum: lineNum,
310 lineText: lineText,
311 })
312
313 if len(matches) >= 200 {
314 return filepath.SkipAll
315 }
316 }
317
318 return nil
319 })
320 if err != nil {
321 return nil, err
322 }
323
324 return matches, nil
325}
326
327func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
328 // Quick binary file detection
329 if isBinaryFile(filePath) {
330 return false, 0, "", nil
331 }
332
333 file, err := os.Open(filePath)
334 if err != nil {
335 return false, 0, "", err
336 }
337 defer file.Close()
338
339 scanner := bufio.NewScanner(file)
340 lineNum := 0
341 for scanner.Scan() {
342 lineNum++
343 line := scanner.Text()
344 if pattern.MatchString(line) {
345 return true, lineNum, line, nil
346 }
347 }
348
349 return false, 0, "", scanner.Err()
350}
351
352var binaryExts = map[string]struct{}{
353 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
354 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
355 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
356 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
357 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
358 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
359}
360
361// isBinaryFile performs a quick check to determine if a file is binary
362func isBinaryFile(filePath string) bool {
363 // Check file extension first (fastest)
364 ext := strings.ToLower(filepath.Ext(filePath))
365 if _, isBinary := binaryExts[ext]; isBinary {
366 return true
367 }
368
369 // Quick content check for files without clear extensions
370 file, err := os.Open(filePath)
371 if err != nil {
372 return false // If we can't open it, let the caller handle the error
373 }
374 defer file.Close()
375
376 // Read first 512 bytes to check for null bytes
377 buffer := make([]byte, 512)
378 n, err := file.Read(buffer)
379 if err != nil && err != io.EOF {
380 return false
381 }
382
383 // Check for null bytes (common in binary files)
384 for i := range n {
385 if buffer[i] == 0 {
386 return true
387 }
388 }
389
390 return false
391}
392
393func globToRegex(glob string) string {
394 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
395 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
396 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
397
398 // Use pre-compiled regex instead of compiling each time
399 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
400 inner := match[1 : len(match)-1]
401 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
402 })
403
404 return regexPattern
405}