1package tools
2
3import (
4 "bufio"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "os"
10 "path/filepath"
11 "regexp"
12 "sort"
13 "strings"
14 "sync"
15 "time"
16
17 "github.com/charmbracelet/crush/internal/fsext"
18)
19
20// regexCache provides thread-safe caching of compiled regex patterns
21type regexCache struct {
22 cache map[string]*regexp.Regexp
23 mu sync.RWMutex
24}
25
26// newRegexCache creates a new regex cache
27func newRegexCache() *regexCache {
28 return ®exCache{
29 cache: make(map[string]*regexp.Regexp),
30 }
31}
32
33// get retrieves a compiled regex from cache or compiles and caches it
34func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
35 // Try to get from cache first (read lock)
36 rc.mu.RLock()
37 if regex, exists := rc.cache[pattern]; exists {
38 rc.mu.RUnlock()
39 return regex, nil
40 }
41 rc.mu.RUnlock()
42
43 // Compile the regex (write lock)
44 rc.mu.Lock()
45 defer rc.mu.Unlock()
46
47 // Double-check in case another goroutine compiled it while we waited
48 if regex, exists := rc.cache[pattern]; exists {
49 return regex, nil
50 }
51
52 // Compile and cache the regex
53 regex, err := regexp.Compile(pattern)
54 if err != nil {
55 return nil, err
56 }
57
58 rc.cache[pattern] = regex
59 return regex, nil
60}
61
62// Global regex cache instances
63var (
64 searchRegexCache = newRegexCache()
65 globRegexCache = newRegexCache()
66 // Pre-compiled regex for glob conversion (used frequently)
67 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
68)
69
70type GrepParams struct {
71 Pattern string `json:"pattern"`
72 Path string `json:"path"`
73 Include string `json:"include"`
74 LiteralText bool `json:"literal_text"`
75}
76
77type grepMatch struct {
78 path string
79 modTime time.Time
80 lineNum int
81 lineText string
82}
83
84type GrepResponseMetadata struct {
85 NumberOfMatches int `json:"number_of_matches"`
86 Truncated bool `json:"truncated"`
87}
88
89type grepTool struct {
90 workingDir string
91}
92
93const (
94 GrepToolName = "grep"
95 grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
96
97WHEN TO USE THIS TOOL:
98- Use when you need to find files containing specific text or patterns
99- Great for searching code bases for function names, variable declarations, or error messages
100- Useful for finding all files that use a particular API or pattern
101
102HOW TO USE:
103- Provide a regex pattern to search for within file contents
104- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
105- Optionally specify a starting directory (defaults to current working directory)
106- Optionally provide an include pattern to filter which files to search
107- Results are sorted with most recently modified files first
108
109REGEX PATTERN SYNTAX (when literal_text=false):
110- Supports standard regular expression syntax
111- 'function' searches for the literal text "function"
112- 'log\..*Error' finds text starting with "log." and ending with "Error"
113- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
114
115COMMON INCLUDE PATTERN EXAMPLES:
116- '*.js' - Only search JavaScript files
117- '*.{ts,tsx}' - Only search TypeScript files
118- '*.go' - Only search Go files
119
120LIMITATIONS:
121- Results are limited to 100 files (newest first)
122- Performance depends on the number of files being searched
123- Very large binary files may be skipped
124- Hidden files (starting with '.') are skipped
125
126CROSS-PLATFORM NOTES:
127- Uses ripgrep (rg) command if available for better performance
128- Falls back to built-in Go implementation if ripgrep is not available
129- File paths are normalized automatically for cross-platform compatibility
130
131TIPS:
132- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
133- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
134- Always check if results are truncated and refine your search pattern if needed
135- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
136)
137
138func NewGrepTool(workingDir string) BaseTool {
139 return &grepTool{
140 workingDir: workingDir,
141 }
142}
143
144func (g *grepTool) Name() string {
145 return GrepToolName
146}
147
148func (g *grepTool) Info() ToolInfo {
149 return ToolInfo{
150 Name: GrepToolName,
151 Description: grepDescription,
152 Parameters: map[string]any{
153 "pattern": map[string]any{
154 "type": "string",
155 "description": "The regex pattern to search for in file contents",
156 },
157 "path": map[string]any{
158 "type": "string",
159 "description": "The directory to search in. Defaults to the current working directory.",
160 },
161 "include": map[string]any{
162 "type": "string",
163 "description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
164 },
165 "literal_text": map[string]any{
166 "type": "boolean",
167 "description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
168 },
169 },
170 Required: []string{"pattern"},
171 }
172}
173
174// escapeRegexPattern escapes special regex characters so they're treated as literal characters
175func escapeRegexPattern(pattern string) string {
176 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
177 escaped := pattern
178
179 for _, char := range specialChars {
180 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
181 }
182
183 return escaped
184}
185
186func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
187 var params GrepParams
188 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
189 return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
190 }
191
192 if params.Pattern == "" {
193 return NewTextErrorResponse("pattern is required"), nil
194 }
195
196 // If literal_text is true, escape the pattern
197 searchPattern := params.Pattern
198 if params.LiteralText {
199 searchPattern = escapeRegexPattern(params.Pattern)
200 }
201
202 searchPath := params.Path
203 if searchPath == "" {
204 searchPath = g.workingDir
205 }
206
207 matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
208 if err != nil {
209 return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
210 }
211
212 var output strings.Builder
213 if len(matches) == 0 {
214 output.WriteString("No files found")
215 } else {
216 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
217
218 currentFile := ""
219 for _, match := range matches {
220 if currentFile != match.path {
221 if currentFile != "" {
222 output.WriteString("\n")
223 }
224 currentFile = match.path
225 fmt.Fprintf(&output, "%s:\n", match.path)
226 }
227 if match.lineNum > 0 {
228 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
229 } else {
230 fmt.Fprintf(&output, " %s\n", match.path)
231 }
232 }
233
234 if truncated {
235 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
236 }
237 }
238
239 return WithResponseMetadata(
240 NewTextResponse(output.String()),
241 GrepResponseMetadata{
242 NumberOfMatches: len(matches),
243 Truncated: truncated,
244 },
245 ), nil
246}
247
248func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
249 matches, err := searchFilesWithRegex(pattern, rootPath, include)
250 if err != nil {
251 return nil, false, err
252 }
253
254 sort.Slice(matches, func(i, j int) bool {
255 return matches[i].modTime.After(matches[j].modTime)
256 })
257
258 truncated := len(matches) > limit
259 if truncated {
260 matches = matches[:limit]
261 }
262
263 return matches, truncated, nil
264}
265
266func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
267 matches := []grepMatch{}
268
269 // Use cached regex compilation
270 regex, err := searchRegexCache.get(pattern)
271 if err != nil {
272 return nil, fmt.Errorf("invalid regex pattern: %w", err)
273 }
274
275 var includePattern *regexp.Regexp
276 if include != "" {
277 regexPattern := globToRegex(include)
278 includePattern, err = globRegexCache.get(regexPattern)
279 if err != nil {
280 return nil, fmt.Errorf("invalid include pattern: %w", err)
281 }
282 }
283
284 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
285 if err != nil {
286 return nil // Skip errors
287 }
288
289 if info.IsDir() {
290 return nil // Skip directories
291 }
292
293 if fsext.SkipHidden(path) {
294 return nil
295 }
296
297 if includePattern != nil && !includePattern.MatchString(path) {
298 return nil
299 }
300
301 match, lineNum, lineText, err := fileContainsPattern(path, regex)
302 if err != nil {
303 return nil // Skip files we can't read
304 }
305
306 if match {
307 matches = append(matches, grepMatch{
308 path: path,
309 modTime: info.ModTime(),
310 lineNum: lineNum,
311 lineText: lineText,
312 })
313
314 if len(matches) >= 200 {
315 return filepath.SkipAll
316 }
317 }
318
319 return nil
320 })
321 if err != nil {
322 return nil, err
323 }
324
325 return matches, nil
326}
327
328func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
329 // Quick binary file detection
330 if isBinaryFile(filePath) {
331 return false, 0, "", nil
332 }
333
334 file, err := os.Open(filePath)
335 if err != nil {
336 return false, 0, "", err
337 }
338 defer file.Close()
339
340 scanner := bufio.NewScanner(file)
341 lineNum := 0
342 for scanner.Scan() {
343 lineNum++
344 line := scanner.Text()
345 if pattern.MatchString(line) {
346 return true, lineNum, line, nil
347 }
348 }
349
350 return false, 0, "", scanner.Err()
351}
352
353var binaryExts = map[string]struct{}{
354 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
355 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
356 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
357 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
358 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
359 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
360}
361
362// isBinaryFile performs a quick check to determine if a file is binary
363func isBinaryFile(filePath string) bool {
364 // Check file extension first (fastest)
365 ext := strings.ToLower(filepath.Ext(filePath))
366 if _, isBinary := binaryExts[ext]; isBinary {
367 return true
368 }
369
370 // Quick content check for files without clear extensions
371 file, err := os.Open(filePath)
372 if err != nil {
373 return false // If we can't open it, let the caller handle the error
374 }
375 defer file.Close()
376
377 // Read first 512 bytes to check for null bytes
378 buffer := make([]byte, 512)
379 n, err := file.Read(buffer)
380 if err != nil && err != io.EOF {
381 return false
382 }
383
384 // Check for null bytes (common in binary files)
385 for i := range n {
386 if buffer[i] == 0 {
387 return true
388 }
389 }
390
391 return false
392}
393
394func globToRegex(glob string) string {
395 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
396 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
397 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
398
399 // Use pre-compiled regex instead of compiling each time
400 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
401 inner := match[1 : len(match)-1]
402 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
403 })
404
405 return regexPattern
406}