1package tools
2
3import (
4 "bufio"
5 "context"
6 _ "embed"
7 "fmt"
8 "io"
9 "os"
10 "os/exec"
11 "path/filepath"
12 "regexp"
13 "sort"
14 "strconv"
15 "strings"
16 "sync"
17 "time"
18
19 "github.com/charmbracelet/crush/internal/fsext"
20 "github.com/charmbracelet/fantasy/ai"
21)
22
23// regexCache provides thread-safe caching of compiled regex patterns
24type regexCache struct {
25 cache map[string]*regexp.Regexp
26 mu sync.RWMutex
27}
28
29// newRegexCache creates a new regex cache
30func newRegexCache() *regexCache {
31 return ®exCache{
32 cache: make(map[string]*regexp.Regexp),
33 }
34}
35
36// get retrieves a compiled regex from cache or compiles and caches it
37func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
38 // Try to get from cache first (read lock)
39 rc.mu.RLock()
40 if regex, exists := rc.cache[pattern]; exists {
41 rc.mu.RUnlock()
42 return regex, nil
43 }
44 rc.mu.RUnlock()
45
46 // Compile the regex (write lock)
47 rc.mu.Lock()
48 defer rc.mu.Unlock()
49
50 // Double-check in case another goroutine compiled it while we waited
51 if regex, exists := rc.cache[pattern]; exists {
52 return regex, nil
53 }
54
55 // Compile and cache the regex
56 regex, err := regexp.Compile(pattern)
57 if err != nil {
58 return nil, err
59 }
60
61 rc.cache[pattern] = regex
62 return regex, nil
63}
64
65// Global regex cache instances
66var (
67 searchRegexCache = newRegexCache()
68 globRegexCache = newRegexCache()
69 // Pre-compiled regex for glob conversion (used frequently)
70 globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
71)
72
73type GrepParams struct {
74 Pattern string `json:"pattern" description:"The regex pattern to search for in file contents"`
75 Path string `json:"path" description:"The directory to search in. Defaults to the current working directory."`
76 Include string `json:"include" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
77 LiteralText bool `json:"literal_text" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
78}
79
80type grepMatch struct {
81 path string
82 modTime time.Time
83 lineNum int
84 lineText string
85}
86
87type GrepResponseMetadata struct {
88 NumberOfMatches int `json:"number_of_matches"`
89 Truncated bool `json:"truncated"`
90}
91
92const GrepToolName = "grep"
93
94//go:embed grep.md
95var grepDescription []byte
96
97func NewGrepTool(workingDir string) ai.AgentTool {
98 return ai.NewAgentTool(
99 GrepToolName,
100 string(grepDescription),
101 func(ctx context.Context, params GrepParams, call ai.ToolCall) (ai.ToolResponse, error) {
102 if params.Pattern == "" {
103 return ai.NewTextErrorResponse("pattern is required"), nil
104 }
105
106 // If literal_text is true, escape the pattern
107 searchPattern := params.Pattern
108 if params.LiteralText {
109 searchPattern = escapeRegexPattern(params.Pattern)
110 }
111
112 searchPath := params.Path
113 if searchPath == "" {
114 searchPath = workingDir
115 }
116
117 matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
118 if err != nil {
119 return ai.ToolResponse{}, fmt.Errorf("error searching files: %w", err)
120 }
121
122 var output strings.Builder
123 if len(matches) == 0 {
124 output.WriteString("No files found")
125 } else {
126 fmt.Fprintf(&output, "Found %d matches\n", len(matches))
127
128 currentFile := ""
129 for _, match := range matches {
130 if currentFile != match.path {
131 if currentFile != "" {
132 output.WriteString("\n")
133 }
134 currentFile = match.path
135 fmt.Fprintf(&output, "%s:\n", match.path)
136 }
137 if match.lineNum > 0 {
138 fmt.Fprintf(&output, " Line %d: %s\n", match.lineNum, match.lineText)
139 } else {
140 fmt.Fprintf(&output, " %s\n", match.path)
141 }
142 }
143
144 if truncated {
145 output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
146 }
147 }
148
149 return ai.WithResponseMetadata(
150 ai.NewTextResponse(output.String()),
151 GrepResponseMetadata{
152 NumberOfMatches: len(matches),
153 Truncated: truncated,
154 },
155 ), nil
156 })
157}
158
159// escapeRegexPattern escapes special regex characters so they're treated as literal characters
160func escapeRegexPattern(pattern string) string {
161 specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
162 escaped := pattern
163
164 for _, char := range specialChars {
165 escaped = strings.ReplaceAll(escaped, char, "\\"+char)
166 }
167
168 return escaped
169}
170
171func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
172 matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
173 if err != nil {
174 matches, err = searchFilesWithRegex(pattern, rootPath, include)
175 if err != nil {
176 return nil, false, err
177 }
178 }
179
180 sort.Slice(matches, func(i, j int) bool {
181 return matches[i].modTime.After(matches[j].modTime)
182 })
183
184 truncated := len(matches) > limit
185 if truncated {
186 matches = matches[:limit]
187 }
188
189 return matches, truncated, nil
190}
191
192func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
193 cmd := getRgSearchCmd(ctx, pattern, path, include)
194 if cmd == nil {
195 return nil, fmt.Errorf("ripgrep not found in $PATH")
196 }
197
198 // Only add ignore files if they exist
199 for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
200 ignorePath := filepath.Join(path, ignoreFile)
201 if _, err := os.Stat(ignorePath); err == nil {
202 cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
203 }
204 }
205
206 output, err := cmd.Output()
207 if err != nil {
208 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
209 return []grepMatch{}, nil
210 }
211 return nil, err
212 }
213
214 lines := strings.Split(strings.TrimSpace(string(output)), "\n")
215 matches := make([]grepMatch, 0, len(lines))
216
217 for _, line := range lines {
218 if line == "" {
219 continue
220 }
221
222 // Parse ripgrep output using null separation
223 filePath, lineNumStr, lineText, ok := parseRipgrepLine(line)
224 if !ok {
225 continue
226 }
227
228 lineNum, err := strconv.Atoi(lineNumStr)
229 if err != nil {
230 continue
231 }
232
233 fileInfo, err := os.Stat(filePath)
234 if err != nil {
235 continue // Skip files we can't access
236 }
237
238 matches = append(matches, grepMatch{
239 path: filePath,
240 modTime: fileInfo.ModTime(),
241 lineNum: lineNum,
242 lineText: lineText,
243 })
244 }
245
246 return matches, nil
247}
248
249// parseRipgrepLine parses ripgrep output with null separation to handle Windows paths
250func parseRipgrepLine(line string) (filePath, lineNum, lineText string, ok bool) {
251 // Split on null byte first to separate filename from rest
252 parts := strings.SplitN(line, "\x00", 2)
253 if len(parts) != 2 {
254 return "", "", "", false
255 }
256
257 filePath = parts[0]
258 remainder := parts[1]
259
260 // Now split the remainder on first colon: "linenum:content"
261 colonIndex := strings.Index(remainder, ":")
262 if colonIndex == -1 {
263 return "", "", "", false
264 }
265
266 lineNumStr := remainder[:colonIndex]
267 lineText = remainder[colonIndex+1:]
268
269 if _, err := strconv.Atoi(lineNumStr); err != nil {
270 return "", "", "", false
271 }
272
273 return filePath, lineNumStr, lineText, true
274}
275
276func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
277 matches := []grepMatch{}
278
279 // Use cached regex compilation
280 regex, err := searchRegexCache.get(pattern)
281 if err != nil {
282 return nil, fmt.Errorf("invalid regex pattern: %w", err)
283 }
284
285 var includePattern *regexp.Regexp
286 if include != "" {
287 regexPattern := globToRegex(include)
288 includePattern, err = globRegexCache.get(regexPattern)
289 if err != nil {
290 return nil, fmt.Errorf("invalid include pattern: %w", err)
291 }
292 }
293
294 // Create walker with gitignore and crushignore support
295 walker := fsext.NewFastGlobWalker(rootPath)
296
297 err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
298 if err != nil {
299 return nil // Skip errors
300 }
301
302 if info.IsDir() {
303 // Check if directory should be skipped
304 if walker.ShouldSkip(path) {
305 return filepath.SkipDir
306 }
307 return nil // Continue into directory
308 }
309
310 // Use walker's shouldSkip method for files
311 if walker.ShouldSkip(path) {
312 return nil
313 }
314
315 // Skip hidden files (starting with a dot) to match ripgrep's default behavior
316 base := filepath.Base(path)
317 if base != "." && strings.HasPrefix(base, ".") {
318 return nil
319 }
320
321 if includePattern != nil && !includePattern.MatchString(path) {
322 return nil
323 }
324
325 match, lineNum, lineText, err := fileContainsPattern(path, regex)
326 if err != nil {
327 return nil // Skip files we can't read
328 }
329
330 if match {
331 matches = append(matches, grepMatch{
332 path: path,
333 modTime: info.ModTime(),
334 lineNum: lineNum,
335 lineText: lineText,
336 })
337
338 if len(matches) >= 200 {
339 return filepath.SkipAll
340 }
341 }
342
343 return nil
344 })
345 if err != nil {
346 return nil, err
347 }
348
349 return matches, nil
350}
351
352func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
353 // Quick binary file detection
354 if isBinaryFile(filePath) {
355 return false, 0, "", nil
356 }
357
358 file, err := os.Open(filePath)
359 if err != nil {
360 return false, 0, "", err
361 }
362 defer file.Close()
363
364 scanner := bufio.NewScanner(file)
365 lineNum := 0
366 for scanner.Scan() {
367 lineNum++
368 line := scanner.Text()
369 if pattern.MatchString(line) {
370 return true, lineNum, line, nil
371 }
372 }
373
374 return false, 0, "", scanner.Err()
375}
376
377var binaryExts = map[string]struct{}{
378 ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {},
379 ".bin": {}, ".obj": {}, ".o": {}, ".a": {},
380 ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {},
381 ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {},
382 ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {},
383 ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {},
384}
385
386// isBinaryFile performs a quick check to determine if a file is binary
387func isBinaryFile(filePath string) bool {
388 // Check file extension first (fastest)
389 ext := strings.ToLower(filepath.Ext(filePath))
390 if _, isBinary := binaryExts[ext]; isBinary {
391 return true
392 }
393
394 // Quick content check for files without clear extensions
395 file, err := os.Open(filePath)
396 if err != nil {
397 return false // If we can't open it, let the caller handle the error
398 }
399 defer file.Close()
400
401 // Read first 512 bytes to check for null bytes
402 buffer := make([]byte, 512)
403 n, err := file.Read(buffer)
404 if err != nil && err != io.EOF {
405 return false
406 }
407
408 // Check for null bytes (common in binary files)
409 for i := range n {
410 if buffer[i] == 0 {
411 return true
412 }
413 }
414
415 return false
416}
417
418func globToRegex(glob string) string {
419 regexPattern := strings.ReplaceAll(glob, ".", "\\.")
420 regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
421 regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
422
423 // Use pre-compiled regex instead of compiling each time
424 regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
425 inner := match[1 : len(match)-1]
426 return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
427 })
428
429 return regexPattern
430}