grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"bytes"
  6	"context"
  7	_ "embed"
  8	"encoding/json"
  9	"fmt"
 10	"io"
 11	"net/http"
 12	"os"
 13	"os/exec"
 14	"path/filepath"
 15	"regexp"
 16	"sort"
 17	"strings"
 18	"sync"
 19	"time"
 20
 21	"charm.land/fantasy"
 22	"github.com/charmbracelet/crush/internal/fsext"
 23)
 24
 25// regexCache provides thread-safe caching of compiled regex patterns
 26type regexCache struct {
 27	cache map[string]*regexp.Regexp
 28	mu    sync.RWMutex
 29}
 30
 31// newRegexCache creates a new regex cache
 32func newRegexCache() *regexCache {
 33	return &regexCache{
 34		cache: make(map[string]*regexp.Regexp),
 35	}
 36}
 37
 38// get retrieves a compiled regex from cache or compiles and caches it
 39func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 40	// Try to get from cache first (read lock)
 41	rc.mu.RLock()
 42	if regex, exists := rc.cache[pattern]; exists {
 43		rc.mu.RUnlock()
 44		return regex, nil
 45	}
 46	rc.mu.RUnlock()
 47
 48	// Compile the regex (write lock)
 49	rc.mu.Lock()
 50	defer rc.mu.Unlock()
 51
 52	// Double-check in case another goroutine compiled it while we waited
 53	if regex, exists := rc.cache[pattern]; exists {
 54		return regex, nil
 55	}
 56
 57	// Compile and cache the regex
 58	regex, err := regexp.Compile(pattern)
 59	if err != nil {
 60		return nil, err
 61	}
 62
 63	rc.cache[pattern] = regex
 64	return regex, nil
 65}
 66
 67// ResetCache clears compiled regex caches to prevent unbounded growth across sessions.
 68func ResetCache() {
 69	searchRegexCache.mu.Lock()
 70	clear(searchRegexCache.cache)
 71	searchRegexCache.mu.Unlock()
 72
 73	globRegexCache.mu.Lock()
 74	clear(globRegexCache.cache)
 75	globRegexCache.mu.Unlock()
 76}
 77
 78// Global regex cache instances
 79var (
 80	searchRegexCache = newRegexCache()
 81	globRegexCache   = newRegexCache()
 82	// Pre-compiled regex for glob conversion (used frequently)
 83	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 84)
 85
 86type GrepParams struct {
 87	Pattern     string `json:"pattern" description:"The regex pattern to search for in file contents"`
 88	Path        string `json:"path,omitempty" description:"The directory to search in. Defaults to the current working directory."`
 89	Include     string `json:"include,omitempty" description:"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"`
 90	LiteralText bool   `json:"literal_text,omitempty" description:"If true, the pattern will be treated as literal text with special regex characters escaped. Default is false."`
 91}
 92
 93type grepMatch struct {
 94	path     string
 95	modTime  time.Time
 96	lineNum  int
 97	charNum  int
 98	lineText string
 99}
100
101type GrepResponseMetadata struct {
102	NumberOfMatches int  `json:"number_of_matches"`
103	Truncated       bool `json:"truncated"`
104}
105
106const (
107	GrepToolName        = "grep"
108	maxGrepContentWidth = 500
109)
110
111//go:embed grep.md
112var grepDescription []byte
113
114// escapeRegexPattern escapes special regex characters so they're treated as literal characters
115func escapeRegexPattern(pattern string) string {
116	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
117	escaped := pattern
118
119	for _, char := range specialChars {
120		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
121	}
122
123	return escaped
124}
125
126func NewGrepTool(workingDir string) fantasy.AgentTool {
127	return fantasy.NewAgentTool(
128		GrepToolName,
129		string(grepDescription),
130		func(ctx context.Context, params GrepParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
131			if params.Pattern == "" {
132				return fantasy.NewTextErrorResponse("pattern is required"), nil
133			}
134
135			// If literal_text is true, escape the pattern
136			searchPattern := params.Pattern
137			if params.LiteralText {
138				searchPattern = escapeRegexPattern(params.Pattern)
139			}
140
141			searchPath := params.Path
142			if searchPath == "" {
143				searchPath = workingDir
144			}
145
146			matches, truncated, err := searchFiles(ctx, searchPattern, searchPath, params.Include, 100)
147			if err != nil {
148				return fantasy.NewTextErrorResponse(fmt.Sprintf("error searching files: %v", err)), nil
149			}
150
151			var output strings.Builder
152			if len(matches) == 0 {
153				output.WriteString("No files found")
154			} else {
155				fmt.Fprintf(&output, "Found %d matches\n", len(matches))
156
157				currentFile := ""
158				for _, match := range matches {
159					if currentFile != match.path {
160						if currentFile != "" {
161							output.WriteString("\n")
162						}
163						currentFile = match.path
164						fmt.Fprintf(&output, "%s:\n", filepath.ToSlash(match.path))
165					}
166					if match.lineNum > 0 {
167						lineText := match.lineText
168						if len(lineText) > maxGrepContentWidth {
169							lineText = lineText[:maxGrepContentWidth] + "..."
170						}
171						if match.charNum > 0 {
172							fmt.Fprintf(&output, "  Line %d, Char %d: %s\n", match.lineNum, match.charNum, lineText)
173						} else {
174							fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, lineText)
175						}
176					} else {
177						fmt.Fprintf(&output, "  %s\n", match.path)
178					}
179				}
180
181				if truncated {
182					output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
183				}
184			}
185
186			return fantasy.WithResponseMetadata(
187				fantasy.NewTextResponse(output.String()),
188				GrepResponseMetadata{
189					NumberOfMatches: len(matches),
190					Truncated:       truncated,
191				},
192			), nil
193		})
194}
195
196func searchFiles(ctx context.Context, pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
197	matches, err := searchWithRipgrep(ctx, pattern, rootPath, include)
198	if err != nil {
199		matches, err = searchFilesWithRegex(pattern, rootPath, include)
200		if err != nil {
201			return nil, false, err
202		}
203	}
204
205	sort.Slice(matches, func(i, j int) bool {
206		return matches[i].modTime.After(matches[j].modTime)
207	})
208
209	truncated := len(matches) > limit
210	if truncated {
211		matches = matches[:limit]
212	}
213
214	return matches, truncated, nil
215}
216
217func searchWithRipgrep(ctx context.Context, pattern, path, include string) ([]grepMatch, error) {
218	cmd := getRgSearchCmd(ctx, pattern, path, include)
219	if cmd == nil {
220		return nil, fmt.Errorf("ripgrep not found in $PATH")
221	}
222
223	// Only add ignore files if they exist
224	for _, ignoreFile := range []string{".gitignore", ".crushignore"} {
225		ignorePath := filepath.Join(path, ignoreFile)
226		if _, err := os.Stat(ignorePath); err == nil {
227			cmd.Args = append(cmd.Args, "--ignore-file", ignorePath)
228		}
229	}
230
231	output, err := cmd.Output()
232	if err != nil {
233		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
234			return []grepMatch{}, nil
235		}
236		return nil, err
237	}
238
239	var matches []grepMatch
240	for line := range bytes.SplitSeq(bytes.TrimSpace(output), []byte{'\n'}) {
241		if len(line) == 0 {
242			continue
243		}
244		var match ripgrepMatch
245		if err := json.Unmarshal(line, &match); err != nil {
246			continue
247		}
248		if match.Type != "match" {
249			continue
250		}
251		for _, m := range match.Data.Submatches {
252			fi, err := os.Stat(match.Data.Path.Text)
253			if err != nil {
254				continue // Skip files we can't access
255			}
256			matches = append(matches, grepMatch{
257				path:     match.Data.Path.Text,
258				modTime:  fi.ModTime(),
259				lineNum:  match.Data.LineNumber,
260				charNum:  m.Start + 1, // ensure 1-based
261				lineText: strings.TrimSpace(match.Data.Lines.Text),
262			})
263			// only get the first match of each line
264			break
265		}
266	}
267	return matches, nil
268}
269
270type ripgrepMatch struct {
271	Type string `json:"type"`
272	Data struct {
273		Path struct {
274			Text string `json:"text"`
275		} `json:"path"`
276		Lines struct {
277			Text string `json:"text"`
278		} `json:"lines"`
279		LineNumber int `json:"line_number"`
280		Submatches []struct {
281			Start int `json:"start"`
282		} `json:"submatches"`
283	} `json:"data"`
284}
285
286func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
287	matches := []grepMatch{}
288
289	// Use cached regex compilation
290	regex, err := searchRegexCache.get(pattern)
291	if err != nil {
292		return nil, fmt.Errorf("invalid regex pattern: %w", err)
293	}
294
295	var includePattern *regexp.Regexp
296	if include != "" {
297		regexPattern := globToRegex(include)
298		includePattern, err = globRegexCache.get(regexPattern)
299		if err != nil {
300			return nil, fmt.Errorf("invalid include pattern: %w", err)
301		}
302	}
303
304	// Create walker with gitignore and crushignore support
305	walker := fsext.NewFastGlobWalker(rootPath)
306
307	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
308		if err != nil {
309			return nil // Skip errors
310		}
311
312		if info.IsDir() {
313			// Check if directory should be skipped
314			if walker.ShouldSkip(path) {
315				return filepath.SkipDir
316			}
317			return nil // Continue into directory
318		}
319
320		// Use walker's shouldSkip method for files
321		if walker.ShouldSkip(path) {
322			return nil
323		}
324
325		// Skip hidden files (starting with a dot) to match ripgrep's default behavior
326		base := filepath.Base(path)
327		if base != "." && strings.HasPrefix(base, ".") {
328			return nil
329		}
330
331		if includePattern != nil && !includePattern.MatchString(path) {
332			return nil
333		}
334
335		match, lineNum, charNum, lineText, err := fileContainsPattern(path, regex)
336		if err != nil {
337			return nil // Skip files we can't read
338		}
339
340		if match {
341			matches = append(matches, grepMatch{
342				path:     path,
343				modTime:  info.ModTime(),
344				lineNum:  lineNum,
345				charNum:  charNum,
346				lineText: lineText,
347			})
348
349			if len(matches) >= 200 {
350				return filepath.SkipAll
351			}
352		}
353
354		return nil
355	})
356	if err != nil {
357		return nil, err
358	}
359
360	return matches, nil
361}
362
363func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, int, string, error) {
364	// Only search text files.
365	if !isTextFile(filePath) {
366		return false, 0, 0, "", nil
367	}
368
369	file, err := os.Open(filePath)
370	if err != nil {
371		return false, 0, 0, "", err
372	}
373	defer file.Close()
374
375	scanner := bufio.NewScanner(file)
376	lineNum := 0
377	for scanner.Scan() {
378		lineNum++
379		line := scanner.Text()
380		if loc := pattern.FindStringIndex(line); loc != nil {
381			charNum := loc[0] + 1
382			return true, lineNum, charNum, line, nil
383		}
384	}
385
386	return false, 0, 0, "", scanner.Err()
387}
388
389// isTextFile checks if a file is a text file by examining its MIME type.
390func isTextFile(filePath string) bool {
391	file, err := os.Open(filePath)
392	if err != nil {
393		return false
394	}
395	defer file.Close()
396
397	// Read first 512 bytes for MIME type detection.
398	buffer := make([]byte, 512)
399	n, err := file.Read(buffer)
400	if err != nil && err != io.EOF {
401		return false
402	}
403
404	// Detect content type.
405	contentType := http.DetectContentType(buffer[:n])
406
407	// Check if it's a text MIME type.
408	return strings.HasPrefix(contentType, "text/") ||
409		contentType == "application/json" ||
410		contentType == "application/xml" ||
411		contentType == "application/javascript" ||
412		contentType == "application/x-sh"
413}
414
415func globToRegex(glob string) string {
416	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
417	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
418	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
419
420	// Use pre-compiled regex instead of compiling each time
421	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
422		inner := match[1 : len(match)-1]
423		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
424	})
425
426	return regexPattern
427}