grep.go

  1package tools
  2
  3import (
  4	"bufio"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"os"
  9	"os/exec"
 10	"path/filepath"
 11	"regexp"
 12	"sort"
 13	"strconv"
 14	"strings"
 15	"sync"
 16	"time"
 17
 18	"github.com/charmbracelet/crush/internal/config"
 19	"github.com/charmbracelet/crush/internal/fileutil"
 20)
 21
 22// regexCache provides thread-safe caching of compiled regex patterns
 23type regexCache struct {
 24	cache map[string]*regexp.Regexp
 25	mu    sync.RWMutex
 26}
 27
 28// newRegexCache creates a new regex cache
 29func newRegexCache() *regexCache {
 30	return &regexCache{
 31		cache: make(map[string]*regexp.Regexp),
 32	}
 33}
 34
 35// get retrieves a compiled regex from cache or compiles and caches it
 36func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
 37	// Try to get from cache first (read lock)
 38	rc.mu.RLock()
 39	if regex, exists := rc.cache[pattern]; exists {
 40		rc.mu.RUnlock()
 41		return regex, nil
 42	}
 43	rc.mu.RUnlock()
 44
 45	// Compile the regex (write lock)
 46	rc.mu.Lock()
 47	defer rc.mu.Unlock()
 48
 49	// Double-check in case another goroutine compiled it while we waited
 50	if regex, exists := rc.cache[pattern]; exists {
 51		return regex, nil
 52	}
 53
 54	// Compile and cache the regex
 55	regex, err := regexp.Compile(pattern)
 56	if err != nil {
 57		return nil, err
 58	}
 59
 60	rc.cache[pattern] = regex
 61	return regex, nil
 62}
 63
 64// Global regex cache instances
 65var (
 66	searchRegexCache = newRegexCache()
 67	globRegexCache   = newRegexCache()
 68	// Pre-compiled regex for glob conversion (used frequently)
 69	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
 70)
 71
 72type GrepParams struct {
 73	Pattern     string `json:"pattern"`
 74	Path        string `json:"path"`
 75	Include     string `json:"include"`
 76	LiteralText bool   `json:"literal_text"`
 77}
 78
 79type grepMatch struct {
 80	path     string
 81	modTime  time.Time
 82	lineNum  int
 83	lineText string
 84}
 85
 86type GrepResponseMetadata struct {
 87	NumberOfMatches int  `json:"number_of_matches"`
 88	Truncated       bool `json:"truncated"`
 89}
 90
 91type grepTool struct{}
 92
 93const (
 94	GrepToolName    = "grep"
 95	grepDescription = `Fast content search tool that finds files containing specific text or patterns, returning matching file paths sorted by modification time (newest first).
 96
 97WHEN TO USE THIS TOOL:
 98- Use when you need to find files containing specific text or patterns
 99- Great for searching code bases for function names, variable declarations, or error messages
100- Useful for finding all files that use a particular API or pattern
101
102HOW TO USE:
103- Provide a regex pattern to search for within file contents
104- Set literal_text=true if you want to search for the exact text with special characters (recommended for non-regex users)
105- Optionally specify a starting directory (defaults to current working directory)
106- Optionally provide an include pattern to filter which files to search
107- Results are sorted with most recently modified files first
108
109REGEX PATTERN SYNTAX (when literal_text=false):
110- Supports standard regular expression syntax
111- 'function' searches for the literal text "function"
112- 'log\..*Error' finds text starting with "log." and ending with "Error"
113- 'import\s+.*\s+from' finds import statements in JavaScript/TypeScript
114
115COMMON INCLUDE PATTERN EXAMPLES:
116- '*.js' - Only search JavaScript files
117- '*.{ts,tsx}' - Only search TypeScript files
118- '*.go' - Only search Go files
119
120LIMITATIONS:
121- Results are limited to 100 files (newest first)
122- Performance depends on the number of files being searched
123- Very large binary files may be skipped
124- Hidden files (starting with '.') are skipped
125
126TIPS:
127- For faster, more targeted searches, first use Glob to find relevant files, then use Grep
128- When doing iterative exploration that may require multiple rounds of searching, consider using the Agent tool instead
129- Always check if results are truncated and refine your search pattern if needed
130- Use literal_text=true when searching for exact text containing special characters like dots, parentheses, etc.`
131)
132
133func NewGrepTool() BaseTool {
134	return &grepTool{}
135}
136
137func (g *grepTool) Info() ToolInfo {
138	return ToolInfo{
139		Name:        GrepToolName,
140		Description: grepDescription,
141		Parameters: map[string]any{
142			"pattern": map[string]any{
143				"type":        "string",
144				"description": "The regex pattern to search for in file contents",
145			},
146			"path": map[string]any{
147				"type":        "string",
148				"description": "The directory to search in. Defaults to the current working directory.",
149			},
150			"include": map[string]any{
151				"type":        "string",
152				"description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")",
153			},
154			"literal_text": map[string]any{
155				"type":        "boolean",
156				"description": "If true, the pattern will be treated as literal text with special regex characters escaped. Default is false.",
157			},
158		},
159		Required: []string{"pattern"},
160	}
161}
162
163// escapeRegexPattern escapes special regex characters so they're treated as literal characters
164func escapeRegexPattern(pattern string) string {
165	specialChars := []string{"\\", ".", "+", "*", "?", "(", ")", "[", "]", "{", "}", "^", "$", "|"}
166	escaped := pattern
167
168	for _, char := range specialChars {
169		escaped = strings.ReplaceAll(escaped, char, "\\"+char)
170	}
171
172	return escaped
173}
174
175func (g *grepTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
176	var params GrepParams
177	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
178		return NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
179	}
180
181	if params.Pattern == "" {
182		return NewTextErrorResponse("pattern is required"), nil
183	}
184
185	// If literal_text is true, escape the pattern
186	searchPattern := params.Pattern
187	if params.LiteralText {
188		searchPattern = escapeRegexPattern(params.Pattern)
189	}
190
191	searchPath := params.Path
192	if searchPath == "" {
193		searchPath = config.WorkingDirectory()
194	}
195
196	matches, truncated, err := searchFiles(searchPattern, searchPath, params.Include, 100)
197	if err != nil {
198		return ToolResponse{}, fmt.Errorf("error searching files: %w", err)
199	}
200
201	var output strings.Builder
202	if len(matches) == 0 {
203		output.WriteString("No files found")
204	} else {
205		fmt.Fprintf(&output, "Found %d matches\n", len(matches))
206
207		currentFile := ""
208		for _, match := range matches {
209			if currentFile != match.path {
210				if currentFile != "" {
211					output.WriteString("\n")
212				}
213				currentFile = match.path
214				fmt.Fprintf(&output, "%s:\n", match.path)
215			}
216			if match.lineNum > 0 {
217				fmt.Fprintf(&output, "  Line %d: %s\n", match.lineNum, match.lineText)
218			} else {
219				fmt.Fprintf(&output, "  %s\n", match.path)
220			}
221		}
222
223		if truncated {
224			output.WriteString("\n(Results are truncated. Consider using a more specific path or pattern.)")
225		}
226	}
227
228	return WithResponseMetadata(
229		NewTextResponse(output.String()),
230		GrepResponseMetadata{
231			NumberOfMatches: len(matches),
232			Truncated:       truncated,
233		},
234	), nil
235}
236
237func searchFiles(pattern, rootPath, include string, limit int) ([]grepMatch, bool, error) {
238	matches, err := searchWithRipgrep(pattern, rootPath, include)
239	if err != nil {
240		matches, err = searchFilesWithRegex(pattern, rootPath, include)
241		if err != nil {
242			return nil, false, err
243		}
244	}
245
246	sort.Slice(matches, func(i, j int) bool {
247		return matches[i].modTime.After(matches[j].modTime)
248	})
249
250	truncated := len(matches) > limit
251	if truncated {
252		matches = matches[:limit]
253	}
254
255	return matches, truncated, nil
256}
257
258func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) {
259	_, err := exec.LookPath("rg")
260	if err != nil {
261		return nil, fmt.Errorf("ripgrep not found: %w", err)
262	}
263
264	// Use -n to show line numbers and include the matched line
265	args := []string{"-n", pattern}
266	if include != "" {
267		args = append(args, "--glob", include)
268	}
269	args = append(args, path)
270
271	cmd := exec.Command("rg", args...)
272	output, err := cmd.Output()
273	if err != nil {
274		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
275			return []grepMatch{}, nil
276		}
277		return nil, err
278	}
279
280	lines := strings.Split(strings.TrimSpace(string(output)), "\n")
281	matches := make([]grepMatch, 0, len(lines))
282
283	for _, line := range lines {
284		if line == "" {
285			continue
286		}
287
288		// Parse ripgrep output format: file:line:content
289		parts := strings.SplitN(line, ":", 3)
290		if len(parts) < 3 {
291			continue
292		}
293
294		filePath := parts[0]
295		lineNum, err := strconv.Atoi(parts[1])
296		if err != nil {
297			continue
298		}
299		lineText := parts[2]
300
301		fileInfo, err := os.Stat(filePath)
302		if err != nil {
303			continue // Skip files we can't access
304		}
305
306		matches = append(matches, grepMatch{
307			path:     filePath,
308			modTime:  fileInfo.ModTime(),
309			lineNum:  lineNum,
310			lineText: lineText,
311		})
312	}
313
314	return matches, nil
315}
316
317func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
318	matches := []grepMatch{}
319
320	// Use cached regex compilation
321	regex, err := searchRegexCache.get(pattern)
322	if err != nil {
323		return nil, fmt.Errorf("invalid regex pattern: %w", err)
324	}
325
326	var includePattern *regexp.Regexp
327	if include != "" {
328		regexPattern := globToRegex(include)
329		includePattern, err = globRegexCache.get(regexPattern)
330		if err != nil {
331			return nil, fmt.Errorf("invalid include pattern: %w", err)
332		}
333	}
334
335	err = filepath.Walk(rootPath, func(path string, info os.FileInfo, err error) error {
336		if err != nil {
337			return nil // Skip errors
338		}
339
340		if info.IsDir() {
341			return nil // Skip directories
342		}
343
344		if fileutil.SkipHidden(path) {
345			return nil
346		}
347
348		if includePattern != nil && !includePattern.MatchString(path) {
349			return nil
350		}
351
352		match, lineNum, lineText, err := fileContainsPattern(path, regex)
353		if err != nil {
354			return nil // Skip files we can't read
355		}
356
357		if match {
358			matches = append(matches, grepMatch{
359				path:     path,
360				modTime:  info.ModTime(),
361				lineNum:  lineNum,
362				lineText: lineText,
363			})
364
365			if len(matches) >= 200 {
366				return filepath.SkipAll
367			}
368		}
369
370		return nil
371	})
372	if err != nil {
373		return nil, err
374	}
375
376	return matches, nil
377}
378
379func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) {
380	file, err := os.Open(filePath)
381	if err != nil {
382		return false, 0, "", err
383	}
384	defer file.Close()
385
386	scanner := bufio.NewScanner(file)
387	lineNum := 0
388	for scanner.Scan() {
389		lineNum++
390		line := scanner.Text()
391		if pattern.MatchString(line) {
392			return true, lineNum, line, nil
393		}
394	}
395
396	return false, 0, "", scanner.Err()
397}
398
399func globToRegex(glob string) string {
400	regexPattern := strings.ReplaceAll(glob, ".", "\\.")
401	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
402	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
403
404	// Use pre-compiled regex instead of compiling each time
405	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
406		inner := match[1 : len(match)-1]
407		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
408	})
409
410	return regexPattern
411}