fix: regex pattern caching to eliminate repeated compilation overhead

Raphael Amorim created

Change summary

internal/diff/diff.go      | 12 +++++--
internal/llm/tools/grep.go | 60 +++++++++++++++++++++++++++++++++++++--
2 files changed, 64 insertions(+), 8 deletions(-)

Detailed changes

internal/diff/diff.go 🔗

@@ -16,6 +16,12 @@ import (
 	"github.com/sergi/go-diff/diffmatchpatch"
 )
 
+// Pre-compiled regex patterns for better performance
+var (
+	hunkHeaderRegex = regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`)
+	ansiRegex       = regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`)
+)
+
 // -------------------------------------------------------------------------
 // Core Types
 // -------------------------------------------------------------------------
@@ -129,7 +135,6 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) {
 	var result DiffResult
 	var currentHunk *Hunk
 
-	hunkHeaderRe := regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`)
 	lines := strings.Split(diff, "\n")
 
 	var oldLine, newLine int
@@ -150,7 +155,7 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) {
 		}
 
 		// Parse hunk headers
-		if matches := hunkHeaderRe.FindStringSubmatch(line); matches != nil {
+		if matches := hunkHeaderRegex.FindStringSubmatch(line); matches != nil {
 			if currentHunk != nil {
 				result.Hunks = append(result.Hunks, *currentHunk)
 			}
@@ -343,8 +348,7 @@ func createStyles(t *styles.Theme) (removedLineStyle, addedLineStyle, contextLin
 
 // applyHighlighting applies intra-line highlighting to a piece of text
 func applyHighlighting(content string, segments []Segment, segmentType LineType, highlightBg color.Color) string {
-	// Find all ANSI sequences in the content
-	ansiRegex := regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`)
+	// Find all ANSI sequences in the content using pre-compiled regex
 	ansiMatches := ansiRegex.FindAllStringIndex(content, -1)
 
 	// Build a mapping of visible character positions to their actual indices

internal/llm/tools/grep.go 🔗

@@ -12,12 +12,63 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/config"
 	"github.com/charmbracelet/crush/internal/fileutil"
 )
 
+// regexCache provides thread-safe caching of compiled regex patterns
+type regexCache struct {
+	cache map[string]*regexp.Regexp
+	mu    sync.RWMutex
+}
+
+// newRegexCache creates a new regex cache
+func newRegexCache() *regexCache {
+	return &regexCache{
+		cache: make(map[string]*regexp.Regexp),
+	}
+}
+
+// get retrieves a compiled regex from cache or compiles and caches it
+func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
+	// Try to get from cache first (read lock)
+	rc.mu.RLock()
+	if regex, exists := rc.cache[pattern]; exists {
+		rc.mu.RUnlock()
+		return regex, nil
+	}
+	rc.mu.RUnlock()
+
+	// Compile the regex (write lock)
+	rc.mu.Lock()
+	defer rc.mu.Unlock()
+
+	// Double-check in case another goroutine compiled it while we waited
+	if regex, exists := rc.cache[pattern]; exists {
+		return regex, nil
+	}
+
+	// Compile and cache the regex
+	regex, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+
+	rc.cache[pattern] = regex
+	return regex, nil
+}
+
+// Global regex cache instances
+var (
+	searchRegexCache = newRegexCache()
+	globRegexCache   = newRegexCache()
+	// Pre-compiled regex for glob conversion (used frequently)
+	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
+)
+
 type GrepParams struct {
 	Pattern     string `json:"pattern"`
 	Path        string `json:"path"`
@@ -266,7 +317,8 @@ func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) {
 func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
 	matches := []grepMatch{}
 
-	regex, err := regexp.Compile(pattern)
+	// Use cached regex compilation
+	regex, err := searchRegexCache.get(pattern)
 	if err != nil {
 		return nil, fmt.Errorf("invalid regex pattern: %w", err)
 	}
@@ -274,7 +326,7 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
 	var includePattern *regexp.Regexp
 	if include != "" {
 		regexPattern := globToRegex(include)
-		includePattern, err = regexp.Compile(regexPattern)
+		includePattern, err = globRegexCache.get(regexPattern)
 		if err != nil {
 			return nil, fmt.Errorf("invalid include pattern: %w", err)
 		}
@@ -349,8 +401,8 @@ func globToRegex(glob string) string {
 	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
 	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
 
-	re := regexp.MustCompile(`\{([^}]+)\}`)
-	regexPattern = re.ReplaceAllStringFunc(regexPattern, func(match string) string {
+	// Use pre-compiled regex instead of compiling each time
+	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
 		inner := match[1 : len(match)-1]
 		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
 	})