Merge pull request #22 from charmbracelet/perf-regex-improvements

Kujtim Hoxha created

Perf regex improvements

Change summary

internal/diff/diff.go           | 12 +++-
internal/llm/tools/grep.go      | 60 +++++++++++++++++++++++++-
internal/llm/tools/grep_test.go | 77 +++++++++++++++++++++++++++++++++++
3 files changed, 141 insertions(+), 8 deletions(-)

Detailed changes

internal/diff/diff.go 🔗

@@ -16,6 +16,12 @@ import (
 	"github.com/sergi/go-diff/diffmatchpatch"
 )
 
+// Pre-compiled regex patterns for better performance
+var (
+	hunkHeaderRegex = regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`)
+	ansiRegex       = regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`)
+)
+
 // -------------------------------------------------------------------------
 // Core Types
 // -------------------------------------------------------------------------
@@ -129,7 +135,6 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) {
 	var result DiffResult
 	var currentHunk *Hunk
 
-	hunkHeaderRe := regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`)
 	lines := strings.Split(diff, "\n")
 
 	var oldLine, newLine int
@@ -150,7 +155,7 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) {
 		}
 
 		// Parse hunk headers
-		if matches := hunkHeaderRe.FindStringSubmatch(line); matches != nil {
+		if matches := hunkHeaderRegex.FindStringSubmatch(line); matches != nil {
 			if currentHunk != nil {
 				result.Hunks = append(result.Hunks, *currentHunk)
 			}
@@ -343,8 +348,7 @@ func createStyles(t *styles.Theme) (removedLineStyle, addedLineStyle, contextLin
 
 // applyHighlighting applies intra-line highlighting to a piece of text
 func applyHighlighting(content string, segments []Segment, segmentType LineType, highlightBg color.Color) string {
-	// Find all ANSI sequences in the content
-	ansiRegex := regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`)
+	// Find all ANSI sequences in the content using pre-compiled regex
 	ansiMatches := ansiRegex.FindAllStringIndex(content, -1)
 
 	// Build a mapping of visible character positions to their actual indices

internal/llm/tools/grep.go 🔗

@@ -12,12 +12,63 @@ import (
 	"sort"
 	"strconv"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/config"
 	"github.com/charmbracelet/crush/internal/fileutil"
 )
 
+// regexCache provides thread-safe caching of compiled regex patterns
+type regexCache struct {
+	cache map[string]*regexp.Regexp
+	mu    sync.RWMutex
+}
+
+// newRegexCache creates a new regex cache
+func newRegexCache() *regexCache {
+	return &regexCache{
+		cache: make(map[string]*regexp.Regexp),
+	}
+}
+
+// get retrieves a compiled regex from cache or compiles and caches it
+func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) {
+	// Try to get from cache first (read lock)
+	rc.mu.RLock()
+	if regex, exists := rc.cache[pattern]; exists {
+		rc.mu.RUnlock()
+		return regex, nil
+	}
+	rc.mu.RUnlock()
+
+	// Compile the regex (write lock)
+	rc.mu.Lock()
+	defer rc.mu.Unlock()
+
+	// Double-check in case another goroutine compiled it while we waited
+	if regex, exists := rc.cache[pattern]; exists {
+		return regex, nil
+	}
+
+	// Compile and cache the regex
+	regex, err := regexp.Compile(pattern)
+	if err != nil {
+		return nil, err
+	}
+
+	rc.cache[pattern] = regex
+	return regex, nil
+}
+
+// Global regex cache instances
+var (
+	searchRegexCache = newRegexCache()
+	globRegexCache   = newRegexCache()
+	// Pre-compiled regex for glob conversion (used frequently)
+	globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`)
+)
+
 type GrepParams struct {
 	Pattern     string `json:"pattern"`
 	Path        string `json:"path"`
@@ -266,7 +317,8 @@ func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) {
 func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) {
 	matches := []grepMatch{}
 
-	regex, err := regexp.Compile(pattern)
+	// Use cached regex compilation
+	regex, err := searchRegexCache.get(pattern)
 	if err != nil {
 		return nil, fmt.Errorf("invalid regex pattern: %w", err)
 	}
@@ -274,7 +326,7 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error
 	var includePattern *regexp.Regexp
 	if include != "" {
 		regexPattern := globToRegex(include)
-		includePattern, err = regexp.Compile(regexPattern)
+		includePattern, err = globRegexCache.get(regexPattern)
 		if err != nil {
 			return nil, fmt.Errorf("invalid include pattern: %w", err)
 		}
@@ -349,8 +401,8 @@ func globToRegex(glob string) string {
 	regexPattern = strings.ReplaceAll(regexPattern, "*", ".*")
 	regexPattern = strings.ReplaceAll(regexPattern, "?", ".")
 
-	re := regexp.MustCompile(`\{([^}]+)\}`)
-	regexPattern = re.ReplaceAllStringFunc(regexPattern, func(match string) string {
+	// Use pre-compiled regex instead of compiling each time
+	regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string {
 		inner := match[1 : len(match)-1]
 		return "(" + strings.ReplaceAll(inner, ",", "|") + ")"
 	})

internal/llm/tools/grep_test.go 🔗

@@ -0,0 +1,77 @@
+package tools
+
+import (
+	"regexp"
+	"testing"
+)
+
+func TestRegexCache(t *testing.T) {
+	cache := newRegexCache()
+	
+	// Test basic caching
+	pattern := "test.*pattern"
+	regex1, err := cache.get(pattern)
+	if err != nil {
+		t.Fatalf("Failed to compile regex: %v", err)
+	}
+	
+	regex2, err := cache.get(pattern)
+	if err != nil {
+		t.Fatalf("Failed to get cached regex: %v", err)
+	}
+	
+	// Should be the same instance (cached)
+	if regex1 != regex2 {
+		t.Error("Expected cached regex to be the same instance")
+	}
+	
+	// Test that it actually works
+	if !regex1.MatchString("test123pattern") {
+		t.Error("Regex should match test string")
+	}
+}
+
+func TestGlobToRegexCaching(t *testing.T) {
+	// Test that globToRegex uses pre-compiled regex
+	pattern1 := globToRegex("*.{js,ts}")
+	
+	// Should not panic and should work correctly
+	regex1, err := regexp.Compile(pattern1)
+	if err != nil {
+		t.Fatalf("Failed to compile glob regex: %v", err)
+	}
+	
+	if !regex1.MatchString("test.js") {
+		t.Error("Glob regex should match .js files")
+	}
+	if !regex1.MatchString("test.ts") {
+		t.Error("Glob regex should match .ts files")
+	}
+	if regex1.MatchString("test.go") {
+		t.Error("Glob regex should not match .go files")
+	}
+}
+
+// Benchmark to show performance improvement
+func BenchmarkRegexCacheVsCompile(b *testing.B) {
+	cache := newRegexCache()
+	pattern := "test.*pattern.*[0-9]+"
+	
+	b.Run("WithCache", func(b *testing.B) {
+		for b.Loop() {
+			_, err := cache.get(pattern)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+	
+	b.Run("WithoutCache", func(b *testing.B) {
+		for b.Loop() {
+			_, err := regexp.Compile(pattern)
+			if err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}