From 4f81ca400469229cce7db94ae9c92f84be77b1b7 Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Tue, 10 Jun 2025 12:39:07 +0200 Subject: [PATCH 1/2] fix: regex pattern caching to eliminate repeated compilation overhead --- internal/diff/diff.go | 12 +++++--- internal/llm/tools/grep.go | 60 +++++++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/internal/diff/diff.go b/internal/diff/diff.go index 89087ecc7594c6feab222c5b7ea288db1ac112e4..755c5e9891d2de693dd51a1ddf866f5911935d15 100644 --- a/internal/diff/diff.go +++ b/internal/diff/diff.go @@ -16,6 +16,12 @@ import ( "github.com/sergi/go-diff/diffmatchpatch" ) +// Pre-compiled regex patterns for better performance +var ( + hunkHeaderRegex = regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`) + ansiRegex = regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`) +) + // ------------------------------------------------------------------------- // Core Types // ------------------------------------------------------------------------- @@ -129,7 +135,6 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) { var result DiffResult var currentHunk *Hunk - hunkHeaderRe := regexp.MustCompile(`^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@`) lines := strings.Split(diff, "\n") var oldLine, newLine int @@ -150,7 +155,7 @@ func ParseUnifiedDiff(diff string) (DiffResult, error) { } // Parse hunk headers - if matches := hunkHeaderRe.FindStringSubmatch(line); matches != nil { + if matches := hunkHeaderRegex.FindStringSubmatch(line); matches != nil { if currentHunk != nil { result.Hunks = append(result.Hunks, *currentHunk) } @@ -343,8 +348,7 @@ func createStyles(t *styles.Theme) (removedLineStyle, addedLineStyle, contextLin // applyHighlighting applies intra-line highlighting to a piece of text func applyHighlighting(content string, segments []Segment, segmentType LineType, highlightBg color.Color) string { - // Find all ANSI sequences in the content - ansiRegex := regexp.MustCompile(`\x1b(?:[@-Z\\-_]|\[[0-9?]*(?:;[0-9?]*)*[@-~])`) + // Find all ANSI sequences in the content using pre-compiled regex ansiMatches := ansiRegex.FindAllStringIndex(content, -1) // Build a mapping of visible character positions to their actual indices diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index f13bb194483af51c56e3fc7e20bed1bcc2f35957..e043a71e0bafc77050bf444a02f50c2462018e8d 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -12,12 +12,63 @@ import ( "sort" "strconv" "strings" + "sync" "time" "github.com/charmbracelet/crush/internal/config" "github.com/charmbracelet/crush/internal/fileutil" ) +// regexCache provides thread-safe caching of compiled regex patterns +type regexCache struct { + cache map[string]*regexp.Regexp + mu sync.RWMutex +} + +// newRegexCache creates a new regex cache +func newRegexCache() *regexCache { + return ®exCache{ + cache: make(map[string]*regexp.Regexp), + } +} + +// get retrieves a compiled regex from cache or compiles and caches it +func (rc *regexCache) get(pattern string) (*regexp.Regexp, error) { + // Try to get from cache first (read lock) + rc.mu.RLock() + if regex, exists := rc.cache[pattern]; exists { + rc.mu.RUnlock() + return regex, nil + } + rc.mu.RUnlock() + + // Compile the regex (write lock) + rc.mu.Lock() + defer rc.mu.Unlock() + + // Double-check in case another goroutine compiled it while we waited + if regex, exists := rc.cache[pattern]; exists { + return regex, nil + } + + // Compile and cache the regex + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + + rc.cache[pattern] = regex + return regex, nil +} + +// Global regex cache instances +var ( + searchRegexCache = newRegexCache() + globRegexCache = newRegexCache() + // Pre-compiled regex for glob conversion (used frequently) + globBraceRegex = regexp.MustCompile(`\{([^}]+)\}`) +) + type GrepParams struct { Pattern string `json:"pattern"` Path string `json:"path"` @@ -266,7 +317,8 @@ func searchWithRipgrep(pattern, path, include string) ([]grepMatch, error) { func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error) { matches := []grepMatch{} - regex, err := regexp.Compile(pattern) + // Use cached regex compilation + regex, err := searchRegexCache.get(pattern) if err != nil { return nil, fmt.Errorf("invalid regex pattern: %w", err) } @@ -274,7 +326,7 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error var includePattern *regexp.Regexp if include != "" { regexPattern := globToRegex(include) - includePattern, err = regexp.Compile(regexPattern) + includePattern, err = globRegexCache.get(regexPattern) if err != nil { return nil, fmt.Errorf("invalid include pattern: %w", err) } @@ -349,8 +401,8 @@ func globToRegex(glob string) string { regexPattern = strings.ReplaceAll(regexPattern, "*", ".*") regexPattern = strings.ReplaceAll(regexPattern, "?", ".") - re := regexp.MustCompile(`\{([^}]+)\}`) - regexPattern = re.ReplaceAllStringFunc(regexPattern, func(match string) string { + // Use pre-compiled regex instead of compiling each time + regexPattern = globBraceRegex.ReplaceAllStringFunc(regexPattern, func(match string) string { inner := match[1 : len(match)-1] return "(" + strings.ReplaceAll(inner, ",", "|") + ")" }) From 0dd52aba50ef88362076cd52469cf1c7f1a83b6f Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Tue, 10 Jun 2025 12:46:51 +0200 Subject: [PATCH 2/2] ci: add a bench test for grep --- internal/llm/tools/grep_test.go | 77 +++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 internal/llm/tools/grep_test.go diff --git a/internal/llm/tools/grep_test.go b/internal/llm/tools/grep_test.go new file mode 100644 index 0000000000000000000000000000000000000000..de4e6afb225075df34e14d54fb1e5db76955625b --- /dev/null +++ b/internal/llm/tools/grep_test.go @@ -0,0 +1,77 @@ +package tools + +import ( + "regexp" + "testing" +) + +func TestRegexCache(t *testing.T) { + cache := newRegexCache() + + // Test basic caching + pattern := "test.*pattern" + regex1, err := cache.get(pattern) + if err != nil { + t.Fatalf("Failed to compile regex: %v", err) + } + + regex2, err := cache.get(pattern) + if err != nil { + t.Fatalf("Failed to get cached regex: %v", err) + } + + // Should be the same instance (cached) + if regex1 != regex2 { + t.Error("Expected cached regex to be the same instance") + } + + // Test that it actually works + if !regex1.MatchString("test123pattern") { + t.Error("Regex should match test string") + } +} + +func TestGlobToRegexCaching(t *testing.T) { + // Test that globToRegex uses pre-compiled regex + pattern1 := globToRegex("*.{js,ts}") + + // Should not panic and should work correctly + regex1, err := regexp.Compile(pattern1) + if err != nil { + t.Fatalf("Failed to compile glob regex: %v", err) + } + + if !regex1.MatchString("test.js") { + t.Error("Glob regex should match .js files") + } + if !regex1.MatchString("test.ts") { + t.Error("Glob regex should match .ts files") + } + if regex1.MatchString("test.go") { + t.Error("Glob regex should not match .go files") + } +} + +// Benchmark to show performance improvement +func BenchmarkRegexCacheVsCompile(b *testing.B) { + cache := newRegexCache() + pattern := "test.*pattern.*[0-9]+" + + b.Run("WithCache", func(b *testing.B) { + for b.Loop() { + _, err := cache.get(pattern) + if err != nil { + b.Fatal(err) + } + } + }) + + b.Run("WithoutCache", func(b *testing.B) { + for b.Loop() { + _, err := regexp.Compile(pattern) + if err != nil { + b.Fatal(err) + } + } + }) +} \ No newline at end of file