From b66be3b7240412e3c72adfcd922644d92d079bd6 Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Mon, 16 Jun 2025 15:40:35 +0200 Subject: [PATCH 1/4] perf: optimize HTTP client pooling and binary file detection - Add HTTP client pooling with connection reuse for fetch and sourcegraph tools - Implement timeout-specific client caching to avoid creating new clients - Add binary file detection in grep tool to skip searching non-text files - Configure transport settings for better connection management --- internal/llm/tools/fetch.go | 64 +++++++++++++++++++++++++------ internal/llm/tools/grep.go | 46 ++++++++++++++++++++++ internal/llm/tools/sourcegraph.go | 62 ++++++++++++++++++++++++------ 3 files changed, 149 insertions(+), 23 deletions(-) diff --git a/internal/llm/tools/fetch.go b/internal/llm/tools/fetch.go index 780f22a43bae7c9ec1e077c2d5878d3aeb0284ec..2662210d128f5e86e2dcbba0d262722850de4b38 100644 --- a/internal/llm/tools/fetch.go +++ b/internal/llm/tools/fetch.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "strings" + "sync" "time" md "github.com/JohannesKaufmann/html-to-markdown" @@ -28,8 +29,10 @@ type FetchPermissionsParams struct { } type fetchTool struct { - client *http.Client - permissions permission.Service + client *http.Client + clientPool map[int]*http.Client + clientPoolMu sync.RWMutex + permissions permission.Service } const ( @@ -69,11 +72,57 @@ func NewFetchTool(permissions permission.Service) BaseTool { return &fetchTool{ client: &http.Client{ Timeout: 30 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, }, + clientPool: make(map[int]*http.Client), permissions: permissions, } } +// getClientForTimeout returns a cached client for the given timeout or the default client +func (t *fetchTool) getClientForTimeout(timeout int) *http.Client { + if timeout <= 0 { + return t.client + } + + maxTimeout := 120 // 2 minutes + if timeout > maxTimeout { + timeout = maxTimeout + } + + // Check if we have a cached client for this timeout + t.clientPoolMu.RLock() + if client, exists := t.clientPool[timeout]; exists { + t.clientPoolMu.RUnlock() + return client + } + t.clientPoolMu.RUnlock() + + // Create and cache a new client + t.clientPoolMu.Lock() + defer t.clientPoolMu.Unlock() + + // Double-check in case another goroutine created it + if client, exists := t.clientPool[timeout]; exists { + return client + } + + client := &http.Client{ + Timeout: time.Duration(timeout) * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, + } + t.clientPool[timeout] = client + return client +} + func (t *fetchTool) Info() ToolInfo { return ToolInfo{ Name: FetchToolName, @@ -136,16 +185,7 @@ func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error return ToolResponse{}, permission.ErrorPermissionDenied } - client := t.client - if params.Timeout > 0 { - maxTimeout := 120 // 2 minutes - if params.Timeout > maxTimeout { - params.Timeout = maxTimeout - } - client = &http.Client{ - Timeout: time.Duration(params.Timeout) * time.Second, - } - } + client := t.getClientForTimeout(params.Timeout) req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil) if err != nil { diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index e6992a4f8c23dc440898298d8f0e5d880b2fdc53..b4eae224d617fdf6ec358d52f5c1011de3df3cab 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -377,6 +378,11 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error } func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) { + // Quick binary file detection + if isBinaryFile(filePath) { + return false, 0, "", nil + } + file, err := os.Open(filePath) if err != nil { return false, 0, "", err @@ -396,6 +402,46 @@ func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, st return false, 0, "", scanner.Err() } +// isBinaryFile performs a quick check to determine if a file is binary +func isBinaryFile(filePath string) bool { + // Check file extension first (fastest) + ext := strings.ToLower(filepath.Ext(filePath)) + binaryExts := map[string]bool{ + ".exe": true, ".dll": true, ".so": true, ".dylib": true, + ".bin": true, ".obj": true, ".o": true, ".a": true, + ".zip": true, ".tar": true, ".gz": true, ".bz2": true, + ".jpg": true, ".jpeg": true, ".png": true, ".gif": true, + ".pdf": true, ".doc": true, ".docx": true, ".xls": true, + ".mp3": true, ".mp4": true, ".avi": true, ".mov": true, + } + if binaryExts[ext] { + return true + } + + // Quick content check for files without clear extensions + file, err := os.Open(filePath) + if err != nil { + return false // If we can't open it, let the caller handle the error + } + defer file.Close() + + // Read first 512 bytes to check for null bytes + buffer := make([]byte, 512) + n, err := file.Read(buffer) + if err != nil && err != io.EOF { + return false + } + + // Check for null bytes (common in binary files) + for i := 0; i < n; i++ { + if buffer[i] == 0 { + return true + } + } + + return false +} + func globToRegex(glob string) string { regexPattern := strings.ReplaceAll(glob, ".", "\\.") regexPattern = strings.ReplaceAll(regexPattern, "*", ".*") diff --git a/internal/llm/tools/sourcegraph.go b/internal/llm/tools/sourcegraph.go index f62e6a961bed962088e0e40670a4276f16174187..1496baf6ae148368570057567bd4ad396ce40d3c 100644 --- a/internal/llm/tools/sourcegraph.go +++ b/internal/llm/tools/sourcegraph.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "strings" + "sync" "time" ) @@ -24,7 +25,9 @@ type SourcegraphResponseMetadata struct { } type sourcegraphTool struct { - client *http.Client + client *http.Client + clientPool map[int]*http.Client + clientPoolMu sync.RWMutex } const ( @@ -129,10 +132,56 @@ func NewSourcegraphTool() BaseTool { return &sourcegraphTool{ client: &http.Client{ Timeout: 30 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, }, + clientPool: make(map[int]*http.Client), } } +// getClientForTimeout returns a cached client for the given timeout or the default client +func (t *sourcegraphTool) getClientForTimeout(timeout int) *http.Client { + if timeout <= 0 { + return t.client + } + + maxTimeout := 120 // 2 minutes + if timeout > maxTimeout { + timeout = maxTimeout + } + + // Check if we have a cached client for this timeout + t.clientPoolMu.RLock() + if client, exists := t.clientPool[timeout]; exists { + t.clientPoolMu.RUnlock() + return client + } + t.clientPoolMu.RUnlock() + + // Create and cache a new client + t.clientPoolMu.Lock() + defer t.clientPoolMu.Unlock() + + // Double-check in case another goroutine created it + if client, exists := t.clientPool[timeout]; exists { + return client + } + + client := &http.Client{ + Timeout: time.Duration(timeout) * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, + } + t.clientPool[timeout] = client + return client +} + func (t *sourcegraphTool) Info() ToolInfo { return ToolInfo{ Name: SourcegraphToolName, @@ -178,16 +227,7 @@ func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, if params.ContextWindow <= 0 { params.ContextWindow = 10 // Default context window } - client := t.client - if params.Timeout > 0 { - maxTimeout := 120 // 2 minutes - if params.Timeout > maxTimeout { - params.Timeout = maxTimeout - } - client = &http.Client{ - Timeout: time.Duration(params.Timeout) * time.Second, - } - } + client := t.getClientForTimeout(params.Timeout) type graphqlRequest struct { Query string `json:"query"` From 6261a5742d565bb954fae5fc38cced8254acd06d Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Mon, 16 Jun 2025 15:46:25 +0200 Subject: [PATCH 2/4] fix: format internal/llm/tools/sourcegraph.go --- internal/llm/tools/sourcegraph.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/llm/tools/sourcegraph.go b/internal/llm/tools/sourcegraph.go index 1496baf6ae148368570057567bd4ad396ce40d3c..0fb517512db18e4a817bf99e95971920018052d2 100644 --- a/internal/llm/tools/sourcegraph.go +++ b/internal/llm/tools/sourcegraph.go @@ -25,9 +25,9 @@ type SourcegraphResponseMetadata struct { } type sourcegraphTool struct { - client *http.Client - clientPool map[int]*http.Client - clientPoolMu sync.RWMutex + client *http.Client + clientPool map[int]*http.Client + clientPoolMu sync.RWMutex } const ( From 5056d443ec70303de4b1ee5d92c9eb02ceb158ee Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Mon, 16 Jun 2025 16:53:41 +0200 Subject: [PATCH 3/4] fix: remove pool logic --- internal/llm/tools/fetch.go | 64 ++++++++----------------------- internal/llm/tools/grep.go | 19 ++++----- internal/llm/tools/sourcegraph.go | 63 ++++++++---------------------- 3 files changed, 40 insertions(+), 106 deletions(-) diff --git a/internal/llm/tools/fetch.go b/internal/llm/tools/fetch.go index 2662210d128f5e86e2dcbba0d262722850de4b38..7acf23bae61df88792dd805317bdf8a67095dd0d 100644 --- a/internal/llm/tools/fetch.go +++ b/internal/llm/tools/fetch.go @@ -7,7 +7,6 @@ import ( "io" "net/http" "strings" - "sync" "time" md "github.com/JohannesKaufmann/html-to-markdown" @@ -29,10 +28,8 @@ type FetchPermissionsParams struct { } type fetchTool struct { - client *http.Client - clientPool map[int]*http.Client - clientPoolMu sync.RWMutex - permissions permission.Service + client *http.Client + permissions permission.Service } const ( @@ -78,51 +75,10 @@ func NewFetchTool(permissions permission.Service) BaseTool { IdleConnTimeout: 90 * time.Second, }, }, - clientPool: make(map[int]*http.Client), permissions: permissions, } } -// getClientForTimeout returns a cached client for the given timeout or the default client -func (t *fetchTool) getClientForTimeout(timeout int) *http.Client { - if timeout <= 0 { - return t.client - } - - maxTimeout := 120 // 2 minutes - if timeout > maxTimeout { - timeout = maxTimeout - } - - // Check if we have a cached client for this timeout - t.clientPoolMu.RLock() - if client, exists := t.clientPool[timeout]; exists { - t.clientPoolMu.RUnlock() - return client - } - t.clientPoolMu.RUnlock() - - // Create and cache a new client - t.clientPoolMu.Lock() - defer t.clientPoolMu.Unlock() - - // Double-check in case another goroutine created it - if client, exists := t.clientPool[timeout]; exists { - return client - } - - client := &http.Client{ - Timeout: time.Duration(timeout) * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 100, - MaxIdleConnsPerHost: 10, - IdleConnTimeout: 90 * time.Second, - }, - } - t.clientPool[timeout] = client - return client -} - func (t *fetchTool) Info() ToolInfo { return ToolInfo{ Name: FetchToolName, @@ -185,16 +141,26 @@ func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error return ToolResponse{}, permission.ErrorPermissionDenied } - client := t.getClientForTimeout(params.Timeout) + // Handle timeout with context + requestCtx := ctx + if params.Timeout > 0 { + maxTimeout := 120 // 2 minutes + if params.Timeout > maxTimeout { + params.Timeout = maxTimeout + } + var cancel context.CancelFunc + requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second) + defer cancel() + } - req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil) + req, err := http.NewRequestWithContext(requestCtx, "GET", params.URL, nil) if err != nil { return ToolResponse{}, fmt.Errorf("failed to create request: %w", err) } req.Header.Set("User-Agent", "crush/1.0") - resp, err := client.Do(req) + resp, err := t.client.Do(req) if err != nil { return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err) } diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index b4eae224d617fdf6ec358d52f5c1011de3df3cab..61d4fb79a614da282fb48cb23b8e0405f28d23ac 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -402,19 +402,20 @@ func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, st return false, 0, "", scanner.Err() } +var binaryExts = map[string]struct{}{ + ".exe": {}, ".dll": {}, ".so": {}, ".dylib": {}, + ".bin": {}, ".obj": {}, ".o": {}, ".a": {}, + ".zip": {}, ".tar": {}, ".gz": {}, ".bz2": {}, + ".jpg": {}, ".jpeg": {}, ".png": {}, ".gif": {}, + ".pdf": {}, ".doc": {}, ".docx": {}, ".xls": {}, + ".mp3": {}, ".mp4": {}, ".avi": {}, ".mov": {}, +} + // isBinaryFile performs a quick check to determine if a file is binary func isBinaryFile(filePath string) bool { // Check file extension first (fastest) ext := strings.ToLower(filepath.Ext(filePath)) - binaryExts := map[string]bool{ - ".exe": true, ".dll": true, ".so": true, ".dylib": true, - ".bin": true, ".obj": true, ".o": true, ".a": true, - ".zip": true, ".tar": true, ".gz": true, ".bz2": true, - ".jpg": true, ".jpeg": true, ".png": true, ".gif": true, - ".pdf": true, ".doc": true, ".docx": true, ".xls": true, - ".mp3": true, ".mp4": true, ".avi": true, ".mov": true, - } - if binaryExts[ext] { + if _, isBinary := binaryExts[ext]; isBinary { return true } diff --git a/internal/llm/tools/sourcegraph.go b/internal/llm/tools/sourcegraph.go index 0fb517512db18e4a817bf99e95971920018052d2..29518b7b818da5746d195ea8b7da521d80429962 100644 --- a/internal/llm/tools/sourcegraph.go +++ b/internal/llm/tools/sourcegraph.go @@ -8,7 +8,6 @@ import ( "io" "net/http" "strings" - "sync" "time" ) @@ -25,9 +24,7 @@ type SourcegraphResponseMetadata struct { } type sourcegraphTool struct { - client *http.Client - clientPool map[int]*http.Client - clientPoolMu sync.RWMutex + client *http.Client } const ( @@ -138,50 +135,9 @@ func NewSourcegraphTool() BaseTool { IdleConnTimeout: 90 * time.Second, }, }, - clientPool: make(map[int]*http.Client), } } -// getClientForTimeout returns a cached client for the given timeout or the default client -func (t *sourcegraphTool) getClientForTimeout(timeout int) *http.Client { - if timeout <= 0 { - return t.client - } - - maxTimeout := 120 // 2 minutes - if timeout > maxTimeout { - timeout = maxTimeout - } - - // Check if we have a cached client for this timeout - t.clientPoolMu.RLock() - if client, exists := t.clientPool[timeout]; exists { - t.clientPoolMu.RUnlock() - return client - } - t.clientPoolMu.RUnlock() - - // Create and cache a new client - t.clientPoolMu.Lock() - defer t.clientPoolMu.Unlock() - - // Double-check in case another goroutine created it - if client, exists := t.clientPool[timeout]; exists { - return client - } - - client := &http.Client{ - Timeout: time.Duration(timeout) * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 100, - MaxIdleConnsPerHost: 10, - IdleConnTimeout: 90 * time.Second, - }, - } - t.clientPool[timeout] = client - return client -} - func (t *sourcegraphTool) Info() ToolInfo { return ToolInfo{ Name: SourcegraphToolName, @@ -227,7 +183,18 @@ func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, if params.ContextWindow <= 0 { params.ContextWindow = 10 // Default context window } - client := t.getClientForTimeout(params.Timeout) + + // Handle timeout with context + requestCtx := ctx + if params.Timeout > 0 { + maxTimeout := 120 // 2 minutes + if params.Timeout > maxTimeout { + params.Timeout = maxTimeout + } + var cancel context.CancelFunc + requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second) + defer cancel() + } type graphqlRequest struct { Query string `json:"query"` @@ -248,7 +215,7 @@ func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, graphqlQuery := string(graphqlQueryBytes) req, err := http.NewRequestWithContext( - ctx, + requestCtx, "POST", "https://sourcegraph.com/.api/graphql", bytes.NewBuffer([]byte(graphqlQuery)), @@ -260,7 +227,7 @@ func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", "crush/1.0") - resp, err := client.Do(req) + resp, err := t.client.Do(req) if err != nil { return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err) } From b700ec37047bd80a0b0210135a5390911946f3a4 Mon Sep 17 00:00:00 2001 From: Raphael Amorim Date: Mon, 16 Jun 2025 16:57:26 +0200 Subject: [PATCH 4/4] feat: use range --- internal/llm/tools/grep.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index 61d4fb79a614da282fb48cb23b8e0405f28d23ac..3064ee633cf0e54bfb9d14efdd475cda15a38c85 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -434,7 +434,7 @@ func isBinaryFile(filePath string) bool { } // Check for null bytes (common in binary files) - for i := 0; i < n; i++ { + for i := range n { if buffer[i] == 0 { return true }