diff --git a/internal/llm/tools/fetch.go b/internal/llm/tools/fetch.go index 780f22a43bae7c9ec1e077c2d5878d3aeb0284ec..2662210d128f5e86e2dcbba0d262722850de4b38 100644 --- a/internal/llm/tools/fetch.go +++ b/internal/llm/tools/fetch.go @@ -7,6 +7,7 @@ import ( "io" "net/http" "strings" + "sync" "time" md "github.com/JohannesKaufmann/html-to-markdown" @@ -28,8 +29,10 @@ type FetchPermissionsParams struct { } type fetchTool struct { - client *http.Client - permissions permission.Service + client *http.Client + clientPool map[int]*http.Client + clientPoolMu sync.RWMutex + permissions permission.Service } const ( @@ -69,11 +72,57 @@ func NewFetchTool(permissions permission.Service) BaseTool { return &fetchTool{ client: &http.Client{ Timeout: 30 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, }, + clientPool: make(map[int]*http.Client), permissions: permissions, } } +// getClientForTimeout returns a cached client for the given timeout or the default client +func (t *fetchTool) getClientForTimeout(timeout int) *http.Client { + if timeout <= 0 { + return t.client + } + + maxTimeout := 120 // 2 minutes + if timeout > maxTimeout { + timeout = maxTimeout + } + + // Check if we have a cached client for this timeout + t.clientPoolMu.RLock() + if client, exists := t.clientPool[timeout]; exists { + t.clientPoolMu.RUnlock() + return client + } + t.clientPoolMu.RUnlock() + + // Create and cache a new client + t.clientPoolMu.Lock() + defer t.clientPoolMu.Unlock() + + // Double-check in case another goroutine created it + if client, exists := t.clientPool[timeout]; exists { + return client + } + + client := &http.Client{ + Timeout: time.Duration(timeout) * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, + } + t.clientPool[timeout] = client + return client +} + func (t *fetchTool) Info() ToolInfo { return ToolInfo{ Name: FetchToolName, @@ -136,16 +185,7 @@ func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error return ToolResponse{}, permission.ErrorPermissionDenied } - client := t.client - if params.Timeout > 0 { - maxTimeout := 120 // 2 minutes - if params.Timeout > maxTimeout { - params.Timeout = maxTimeout - } - client = &http.Client{ - Timeout: time.Duration(params.Timeout) * time.Second, - } - } + client := t.getClientForTimeout(params.Timeout) req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil) if err != nil { diff --git a/internal/llm/tools/grep.go b/internal/llm/tools/grep.go index e6992a4f8c23dc440898298d8f0e5d880b2fdc53..b4eae224d617fdf6ec358d52f5c1011de3df3cab 100644 --- a/internal/llm/tools/grep.go +++ b/internal/llm/tools/grep.go @@ -5,6 +5,7 @@ import ( "context" "encoding/json" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -377,6 +378,11 @@ func searchFilesWithRegex(pattern, rootPath, include string) ([]grepMatch, error } func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, string, error) { + // Quick binary file detection + if isBinaryFile(filePath) { + return false, 0, "", nil + } + file, err := os.Open(filePath) if err != nil { return false, 0, "", err @@ -396,6 +402,46 @@ func fileContainsPattern(filePath string, pattern *regexp.Regexp) (bool, int, st return false, 0, "", scanner.Err() } +// isBinaryFile performs a quick check to determine if a file is binary +func isBinaryFile(filePath string) bool { + // Check file extension first (fastest) + ext := strings.ToLower(filepath.Ext(filePath)) + binaryExts := map[string]bool{ + ".exe": true, ".dll": true, ".so": true, ".dylib": true, + ".bin": true, ".obj": true, ".o": true, ".a": true, + ".zip": true, ".tar": true, ".gz": true, ".bz2": true, + ".jpg": true, ".jpeg": true, ".png": true, ".gif": true, + ".pdf": true, ".doc": true, ".docx": true, ".xls": true, + ".mp3": true, ".mp4": true, ".avi": true, ".mov": true, + } + if binaryExts[ext] { + return true + } + + // Quick content check for files without clear extensions + file, err := os.Open(filePath) + if err != nil { + return false // If we can't open it, let the caller handle the error + } + defer file.Close() + + // Read first 512 bytes to check for null bytes + buffer := make([]byte, 512) + n, err := file.Read(buffer) + if err != nil && err != io.EOF { + return false + } + + // Check for null bytes (common in binary files) + for i := 0; i < n; i++ { + if buffer[i] == 0 { + return true + } + } + + return false +} + func globToRegex(glob string) string { regexPattern := strings.ReplaceAll(glob, ".", "\\.") regexPattern = strings.ReplaceAll(regexPattern, "*", ".*") diff --git a/internal/llm/tools/sourcegraph.go b/internal/llm/tools/sourcegraph.go index f62e6a961bed962088e0e40670a4276f16174187..1496baf6ae148368570057567bd4ad396ce40d3c 100644 --- a/internal/llm/tools/sourcegraph.go +++ b/internal/llm/tools/sourcegraph.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "strings" + "sync" "time" ) @@ -24,7 +25,9 @@ type SourcegraphResponseMetadata struct { } type sourcegraphTool struct { - client *http.Client + client *http.Client + clientPool map[int]*http.Client + clientPoolMu sync.RWMutex } const ( @@ -129,10 +132,56 @@ func NewSourcegraphTool() BaseTool { return &sourcegraphTool{ client: &http.Client{ Timeout: 30 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, }, + clientPool: make(map[int]*http.Client), } } +// getClientForTimeout returns a cached client for the given timeout or the default client +func (t *sourcegraphTool) getClientForTimeout(timeout int) *http.Client { + if timeout <= 0 { + return t.client + } + + maxTimeout := 120 // 2 minutes + if timeout > maxTimeout { + timeout = maxTimeout + } + + // Check if we have a cached client for this timeout + t.clientPoolMu.RLock() + if client, exists := t.clientPool[timeout]; exists { + t.clientPoolMu.RUnlock() + return client + } + t.clientPoolMu.RUnlock() + + // Create and cache a new client + t.clientPoolMu.Lock() + defer t.clientPoolMu.Unlock() + + // Double-check in case another goroutine created it + if client, exists := t.clientPool[timeout]; exists { + return client + } + + client := &http.Client{ + Timeout: time.Duration(timeout) * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + }, + } + t.clientPool[timeout] = client + return client +} + func (t *sourcegraphTool) Info() ToolInfo { return ToolInfo{ Name: SourcegraphToolName, @@ -178,16 +227,7 @@ func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, if params.ContextWindow <= 0 { params.ContextWindow = 10 // Default context window } - client := t.client - if params.Timeout > 0 { - maxTimeout := 120 // 2 minutes - if params.Timeout > maxTimeout { - params.Timeout = maxTimeout - } - client = &http.Client{ - Timeout: time.Duration(params.Timeout) * time.Second, - } - } + client := t.getClientForTimeout(params.Timeout) type graphqlRequest struct { Query string `json:"query"`