1package tools
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "net/http"
10 "strings"
11 "sync"
12 "time"
13)
14
15type SourcegraphParams struct {
16 Query string `json:"query"`
17 Count int `json:"count,omitempty"`
18 ContextWindow int `json:"context_window,omitempty"`
19 Timeout int `json:"timeout,omitempty"`
20}
21
22type SourcegraphResponseMetadata struct {
23 NumberOfMatches int `json:"number_of_matches"`
24 Truncated bool `json:"truncated"`
25}
26
27type sourcegraphTool struct {
28 client *http.Client
29 clientPool map[int]*http.Client
30 clientPoolMu sync.RWMutex
31}
32
33const (
34 SourcegraphToolName = "sourcegraph"
35 sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
36
37WHEN TO USE THIS TOOL:
38- Use when you need to find code examples or implementations across public repositories
39- Helpful for researching how others have solved similar problems
40- Useful for discovering patterns and best practices in open source code
41
42HOW TO USE:
43- Provide a search query using Sourcegraph's query syntax
44- Optionally specify the number of results to return (default: 10)
45- Optionally set a timeout for the request
46
47QUERY SYNTAX:
48- Basic search: "fmt.Println" searches for exact matches
49- File filters: "file:.go fmt.Println" limits to Go files
50- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
51- Language filters: "lang:go fmt.Println" limits to Go code
52- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
53- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
54- Quoted strings: "\"exact phrase\"" for exact phrase matching
55- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
56
57ADVANCED FILTERS:
58- Repository filters:
59 * "repo:name" - Match repositories with name containing "name"
60 * "repo:^github\.com/org/repo$" - Exact repository match
61 * "repo:org/repo@branch" - Search specific branch
62 * "repo:org/repo rev:branch" - Alternative branch syntax
63 * "-repo:name" - Exclude repositories
64 * "fork:yes" or "fork:only" - Include or only show forks
65 * "archived:yes" or "archived:only" - Include or only show archived repos
66 * "visibility:public" or "visibility:private" - Filter by visibility
67
68- File filters:
69 * "file:\.js$" - Files with .js extension
70 * "file:internal/" - Files in internal directory
71 * "-file:test" - Exclude test files
72 * "file:has.content(Copyright)" - Files containing "Copyright"
73 * "file:has.contributor([email protected])" - Files with specific contributor
74
75- Content filters:
76 * "content:\"exact string\"" - Search for exact string
77 * "-content:\"unwanted\"" - Exclude files with unwanted content
78 * "case:yes" - Case-sensitive search
79
80- Type filters:
81 * "type:symbol" - Search for symbols (functions, classes, etc.)
82 * "type:file" - Search file content only
83 * "type:path" - Search filenames only
84 * "type:diff" - Search code changes
85 * "type:commit" - Search commit messages
86
87- Commit/diff search:
88 * "after:\"1 month ago\"" - Commits after date
89 * "before:\"2023-01-01\"" - Commits before date
90 * "author:name" - Commits by author
91 * "message:\"fix bug\"" - Commits with message
92
93- Result selection:
94 * "select:repo" - Show only repository names
95 * "select:file" - Show only file paths
96 * "select:content" - Show only matching content
97 * "select:symbol" - Show only matching symbols
98
99- Result control:
100 * "count:100" - Return up to 100 results
101 * "count:all" - Return all results
102 * "timeout:30s" - Set search timeout
103
104EXAMPLES:
105- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
106- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
107- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
108- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
109- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
110- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
111
112BOOLEAN OPERATORS:
113- "term1 AND term2" - Results containing both terms
114- "term1 OR term2" - Results containing either term
115- "term1 NOT term2" - Results with term1 but not term2
116- "term1 and (term2 or term3)" - Grouping with parentheses
117
118LIMITATIONS:
119- Only searches public repositories
120- Rate limits may apply
121- Complex queries may take longer to execute
122- Maximum of 20 results per query
123
124TIPS:
125- Use specific file extensions to narrow results
126- Add repo: filters for more targeted searches
127- Use type:symbol to find function/method definitions
128- Use type:file to find relevant files`
129)
130
131func NewSourcegraphTool() BaseTool {
132 return &sourcegraphTool{
133 client: &http.Client{
134 Timeout: 30 * time.Second,
135 Transport: &http.Transport{
136 MaxIdleConns: 100,
137 MaxIdleConnsPerHost: 10,
138 IdleConnTimeout: 90 * time.Second,
139 },
140 },
141 clientPool: make(map[int]*http.Client),
142 }
143}
144
145// getClientForTimeout returns a cached client for the given timeout or the default client
146func (t *sourcegraphTool) getClientForTimeout(timeout int) *http.Client {
147 if timeout <= 0 {
148 return t.client
149 }
150
151 maxTimeout := 120 // 2 minutes
152 if timeout > maxTimeout {
153 timeout = maxTimeout
154 }
155
156 // Check if we have a cached client for this timeout
157 t.clientPoolMu.RLock()
158 if client, exists := t.clientPool[timeout]; exists {
159 t.clientPoolMu.RUnlock()
160 return client
161 }
162 t.clientPoolMu.RUnlock()
163
164 // Create and cache a new client
165 t.clientPoolMu.Lock()
166 defer t.clientPoolMu.Unlock()
167
168 // Double-check in case another goroutine created it
169 if client, exists := t.clientPool[timeout]; exists {
170 return client
171 }
172
173 client := &http.Client{
174 Timeout: time.Duration(timeout) * time.Second,
175 Transport: &http.Transport{
176 MaxIdleConns: 100,
177 MaxIdleConnsPerHost: 10,
178 IdleConnTimeout: 90 * time.Second,
179 },
180 }
181 t.clientPool[timeout] = client
182 return client
183}
184
185func (t *sourcegraphTool) Info() ToolInfo {
186 return ToolInfo{
187 Name: SourcegraphToolName,
188 Description: sourcegraphToolDescription,
189 Parameters: map[string]any{
190 "query": map[string]any{
191 "type": "string",
192 "description": "The Sourcegraph search query",
193 },
194 "count": map[string]any{
195 "type": "number",
196 "description": "Optional number of results to return (default: 10, max: 20)",
197 },
198 "context_window": map[string]any{
199 "type": "number",
200 "description": "The context around the match to return (default: 10 lines)",
201 },
202 "timeout": map[string]any{
203 "type": "number",
204 "description": "Optional timeout in seconds (max 120)",
205 },
206 },
207 Required: []string{"query"},
208 }
209}
210
211func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
212 var params SourcegraphParams
213 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
214 return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
215 }
216
217 if params.Query == "" {
218 return NewTextErrorResponse("Query parameter is required"), nil
219 }
220
221 if params.Count <= 0 {
222 params.Count = 10
223 } else if params.Count > 20 {
224 params.Count = 20 // Limit to 20 results
225 }
226
227 if params.ContextWindow <= 0 {
228 params.ContextWindow = 10 // Default context window
229 }
230 client := t.getClientForTimeout(params.Timeout)
231
232 type graphqlRequest struct {
233 Query string `json:"query"`
234 Variables struct {
235 Query string `json:"query"`
236 } `json:"variables"`
237 }
238
239 request := graphqlRequest{
240 Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
241 }
242 request.Variables.Query = params.Query
243
244 graphqlQueryBytes, err := json.Marshal(request)
245 if err != nil {
246 return ToolResponse{}, fmt.Errorf("failed to marshal GraphQL request: %w", err)
247 }
248 graphqlQuery := string(graphqlQueryBytes)
249
250 req, err := http.NewRequestWithContext(
251 ctx,
252 "POST",
253 "https://sourcegraph.com/.api/graphql",
254 bytes.NewBuffer([]byte(graphqlQuery)),
255 )
256 if err != nil {
257 return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
258 }
259
260 req.Header.Set("Content-Type", "application/json")
261 req.Header.Set("User-Agent", "crush/1.0")
262
263 resp, err := client.Do(req)
264 if err != nil {
265 return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
266 }
267 defer resp.Body.Close()
268
269 if resp.StatusCode != http.StatusOK {
270 body, _ := io.ReadAll(resp.Body)
271 if len(body) > 0 {
272 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
273 }
274
275 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
276 }
277 body, err := io.ReadAll(resp.Body)
278 if err != nil {
279 return ToolResponse{}, fmt.Errorf("failed to read response body: %w", err)
280 }
281
282 var result map[string]any
283 if err = json.Unmarshal(body, &result); err != nil {
284 return ToolResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
285 }
286
287 formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
288 if err != nil {
289 return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
290 }
291
292 return NewTextResponse(formattedResults), nil
293}
294
295func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
296 var buffer strings.Builder
297
298 if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
299 buffer.WriteString("## Sourcegraph API Error\n\n")
300 for _, err := range errors {
301 if errMap, ok := err.(map[string]any); ok {
302 if message, ok := errMap["message"].(string); ok {
303 buffer.WriteString(fmt.Sprintf("- %s\n", message))
304 }
305 }
306 }
307 return buffer.String(), nil
308 }
309
310 data, ok := result["data"].(map[string]any)
311 if !ok {
312 return "", fmt.Errorf("invalid response format: missing data field")
313 }
314
315 search, ok := data["search"].(map[string]any)
316 if !ok {
317 return "", fmt.Errorf("invalid response format: missing search field")
318 }
319
320 searchResults, ok := search["results"].(map[string]any)
321 if !ok {
322 return "", fmt.Errorf("invalid response format: missing results field")
323 }
324
325 matchCount, _ := searchResults["matchCount"].(float64)
326 resultCount, _ := searchResults["resultCount"].(float64)
327 limitHit, _ := searchResults["limitHit"].(bool)
328
329 buffer.WriteString("# Sourcegraph Search Results\n\n")
330 buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
331
332 if limitHit {
333 buffer.WriteString("(Result limit reached, try a more specific query)\n")
334 }
335
336 buffer.WriteString("\n")
337
338 results, ok := searchResults["results"].([]any)
339 if !ok || len(results) == 0 {
340 buffer.WriteString("No results found. Try a different query.\n")
341 return buffer.String(), nil
342 }
343
344 maxResults := 10
345 if len(results) > maxResults {
346 results = results[:maxResults]
347 }
348
349 for i, res := range results {
350 fileMatch, ok := res.(map[string]any)
351 if !ok {
352 continue
353 }
354
355 typeName, _ := fileMatch["__typename"].(string)
356 if typeName != "FileMatch" {
357 continue
358 }
359
360 repo, _ := fileMatch["repository"].(map[string]any)
361 file, _ := fileMatch["file"].(map[string]any)
362 lineMatches, _ := fileMatch["lineMatches"].([]any)
363
364 if repo == nil || file == nil {
365 continue
366 }
367
368 repoName, _ := repo["name"].(string)
369 filePath, _ := file["path"].(string)
370 fileURL, _ := file["url"].(string)
371 fileContent, _ := file["content"].(string)
372
373 buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
374
375 if fileURL != "" {
376 buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
377 }
378
379 if len(lineMatches) > 0 {
380 for _, lm := range lineMatches {
381 lineMatch, ok := lm.(map[string]any)
382 if !ok {
383 continue
384 }
385
386 lineNumber, _ := lineMatch["lineNumber"].(float64)
387 preview, _ := lineMatch["preview"].(string)
388
389 if fileContent != "" {
390 lines := strings.Split(fileContent, "\n")
391
392 buffer.WriteString("```\n")
393
394 startLine := max(1, int(lineNumber)-contextWindow)
395
396 for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
397 if j >= 0 {
398 buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
399 }
400 }
401
402 buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
403
404 endLine := int(lineNumber) + contextWindow
405
406 for j := int(lineNumber); j < endLine && j < len(lines); j++ {
407 if j < len(lines) {
408 buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
409 }
410 }
411
412 buffer.WriteString("```\n\n")
413 } else {
414 buffer.WriteString("```\n")
415 buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
416 buffer.WriteString("```\n\n")
417 }
418 }
419 }
420 }
421
422 return buffer.String(), nil
423}