1package tools
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "net/http"
10 "strings"
11 "time"
12)
13
14type SourcegraphParams struct {
15 Query string `json:"query"`
16 Count int `json:"count,omitempty"`
17 ContextWindow int `json:"context_window,omitempty"`
18 Timeout int `json:"timeout,omitempty"`
19}
20
21type SourcegraphResponseMetadata struct {
22 NumberOfMatches int `json:"number_of_matches"`
23 Truncated bool `json:"truncated"`
24}
25
26type sourcegraphTool struct {
27 client *http.Client
28}
29
30const (
31 SourcegraphToolName = "sourcegraph"
32 sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
33
34WHEN TO USE THIS TOOL:
35- Use when you need to find code examples or implementations across public repositories
36- Helpful for researching how others have solved similar problems
37- Useful for discovering patterns and best practices in open source code
38
39HOW TO USE:
40- Provide a search query using Sourcegraph's query syntax
41- Optionally specify the number of results to return (default: 10)
42- Optionally set a timeout for the request
43
44QUERY SYNTAX:
45- Basic search: "fmt.Println" searches for exact matches
46- File filters: "file:.go fmt.Println" limits to Go files
47- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
48- Language filters: "lang:go fmt.Println" limits to Go code
49- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
50- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
51- Quoted strings: "\"exact phrase\"" for exact phrase matching
52- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
53
54ADVANCED FILTERS:
55- Repository filters:
56 * "repo:name" - Match repositories with name containing "name"
57 * "repo:^github\.com/org/repo$" - Exact repository match
58 * "repo:org/repo@branch" - Search specific branch
59 * "repo:org/repo rev:branch" - Alternative branch syntax
60 * "-repo:name" - Exclude repositories
61 * "fork:yes" or "fork:only" - Include or only show forks
62 * "archived:yes" or "archived:only" - Include or only show archived repos
63 * "visibility:public" or "visibility:private" - Filter by visibility
64
65- File filters:
66 * "file:\.js$" - Files with .js extension
67 * "file:internal/" - Files in internal directory
68 * "-file:test" - Exclude test files
69 * "file:has.content(Copyright)" - Files containing "Copyright"
70 * "file:has.contributor([email protected])" - Files with specific contributor
71
72- Content filters:
73 * "content:\"exact string\"" - Search for exact string
74 * "-content:\"unwanted\"" - Exclude files with unwanted content
75 * "case:yes" - Case-sensitive search
76
77- Type filters:
78 * "type:symbol" - Search for symbols (functions, classes, etc.)
79 * "type:file" - Search file content only
80 * "type:path" - Search filenames only
81 * "type:diff" - Search code changes
82 * "type:commit" - Search commit messages
83
84- Commit/diff search:
85 * "after:\"1 month ago\"" - Commits after date
86 * "before:\"2023-01-01\"" - Commits before date
87 * "author:name" - Commits by author
88 * "message:\"fix bug\"" - Commits with message
89
90- Result selection:
91 * "select:repo" - Show only repository names
92 * "select:file" - Show only file paths
93 * "select:content" - Show only matching content
94 * "select:symbol" - Show only matching symbols
95
96- Result control:
97 * "count:100" - Return up to 100 results
98 * "count:all" - Return all results
99 * "timeout:30s" - Set search timeout
100
101EXAMPLES:
102- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
103- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
104- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
105- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
106- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
107- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
108
109BOOLEAN OPERATORS:
110- "term1 AND term2" - Results containing both terms
111- "term1 OR term2" - Results containing either term
112- "term1 NOT term2" - Results with term1 but not term2
113- "term1 and (term2 or term3)" - Grouping with parentheses
114
115LIMITATIONS:
116- Only searches public repositories
117- Rate limits may apply
118- Complex queries may take longer to execute
119- Maximum of 20 results per query
120
121TIPS:
122- Use specific file extensions to narrow results
123- Add repo: filters for more targeted searches
124- Use type:symbol to find function/method definitions
125- Use type:file to find relevant files`
126)
127
128func NewSourcegraphTool() BaseTool {
129 return &sourcegraphTool{
130 client: &http.Client{
131 Timeout: 30 * time.Second,
132 Transport: &http.Transport{
133 MaxIdleConns: 100,
134 MaxIdleConnsPerHost: 10,
135 IdleConnTimeout: 90 * time.Second,
136 },
137 },
138 }
139}
140
141func (t *sourcegraphTool) Name() string {
142 return SourcegraphToolName
143}
144
145func (t *sourcegraphTool) Info() ToolInfo {
146 return ToolInfo{
147 Name: SourcegraphToolName,
148 Description: sourcegraphToolDescription,
149 Parameters: map[string]any{
150 "query": map[string]any{
151 "type": "string",
152 "description": "The Sourcegraph search query",
153 },
154 "count": map[string]any{
155 "type": "number",
156 "description": "Optional number of results to return (default: 10, max: 20)",
157 },
158 "context_window": map[string]any{
159 "type": "number",
160 "description": "The context around the match to return (default: 10 lines)",
161 },
162 "timeout": map[string]any{
163 "type": "number",
164 "description": "Optional timeout in seconds (max 120)",
165 },
166 },
167 Required: []string{"query"},
168 }
169}
170
171func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
172 var params SourcegraphParams
173 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
174 return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
175 }
176
177 if params.Query == "" {
178 return NewTextErrorResponse("Query parameter is required"), nil
179 }
180
181 if params.Count <= 0 {
182 params.Count = 10
183 } else if params.Count > 20 {
184 params.Count = 20 // Limit to 20 results
185 }
186
187 if params.ContextWindow <= 0 {
188 params.ContextWindow = 10 // Default context window
189 }
190
191 // Handle timeout with context
192 requestCtx := ctx
193 if params.Timeout > 0 {
194 maxTimeout := 120 // 2 minutes
195 if params.Timeout > maxTimeout {
196 params.Timeout = maxTimeout
197 }
198 var cancel context.CancelFunc
199 requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
200 defer cancel()
201 }
202
203 type graphqlRequest struct {
204 Query string `json:"query"`
205 Variables struct {
206 Query string `json:"query"`
207 } `json:"variables"`
208 }
209
210 request := graphqlRequest{
211 Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
212 }
213 request.Variables.Query = params.Query
214
215 graphqlQueryBytes, err := json.Marshal(request)
216 if err != nil {
217 return ToolResponse{}, fmt.Errorf("failed to marshal GraphQL request: %w", err)
218 }
219 graphqlQuery := string(graphqlQueryBytes)
220
221 req, err := http.NewRequestWithContext(
222 requestCtx,
223 "POST",
224 "https://sourcegraph.com/.api/graphql",
225 bytes.NewBuffer([]byte(graphqlQuery)),
226 )
227 if err != nil {
228 return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
229 }
230
231 req.Header.Set("Content-Type", "application/json")
232 req.Header.Set("User-Agent", "crush/1.0")
233
234 resp, err := t.client.Do(req)
235 if err != nil {
236 return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
237 }
238 defer resp.Body.Close()
239
240 if resp.StatusCode != http.StatusOK {
241 body, _ := io.ReadAll(resp.Body)
242 if len(body) > 0 {
243 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
244 }
245
246 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
247 }
248 body, err := io.ReadAll(resp.Body)
249 if err != nil {
250 return ToolResponse{}, fmt.Errorf("failed to read response body: %w", err)
251 }
252
253 var result map[string]any
254 if err = json.Unmarshal(body, &result); err != nil {
255 return ToolResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
256 }
257
258 formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
259 if err != nil {
260 return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
261 }
262
263 return NewTextResponse(formattedResults), nil
264}
265
266func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
267 var buffer strings.Builder
268
269 if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
270 buffer.WriteString("## Sourcegraph API Error\n\n")
271 for _, err := range errors {
272 if errMap, ok := err.(map[string]any); ok {
273 if message, ok := errMap["message"].(string); ok {
274 buffer.WriteString(fmt.Sprintf("- %s\n", message))
275 }
276 }
277 }
278 return buffer.String(), nil
279 }
280
281 data, ok := result["data"].(map[string]any)
282 if !ok {
283 return "", fmt.Errorf("invalid response format: missing data field")
284 }
285
286 search, ok := data["search"].(map[string]any)
287 if !ok {
288 return "", fmt.Errorf("invalid response format: missing search field")
289 }
290
291 searchResults, ok := search["results"].(map[string]any)
292 if !ok {
293 return "", fmt.Errorf("invalid response format: missing results field")
294 }
295
296 matchCount, _ := searchResults["matchCount"].(float64)
297 resultCount, _ := searchResults["resultCount"].(float64)
298 limitHit, _ := searchResults["limitHit"].(bool)
299
300 buffer.WriteString("# Sourcegraph Search Results\n\n")
301 buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
302
303 if limitHit {
304 buffer.WriteString("(Result limit reached, try a more specific query)\n")
305 }
306
307 buffer.WriteString("\n")
308
309 results, ok := searchResults["results"].([]any)
310 if !ok || len(results) == 0 {
311 buffer.WriteString("No results found. Try a different query.\n")
312 return buffer.String(), nil
313 }
314
315 maxResults := 10
316 if len(results) > maxResults {
317 results = results[:maxResults]
318 }
319
320 for i, res := range results {
321 fileMatch, ok := res.(map[string]any)
322 if !ok {
323 continue
324 }
325
326 typeName, _ := fileMatch["__typename"].(string)
327 if typeName != "FileMatch" {
328 continue
329 }
330
331 repo, _ := fileMatch["repository"].(map[string]any)
332 file, _ := fileMatch["file"].(map[string]any)
333 lineMatches, _ := fileMatch["lineMatches"].([]any)
334
335 if repo == nil || file == nil {
336 continue
337 }
338
339 repoName, _ := repo["name"].(string)
340 filePath, _ := file["path"].(string)
341 fileURL, _ := file["url"].(string)
342 fileContent, _ := file["content"].(string)
343
344 buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
345
346 if fileURL != "" {
347 buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
348 }
349
350 if len(lineMatches) > 0 {
351 for _, lm := range lineMatches {
352 lineMatch, ok := lm.(map[string]any)
353 if !ok {
354 continue
355 }
356
357 lineNumber, _ := lineMatch["lineNumber"].(float64)
358 preview, _ := lineMatch["preview"].(string)
359
360 if fileContent != "" {
361 lines := strings.Split(fileContent, "\n")
362
363 buffer.WriteString("```\n")
364
365 startLine := max(1, int(lineNumber)-contextWindow)
366
367 for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
368 if j >= 0 {
369 buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
370 }
371 }
372
373 buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
374
375 endLine := int(lineNumber) + contextWindow
376
377 for j := int(lineNumber); j < endLine && j < len(lines); j++ {
378 if j < len(lines) {
379 buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
380 }
381 }
382
383 buffer.WriteString("```\n\n")
384 } else {
385 buffer.WriteString("```\n")
386 buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
387 buffer.WriteString("```\n\n")
388 }
389 }
390 }
391 }
392
393 return buffer.String(), nil
394}