sourcegraph.go

  1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"net/http"
 10	"strings"
 11	"time"
 12)
 13
 14type SourcegraphParams struct {
 15	Query         string `json:"query"`
 16	Count         int    `json:"count,omitempty"`
 17	ContextWindow int    `json:"context_window,omitempty"`
 18	Timeout       int    `json:"timeout,omitempty"`
 19}
 20
 21type sourcegraphTool struct {
 22	client *http.Client
 23}
 24
 25const (
 26	SourcegraphToolName        = "sourcegraph"
 27	sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
 28
 29WHEN TO USE THIS TOOL:
 30- Use when you need to find code examples or implementations across public repositories
 31- Helpful for researching how others have solved similar problems
 32- Useful for discovering patterns and best practices in open source code
 33
 34HOW TO USE:
 35- Provide a search query using Sourcegraph's query syntax
 36- Optionally specify the number of results to return (default: 10)
 37- Optionally set a timeout for the request
 38
 39QUERY SYNTAX:
 40- Basic search: "fmt.Println" searches for exact matches
 41- File filters: "file:.go fmt.Println" limits to Go files
 42- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
 43- Language filters: "lang:go fmt.Println" limits to Go code
 44- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
 45- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
 46- Quoted strings: "\"exact phrase\"" for exact phrase matching
 47- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
 48
 49ADVANCED FILTERS:
 50- Repository filters:
 51  * "repo:name" - Match repositories with name containing "name"
 52  * "repo:^github\.com/org/repo$" - Exact repository match
 53  * "repo:org/repo@branch" - Search specific branch
 54  * "repo:org/repo rev:branch" - Alternative branch syntax
 55  * "-repo:name" - Exclude repositories
 56  * "fork:yes" or "fork:only" - Include or only show forks
 57  * "archived:yes" or "archived:only" - Include or only show archived repos
 58  * "visibility:public" or "visibility:private" - Filter by visibility
 59
 60- File filters:
 61  * "file:\.js$" - Files with .js extension
 62  * "file:internal/" - Files in internal directory
 63  * "-file:test" - Exclude test files
 64  * "file:has.content(Copyright)" - Files containing "Copyright"
 65  * "file:has.contributor([email protected])" - Files with specific contributor
 66
 67- Content filters:
 68  * "content:\"exact string\"" - Search for exact string
 69  * "-content:\"unwanted\"" - Exclude files with unwanted content
 70  * "case:yes" - Case-sensitive search
 71
 72- Type filters:
 73  * "type:symbol" - Search for symbols (functions, classes, etc.)
 74  * "type:file" - Search file content only
 75  * "type:path" - Search filenames only
 76  * "type:diff" - Search code changes
 77  * "type:commit" - Search commit messages
 78
 79- Commit/diff search:
 80  * "after:\"1 month ago\"" - Commits after date
 81  * "before:\"2023-01-01\"" - Commits before date
 82  * "author:name" - Commits by author
 83  * "message:\"fix bug\"" - Commits with message
 84
 85- Result selection:
 86  * "select:repo" - Show only repository names
 87  * "select:file" - Show only file paths
 88  * "select:content" - Show only matching content
 89  * "select:symbol" - Show only matching symbols
 90
 91- Result control:
 92  * "count:100" - Return up to 100 results
 93  * "count:all" - Return all results
 94  * "timeout:30s" - Set search timeout
 95
 96EXAMPLES:
 97- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
 98- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
 99- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
100- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
101- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
102- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
103
104BOOLEAN OPERATORS:
105- "term1 AND term2" - Results containing both terms
106- "term1 OR term2" - Results containing either term
107- "term1 NOT term2" - Results with term1 but not term2
108- "term1 and (term2 or term3)" - Grouping with parentheses
109
110LIMITATIONS:
111- Only searches public repositories
112- Rate limits may apply
113- Complex queries may take longer to execute
114- Maximum of 20 results per query
115
116TIPS:
117- Use specific file extensions to narrow results
118- Add repo: filters for more targeted searches
119- Use type:symbol to find function/method definitions
120- Use type:file to find relevant files`
121)
122
123func NewSourcegraphTool() BaseTool {
124	return &sourcegraphTool{
125		client: &http.Client{
126			Timeout: 30 * time.Second,
127		},
128	}
129}
130
131func (t *sourcegraphTool) Info() ToolInfo {
132	return ToolInfo{
133		Name:        SourcegraphToolName,
134		Description: sourcegraphToolDescription,
135		Parameters: map[string]any{
136			"query": map[string]any{
137				"type":        "string",
138				"description": "The Sourcegraph search query",
139			},
140			"count": map[string]any{
141				"type":        "number",
142				"description": "Optional number of results to return (default: 10, max: 20)",
143			},
144			"context_window": map[string]any{
145				"type":        "number",
146				"description": "The context around the match to return (default: 10 lines)",
147			},
148			"timeout": map[string]any{
149				"type":        "number",
150				"description": "Optional timeout in seconds (max 120)",
151			},
152		},
153		Required: []string{"query"},
154	}
155}
156
157func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
158	var params SourcegraphParams
159	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
160		return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
161	}
162
163	if params.Query == "" {
164		return NewTextErrorResponse("Query parameter is required"), nil
165	}
166
167	if params.Count <= 0 {
168		params.Count = 10
169	} else if params.Count > 20 {
170		params.Count = 20 // Limit to 20 results
171	}
172
173	if params.ContextWindow <= 0 {
174		params.ContextWindow = 10 // Default context window
175	}
176	client := t.client
177	if params.Timeout > 0 {
178		maxTimeout := 120 // 2 minutes
179		if params.Timeout > maxTimeout {
180			params.Timeout = maxTimeout
181		}
182		client = &http.Client{
183			Timeout: time.Duration(params.Timeout) * time.Second,
184		}
185	}
186
187	type graphqlRequest struct {
188		Query     string `json:"query"`
189		Variables struct {
190			Query string `json:"query"`
191		} `json:"variables"`
192	}
193
194	request := graphqlRequest{
195		Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
196	}
197	request.Variables.Query = params.Query
198
199	graphqlQueryBytes, err := json.Marshal(request)
200	if err != nil {
201		return NewTextErrorResponse("Failed to create GraphQL request: " + err.Error()), nil
202	}
203	graphqlQuery := string(graphqlQueryBytes)
204
205	req, err := http.NewRequestWithContext(
206		ctx,
207		"POST",
208		"https://sourcegraph.com/.api/graphql",
209		bytes.NewBuffer([]byte(graphqlQuery)),
210	)
211	if err != nil {
212		return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
213	}
214
215	req.Header.Set("Content-Type", "application/json")
216	req.Header.Set("User-Agent", "termai/1.0")
217
218	resp, err := client.Do(req)
219	if err != nil {
220		return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
221	}
222	defer resp.Body.Close()
223
224	if resp.StatusCode != http.StatusOK {
225		body, _ := io.ReadAll(resp.Body)
226		if len(body) > 0 {
227			return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
228		}
229
230		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
231	}
232	body, err := io.ReadAll(resp.Body)
233	if err != nil {
234		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
235	}
236
237	var result map[string]any
238	if err = json.Unmarshal(body, &result); err != nil {
239		return NewTextErrorResponse("Failed to parse response: " + err.Error()), nil
240	}
241
242	formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
243	if err != nil {
244		return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
245	}
246
247	return NewTextResponse(formattedResults), nil
248}
249
250func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
251	var buffer strings.Builder
252
253	if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
254		buffer.WriteString("## Sourcegraph API Error\n\n")
255		for _, err := range errors {
256			if errMap, ok := err.(map[string]any); ok {
257				if message, ok := errMap["message"].(string); ok {
258					buffer.WriteString(fmt.Sprintf("- %s\n", message))
259				}
260			}
261		}
262		return buffer.String(), nil
263	}
264
265	data, ok := result["data"].(map[string]any)
266	if !ok {
267		return "", fmt.Errorf("invalid response format: missing data field")
268	}
269
270	search, ok := data["search"].(map[string]any)
271	if !ok {
272		return "", fmt.Errorf("invalid response format: missing search field")
273	}
274
275	searchResults, ok := search["results"].(map[string]any)
276	if !ok {
277		return "", fmt.Errorf("invalid response format: missing results field")
278	}
279
280	matchCount, _ := searchResults["matchCount"].(float64)
281	resultCount, _ := searchResults["resultCount"].(float64)
282	limitHit, _ := searchResults["limitHit"].(bool)
283
284	buffer.WriteString("# Sourcegraph Search Results\n\n")
285	buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
286
287	if limitHit {
288		buffer.WriteString("(Result limit reached, try a more specific query)\n")
289	}
290
291	buffer.WriteString("\n")
292
293	results, ok := searchResults["results"].([]any)
294	if !ok || len(results) == 0 {
295		buffer.WriteString("No results found. Try a different query.\n")
296		return buffer.String(), nil
297	}
298
299	maxResults := 10
300	if len(results) > maxResults {
301		results = results[:maxResults]
302	}
303
304	for i, res := range results {
305		fileMatch, ok := res.(map[string]any)
306		if !ok {
307			continue
308		}
309
310		typeName, _ := fileMatch["__typename"].(string)
311		if typeName != "FileMatch" {
312			continue
313		}
314
315		repo, _ := fileMatch["repository"].(map[string]any)
316		file, _ := fileMatch["file"].(map[string]any)
317		lineMatches, _ := fileMatch["lineMatches"].([]any)
318
319		if repo == nil || file == nil {
320			continue
321		}
322
323		repoName, _ := repo["name"].(string)
324		filePath, _ := file["path"].(string)
325		fileURL, _ := file["url"].(string)
326		fileContent, _ := file["content"].(string)
327
328		buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
329
330		if fileURL != "" {
331			buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
332		}
333
334		if len(lineMatches) > 0 {
335			for _, lm := range lineMatches {
336				lineMatch, ok := lm.(map[string]any)
337				if !ok {
338					continue
339				}
340
341				lineNumber, _ := lineMatch["lineNumber"].(float64)
342				preview, _ := lineMatch["preview"].(string)
343
344				if fileContent != "" {
345					lines := strings.Split(fileContent, "\n")
346
347					buffer.WriteString("```\n")
348
349					startLine := max(1, int(lineNumber)-contextWindow)
350
351					for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
352						if j >= 0 {
353							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
354						}
355					}
356
357					buffer.WriteString(fmt.Sprintf("%d|  %s\n", int(lineNumber), preview))
358
359					endLine := int(lineNumber) + contextWindow
360
361					for j := int(lineNumber); j < endLine && j < len(lines); j++ {
362						if j < len(lines) {
363							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
364						}
365					}
366
367					buffer.WriteString("```\n\n")
368				} else {
369					buffer.WriteString("```\n")
370					buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
371					buffer.WriteString("```\n\n")
372				}
373			}
374		}
375	}
376
377	return buffer.String(), nil
378}