sourcegraph.go

  1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"net/http"
 10	"strings"
 11	"time"
 12)
 13
 14const (
 15	SourcegraphToolName        = "sourcegraph"
 16	sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
 17
 18WHEN TO USE THIS TOOL:
 19- Use when you need to find code examples or implementations across public repositories
 20- Helpful for researching how others have solved similar problems
 21- Useful for discovering patterns and best practices in open source code
 22
 23HOW TO USE:
 24- Provide a search query using Sourcegraph's query syntax
 25- Optionally specify the number of results to return (default: 10)
 26- Optionally set a timeout for the request
 27
 28QUERY SYNTAX:
 29- Basic search: "fmt.Println" searches for exact matches
 30- File filters: "file:.go fmt.Println" limits to Go files
 31- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
 32- Language filters: "lang:go fmt.Println" limits to Go code
 33- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
 34- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
 35- Quoted strings: "\"exact phrase\"" for exact phrase matching
 36- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
 37
 38ADVANCED FILTERS:
 39- Repository filters:
 40  * "repo:name" - Match repositories with name containing "name"
 41  * "repo:^github\.com/org/repo$" - Exact repository match
 42  * "repo:org/repo@branch" - Search specific branch
 43  * "repo:org/repo rev:branch" - Alternative branch syntax
 44  * "-repo:name" - Exclude repositories
 45  * "fork:yes" or "fork:only" - Include or only show forks
 46  * "archived:yes" or "archived:only" - Include or only show archived repos
 47  * "visibility:public" or "visibility:private" - Filter by visibility
 48
 49- File filters:
 50  * "file:\.js$" - Files with .js extension
 51  * "file:internal/" - Files in internal directory
 52  * "-file:test" - Exclude test files
 53  * "file:has.content(Copyright)" - Files containing "Copyright"
 54  * "file:has.contributor([email protected])" - Files with specific contributor
 55
 56- Content filters:
 57  * "content:\"exact string\"" - Search for exact string
 58  * "-content:\"unwanted\"" - Exclude files with unwanted content
 59  * "case:yes" - Case-sensitive search
 60
 61- Type filters:
 62  * "type:symbol" - Search for symbols (functions, classes, etc.)
 63  * "type:file" - Search file content only
 64  * "type:path" - Search filenames only
 65  * "type:diff" - Search code changes
 66  * "type:commit" - Search commit messages
 67
 68- Commit/diff search:
 69  * "after:\"1 month ago\"" - Commits after date
 70  * "before:\"2023-01-01\"" - Commits before date
 71  * "author:name" - Commits by author
 72  * "message:\"fix bug\"" - Commits with message
 73
 74- Result selection:
 75  * "select:repo" - Show only repository names
 76  * "select:file" - Show only file paths
 77  * "select:content" - Show only matching content
 78  * "select:symbol" - Show only matching symbols
 79
 80- Result control:
 81  * "count:100" - Return up to 100 results
 82  * "count:all" - Return all results
 83  * "timeout:30s" - Set search timeout
 84
 85EXAMPLES:
 86- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
 87- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
 88- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
 89- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
 90- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
 91- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
 92
 93BOOLEAN OPERATORS:
 94- "term1 AND term2" - Results containing both terms
 95- "term1 OR term2" - Results containing either term
 96- "term1 NOT term2" - Results with term1 but not term2
 97- "term1 and (term2 or term3)" - Grouping with parentheses
 98
 99LIMITATIONS:
100- Only searches public repositories
101- Rate limits may apply
102- Complex queries may take longer to execute
103- Maximum of 20 results per query
104
105TIPS:
106- Use specific file extensions to narrow results
107- Add repo: filters for more targeted searches
108- Use type:symbol to find function/method definitions
109- Use type:file to find relevant files
110- For more details on query syntax, visit: https://docs.sourcegraph.com/code_search/queries`
111)
112
113type SourcegraphParams struct {
114	Query         string `json:"query"`
115	Count         int    `json:"count,omitempty"`
116	ContextWindow int    `json:"context_window,omitempty"`
117	Timeout       int    `json:"timeout,omitempty"`
118}
119
120type sourcegraphTool struct {
121	client *http.Client
122}
123
124func NewSourcegraphTool() BaseTool {
125	return &sourcegraphTool{
126		client: &http.Client{
127			Timeout: 30 * time.Second,
128		},
129	}
130}
131
132func (t *sourcegraphTool) Info() ToolInfo {
133	return ToolInfo{
134		Name:        SourcegraphToolName,
135		Description: sourcegraphToolDescription,
136		Parameters: map[string]any{
137			"query": map[string]any{
138				"type":        "string",
139				"description": "The Sourcegraph search query",
140			},
141			"count": map[string]any{
142				"type":        "number",
143				"description": "Optional number of results to return (default: 10, max: 20)",
144			},
145			"context_window": map[string]any{
146				"type":        "number",
147				"description": "The context around the match to return (default: 10 lines)",
148			},
149			"timeout": map[string]any{
150				"type":        "number",
151				"description": "Optional timeout in seconds (max 120)",
152			},
153		},
154		Required: []string{"query"},
155	}
156}
157
158func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
159	var params SourcegraphParams
160	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
161		return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
162	}
163
164	if params.Query == "" {
165		return NewTextErrorResponse("Query parameter is required"), nil
166	}
167
168	// Set default count if not specified
169	if params.Count <= 0 {
170		params.Count = 10
171	} else if params.Count > 20 {
172		params.Count = 20 // Limit to 20 results
173	}
174
175	if params.ContextWindow <= 0 {
176		params.ContextWindow = 10 // Default context window
177	}
178	client := t.client
179	if params.Timeout > 0 {
180		maxTimeout := 120 // 2 minutes
181		if params.Timeout > maxTimeout {
182			params.Timeout = maxTimeout
183		}
184		client = &http.Client{
185			Timeout: time.Duration(params.Timeout) * time.Second,
186		}
187	}
188
189	// GraphQL query for Sourcegraph search
190	// Create a properly escaped JSON structure
191	type graphqlRequest struct {
192		Query     string `json:"query"`
193		Variables struct {
194			Query string `json:"query"`
195		} `json:"variables"`
196	}
197
198	request := graphqlRequest{
199		Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
200	}
201	request.Variables.Query = params.Query
202
203	// Marshal to JSON to ensure proper escaping
204	graphqlQueryBytes, err := json.Marshal(request)
205	if err != nil {
206		return NewTextErrorResponse("Failed to create GraphQL request: " + err.Error()), nil
207	}
208	graphqlQuery := string(graphqlQueryBytes)
209
210	// Create request to Sourcegraph API
211	req, err := http.NewRequestWithContext(
212		ctx,
213		"POST",
214		"https://sourcegraph.com/.api/graphql",
215		bytes.NewBuffer([]byte(graphqlQuery)),
216	)
217	if err != nil {
218		return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
219	}
220
221	req.Header.Set("Content-Type", "application/json")
222	req.Header.Set("User-Agent", "termai/1.0")
223
224	resp, err := client.Do(req)
225	if err != nil {
226		return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
227	}
228	defer resp.Body.Close()
229
230	if resp.StatusCode != http.StatusOK {
231		// log the error response
232		body, _ := io.ReadAll(resp.Body)
233		if len(body) > 0 {
234			return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
235		}
236
237		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
238	}
239	body, err := io.ReadAll(resp.Body)
240	if err != nil {
241		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
242	}
243
244	// Parse the GraphQL response
245	var result map[string]any
246	if err = json.Unmarshal(body, &result); err != nil {
247		return NewTextErrorResponse("Failed to parse response: " + err.Error()), nil
248	}
249
250	// Format the results in a readable way
251	formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
252	if err != nil {
253		return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
254	}
255
256	return NewTextResponse(formattedResults), nil
257}
258
259func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
260	var buffer strings.Builder
261
262	// Check for errors in the GraphQL response
263	if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
264		buffer.WriteString("## Sourcegraph API Error\n\n")
265		for _, err := range errors {
266			if errMap, ok := err.(map[string]any); ok {
267				if message, ok := errMap["message"].(string); ok {
268					buffer.WriteString(fmt.Sprintf("- %s\n", message))
269				}
270			}
271		}
272		return buffer.String(), nil
273	}
274
275	// Extract data from the response
276	data, ok := result["data"].(map[string]any)
277	if !ok {
278		return "", fmt.Errorf("invalid response format: missing data field")
279	}
280
281	search, ok := data["search"].(map[string]any)
282	if !ok {
283		return "", fmt.Errorf("invalid response format: missing search field")
284	}
285
286	searchResults, ok := search["results"].(map[string]any)
287	if !ok {
288		return "", fmt.Errorf("invalid response format: missing results field")
289	}
290
291	// Write search metadata
292	matchCount, _ := searchResults["matchCount"].(float64)
293	resultCount, _ := searchResults["resultCount"].(float64)
294	limitHit, _ := searchResults["limitHit"].(bool)
295
296	buffer.WriteString("# Sourcegraph Search Results\n\n")
297	buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
298
299	if limitHit {
300		buffer.WriteString("(Result limit reached, try a more specific query)\n")
301	}
302
303	buffer.WriteString("\n")
304
305	// Process results
306	results, ok := searchResults["results"].([]any)
307	if !ok || len(results) == 0 {
308		buffer.WriteString("No results found. Try a different query.\n")
309		return buffer.String(), nil
310	}
311
312	// Limit to 10 results
313	maxResults := 10
314	if len(results) > maxResults {
315		results = results[:maxResults]
316	}
317
318	// Process each result
319	for i, res := range results {
320		fileMatch, ok := res.(map[string]any)
321		if !ok {
322			continue
323		}
324
325		// Skip non-FileMatch results
326		typeName, _ := fileMatch["__typename"].(string)
327		if typeName != "FileMatch" {
328			continue
329		}
330
331		// Extract repository and file information
332		repo, _ := fileMatch["repository"].(map[string]any)
333		file, _ := fileMatch["file"].(map[string]any)
334		lineMatches, _ := fileMatch["lineMatches"].([]any)
335
336		if repo == nil || file == nil {
337			continue
338		}
339
340		repoName, _ := repo["name"].(string)
341		filePath, _ := file["path"].(string)
342		fileURL, _ := file["url"].(string)
343		fileContent, _ := file["content"].(string)
344
345		buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
346
347		if fileURL != "" {
348			buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
349		}
350
351		// Show line matches with context
352		if len(lineMatches) > 0 {
353			for _, lm := range lineMatches {
354				lineMatch, ok := lm.(map[string]any)
355				if !ok {
356					continue
357				}
358
359				lineNumber, _ := lineMatch["lineNumber"].(float64)
360				preview, _ := lineMatch["preview"].(string)
361
362				// Extract context from file content if available
363				if fileContent != "" {
364					lines := strings.Split(fileContent, "\n")
365
366					buffer.WriteString("```\n")
367
368					// Display context before the match (up to 10 lines)
369					startLine := max(1, int(lineNumber)-contextWindow)
370
371					for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
372						if j >= 0 {
373							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
374						}
375					}
376
377					// Display the matching line (highlighted)
378					buffer.WriteString(fmt.Sprintf("%d|  %s\n", int(lineNumber), preview))
379
380					// Display context after the match (up to 10 lines)
381					endLine := int(lineNumber) + contextWindow
382
383					for j := int(lineNumber); j < endLine && j < len(lines); j++ {
384						if j < len(lines) {
385							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
386						}
387					}
388
389					buffer.WriteString("```\n\n")
390				} else {
391					// If file content is not available, just show the preview
392					buffer.WriteString("```\n")
393					buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
394					buffer.WriteString("```\n\n")
395				}
396			}
397		}
398	}
399
400	return buffer.String(), nil
401}