sourcegraph.go

  1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"net/http"
 10	"strings"
 11	"time"
 12)
 13
 14const (
 15	SourcegraphToolName        = "sourcegraph"
 16	sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
 17
 18WHEN TO USE THIS TOOL:
 19- Use when you need to find code examples or implementations across public repositories
 20- Helpful for researching how others have solved similar problems
 21- Useful for discovering patterns and best practices in open source code
 22
 23HOW TO USE:
 24- Provide a search query using Sourcegraph's query syntax
 25- Optionally specify the number of results to return (default: 10)
 26- Optionally set a timeout for the request
 27
 28QUERY SYNTAX:
 29- Basic search: "fmt.Println" searches for exact matches
 30- File filters: "file:.go fmt.Println" limits to Go files
 31- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
 32- Language filters: "lang:go fmt.Println" limits to Go code
 33- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
 34- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
 35- Quoted strings: "\"exact phrase\"" for exact phrase matching
 36- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
 37
 38ADVANCED FILTERS:
 39- Repository filters:
 40  * "repo:name" - Match repositories with name containing "name"
 41  * "repo:^github\.com/org/repo$" - Exact repository match
 42  * "repo:org/repo@branch" - Search specific branch
 43  * "repo:org/repo rev:branch" - Alternative branch syntax
 44  * "-repo:name" - Exclude repositories
 45  * "fork:yes" or "fork:only" - Include or only show forks
 46  * "archived:yes" or "archived:only" - Include or only show archived repos
 47  * "visibility:public" or "visibility:private" - Filter by visibility
 48
 49- File filters:
 50  * "file:\.js$" - Files with .js extension
 51  * "file:internal/" - Files in internal directory
 52  * "-file:test" - Exclude test files
 53  * "file:has.content(Copyright)" - Files containing "Copyright"
 54  * "file:has.contributor([email protected])" - Files with specific contributor
 55
 56- Content filters:
 57  * "content:\"exact string\"" - Search for exact string
 58  * "-content:\"unwanted\"" - Exclude files with unwanted content
 59  * "case:yes" - Case-sensitive search
 60
 61- Type filters:
 62  * "type:symbol" - Search for symbols (functions, classes, etc.)
 63  * "type:file" - Search file content only
 64  * "type:path" - Search filenames only
 65  * "type:diff" - Search code changes
 66  * "type:commit" - Search commit messages
 67
 68- Commit/diff search:
 69  * "after:\"1 month ago\"" - Commits after date
 70  * "before:\"2023-01-01\"" - Commits before date
 71  * "author:name" - Commits by author
 72  * "message:\"fix bug\"" - Commits with message
 73
 74- Result selection:
 75  * "select:repo" - Show only repository names
 76  * "select:file" - Show only file paths
 77  * "select:content" - Show only matching content
 78  * "select:symbol" - Show only matching symbols
 79
 80- Result control:
 81  * "count:100" - Return up to 100 results
 82  * "count:all" - Return all results
 83  * "timeout:30s" - Set search timeout
 84
 85EXAMPLES:
 86- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
 87- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
 88- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
 89- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
 90- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
 91- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
 92
 93BOOLEAN OPERATORS:
 94- "term1 AND term2" - Results containing both terms
 95- "term1 OR term2" - Results containing either term
 96- "term1 NOT term2" - Results with term1 but not term2
 97- "term1 and (term2 or term3)" - Grouping with parentheses
 98
 99LIMITATIONS:
100- Only searches public repositories
101- Rate limits may apply
102- Complex queries may take longer to execute
103- Maximum of 20 results per query
104
105TIPS:
106- Use specific file extensions to narrow results
107- Add repo: filters for more targeted searches
108- Use type:symbol to find function/method definitions
109- Use type:file to find relevant files
110- For more details on query syntax, visit: https://docs.sourcegraph.com/code_search/queries`
111)
112
113type SourcegraphParams struct {
114	Query   string `json:"query"`
115	Count   int    `json:"count,omitempty"`
116	Timeout int    `json:"timeout,omitempty"`
117}
118
119type SourcegraphPermissionsParams struct {
120	Query   string `json:"query"`
121	Count   int    `json:"count,omitempty"`
122	Timeout int    `json:"timeout,omitempty"`
123}
124
125type sourcegraphTool struct {
126	client *http.Client
127}
128
129func NewSourcegraphTool() BaseTool {
130	return &sourcegraphTool{
131		client: &http.Client{
132			Timeout: 30 * time.Second,
133		},
134	}
135}
136
137func (t *sourcegraphTool) Info() ToolInfo {
138	return ToolInfo{
139		Name:        SourcegraphToolName,
140		Description: sourcegraphToolDescription,
141		Parameters: map[string]any{
142			"query": map[string]any{
143				"type":        "string",
144				"description": "The Sourcegraph search query",
145			},
146			"count": map[string]any{
147				"type":        "number",
148				"description": "Optional number of results to return (default: 10, max: 20)",
149			},
150			"timeout": map[string]any{
151				"type":        "number",
152				"description": "Optional timeout in seconds (max 120)",
153			},
154		},
155		Required: []string{"query"},
156	}
157}
158
159func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
160	var params SourcegraphParams
161	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
162		return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
163	}
164
165	if params.Query == "" {
166		return NewTextErrorResponse("Query parameter is required"), nil
167	}
168
169	// Set default count if not specified
170	if params.Count <= 0 {
171		params.Count = 10
172	} else if params.Count > 20 {
173		params.Count = 20 // Limit to 20 results
174	}
175
176	client := t.client
177	if params.Timeout > 0 {
178		maxTimeout := 120 // 2 minutes
179		if params.Timeout > maxTimeout {
180			params.Timeout = maxTimeout
181		}
182		client = &http.Client{
183			Timeout: time.Duration(params.Timeout) * time.Second,
184		}
185	}
186
187	// GraphQL query for Sourcegraph search
188	// Create a properly escaped JSON structure
189	type graphqlRequest struct {
190		Query     string `json:"query"`
191		Variables struct {
192			Query string `json:"query"`
193		} `json:"variables"`
194	}
195
196	request := graphqlRequest{
197		Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: standard ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
198	}
199	request.Variables.Query = params.Query
200
201	// Marshal to JSON to ensure proper escaping
202	graphqlQueryBytes, err := json.Marshal(request)
203	if err != nil {
204		return NewTextErrorResponse("Failed to create GraphQL request: " + err.Error()), nil
205	}
206	graphqlQuery := string(graphqlQueryBytes)
207
208	// Create request to Sourcegraph API
209	req, err := http.NewRequestWithContext(
210		ctx,
211		"POST",
212		"https://sourcegraph.com/.api/graphql",
213		bytes.NewBuffer([]byte(graphqlQuery)),
214	)
215	if err != nil {
216		return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
217	}
218
219	req.Header.Set("Content-Type", "application/json")
220	req.Header.Set("User-Agent", "termai/1.0")
221
222	resp, err := client.Do(req)
223	if err != nil {
224		return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
225	}
226	defer resp.Body.Close()
227
228	if resp.StatusCode != http.StatusOK {
229		// log the error response
230		body, _ := io.ReadAll(resp.Body)
231		if len(body) > 0 {
232			return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
233		}
234
235		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
236	}
237	body, err := io.ReadAll(resp.Body)
238	if err != nil {
239		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
240	}
241
242	// Parse the GraphQL response
243	var result map[string]any
244	if err = json.Unmarshal(body, &result); err != nil {
245		return NewTextErrorResponse("Failed to parse response: " + err.Error()), nil
246	}
247
248	// Format the results in a readable way
249	formattedResults, err := formatSourcegraphResults(result)
250	if err != nil {
251		return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
252	}
253
254	return NewTextResponse(formattedResults), nil
255}
256
257func formatSourcegraphResults(result map[string]any) (string, error) {
258	var buffer strings.Builder
259
260	// Check for errors in the GraphQL response
261	if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
262		buffer.WriteString("## Sourcegraph API Error\n\n")
263		for _, err := range errors {
264			if errMap, ok := err.(map[string]any); ok {
265				if message, ok := errMap["message"].(string); ok {
266					buffer.WriteString(fmt.Sprintf("- %s\n", message))
267				}
268			}
269		}
270		return buffer.String(), nil
271	}
272
273	// Extract data from the response
274	data, ok := result["data"].(map[string]any)
275	if !ok {
276		return "", fmt.Errorf("invalid response format: missing data field")
277	}
278
279	search, ok := data["search"].(map[string]any)
280	if !ok {
281		return "", fmt.Errorf("invalid response format: missing search field")
282	}
283
284	searchResults, ok := search["results"].(map[string]any)
285	if !ok {
286		return "", fmt.Errorf("invalid response format: missing results field")
287	}
288
289	// Write search metadata
290	matchCount, _ := searchResults["matchCount"].(float64)
291	resultCount, _ := searchResults["resultCount"].(float64)
292	limitHit, _ := searchResults["limitHit"].(bool)
293
294	buffer.WriteString("# Sourcegraph Search Results\n\n")
295	buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
296
297	if limitHit {
298		buffer.WriteString("(Result limit reached, try a more specific query)\n")
299	}
300
301	buffer.WriteString("\n")
302
303	// Process results
304	results, ok := searchResults["results"].([]any)
305	if !ok || len(results) == 0 {
306		buffer.WriteString("No results found. Try a different query.\n")
307		return buffer.String(), nil
308	}
309
310	// Limit to 10 results
311	maxResults := 10
312	if len(results) > maxResults {
313		results = results[:maxResults]
314	}
315
316	// Process each result
317	for i, res := range results {
318		fileMatch, ok := res.(map[string]any)
319		if !ok {
320			continue
321		}
322
323		// Skip non-FileMatch results
324		typeName, _ := fileMatch["__typename"].(string)
325		if typeName != "FileMatch" {
326			continue
327		}
328
329		// Extract repository and file information
330		repo, _ := fileMatch["repository"].(map[string]any)
331		file, _ := fileMatch["file"].(map[string]any)
332		lineMatches, _ := fileMatch["lineMatches"].([]any)
333
334		if repo == nil || file == nil {
335			continue
336		}
337
338		repoName, _ := repo["name"].(string)
339		filePath, _ := file["path"].(string)
340		fileURL, _ := file["url"].(string)
341		fileContent, _ := file["content"].(string)
342
343		buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
344
345		if fileURL != "" {
346			buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
347		}
348
349		// Show line matches with context
350		if len(lineMatches) > 0 {
351			for _, lm := range lineMatches {
352				lineMatch, ok := lm.(map[string]any)
353				if !ok {
354					continue
355				}
356
357				lineNumber, _ := lineMatch["lineNumber"].(float64)
358				preview, _ := lineMatch["preview"].(string)
359
360				// Extract context from file content if available
361				if fileContent != "" {
362					lines := strings.Split(fileContent, "\n")
363
364					buffer.WriteString("```\n")
365
366					// Display context before the match (up to 10 lines)
367					contextBefore := 10
368					startLine := max(1, int(lineNumber)-contextBefore)
369
370					for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
371						if j >= 0 {
372							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
373						}
374					}
375
376					// Display the matching line (highlighted)
377					buffer.WriteString(fmt.Sprintf("%d|  %s\n", int(lineNumber), preview))
378
379					// Display context after the match (up to 10 lines)
380					contextAfter := 10
381					endLine := int(lineNumber) + contextAfter
382
383					for j := int(lineNumber); j < endLine && j < len(lines); j++ {
384						if j < len(lines) {
385							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
386						}
387					}
388
389					buffer.WriteString("```\n\n")
390				} else {
391					// If file content is not available, just show the preview
392					buffer.WriteString("```\n")
393					buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
394					buffer.WriteString("```\n\n")
395				}
396			}
397		}
398	}
399
400	return buffer.String(), nil
401}