sourcegraph.go

  1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"net/http"
 10	"strings"
 11	"sync"
 12	"time"
 13)
 14
 15type SourcegraphParams struct {
 16	Query         string `json:"query"`
 17	Count         int    `json:"count,omitempty"`
 18	ContextWindow int    `json:"context_window,omitempty"`
 19	Timeout       int    `json:"timeout,omitempty"`
 20}
 21
 22type SourcegraphResponseMetadata struct {
 23	NumberOfMatches int  `json:"number_of_matches"`
 24	Truncated       bool `json:"truncated"`
 25}
 26
 27type sourcegraphTool struct {
 28	client       *http.Client
 29	clientPool   map[int]*http.Client
 30	clientPoolMu sync.RWMutex
 31}
 32
 33const (
 34	SourcegraphToolName        = "sourcegraph"
 35	sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
 36
 37WHEN TO USE THIS TOOL:
 38- Use when you need to find code examples or implementations across public repositories
 39- Helpful for researching how others have solved similar problems
 40- Useful for discovering patterns and best practices in open source code
 41
 42HOW TO USE:
 43- Provide a search query using Sourcegraph's query syntax
 44- Optionally specify the number of results to return (default: 10)
 45- Optionally set a timeout for the request
 46
 47QUERY SYNTAX:
 48- Basic search: "fmt.Println" searches for exact matches
 49- File filters: "file:.go fmt.Println" limits to Go files
 50- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
 51- Language filters: "lang:go fmt.Println" limits to Go code
 52- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
 53- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
 54- Quoted strings: "\"exact phrase\"" for exact phrase matching
 55- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
 56
 57ADVANCED FILTERS:
 58- Repository filters:
 59  * "repo:name" - Match repositories with name containing "name"
 60  * "repo:^github\.com/org/repo$" - Exact repository match
 61  * "repo:org/repo@branch" - Search specific branch
 62  * "repo:org/repo rev:branch" - Alternative branch syntax
 63  * "-repo:name" - Exclude repositories
 64  * "fork:yes" or "fork:only" - Include or only show forks
 65  * "archived:yes" or "archived:only" - Include or only show archived repos
 66  * "visibility:public" or "visibility:private" - Filter by visibility
 67
 68- File filters:
 69  * "file:\.js$" - Files with .js extension
 70  * "file:internal/" - Files in internal directory
 71  * "-file:test" - Exclude test files
 72  * "file:has.content(Copyright)" - Files containing "Copyright"
 73  * "file:has.contributor([email protected])" - Files with specific contributor
 74
 75- Content filters:
 76  * "content:\"exact string\"" - Search for exact string
 77  * "-content:\"unwanted\"" - Exclude files with unwanted content
 78  * "case:yes" - Case-sensitive search
 79
 80- Type filters:
 81  * "type:symbol" - Search for symbols (functions, classes, etc.)
 82  * "type:file" - Search file content only
 83  * "type:path" - Search filenames only
 84  * "type:diff" - Search code changes
 85  * "type:commit" - Search commit messages
 86
 87- Commit/diff search:
 88  * "after:\"1 month ago\"" - Commits after date
 89  * "before:\"2023-01-01\"" - Commits before date
 90  * "author:name" - Commits by author
 91  * "message:\"fix bug\"" - Commits with message
 92
 93- Result selection:
 94  * "select:repo" - Show only repository names
 95  * "select:file" - Show only file paths
 96  * "select:content" - Show only matching content
 97  * "select:symbol" - Show only matching symbols
 98
 99- Result control:
100  * "count:100" - Return up to 100 results
101  * "count:all" - Return all results
102  * "timeout:30s" - Set search timeout
103
104EXAMPLES:
105- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
106- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
107- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
108- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
109- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
110- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
111
112BOOLEAN OPERATORS:
113- "term1 AND term2" - Results containing both terms
114- "term1 OR term2" - Results containing either term
115- "term1 NOT term2" - Results with term1 but not term2
116- "term1 and (term2 or term3)" - Grouping with parentheses
117
118LIMITATIONS:
119- Only searches public repositories
120- Rate limits may apply
121- Complex queries may take longer to execute
122- Maximum of 20 results per query
123
124TIPS:
125- Use specific file extensions to narrow results
126- Add repo: filters for more targeted searches
127- Use type:symbol to find function/method definitions
128- Use type:file to find relevant files`
129)
130
131func NewSourcegraphTool() BaseTool {
132	return &sourcegraphTool{
133		client: &http.Client{
134			Timeout: 30 * time.Second,
135			Transport: &http.Transport{
136				MaxIdleConns:        100,
137				MaxIdleConnsPerHost: 10,
138				IdleConnTimeout:     90 * time.Second,
139			},
140		},
141		clientPool: make(map[int]*http.Client),
142	}
143}
144
145// getClientForTimeout returns a cached client for the given timeout or the default client
146func (t *sourcegraphTool) getClientForTimeout(timeout int) *http.Client {
147	if timeout <= 0 {
148		return t.client
149	}
150
151	maxTimeout := 120 // 2 minutes
152	if timeout > maxTimeout {
153		timeout = maxTimeout
154	}
155
156	// Check if we have a cached client for this timeout
157	t.clientPoolMu.RLock()
158	if client, exists := t.clientPool[timeout]; exists {
159		t.clientPoolMu.RUnlock()
160		return client
161	}
162	t.clientPoolMu.RUnlock()
163
164	// Create and cache a new client
165	t.clientPoolMu.Lock()
166	defer t.clientPoolMu.Unlock()
167
168	// Double-check in case another goroutine created it
169	if client, exists := t.clientPool[timeout]; exists {
170		return client
171	}
172
173	client := &http.Client{
174		Timeout: time.Duration(timeout) * time.Second,
175		Transport: &http.Transport{
176			MaxIdleConns:        100,
177			MaxIdleConnsPerHost: 10,
178			IdleConnTimeout:     90 * time.Second,
179		},
180	}
181	t.clientPool[timeout] = client
182	return client
183}
184
185func (t *sourcegraphTool) Info() ToolInfo {
186	return ToolInfo{
187		Name:        SourcegraphToolName,
188		Description: sourcegraphToolDescription,
189		Parameters: map[string]any{
190			"query": map[string]any{
191				"type":        "string",
192				"description": "The Sourcegraph search query",
193			},
194			"count": map[string]any{
195				"type":        "number",
196				"description": "Optional number of results to return (default: 10, max: 20)",
197			},
198			"context_window": map[string]any{
199				"type":        "number",
200				"description": "The context around the match to return (default: 10 lines)",
201			},
202			"timeout": map[string]any{
203				"type":        "number",
204				"description": "Optional timeout in seconds (max 120)",
205			},
206		},
207		Required: []string{"query"},
208	}
209}
210
211func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
212	var params SourcegraphParams
213	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
214		return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
215	}
216
217	if params.Query == "" {
218		return NewTextErrorResponse("Query parameter is required"), nil
219	}
220
221	if params.Count <= 0 {
222		params.Count = 10
223	} else if params.Count > 20 {
224		params.Count = 20 // Limit to 20 results
225	}
226
227	if params.ContextWindow <= 0 {
228		params.ContextWindow = 10 // Default context window
229	}
230	client := t.getClientForTimeout(params.Timeout)
231
232	type graphqlRequest struct {
233		Query     string `json:"query"`
234		Variables struct {
235			Query string `json:"query"`
236		} `json:"variables"`
237	}
238
239	request := graphqlRequest{
240		Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
241	}
242	request.Variables.Query = params.Query
243
244	graphqlQueryBytes, err := json.Marshal(request)
245	if err != nil {
246		return ToolResponse{}, fmt.Errorf("failed to marshal GraphQL request: %w", err)
247	}
248	graphqlQuery := string(graphqlQueryBytes)
249
250	req, err := http.NewRequestWithContext(
251		ctx,
252		"POST",
253		"https://sourcegraph.com/.api/graphql",
254		bytes.NewBuffer([]byte(graphqlQuery)),
255	)
256	if err != nil {
257		return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
258	}
259
260	req.Header.Set("Content-Type", "application/json")
261	req.Header.Set("User-Agent", "crush/1.0")
262
263	resp, err := client.Do(req)
264	if err != nil {
265		return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
266	}
267	defer resp.Body.Close()
268
269	if resp.StatusCode != http.StatusOK {
270		body, _ := io.ReadAll(resp.Body)
271		if len(body) > 0 {
272			return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
273		}
274
275		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
276	}
277	body, err := io.ReadAll(resp.Body)
278	if err != nil {
279		return ToolResponse{}, fmt.Errorf("failed to read response body: %w", err)
280	}
281
282	var result map[string]any
283	if err = json.Unmarshal(body, &result); err != nil {
284		return ToolResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
285	}
286
287	formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
288	if err != nil {
289		return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
290	}
291
292	return NewTextResponse(formattedResults), nil
293}
294
295func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
296	var buffer strings.Builder
297
298	if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
299		buffer.WriteString("## Sourcegraph API Error\n\n")
300		for _, err := range errors {
301			if errMap, ok := err.(map[string]any); ok {
302				if message, ok := errMap["message"].(string); ok {
303					buffer.WriteString(fmt.Sprintf("- %s\n", message))
304				}
305			}
306		}
307		return buffer.String(), nil
308	}
309
310	data, ok := result["data"].(map[string]any)
311	if !ok {
312		return "", fmt.Errorf("invalid response format: missing data field")
313	}
314
315	search, ok := data["search"].(map[string]any)
316	if !ok {
317		return "", fmt.Errorf("invalid response format: missing search field")
318	}
319
320	searchResults, ok := search["results"].(map[string]any)
321	if !ok {
322		return "", fmt.Errorf("invalid response format: missing results field")
323	}
324
325	matchCount, _ := searchResults["matchCount"].(float64)
326	resultCount, _ := searchResults["resultCount"].(float64)
327	limitHit, _ := searchResults["limitHit"].(bool)
328
329	buffer.WriteString("# Sourcegraph Search Results\n\n")
330	buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
331
332	if limitHit {
333		buffer.WriteString("(Result limit reached, try a more specific query)\n")
334	}
335
336	buffer.WriteString("\n")
337
338	results, ok := searchResults["results"].([]any)
339	if !ok || len(results) == 0 {
340		buffer.WriteString("No results found. Try a different query.\n")
341		return buffer.String(), nil
342	}
343
344	maxResults := 10
345	if len(results) > maxResults {
346		results = results[:maxResults]
347	}
348
349	for i, res := range results {
350		fileMatch, ok := res.(map[string]any)
351		if !ok {
352			continue
353		}
354
355		typeName, _ := fileMatch["__typename"].(string)
356		if typeName != "FileMatch" {
357			continue
358		}
359
360		repo, _ := fileMatch["repository"].(map[string]any)
361		file, _ := fileMatch["file"].(map[string]any)
362		lineMatches, _ := fileMatch["lineMatches"].([]any)
363
364		if repo == nil || file == nil {
365			continue
366		}
367
368		repoName, _ := repo["name"].(string)
369		filePath, _ := file["path"].(string)
370		fileURL, _ := file["url"].(string)
371		fileContent, _ := file["content"].(string)
372
373		buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
374
375		if fileURL != "" {
376			buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
377		}
378
379		if len(lineMatches) > 0 {
380			for _, lm := range lineMatches {
381				lineMatch, ok := lm.(map[string]any)
382				if !ok {
383					continue
384				}
385
386				lineNumber, _ := lineMatch["lineNumber"].(float64)
387				preview, _ := lineMatch["preview"].(string)
388
389				if fileContent != "" {
390					lines := strings.Split(fileContent, "\n")
391
392					buffer.WriteString("```\n")
393
394					startLine := max(1, int(lineNumber)-contextWindow)
395
396					for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
397						if j >= 0 {
398							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
399						}
400					}
401
402					buffer.WriteString(fmt.Sprintf("%d|  %s\n", int(lineNumber), preview))
403
404					endLine := int(lineNumber) + contextWindow
405
406					for j := int(lineNumber); j < endLine && j < len(lines); j++ {
407						if j < len(lines) {
408							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
409						}
410					}
411
412					buffer.WriteString("```\n\n")
413				} else {
414					buffer.WriteString("```\n")
415					buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
416					buffer.WriteString("```\n\n")
417				}
418			}
419		}
420	}
421
422	return buffer.String(), nil
423}