1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"fmt"
  8	"io"
  9	"net/http"
 10	"strings"
 11	"time"
 12)
 13
 14type SourcegraphParams struct {
 15	Query         string `json:"query"`
 16	Count         int    `json:"count,omitempty"`
 17	ContextWindow int    `json:"context_window,omitempty"`
 18	Timeout       int    `json:"timeout,omitempty"`
 19}
 20
 21type SourcegraphResponseMetadata struct {
 22	NumberOfMatches int  `json:"number_of_matches"`
 23	Truncated       bool `json:"truncated"`
 24}
 25
 26type sourcegraphTool struct {
 27	client *http.Client
 28}
 29
 30const (
 31	SourcegraphToolName        = "sourcegraph"
 32	sourcegraphToolDescription = `Search code across public repositories using Sourcegraph's GraphQL API.
 33
 34WHEN TO USE THIS TOOL:
 35- Use when you need to find code examples or implementations across public repositories
 36- Helpful for researching how others have solved similar problems
 37- Useful for discovering patterns and best practices in open source code
 38
 39HOW TO USE:
 40- Provide a search query using Sourcegraph's query syntax
 41- Optionally specify the number of results to return (default: 10)
 42- Optionally set a timeout for the request
 43
 44QUERY SYNTAX:
 45- Basic search: "fmt.Println" searches for exact matches
 46- File filters: "file:.go fmt.Println" limits to Go files
 47- Repository filters: "repo:^github\.com/golang/go$ fmt.Println" limits to specific repos
 48- Language filters: "lang:go fmt.Println" limits to Go code
 49- Boolean operators: "fmt.Println AND log.Fatal" for combined terms
 50- Regular expressions: "fmt\.(Print|Printf|Println)" for pattern matching
 51- Quoted strings: "\"exact phrase\"" for exact phrase matching
 52- Exclude filters: "-file:test" or "-repo:forks" to exclude matches
 53
 54ADVANCED FILTERS:
 55- Repository filters:
 56  * "repo:name" - Match repositories with name containing "name"
 57  * "repo:^github\.com/org/repo$" - Exact repository match
 58  * "repo:org/repo@branch" - Search specific branch
 59  * "repo:org/repo rev:branch" - Alternative branch syntax
 60  * "-repo:name" - Exclude repositories
 61  * "fork:yes" or "fork:only" - Include or only show forks
 62  * "archived:yes" or "archived:only" - Include or only show archived repos
 63  * "visibility:public" or "visibility:private" - Filter by visibility
 64
 65- File filters:
 66  * "file:\.js$" - Files with .js extension
 67  * "file:internal/" - Files in internal directory
 68  * "-file:test" - Exclude test files
 69  * "file:has.content(Copyright)" - Files containing "Copyright"
 70  * "file:has.contributor([email protected])" - Files with specific contributor
 71
 72- Content filters:
 73  * "content:\"exact string\"" - Search for exact string
 74  * "-content:\"unwanted\"" - Exclude files with unwanted content
 75  * "case:yes" - Case-sensitive search
 76
 77- Type filters:
 78  * "type:symbol" - Search for symbols (functions, classes, etc.)
 79  * "type:file" - Search file content only
 80  * "type:path" - Search filenames only
 81  * "type:diff" - Search code changes
 82  * "type:commit" - Search commit messages
 83
 84- Commit/diff search:
 85  * "after:\"1 month ago\"" - Commits after date
 86  * "before:\"2023-01-01\"" - Commits before date
 87  * "author:name" - Commits by author
 88  * "message:\"fix bug\"" - Commits with message
 89
 90- Result selection:
 91  * "select:repo" - Show only repository names
 92  * "select:file" - Show only file paths
 93  * "select:content" - Show only matching content
 94  * "select:symbol" - Show only matching symbols
 95
 96- Result control:
 97  * "count:100" - Return up to 100 results
 98  * "count:all" - Return all results
 99  * "timeout:30s" - Set search timeout
100
101EXAMPLES:
102- "file:.go context.WithTimeout" - Find Go code using context.WithTimeout
103- "lang:typescript useState type:symbol" - Find TypeScript React useState hooks
104- "repo:^github\.com/kubernetes/kubernetes$ pod list type:file" - Find Kubernetes files related to pod listing
105- "repo:sourcegraph/sourcegraph$ after:\"3 months ago\" type:diff database" - Recent changes to database code
106- "file:Dockerfile (alpine OR ubuntu) -content:alpine:latest" - Dockerfiles with specific base images
107- "repo:has.path(\.py) file:requirements.txt tensorflow" - Python projects using TensorFlow
108
109BOOLEAN OPERATORS:
110- "term1 AND term2" - Results containing both terms
111- "term1 OR term2" - Results containing either term
112- "term1 NOT term2" - Results with term1 but not term2
113- "term1 and (term2 or term3)" - Grouping with parentheses
114
115LIMITATIONS:
116- Only searches public repositories
117- Rate limits may apply
118- Complex queries may take longer to execute
119- Maximum of 20 results per query
120
121TIPS:
122- Use specific file extensions to narrow results
123- Add repo: filters for more targeted searches
124- Use type:symbol to find function/method definitions
125- Use type:file to find relevant files`
126)
127
128func NewSourcegraphTool() BaseTool {
129	return &sourcegraphTool{
130		client: &http.Client{
131			Timeout: 30 * time.Second,
132			Transport: &http.Transport{
133				MaxIdleConns:        100,
134				MaxIdleConnsPerHost: 10,
135				IdleConnTimeout:     90 * time.Second,
136			},
137		},
138	}
139}
140
141func (t *sourcegraphTool) Name() string {
142	return SourcegraphToolName
143}
144
145func (t *sourcegraphTool) Info() ToolInfo {
146	return ToolInfo{
147		Name:        SourcegraphToolName,
148		Description: sourcegraphToolDescription,
149		Parameters: map[string]any{
150			"query": map[string]any{
151				"type":        "string",
152				"description": "The Sourcegraph search query",
153			},
154			"count": map[string]any{
155				"type":        "number",
156				"description": "Optional number of results to return (default: 10, max: 20)",
157			},
158			"context_window": map[string]any{
159				"type":        "number",
160				"description": "The context around the match to return (default: 10 lines)",
161			},
162			"timeout": map[string]any{
163				"type":        "number",
164				"description": "Optional timeout in seconds (max 120)",
165			},
166		},
167		Required: []string{"query"},
168	}
169}
170
171func (t *sourcegraphTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
172	var params SourcegraphParams
173	if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
174		return NewTextErrorResponse("Failed to parse sourcegraph parameters: " + err.Error()), nil
175	}
176
177	if params.Query == "" {
178		return NewTextErrorResponse("Query parameter is required"), nil
179	}
180
181	if params.Count <= 0 {
182		params.Count = 10
183	} else if params.Count > 20 {
184		params.Count = 20 // Limit to 20 results
185	}
186
187	if params.ContextWindow <= 0 {
188		params.ContextWindow = 10 // Default context window
189	}
190
191	// Handle timeout with context
192	requestCtx := ctx
193	if params.Timeout > 0 {
194		maxTimeout := 120 // 2 minutes
195		if params.Timeout > maxTimeout {
196			params.Timeout = maxTimeout
197		}
198		var cancel context.CancelFunc
199		requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
200		defer cancel()
201	}
202
203	type graphqlRequest struct {
204		Query     string `json:"query"`
205		Variables struct {
206			Query string `json:"query"`
207		} `json:"variables"`
208	}
209
210	request := graphqlRequest{
211		Query: "query Search($query: String!) { search(query: $query, version: V2, patternType: keyword ) { results { matchCount, limitHit, resultCount, approximateResultCount, missing { name }, timedout { name }, indexUnavailable, results { __typename, ... on FileMatch { repository { name }, file { path, url, content }, lineMatches { preview, lineNumber, offsetAndLengths } } } } } }",
212	}
213	request.Variables.Query = params.Query
214
215	graphqlQueryBytes, err := json.Marshal(request)
216	if err != nil {
217		return ToolResponse{}, fmt.Errorf("failed to marshal GraphQL request: %w", err)
218	}
219	graphqlQuery := string(graphqlQueryBytes)
220
221	req, err := http.NewRequestWithContext(
222		requestCtx,
223		"POST",
224		"https://sourcegraph.com/.api/graphql",
225		bytes.NewBuffer([]byte(graphqlQuery)),
226	)
227	if err != nil {
228		return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
229	}
230
231	req.Header.Set("Content-Type", "application/json")
232	req.Header.Set("User-Agent", "crush/1.0")
233
234	resp, err := t.client.Do(req)
235	if err != nil {
236		return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
237	}
238	defer resp.Body.Close()
239
240	if resp.StatusCode != http.StatusOK {
241		body, _ := io.ReadAll(resp.Body)
242		if len(body) > 0 {
243			return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d, response: %s", resp.StatusCode, string(body))), nil
244		}
245
246		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
247	}
248	body, err := io.ReadAll(resp.Body)
249	if err != nil {
250		return ToolResponse{}, fmt.Errorf("failed to read response body: %w", err)
251	}
252
253	var result map[string]any
254	if err = json.Unmarshal(body, &result); err != nil {
255		return ToolResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
256	}
257
258	formattedResults, err := formatSourcegraphResults(result, params.ContextWindow)
259	if err != nil {
260		return NewTextErrorResponse("Failed to format results: " + err.Error()), nil
261	}
262
263	return NewTextResponse(formattedResults), nil
264}
265
266func formatSourcegraphResults(result map[string]any, contextWindow int) (string, error) {
267	var buffer strings.Builder
268
269	if errors, ok := result["errors"].([]any); ok && len(errors) > 0 {
270		buffer.WriteString("## Sourcegraph API Error\n\n")
271		for _, err := range errors {
272			if errMap, ok := err.(map[string]any); ok {
273				if message, ok := errMap["message"].(string); ok {
274					buffer.WriteString(fmt.Sprintf("- %s\n", message))
275				}
276			}
277		}
278		return buffer.String(), nil
279	}
280
281	data, ok := result["data"].(map[string]any)
282	if !ok {
283		return "", fmt.Errorf("invalid response format: missing data field")
284	}
285
286	search, ok := data["search"].(map[string]any)
287	if !ok {
288		return "", fmt.Errorf("invalid response format: missing search field")
289	}
290
291	searchResults, ok := search["results"].(map[string]any)
292	if !ok {
293		return "", fmt.Errorf("invalid response format: missing results field")
294	}
295
296	matchCount, _ := searchResults["matchCount"].(float64)
297	resultCount, _ := searchResults["resultCount"].(float64)
298	limitHit, _ := searchResults["limitHit"].(bool)
299
300	buffer.WriteString("# Sourcegraph Search Results\n\n")
301	buffer.WriteString(fmt.Sprintf("Found %d matches across %d results\n", int(matchCount), int(resultCount)))
302
303	if limitHit {
304		buffer.WriteString("(Result limit reached, try a more specific query)\n")
305	}
306
307	buffer.WriteString("\n")
308
309	results, ok := searchResults["results"].([]any)
310	if !ok || len(results) == 0 {
311		buffer.WriteString("No results found. Try a different query.\n")
312		return buffer.String(), nil
313	}
314
315	maxResults := 10
316	if len(results) > maxResults {
317		results = results[:maxResults]
318	}
319
320	for i, res := range results {
321		fileMatch, ok := res.(map[string]any)
322		if !ok {
323			continue
324		}
325
326		typeName, _ := fileMatch["__typename"].(string)
327		if typeName != "FileMatch" {
328			continue
329		}
330
331		repo, _ := fileMatch["repository"].(map[string]any)
332		file, _ := fileMatch["file"].(map[string]any)
333		lineMatches, _ := fileMatch["lineMatches"].([]any)
334
335		if repo == nil || file == nil {
336			continue
337		}
338
339		repoName, _ := repo["name"].(string)
340		filePath, _ := file["path"].(string)
341		fileURL, _ := file["url"].(string)
342		fileContent, _ := file["content"].(string)
343
344		buffer.WriteString(fmt.Sprintf("## Result %d: %s/%s\n\n", i+1, repoName, filePath))
345
346		if fileURL != "" {
347			buffer.WriteString(fmt.Sprintf("URL: %s\n\n", fileURL))
348		}
349
350		if len(lineMatches) > 0 {
351			for _, lm := range lineMatches {
352				lineMatch, ok := lm.(map[string]any)
353				if !ok {
354					continue
355				}
356
357				lineNumber, _ := lineMatch["lineNumber"].(float64)
358				preview, _ := lineMatch["preview"].(string)
359
360				if fileContent != "" {
361					lines := strings.Split(fileContent, "\n")
362
363					buffer.WriteString("```\n")
364
365					startLine := max(1, int(lineNumber)-contextWindow)
366
367					for j := startLine - 1; j < int(lineNumber)-1 && j < len(lines); j++ {
368						if j >= 0 {
369							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
370						}
371					}
372
373					buffer.WriteString(fmt.Sprintf("%d|  %s\n", int(lineNumber), preview))
374
375					endLine := int(lineNumber) + contextWindow
376
377					for j := int(lineNumber); j < endLine && j < len(lines); j++ {
378						if j < len(lines) {
379							buffer.WriteString(fmt.Sprintf("%d| %s\n", j+1, lines[j]))
380						}
381					}
382
383					buffer.WriteString("```\n\n")
384				} else {
385					buffer.WriteString("```\n")
386					buffer.WriteString(fmt.Sprintf("%d| %s\n", int(lineNumber), preview))
387					buffer.WriteString("```\n\n")
388				}
389			}
390		}
391	}
392
393	return buffer.String(), nil
394}