1package claudetool
2
3import (
4 "context"
5 _ "embed"
6 "encoding/json"
7 "fmt"
8 "log/slog"
9 "os/exec"
10 "strings"
11
12 "shelley.exe.dev/llm"
13)
14
15// LLMServiceProvider defines the interface for getting LLM services
16type LLMServiceProvider interface {
17 GetService(modelID string) (llm.Service, error)
18 GetAvailableModels() []string
19}
20
21// KeywordTool provides keyword search functionality
22type KeywordTool struct {
23 llmProvider LLMServiceProvider
24 workingDir *MutableWorkingDir
25}
26
27// NewKeywordTool creates a new keyword tool with the given LLM provider
28func NewKeywordTool(provider LLMServiceProvider) *KeywordTool {
29 return &KeywordTool{llmProvider: provider}
30}
31
32// NewKeywordToolWithWorkingDir creates a new keyword tool with the given LLM provider and shared working directory
33func NewKeywordToolWithWorkingDir(provider LLMServiceProvider, wd *MutableWorkingDir) *KeywordTool {
34 return &KeywordTool{llmProvider: provider, workingDir: wd}
35}
36
37// Tool returns the LLM tool definition
38func (k *KeywordTool) Tool() *llm.Tool {
39 return &llm.Tool{
40 Name: keywordName,
41 Description: keywordDescription,
42 InputSchema: llm.MustSchema(keywordInputSchema),
43 Run: k.keywordRun,
44 }
45}
46
47const (
48 keywordName = "keyword_search"
49 keywordDescription = `
50keyword_search locates files with a search-and-filter approach.
51Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions.
52
53Effective use:
54- Provide a detailed query for accurate relevance ranking
55- Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall)
56- Order search terms by importance (most important first)
57- Supports regex search terms for flexible matching
58
59IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, stack traces, filenames, or symbols. Use direct approaches (rg, cat, etc.) instead.
60`
61
62 // If you modify this, update the termui template for prettier rendering.
63 keywordInputSchema = `
64{
65 "type": "object",
66 "required": [
67 "query",
68 "search_terms"
69 ],
70 "properties": {
71 "query": {
72 "type": "string",
73 "description": "A detailed statement of what you're trying to find or learn."
74 },
75 "search_terms": {
76 "type": "array",
77 "items": {
78 "type": "string"
79 },
80 "description": "List of search terms in descending order of importance."
81 }
82 }
83}
84`
85)
86
87type keywordInput struct {
88 Query string `json:"query"`
89 SearchTerms []string `json:"search_terms"`
90}
91
92//go:embed keyword_system_prompt.txt
93var keywordSystemPrompt string
94
95// FindRepoRoot attempts to find the git repository root from the current directory
96func FindRepoRoot(wd string) (string, error) {
97 cmd := exec.Command("git", "rev-parse", "--show-toplevel")
98 cmd.Dir = wd
99 out, err := cmd.Output()
100 // todo: cwd here and throughout
101 if err != nil {
102 return "", fmt.Errorf("failed to find git repository root: %w", err)
103 }
104 return strings.TrimSpace(string(out)), nil
105}
106
107// keywordRun is the main implementation using the LLM provider
108func (k *KeywordTool) keywordRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
109 var input keywordInput
110 if err := json.Unmarshal(m, &input); err != nil {
111 return llm.ErrorToolOut(err)
112 }
113 wd := k.workingDir.Get()
114 root, err := FindRepoRoot(wd)
115 if err == nil {
116 wd = root
117 }
118 slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd)
119
120 // first remove stopwords
121 var keep []string
122 for _, term := range input.SearchTerms {
123 out, err := ripgrep(ctx, wd, []string{term})
124 if err != nil {
125 return llm.ErrorToolOut(err)
126 }
127 if len(out) > 64*1024 {
128 slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out))
129 continue
130 }
131 keep = append(keep, term)
132 }
133
134 if len(keep) == 0 {
135 return llm.ToolOut{LLMContent: llm.TextContent("each of those search terms yielded too many results")}
136 }
137
138 // peel off keywords until we get a result that fits in the query window
139 var out string
140 for {
141 var err error
142 out, err = ripgrep(ctx, wd, keep)
143 if err != nil {
144 return llm.ErrorToolOut(err)
145 }
146 if len(out) < 128*1024 {
147 break
148 }
149 keep = keep[:len(keep)-1]
150 }
151
152 // Select the best available LLM service
153 llmService, err := k.selectBestLLM(k.llmProvider)
154 if err != nil {
155 return llm.ErrorfToolOut("failed to get LLM service: %w", err)
156 }
157
158 // Create the filtering request
159 system := []llm.SystemContent{
160 {Type: "text", Text: strings.TrimSpace(keywordSystemPrompt)},
161 }
162
163 initialMessage := llm.Message{
164 Role: llm.MessageRoleUser,
165 Content: []llm.Content{
166 llm.StringContent("<pwd>\n" + wd + "\n</pwd>"),
167 llm.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"),
168 llm.StringContent("<query>\n" + input.Query + "\n</query>"),
169 },
170 }
171
172 req := &llm.Request{
173 Messages: []llm.Message{initialMessage},
174 System: system,
175 }
176
177 resp, err := llmService.Do(ctx, req)
178 if err != nil {
179 return llm.ErrorfToolOut("failed to send relevance filtering message: %w", err)
180 }
181 if len(resp.Content) != 1 {
182 return llm.ErrorfToolOut("unexpected number of messages (%d) in relevance filtering response: %v", len(resp.Content), resp.Content)
183 }
184
185 filtered := resp.Content[0].Text
186
187 slog.InfoContext(ctx, "keyword search results processed",
188 "bytes", len(out),
189 "lines", strings.Count(out, "\n"),
190 "files", strings.Count(out, "\n\n"),
191 "query", input.Query,
192 "filtered", filtered,
193 )
194
195 return llm.ToolOut{LLMContent: llm.TextContent(resp.Content[0].Text)}
196}
197
198func ripgrep(ctx context.Context, wd string, terms []string) (string, error) {
199 args := []string{"-C", "10", "-i", "--line-number", "--with-filename"}
200 for _, term := range terms {
201 args = append(args, "-e", term)
202 }
203 cmd := exec.CommandContext(ctx, "rg", args...)
204 cmd.Dir = wd
205 out, err := cmd.CombinedOutput()
206 if err != nil {
207 // ripgrep returns exit code 1 when no matches are found, which is not an error for us
208 if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
209 return "no matches found", nil
210 }
211 return "", fmt.Errorf("search failed: %v\n%s", err, out)
212 }
213 outStr := string(out)
214 return outStr, nil
215}
216
217// selectBestLLM selects the best available LLM service for keyword search
218func (k *KeywordTool) selectBestLLM(provider LLMServiceProvider) (llm.Service, error) {
219 // Preferred models in order of preference for keyword search (fast, cheap models preferred)
220 preferredModels := []string{"qwen3-coder-fireworks", "gpt-5-thinking-mini", "gpt5-mini", "claude-sonnet-4.5", "predictable"}
221
222 for _, model := range preferredModels {
223 svc, err := provider.GetService(model)
224 if err == nil {
225 return svc, nil
226 }
227 }
228
229 // If no preferred model is available, try any available model
230 available := provider.GetAvailableModels()
231 if len(available) > 0 {
232 return provider.GetService(available[0])
233 }
234
235 return nil, fmt.Errorf("no LLM services available")
236}