keyword.go

  1package claudetool
  2
  3import (
  4	"context"
  5	_ "embed"
  6	"encoding/json"
  7	"fmt"
  8	"log/slog"
  9	"os/exec"
 10	"strings"
 11
 12	"shelley.exe.dev/llm"
 13)
 14
 15// LLMServiceProvider defines the interface for getting LLM services
 16type LLMServiceProvider interface {
 17	GetService(modelID string) (llm.Service, error)
 18	GetAvailableModels() []string
 19}
 20
 21// KeywordTool provides keyword search functionality
 22type KeywordTool struct {
 23	llmProvider LLMServiceProvider
 24	workingDir  *MutableWorkingDir
 25}
 26
 27// NewKeywordTool creates a new keyword tool with the given LLM provider
 28func NewKeywordTool(provider LLMServiceProvider) *KeywordTool {
 29	return &KeywordTool{llmProvider: provider}
 30}
 31
 32// NewKeywordToolWithWorkingDir creates a new keyword tool with the given LLM provider and shared working directory
 33func NewKeywordToolWithWorkingDir(provider LLMServiceProvider, wd *MutableWorkingDir) *KeywordTool {
 34	return &KeywordTool{llmProvider: provider, workingDir: wd}
 35}
 36
 37// Tool returns the LLM tool definition
 38func (k *KeywordTool) Tool() *llm.Tool {
 39	return &llm.Tool{
 40		Name:        keywordName,
 41		Description: keywordDescription,
 42		InputSchema: llm.MustSchema(keywordInputSchema),
 43		Run:         k.keywordRun,
 44	}
 45}
 46
 47const (
 48	keywordName        = "keyword_search"
 49	keywordDescription = `
 50keyword_search locates files with a search-and-filter approach.
 51Use when navigating unfamiliar codebases with only conceptual understanding or vague user questions.
 52
 53Effective use:
 54- Provide a detailed query for accurate relevance ranking
 55- Prefer MANY SPECIFIC terms over FEW GENERAL ones (high precision beats high recall)
 56- Order search terms by importance (most important first)
 57- Supports regex search terms for flexible matching
 58
 59IMPORTANT: Do NOT use this tool if you have precise information like log lines, error messages, stack traces, filenames, or symbols. Use direct approaches (rg, cat, etc.) instead.
 60`
 61
 62	// If you modify this, update the termui template for prettier rendering.
 63	keywordInputSchema = `
 64{
 65  "type": "object",
 66  "required": [
 67    "query",
 68    "search_terms"
 69  ],
 70  "properties": {
 71    "query": {
 72      "type": "string",
 73      "description": "A detailed statement of what you're trying to find or learn."
 74    },
 75    "search_terms": {
 76      "type": "array",
 77      "items": {
 78        "type": "string"
 79      },
 80      "description": "List of search terms in descending order of importance."
 81    }
 82  }
 83}
 84`
 85)
 86
 87type keywordInput struct {
 88	Query       string   `json:"query"`
 89	SearchTerms []string `json:"search_terms"`
 90}
 91
 92//go:embed keyword_system_prompt.txt
 93var keywordSystemPrompt string
 94
 95// FindRepoRoot attempts to find the git repository root from the current directory
 96func FindRepoRoot(wd string) (string, error) {
 97	cmd := exec.Command("git", "rev-parse", "--show-toplevel")
 98	cmd.Dir = wd
 99	out, err := cmd.Output()
100	// todo: cwd here and throughout
101	if err != nil {
102		return "", fmt.Errorf("failed to find git repository root: %w", err)
103	}
104	return strings.TrimSpace(string(out)), nil
105}
106
107// keywordRun is the main implementation using the LLM provider
108func (k *KeywordTool) keywordRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
109	var input keywordInput
110	if err := json.Unmarshal(m, &input); err != nil {
111		return llm.ErrorToolOut(err)
112	}
113	wd := k.workingDir.Get()
114	root, err := FindRepoRoot(wd)
115	if err == nil {
116		wd = root
117	}
118	slog.InfoContext(ctx, "keyword search input", "query", input.Query, "keywords", input.SearchTerms, "wd", wd)
119
120	// first remove stopwords
121	var keep []string
122	for _, term := range input.SearchTerms {
123		out, err := ripgrep(ctx, wd, []string{term})
124		if err != nil {
125			return llm.ErrorToolOut(err)
126		}
127		if len(out) > 64*1024 {
128			slog.InfoContext(ctx, "keyword search result too large", "term", term, "bytes", len(out))
129			continue
130		}
131		keep = append(keep, term)
132	}
133
134	if len(keep) == 0 {
135		return llm.ToolOut{LLMContent: llm.TextContent("each of those search terms yielded too many results")}
136	}
137
138	// peel off keywords until we get a result that fits in the query window
139	var out string
140	for {
141		var err error
142		out, err = ripgrep(ctx, wd, keep)
143		if err != nil {
144			return llm.ErrorToolOut(err)
145		}
146		if len(out) < 128*1024 {
147			break
148		}
149		keep = keep[:len(keep)-1]
150	}
151
152	// Select the best available LLM service
153	llmService, err := k.selectBestLLM(k.llmProvider)
154	if err != nil {
155		return llm.ErrorfToolOut("failed to get LLM service: %w", err)
156	}
157
158	// Create the filtering request
159	system := []llm.SystemContent{
160		{Type: "text", Text: strings.TrimSpace(keywordSystemPrompt)},
161	}
162
163	initialMessage := llm.Message{
164		Role: llm.MessageRoleUser,
165		Content: []llm.Content{
166			llm.StringContent("<pwd>\n" + wd + "\n</pwd>"),
167			llm.StringContent("<ripgrep_results>\n" + out + "\n</ripgrep_results>"),
168			llm.StringContent("<query>\n" + input.Query + "\n</query>"),
169		},
170	}
171
172	req := &llm.Request{
173		Messages: []llm.Message{initialMessage},
174		System:   system,
175	}
176
177	resp, err := llmService.Do(ctx, req)
178	if err != nil {
179		return llm.ErrorfToolOut("failed to send relevance filtering message: %w", err)
180	}
181	if len(resp.Content) != 1 {
182		return llm.ErrorfToolOut("unexpected number of messages (%d) in relevance filtering response: %v", len(resp.Content), resp.Content)
183	}
184
185	filtered := resp.Content[0].Text
186
187	slog.InfoContext(ctx, "keyword search results processed",
188		"bytes", len(out),
189		"lines", strings.Count(out, "\n"),
190		"files", strings.Count(out, "\n\n"),
191		"query", input.Query,
192		"filtered", filtered,
193	)
194
195	return llm.ToolOut{LLMContent: llm.TextContent(resp.Content[0].Text)}
196}
197
198func ripgrep(ctx context.Context, wd string, terms []string) (string, error) {
199	args := []string{"-C", "10", "-i", "--line-number", "--with-filename"}
200	for _, term := range terms {
201		args = append(args, "-e", term)
202	}
203	cmd := exec.CommandContext(ctx, "rg", args...)
204	cmd.Dir = wd
205	out, err := cmd.CombinedOutput()
206	if err != nil {
207		// ripgrep returns exit code 1 when no matches are found, which is not an error for us
208		if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 {
209			return "no matches found", nil
210		}
211		return "", fmt.Errorf("search failed: %v\n%s", err, out)
212	}
213	outStr := string(out)
214	return outStr, nil
215}
216
217// selectBestLLM selects the best available LLM service for keyword search
218func (k *KeywordTool) selectBestLLM(provider LLMServiceProvider) (llm.Service, error) {
219	// Preferred models in order of preference for keyword search (fast, cheap models preferred)
220	preferredModels := []string{"qwen3-coder-fireworks", "gpt-5-thinking-mini", "gpt5-mini", "claude-sonnet-4.5", "predictable"}
221
222	for _, model := range preferredModels {
223		svc, err := provider.GetService(model)
224		if err == nil {
225			return svc, nil
226		}
227	}
228
229	// If no preferred model is available, try any available model
230	available := provider.GetAvailableModels()
231	if len(available) > 0 {
232		return provider.GetService(available[0])
233	}
234
235	return nil, fmt.Errorf("no LLM services available")
236}