diff --git a/README.md b/README.md index 0003c9223283cdfd59ea40b804d3856dd7923db9..6a57c7934d0714cd4e0ae3f30fab108d03196b98 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ - **LSP-Enhanced:** Crush uses LSPs for additional context, just like you do - **Extensible:** add capabilities via MCPs (`http`, `stdio`, and `sse`) - **Works Everywhere:** first-class support in every terminal on macOS, Linux, Windows (PowerShell and WSL), Android, FreeBSD, OpenBSD, and NetBSD -- **Industrial Grade:** built on the Charm ecosystem, powering 25k+ applications, from leading open source projects, to business-critical infrastructure +- **Industrial Grade:** built on the Charm ecosystem, powering 25k+ applications, from leading open source projects to business-critical infrastructure ## Installation diff --git a/internal/agent/agent.go b/internal/agent/agent.go index 3fe13094666b1b8ff249459ef8674e61e03c9f2a..c916cfd886372ab86f6d1fbb0e8b7bde2c87dabb 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -820,7 +820,6 @@ func (a *sessionAgent) generateTitle(ctx context.Context, sessionID string, user // Clean up title. var title string title = strings.ReplaceAll(resp.Response.Content.Text(), "\n", " ") - slog.Info("generated title", "title", title) // Remove thinking tags if present. title = thinkTagRegex.ReplaceAllString(title, "") diff --git a/internal/agent/coordinator.go b/internal/agent/coordinator.go index dbc9b822db0c0d29fa616303c91199de666c716d..777d5b60fa6be1efd62731c6f52b3aaa6c98498e 100644 --- a/internal/agent/coordinator.go +++ b/internal/agent/coordinator.go @@ -20,6 +20,7 @@ import ( "github.com/charmbracelet/crush/internal/agent/hyper" "github.com/charmbracelet/crush/internal/agent/prompt" "github.com/charmbracelet/crush/internal/agent/tools" + "github.com/charmbracelet/crush/internal/agent/tools/mcp" "github.com/charmbracelet/crush/internal/config" "github.com/charmbracelet/crush/internal/csync" "github.com/charmbracelet/crush/internal/history" @@ -412,6 +413,11 @@ func (c *coordinator) buildTools(ctx context.Context, agent config.Agent) ([]fan } } + // Wait for MCP initialization to complete before reading MCP tools. + if err := mcp.WaitForInit(ctx); err != nil { + return nil, fmt.Errorf("failed to wait for MCP initialization: %w", err) + } + for _, tool := range tools.GetMCPTools(c.permissions, c.cfg.WorkingDir()) { if agent.AllowedMCP == nil { // No MCP restrictions diff --git a/internal/agent/tools/mcp/init.go b/internal/agent/tools/mcp/init.go index bb43f7f157dc1cf2d094354a4e709e0beb1f52b6..e1e7d609efc86d0dcb510fa5963552f7d487a134 100644 --- a/internal/agent/tools/mcp/init.go +++ b/internal/agent/tools/mcp/init.go @@ -29,6 +29,8 @@ var ( sessions = csync.NewMap[string, *mcp.ClientSession]() states = csync.NewMap[string, ClientInfo]() broker = pubsub.NewBroker[Event]() + initOnce sync.Once + initDone = make(chan struct{}) ) // State represents the current state of an MCP client @@ -197,6 +199,18 @@ func Initialize(ctx context.Context, permissions permission.Service, cfg *config }(name, m) } wg.Wait() + initOnce.Do(func() { close(initDone) }) +} + +// WaitForInit blocks until MCP initialization is complete. +// If Initialize was never called, this returns immediately. +func WaitForInit(ctx context.Context) error { + select { + case <-initDone: + return nil + case <-ctx.Done(): + return ctx.Err() + } } func getOrRenewClient(ctx context.Context, name string) (*mcp.ClientSession, error) { diff --git a/internal/agent/tools/search.go b/internal/agent/tools/search.go index 64c3219f169b1c8ce8284b86203e84bfb19d0e59..9df7be8764ab952a23f25d624f72748696a86aac 100644 --- a/internal/agent/tools/search.go +++ b/internal/agent/tools/search.go @@ -4,10 +4,13 @@ import ( "context" "fmt" "io" + "math/rand/v2" "net/http" "net/url" "slices" "strings" + "sync" + "time" "golang.org/x/net/html" ) @@ -20,28 +23,41 @@ type SearchResult struct { Position int } -// searchDuckDuckGo performs a web search using DuckDuckGo's HTML endpoint. +var userAgents = []string{ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", +} + +var acceptLanguages = []string{ + "en-US,en;q=0.9", + "en-US,en;q=0.9,es;q=0.8", + "en-GB,en;q=0.9,en-US;q=0.8", + "en-US,en;q=0.5", + "en-CA,en;q=0.9,en-US;q=0.8", +} + func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, maxResults int) ([]SearchResult, error) { if maxResults <= 0 { maxResults = 10 } - formData := url.Values{} - formData.Set("q", query) - formData.Set("b", "") - formData.Set("kl", "") + searchURL := "https://lite.duckduckgo.com/lite/?q=" + url.QueryEscape(query) - req, err := http.NewRequestWithContext(ctx, "POST", "https://html.duckduckgo.com/html", strings.NewReader(formData.Encode())) + req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) if err != nil { return nil, fmt.Errorf("failed to create request: %w", err) } - req.Header.Set("Content-Type", "application/x-www-form-urlencoded") - req.Header.Set("User-Agent", BrowserUserAgent) - req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") - req.Header.Set("Accept-Language", "en-US,en;q=0.5") - req.Header.Set("Accept-Encoding", "gzip, deflate") - req.Header.Set("Referer", "https://duckduckgo.com/") + setRandomizedHeaders(req) resp, err := client.Do(req) if err != nil { @@ -49,10 +65,8 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, ma } defer resp.Body.Close() - // Accept both 200 (OK) and 202 (Accepted). - // DuckDuckGo may still return 202 for rate limiting or bot detection. if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted { - return nil, fmt.Errorf("search failed with status code: %d (DuckDuckGo may be rate limiting requests)", resp.StatusCode) + return nil, fmt.Errorf("search failed with status code: %d", resp.StatusCode) } body, err := io.ReadAll(resp.Body) @@ -60,85 +74,90 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, ma return nil, fmt.Errorf("failed to read response: %w", err) } - return parseSearchResults(string(body), maxResults) + return parseLiteSearchResults(string(body), maxResults) } -// parseSearchResults extracts search results from DuckDuckGo HTML response. -func parseSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) { +func setRandomizedHeaders(req *http.Request) { + req.Header.Set("User-Agent", userAgents[rand.IntN(len(userAgents))]) + req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + req.Header.Set("Accept-Language", acceptLanguages[rand.IntN(len(acceptLanguages))]) + req.Header.Set("Accept-Encoding", "identity") + req.Header.Set("Connection", "keep-alive") + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + req.Header.Set("Cache-Control", "max-age=0") + if rand.IntN(2) == 0 { + req.Header.Set("DNT", "1") + } +} + +func parseLiteSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) { doc, err := html.Parse(strings.NewReader(htmlContent)) if err != nil { return nil, fmt.Errorf("failed to parse HTML: %w", err) } var results []SearchResult - var traverse func(*html.Node) + var currentResult *SearchResult + var traverse func(*html.Node) traverse = func(n *html.Node) { - if n.Type == html.ElementNode && n.Data == "div" && hasClass(n, "result") { - result := extractResult(n) - if result != nil && result.Link != "" && !strings.Contains(result.Link, "y.js") { - result.Position = len(results) + 1 - results = append(results, *result) - if len(results) >= maxResults { - return + if n.Type == html.ElementNode { + if n.Data == "a" && hasClass(n, "result-link") { + if currentResult != nil && currentResult.Link != "" { + currentResult.Position = len(results) + 1 + results = append(results, *currentResult) + if len(results) >= maxResults { + return + } + } + currentResult = &SearchResult{Title: getTextContent(n)} + for _, attr := range n.Attr { + if attr.Key == "href" { + currentResult.Link = cleanDuckDuckGoURL(attr.Val) + break + } } } + if n.Data == "td" && hasClass(n, "result-snippet") && currentResult != nil { + currentResult.Snippet = getTextContent(n) + } } - for c := n.FirstChild; c != nil && len(results) < maxResults; c = c.NextSibling { + for c := n.FirstChild; c != nil; c = c.NextSibling { + if len(results) >= maxResults { + return + } traverse(c) } } traverse(doc) + + if currentResult != nil && currentResult.Link != "" && len(results) < maxResults { + currentResult.Position = len(results) + 1 + results = append(results, *currentResult) + } + return results, nil } -// hasClass checks if an HTML node has a specific class. func hasClass(n *html.Node, class string) bool { for _, attr := range n.Attr { if attr.Key == "class" { - return slices.Contains(strings.Fields(attr.Val), class) - } - } - return false -} - -// extractResult extracts a search result from a result div node. -func extractResult(n *html.Node) *SearchResult { - result := &SearchResult{} - - var traverse func(*html.Node) - traverse = func(node *html.Node) { - if node.Type == html.ElementNode { - // Look for title link. - if node.Data == "a" && hasClass(node, "result__a") { - result.Title = getTextContent(node) - for _, attr := range node.Attr { - if attr.Key == "href" { - result.Link = cleanDuckDuckGoURL(attr.Val) - break - } - } - } - // Look for snippet. - if node.Data == "a" && hasClass(node, "result__snippet") { - result.Snippet = getTextContent(node) + if slices.Contains(strings.Fields(attr.Val), class) { + return true } } - for c := node.FirstChild; c != nil; c = c.NextSibling { - traverse(c) - } } - - traverse(n) - return result + return false } -// getTextContent extracts all text content from a node and its children. func getTextContent(n *html.Node) string { var text strings.Builder var traverse func(*html.Node) - traverse = func(node *html.Node) { if node.Type == html.TextNode { text.WriteString(node.Data) @@ -147,22 +166,18 @@ func getTextContent(n *html.Node) string { traverse(c) } } - traverse(n) return strings.TrimSpace(text.String()) } -// cleanDuckDuckGoURL extracts the actual URL from DuckDuckGo's redirect URL. func cleanDuckDuckGoURL(rawURL string) string { if strings.HasPrefix(rawURL, "//duckduckgo.com/l/?uddg=") { - // Extract the actual URL from the redirect. if idx := strings.Index(rawURL, "uddg="); idx != -1 { encoded := rawURL[idx+5:] if ampIdx := strings.Index(encoded, "&"); ampIdx != -1 { encoded = encoded[:ampIdx] } - decoded, err := url.QueryUnescape(encoded) - if err == nil { + if decoded, err := url.QueryUnescape(encoded); err == nil { return decoded } } @@ -170,20 +185,35 @@ func cleanDuckDuckGoURL(rawURL string) string { return rawURL } -// formatSearchResults formats search results for LLM consumption. func formatSearchResults(results []SearchResult) string { if len(results) == 0 { - return "No results were found for your search query. This could be due to DuckDuckGo's bot detection or the query returned no matches. Please try rephrasing your search or try again in a few minutes." + return "No results found. Try rephrasing your search." } var sb strings.Builder sb.WriteString(fmt.Sprintf("Found %d search results:\n\n", len(results))) - for _, result := range results { sb.WriteString(fmt.Sprintf("%d. %s\n", result.Position, result.Title)) sb.WriteString(fmt.Sprintf(" URL: %s\n", result.Link)) sb.WriteString(fmt.Sprintf(" Summary: %s\n\n", result.Snippet)) } - return sb.String() } + +var ( + lastSearchMu sync.Mutex + lastSearchTime time.Time +) + +// maybeDelaySearch adds a random delay if the last search was recent. +func maybeDelaySearch() { + lastSearchMu.Lock() + defer lastSearchMu.Unlock() + + minGap := time.Duration(500+rand.IntN(1500)) * time.Millisecond + elapsed := time.Since(lastSearchTime) + if elapsed < minGap { + time.Sleep(minGap - elapsed) + } + lastSearchTime = time.Now() +} diff --git a/internal/agent/tools/web_search.go b/internal/agent/tools/web_search.go index b604c9051b4f5b0039431c01bea0b150a318740e..5ce9280c013cdd100f6d7734c969723b21e7e3bf 100644 --- a/internal/agent/tools/web_search.go +++ b/internal/agent/tools/web_search.go @@ -3,6 +3,7 @@ package tools import ( "context" _ "embed" + "log/slog" "net/http" "time" @@ -41,7 +42,9 @@ func NewWebSearchTool(client *http.Client) fantasy.AgentTool { maxResults = 20 } + maybeDelaySearch() results, err := searchDuckDuckGo(ctx, client, params.Query, maxResults) + slog.Debug("Web search completed", "query", params.Query, "results", len(results), "err", err) if err != nil { return fantasy.NewTextErrorResponse("Failed to search: " + err.Error()), nil }