1package tools
2
3import (
4 "context"
5 "fmt"
6 "io"
7 "net/http"
8 "net/url"
9 "slices"
10 "strings"
11
12 "golang.org/x/net/html"
13)
14
15// SearchResult represents a single search result from DuckDuckGo.
16type SearchResult struct {
17 Title string
18 Link string
19 Snippet string
20 Position int
21}
22
23// searchDuckDuckGo performs a web search using DuckDuckGo's HTML endpoint.
24func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, maxResults int) ([]SearchResult, error) {
25 if maxResults <= 0 {
26 maxResults = 10
27 }
28
29 formData := url.Values{}
30 formData.Set("q", query)
31 formData.Set("b", "")
32 formData.Set("kl", "")
33
34 req, err := http.NewRequestWithContext(ctx, "POST", "https://html.duckduckgo.com/html", strings.NewReader(formData.Encode()))
35 if err != nil {
36 return nil, fmt.Errorf("failed to create request: %w", err)
37 }
38
39 req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
40 req.Header.Set("User-Agent", BrowserUserAgent)
41 req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
42 req.Header.Set("Accept-Language", "en-US,en;q=0.5")
43 req.Header.Set("Accept-Encoding", "gzip, deflate")
44 req.Header.Set("Referer", "https://duckduckgo.com/")
45
46 resp, err := client.Do(req)
47 if err != nil {
48 return nil, fmt.Errorf("failed to execute search: %w", err)
49 }
50 defer resp.Body.Close()
51
52 // Accept both 200 (OK) and 202 (Accepted).
53 // DuckDuckGo may still return 202 for rate limiting or bot detection.
54 if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
55 return nil, fmt.Errorf("search failed with status code: %d (DuckDuckGo may be rate limiting requests)", resp.StatusCode)
56 }
57
58 body, err := io.ReadAll(resp.Body)
59 if err != nil {
60 return nil, fmt.Errorf("failed to read response: %w", err)
61 }
62
63 return parseSearchResults(string(body), maxResults)
64}
65
66// parseSearchResults extracts search results from DuckDuckGo HTML response.
67func parseSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) {
68 doc, err := html.Parse(strings.NewReader(htmlContent))
69 if err != nil {
70 return nil, fmt.Errorf("failed to parse HTML: %w", err)
71 }
72
73 var results []SearchResult
74 var traverse func(*html.Node)
75
76 traverse = func(n *html.Node) {
77 if n.Type == html.ElementNode && n.Data == "div" && hasClass(n, "result") {
78 result := extractResult(n)
79 if result != nil && result.Link != "" && !strings.Contains(result.Link, "y.js") {
80 result.Position = len(results) + 1
81 results = append(results, *result)
82 if len(results) >= maxResults {
83 return
84 }
85 }
86 }
87 for c := n.FirstChild; c != nil && len(results) < maxResults; c = c.NextSibling {
88 traverse(c)
89 }
90 }
91
92 traverse(doc)
93 return results, nil
94}
95
96// hasClass checks if an HTML node has a specific class.
97func hasClass(n *html.Node, class string) bool {
98 for _, attr := range n.Attr {
99 if attr.Key == "class" {
100 return slices.Contains(strings.Fields(attr.Val), class)
101 }
102 }
103 return false
104}
105
106// extractResult extracts a search result from a result div node.
107func extractResult(n *html.Node) *SearchResult {
108 result := &SearchResult{}
109
110 var traverse func(*html.Node)
111 traverse = func(node *html.Node) {
112 if node.Type == html.ElementNode {
113 // Look for title link.
114 if node.Data == "a" && hasClass(node, "result__a") {
115 result.Title = getTextContent(node)
116 for _, attr := range node.Attr {
117 if attr.Key == "href" {
118 result.Link = cleanDuckDuckGoURL(attr.Val)
119 break
120 }
121 }
122 }
123 // Look for snippet.
124 if node.Data == "a" && hasClass(node, "result__snippet") {
125 result.Snippet = getTextContent(node)
126 }
127 }
128 for c := node.FirstChild; c != nil; c = c.NextSibling {
129 traverse(c)
130 }
131 }
132
133 traverse(n)
134 return result
135}
136
137// getTextContent extracts all text content from a node and its children.
138func getTextContent(n *html.Node) string {
139 var text strings.Builder
140 var traverse func(*html.Node)
141
142 traverse = func(node *html.Node) {
143 if node.Type == html.TextNode {
144 text.WriteString(node.Data)
145 }
146 for c := node.FirstChild; c != nil; c = c.NextSibling {
147 traverse(c)
148 }
149 }
150
151 traverse(n)
152 return strings.TrimSpace(text.String())
153}
154
155// cleanDuckDuckGoURL extracts the actual URL from DuckDuckGo's redirect URL.
156func cleanDuckDuckGoURL(rawURL string) string {
157 if strings.HasPrefix(rawURL, "//duckduckgo.com/l/?uddg=") {
158 // Extract the actual URL from the redirect.
159 if idx := strings.Index(rawURL, "uddg="); idx != -1 {
160 encoded := rawURL[idx+5:]
161 if ampIdx := strings.Index(encoded, "&"); ampIdx != -1 {
162 encoded = encoded[:ampIdx]
163 }
164 decoded, err := url.QueryUnescape(encoded)
165 if err == nil {
166 return decoded
167 }
168 }
169 }
170 return rawURL
171}
172
173// formatSearchResults formats search results for LLM consumption.
174func formatSearchResults(results []SearchResult) string {
175 if len(results) == 0 {
176 return "No results were found for your search query. This could be due to DuckDuckGo's bot detection or the query returned no matches. Please try rephrasing your search or try again in a few minutes."
177 }
178
179 var sb strings.Builder
180 sb.WriteString(fmt.Sprintf("Found %d search results:\n\n", len(results)))
181
182 for _, result := range results {
183 sb.WriteString(fmt.Sprintf("%d. %s\n", result.Position, result.Title))
184 sb.WriteString(fmt.Sprintf(" URL: %s\n", result.Link))
185 sb.WriteString(fmt.Sprintf(" Summary: %s\n\n", result.Snippet))
186 }
187
188 return sb.String()
189}