1package tools
2
3import (
4 "context"
5 "fmt"
6 "io"
7 "net/http"
8 "net/url"
9 "slices"
10 "strings"
11
12 "golang.org/x/net/html"
13)
14
15// SearchResult represents a single search result from DuckDuckGo.
16type SearchResult struct {
17 Title string
18 Link string
19 Snippet string
20 Position int
21}
22
23// searchDuckDuckGo performs a web search using DuckDuckGo's HTML endpoint.
24func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, maxResults int) ([]SearchResult, error) {
25 if maxResults <= 0 {
26 maxResults = 10
27 }
28
29 formData := url.Values{}
30 formData.Set("q", query)
31 formData.Set("b", "")
32 formData.Set("kl", "")
33
34 req, err := http.NewRequestWithContext(ctx, "POST", "https://html.duckduckgo.com/html", strings.NewReader(formData.Encode()))
35 if err != nil {
36 return nil, fmt.Errorf("failed to create request: %w", err)
37 }
38
39 req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
40 req.Header.Set("User-Agent", BrowserUserAgent)
41
42 resp, err := client.Do(req)
43 if err != nil {
44 return nil, fmt.Errorf("failed to execute search: %w", err)
45 }
46 defer resp.Body.Close()
47
48 if resp.StatusCode != http.StatusOK {
49 return nil, fmt.Errorf("search failed with status code: %d", resp.StatusCode)
50 }
51
52 body, err := io.ReadAll(resp.Body)
53 if err != nil {
54 return nil, fmt.Errorf("failed to read response: %w", err)
55 }
56
57 return parseSearchResults(string(body), maxResults)
58}
59
60// parseSearchResults extracts search results from DuckDuckGo HTML response.
61func parseSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) {
62 doc, err := html.Parse(strings.NewReader(htmlContent))
63 if err != nil {
64 return nil, fmt.Errorf("failed to parse HTML: %w", err)
65 }
66
67 var results []SearchResult
68 var traverse func(*html.Node)
69
70 traverse = func(n *html.Node) {
71 if n.Type == html.ElementNode && n.Data == "div" && hasClass(n, "result") {
72 result := extractResult(n)
73 if result != nil && result.Link != "" && !strings.Contains(result.Link, "y.js") {
74 result.Position = len(results) + 1
75 results = append(results, *result)
76 if len(results) >= maxResults {
77 return
78 }
79 }
80 }
81 for c := n.FirstChild; c != nil && len(results) < maxResults; c = c.NextSibling {
82 traverse(c)
83 }
84 }
85
86 traverse(doc)
87 return results, nil
88}
89
90// hasClass checks if an HTML node has a specific class.
91func hasClass(n *html.Node, class string) bool {
92 for _, attr := range n.Attr {
93 if attr.Key == "class" {
94 return slices.Contains(strings.Fields(attr.Val), class)
95 }
96 }
97 return false
98}
99
100// extractResult extracts a search result from a result div node.
101func extractResult(n *html.Node) *SearchResult {
102 result := &SearchResult{}
103
104 var traverse func(*html.Node)
105 traverse = func(node *html.Node) {
106 if node.Type == html.ElementNode {
107 // Look for title link.
108 if node.Data == "a" && hasClass(node, "result__a") {
109 result.Title = getTextContent(node)
110 for _, attr := range node.Attr {
111 if attr.Key == "href" {
112 result.Link = cleanDuckDuckGoURL(attr.Val)
113 break
114 }
115 }
116 }
117 // Look for snippet.
118 if node.Data == "a" && hasClass(node, "result__snippet") {
119 result.Snippet = getTextContent(node)
120 }
121 }
122 for c := node.FirstChild; c != nil; c = c.NextSibling {
123 traverse(c)
124 }
125 }
126
127 traverse(n)
128 return result
129}
130
131// getTextContent extracts all text content from a node and its children.
132func getTextContent(n *html.Node) string {
133 var text strings.Builder
134 var traverse func(*html.Node)
135
136 traverse = func(node *html.Node) {
137 if node.Type == html.TextNode {
138 text.WriteString(node.Data)
139 }
140 for c := node.FirstChild; c != nil; c = c.NextSibling {
141 traverse(c)
142 }
143 }
144
145 traverse(n)
146 return strings.TrimSpace(text.String())
147}
148
149// cleanDuckDuckGoURL extracts the actual URL from DuckDuckGo's redirect URL.
150func cleanDuckDuckGoURL(rawURL string) string {
151 if strings.HasPrefix(rawURL, "//duckduckgo.com/l/?uddg=") {
152 // Extract the actual URL from the redirect.
153 if idx := strings.Index(rawURL, "uddg="); idx != -1 {
154 encoded := rawURL[idx+5:]
155 if ampIdx := strings.Index(encoded, "&"); ampIdx != -1 {
156 encoded = encoded[:ampIdx]
157 }
158 decoded, err := url.QueryUnescape(encoded)
159 if err == nil {
160 return decoded
161 }
162 }
163 }
164 return rawURL
165}
166
167// formatSearchResults formats search results for LLM consumption.
168func formatSearchResults(results []SearchResult) string {
169 if len(results) == 0 {
170 return "No results were found for your search query. This could be due to DuckDuckGo's bot detection or the query returned no matches. Please try rephrasing your search or try again in a few minutes."
171 }
172
173 var sb strings.Builder
174 sb.WriteString(fmt.Sprintf("Found %d search results:\n\n", len(results)))
175
176 for _, result := range results {
177 sb.WriteString(fmt.Sprintf("%d. %s\n", result.Position, result.Title))
178 sb.WriteString(fmt.Sprintf(" URL: %s\n", result.Link))
179 sb.WriteString(fmt.Sprintf(" Summary: %s\n\n", result.Snippet))
180 }
181
182 return sb.String()
183}