search.go

  1package tools
  2
  3import (
  4	"context"
  5	"fmt"
  6	"io"
  7	"math/rand/v2"
  8	"net/http"
  9	"net/url"
 10	"slices"
 11	"strings"
 12	"sync"
 13	"time"
 14
 15	"golang.org/x/net/html"
 16)
 17
 18// SearchResult represents a single search result from DuckDuckGo.
 19type SearchResult struct {
 20	Title    string
 21	Link     string
 22	Snippet  string
 23	Position int
 24}
 25
 26var userAgents = []string{
 27	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
 28	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
 29	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
 30	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
 31	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
 32	"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
 33	"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
 34	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
 35	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
 36	"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
 37	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
 38}
 39
 40var acceptLanguages = []string{
 41	"en-US,en;q=0.9",
 42	"en-US,en;q=0.9,es;q=0.8",
 43	"en-GB,en;q=0.9,en-US;q=0.8",
 44	"en-US,en;q=0.5",
 45	"en-CA,en;q=0.9,en-US;q=0.8",
 46}
 47
 48func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, maxResults int) ([]SearchResult, error) {
 49	if maxResults <= 0 {
 50		maxResults = 10
 51	}
 52
 53	searchURL := "https://lite.duckduckgo.com/lite/?q=" + url.QueryEscape(query)
 54
 55	req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
 56	if err != nil {
 57		return nil, fmt.Errorf("failed to create request: %w", err)
 58	}
 59
 60	setRandomizedHeaders(req)
 61
 62	resp, err := client.Do(req)
 63	if err != nil {
 64		return nil, fmt.Errorf("failed to execute search: %w", err)
 65	}
 66	defer resp.Body.Close()
 67
 68	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
 69		return nil, fmt.Errorf("search failed with status code: %d", resp.StatusCode)
 70	}
 71
 72	body, err := io.ReadAll(resp.Body)
 73	if err != nil {
 74		return nil, fmt.Errorf("failed to read response: %w", err)
 75	}
 76
 77	return parseLiteSearchResults(string(body), maxResults)
 78}
 79
 80func setRandomizedHeaders(req *http.Request) {
 81	req.Header.Set("User-Agent", userAgents[rand.IntN(len(userAgents))])
 82	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
 83	req.Header.Set("Accept-Language", acceptLanguages[rand.IntN(len(acceptLanguages))])
 84	req.Header.Set("Accept-Encoding", "identity")
 85	req.Header.Set("Connection", "keep-alive")
 86	req.Header.Set("Upgrade-Insecure-Requests", "1")
 87	req.Header.Set("Sec-Fetch-Dest", "document")
 88	req.Header.Set("Sec-Fetch-Mode", "navigate")
 89	req.Header.Set("Sec-Fetch-Site", "none")
 90	req.Header.Set("Sec-Fetch-User", "?1")
 91	req.Header.Set("Cache-Control", "max-age=0")
 92	if rand.IntN(2) == 0 {
 93		req.Header.Set("DNT", "1")
 94	}
 95}
 96
 97func parseLiteSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) {
 98	doc, err := html.Parse(strings.NewReader(htmlContent))
 99	if err != nil {
100		return nil, fmt.Errorf("failed to parse HTML: %w", err)
101	}
102
103	var results []SearchResult
104	var currentResult *SearchResult
105
106	var traverse func(*html.Node)
107	traverse = func(n *html.Node) {
108		if n.Type == html.ElementNode {
109			if n.Data == "a" && hasClass(n, "result-link") {
110				if currentResult != nil && currentResult.Link != "" {
111					currentResult.Position = len(results) + 1
112					results = append(results, *currentResult)
113					if len(results) >= maxResults {
114						return
115					}
116				}
117				currentResult = &SearchResult{Title: getTextContent(n)}
118				for _, attr := range n.Attr {
119					if attr.Key == "href" {
120						currentResult.Link = cleanDuckDuckGoURL(attr.Val)
121						break
122					}
123				}
124			}
125			if n.Data == "td" && hasClass(n, "result-snippet") && currentResult != nil {
126				currentResult.Snippet = getTextContent(n)
127			}
128		}
129		for c := n.FirstChild; c != nil; c = c.NextSibling {
130			if len(results) >= maxResults {
131				return
132			}
133			traverse(c)
134		}
135	}
136
137	traverse(doc)
138
139	if currentResult != nil && currentResult.Link != "" && len(results) < maxResults {
140		currentResult.Position = len(results) + 1
141		results = append(results, *currentResult)
142	}
143
144	return results, nil
145}
146
147func hasClass(n *html.Node, class string) bool {
148	for _, attr := range n.Attr {
149		if attr.Key == "class" {
150			if slices.Contains(strings.Fields(attr.Val), class) {
151				return true
152			}
153		}
154	}
155	return false
156}
157
158func getTextContent(n *html.Node) string {
159	var text strings.Builder
160	var traverse func(*html.Node)
161	traverse = func(node *html.Node) {
162		if node.Type == html.TextNode {
163			text.WriteString(node.Data)
164		}
165		for c := node.FirstChild; c != nil; c = c.NextSibling {
166			traverse(c)
167		}
168	}
169	traverse(n)
170	return strings.TrimSpace(text.String())
171}
172
173func cleanDuckDuckGoURL(rawURL string) string {
174	if strings.HasPrefix(rawURL, "//duckduckgo.com/l/?uddg=") {
175		if idx := strings.Index(rawURL, "uddg="); idx != -1 {
176			encoded := rawURL[idx+5:]
177			if ampIdx := strings.Index(encoded, "&"); ampIdx != -1 {
178				encoded = encoded[:ampIdx]
179			}
180			if decoded, err := url.QueryUnescape(encoded); err == nil {
181				return decoded
182			}
183		}
184	}
185	return rawURL
186}
187
188func formatSearchResults(results []SearchResult) string {
189	if len(results) == 0 {
190		return "No results found. Try rephrasing your search."
191	}
192
193	var sb strings.Builder
194	sb.WriteString(fmt.Sprintf("Found %d search results:\n\n", len(results)))
195	for _, result := range results {
196		sb.WriteString(fmt.Sprintf("%d. %s\n", result.Position, result.Title))
197		sb.WriteString(fmt.Sprintf("   URL: %s\n", result.Link))
198		sb.WriteString(fmt.Sprintf("   Summary: %s\n\n", result.Snippet))
199	}
200	return sb.String()
201}
202
203var (
204	lastSearchMu   sync.Mutex
205	lastSearchTime time.Time
206)
207
208// maybeDelaySearch adds a random delay if the last search was recent.
209func maybeDelaySearch() {
210	lastSearchMu.Lock()
211	defer lastSearchMu.Unlock()
212
213	minGap := time.Duration(500+rand.IntN(1500)) * time.Millisecond
214	elapsed := time.Since(lastSearchTime)
215	if elapsed < minGap {
216		time.Sleep(minGap - elapsed)
217	}
218	lastSearchTime = time.Now()
219}