@@ -4,10 +4,11 @@ import (
"context"
"fmt"
"io"
+ "math/rand/v2"
"net/http"
"net/url"
- "slices"
"strings"
+ "time"
"golang.org/x/net/html"
)
@@ -20,28 +21,41 @@ type SearchResult struct {
Position int
}
-// searchDuckDuckGo performs a web search using DuckDuckGo's HTML endpoint.
+var userAgents = []string{
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1 Safari/605.1.15",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Safari/605.1.15",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
+}
+
+var acceptLanguages = []string{
+ "en-US,en;q=0.9",
+ "en-US,en;q=0.9,es;q=0.8",
+ "en-GB,en;q=0.9,en-US;q=0.8",
+ "en-US,en;q=0.5",
+ "en-CA,en;q=0.9,en-US;q=0.8",
+}
+
func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, maxResults int) ([]SearchResult, error) {
if maxResults <= 0 {
maxResults = 10
}
- formData := url.Values{}
- formData.Set("q", query)
- formData.Set("b", "")
- formData.Set("kl", "")
+ searchURL := "https://lite.duckduckgo.com/lite/?q=" + url.QueryEscape(query)
- req, err := http.NewRequestWithContext(ctx, "POST", "https://html.duckduckgo.com/html", strings.NewReader(formData.Encode()))
+ req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
- req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
- req.Header.Set("User-Agent", BrowserUserAgent)
- req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
- req.Header.Set("Accept-Language", "en-US,en;q=0.5")
- req.Header.Set("Accept-Encoding", "gzip, deflate")
- req.Header.Set("Referer", "https://duckduckgo.com/")
+ setRandomizedHeaders(req)
resp, err := client.Do(req)
if err != nil {
@@ -49,10 +63,8 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, ma
}
defer resp.Body.Close()
- // Accept both 200 (OK) and 202 (Accepted).
- // DuckDuckGo may still return 202 for rate limiting or bot detection.
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusAccepted {
- return nil, fmt.Errorf("search failed with status code: %d (DuckDuckGo may be rate limiting requests)", resp.StatusCode)
+ return nil, fmt.Errorf("search failed with status code: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
@@ -60,85 +72,92 @@ func searchDuckDuckGo(ctx context.Context, client *http.Client, query string, ma
return nil, fmt.Errorf("failed to read response: %w", err)
}
- return parseSearchResults(string(body), maxResults)
+ return parseLiteSearchResults(string(body), maxResults)
+}
+
+func setRandomizedHeaders(req *http.Request) {
+ req.Header.Set("User-Agent", userAgents[rand.IntN(len(userAgents))])
+ req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+ req.Header.Set("Accept-Language", acceptLanguages[rand.IntN(len(acceptLanguages))])
+ req.Header.Set("Accept-Encoding", "identity")
+ req.Header.Set("Connection", "keep-alive")
+ req.Header.Set("Upgrade-Insecure-Requests", "1")
+ req.Header.Set("Sec-Fetch-Dest", "document")
+ req.Header.Set("Sec-Fetch-Mode", "navigate")
+ req.Header.Set("Sec-Fetch-Site", "none")
+ req.Header.Set("Sec-Fetch-User", "?1")
+ req.Header.Set("Cache-Control", "max-age=0")
+ if rand.IntN(2) == 0 {
+ req.Header.Set("DNT", "1")
+ }
}
-// parseSearchResults extracts search results from DuckDuckGo HTML response.
-func parseSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) {
+func parseLiteSearchResults(htmlContent string, maxResults int) ([]SearchResult, error) {
doc, err := html.Parse(strings.NewReader(htmlContent))
if err != nil {
return nil, fmt.Errorf("failed to parse HTML: %w", err)
}
var results []SearchResult
- var traverse func(*html.Node)
+ var currentResult *SearchResult
+ var traverse func(*html.Node)
traverse = func(n *html.Node) {
- if n.Type == html.ElementNode && n.Data == "div" && hasClass(n, "result") {
- result := extractResult(n)
- if result != nil && result.Link != "" && !strings.Contains(result.Link, "y.js") {
- result.Position = len(results) + 1
- results = append(results, *result)
- if len(results) >= maxResults {
- return
+ if n.Type == html.ElementNode {
+ if n.Data == "a" && hasClass(n, "result-link") {
+ if currentResult != nil && currentResult.Link != "" {
+ currentResult.Position = len(results) + 1
+ results = append(results, *currentResult)
+ if len(results) >= maxResults {
+ return
+ }
}
+ currentResult = &SearchResult{Title: getTextContent(n)}
+ for _, attr := range n.Attr {
+ if attr.Key == "href" {
+ currentResult.Link = cleanDuckDuckGoURL(attr.Val)
+ break
+ }
+ }
+ }
+ if n.Data == "td" && hasClass(n, "result-snippet") && currentResult != nil {
+ currentResult.Snippet = getTextContent(n)
}
}
- for c := n.FirstChild; c != nil && len(results) < maxResults; c = c.NextSibling {
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if len(results) >= maxResults {
+ return
+ }
traverse(c)
}
}
traverse(doc)
+
+ if currentResult != nil && currentResult.Link != "" && len(results) < maxResults {
+ currentResult.Position = len(results) + 1
+ results = append(results, *currentResult)
+ }
+
return results, nil
}
-// hasClass checks if an HTML node has a specific class.
func hasClass(n *html.Node, class string) bool {
for _, attr := range n.Attr {
if attr.Key == "class" {
- return slices.Contains(strings.Fields(attr.Val), class)
- }
- }
- return false
-}
-
-// extractResult extracts a search result from a result div node.
-func extractResult(n *html.Node) *SearchResult {
- result := &SearchResult{}
-
- var traverse func(*html.Node)
- traverse = func(node *html.Node) {
- if node.Type == html.ElementNode {
- // Look for title link.
- if node.Data == "a" && hasClass(node, "result__a") {
- result.Title = getTextContent(node)
- for _, attr := range node.Attr {
- if attr.Key == "href" {
- result.Link = cleanDuckDuckGoURL(attr.Val)
- break
- }
+ for _, c := range strings.Fields(attr.Val) {
+ if c == class {
+ return true
}
}
- // Look for snippet.
- if node.Data == "a" && hasClass(node, "result__snippet") {
- result.Snippet = getTextContent(node)
- }
- }
- for c := node.FirstChild; c != nil; c = c.NextSibling {
- traverse(c)
}
}
-
- traverse(n)
- return result
+ return false
}
-// getTextContent extracts all text content from a node and its children.
func getTextContent(n *html.Node) string {
var text strings.Builder
var traverse func(*html.Node)
-
traverse = func(node *html.Node) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
@@ -147,22 +166,18 @@ func getTextContent(n *html.Node) string {
traverse(c)
}
}
-
traverse(n)
return strings.TrimSpace(text.String())
}
-// cleanDuckDuckGoURL extracts the actual URL from DuckDuckGo's redirect URL.
func cleanDuckDuckGoURL(rawURL string) string {
if strings.HasPrefix(rawURL, "//duckduckgo.com/l/?uddg=") {
- // Extract the actual URL from the redirect.
if idx := strings.Index(rawURL, "uddg="); idx != -1 {
encoded := rawURL[idx+5:]
if ampIdx := strings.Index(encoded, "&"); ampIdx != -1 {
encoded = encoded[:ampIdx]
}
- decoded, err := url.QueryUnescape(encoded)
- if err == nil {
+ if decoded, err := url.QueryUnescape(encoded); err == nil {
return decoded
}
}
@@ -170,20 +185,22 @@ func cleanDuckDuckGoURL(rawURL string) string {
return rawURL
}
-// formatSearchResults formats search results for LLM consumption.
func formatSearchResults(results []SearchResult) string {
if len(results) == 0 {
- return "No results were found for your search query. This could be due to DuckDuckGo's bot detection or the query returned no matches. Please try rephrasing your search or try again in a few minutes."
+ return "No results found. Try rephrasing your search."
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("Found %d search results:\n\n", len(results)))
-
for _, result := range results {
sb.WriteString(fmt.Sprintf("%d. %s\n", result.Position, result.Title))
sb.WriteString(fmt.Sprintf(" URL: %s\n", result.Link))
sb.WriteString(fmt.Sprintf(" Summary: %s\n\n", result.Snippet))
}
-
return sb.String()
}
+
+// AddSearchDelay adds a small random delay between consecutive searches.
+func AddSearchDelay() {
+ time.Sleep(time.Duration(500+rand.IntN(1500)) * time.Millisecond)
+}