1package tools
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "io"
10 "net/http"
11 "strings"
12 "unicode/utf8"
13
14 md "github.com/JohannesKaufmann/html-to-markdown"
15)
16
17// FetchURLAndConvert fetches a URL and converts HTML content to markdown.
18func FetchURLAndConvert(ctx context.Context, client *http.Client, url string) (string, error) {
19 req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
20 if err != nil {
21 return "", fmt.Errorf("failed to create request: %w", err)
22 }
23
24 req.Header.Set("User-Agent", "crush/1.0")
25
26 resp, err := client.Do(req)
27 if err != nil {
28 return "", fmt.Errorf("failed to fetch URL: %w", err)
29 }
30 defer resp.Body.Close()
31
32 if resp.StatusCode != http.StatusOK {
33 return "", fmt.Errorf("request failed with status code: %d", resp.StatusCode)
34 }
35
36 maxSize := int64(5 * 1024 * 1024) // 5MB
37 body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
38 if err != nil {
39 return "", fmt.Errorf("failed to read response body: %w", err)
40 }
41
42 content := string(body)
43
44 if !utf8.ValidString(content) {
45 return "", errors.New("response content is not valid UTF-8")
46 }
47
48 contentType := resp.Header.Get("Content-Type")
49
50 // Convert HTML to markdown for better AI processing.
51 if strings.Contains(contentType, "text/html") {
52 markdown, err := ConvertHTMLToMarkdown(content)
53 if err != nil {
54 return "", fmt.Errorf("failed to convert HTML to markdown: %w", err)
55 }
56 content = markdown
57 } else if strings.Contains(contentType, "application/json") || strings.Contains(contentType, "text/json") {
58 // Format JSON for better readability.
59 formatted, err := FormatJSON(content)
60 if err == nil {
61 content = formatted
62 }
63 // If formatting fails, keep original content.
64 }
65
66 return content, nil
67}
68
69// ConvertHTMLToMarkdown converts HTML content to markdown format.
70func ConvertHTMLToMarkdown(html string) (string, error) {
71 converter := md.NewConverter("", true, nil)
72
73 markdown, err := converter.ConvertString(html)
74 if err != nil {
75 return "", err
76 }
77
78 return markdown, nil
79}
80
81// FormatJSON formats JSON content with proper indentation.
82func FormatJSON(content string) (string, error) {
83 var data interface{}
84 if err := json.Unmarshal([]byte(content), &data); err != nil {
85 return "", err
86 }
87
88 var buf bytes.Buffer
89 encoder := json.NewEncoder(&buf)
90 encoder.SetIndent("", " ")
91 if err := encoder.Encode(data); err != nil {
92 return "", err
93 }
94
95 return buf.String(), nil
96}