1package tools
 2
 3import (
 4	"bytes"
 5	"context"
 6	"encoding/json"
 7	"errors"
 8	"fmt"
 9	"io"
10	"net/http"
11	"strings"
12	"unicode/utf8"
13
14	md "github.com/JohannesKaufmann/html-to-markdown"
15)
16
17// FetchURLAndConvert fetches a URL and converts HTML content to markdown.
18func FetchURLAndConvert(ctx context.Context, client *http.Client, url string) (string, error) {
19	req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
20	if err != nil {
21		return "", fmt.Errorf("failed to create request: %w", err)
22	}
23
24	req.Header.Set("User-Agent", "crush/1.0")
25
26	resp, err := client.Do(req)
27	if err != nil {
28		return "", fmt.Errorf("failed to fetch URL: %w", err)
29	}
30	defer resp.Body.Close()
31
32	if resp.StatusCode != http.StatusOK {
33		return "", fmt.Errorf("request failed with status code: %d", resp.StatusCode)
34	}
35
36	maxSize := int64(5 * 1024 * 1024) // 5MB
37	body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
38	if err != nil {
39		return "", fmt.Errorf("failed to read response body: %w", err)
40	}
41
42	content := string(body)
43
44	if !utf8.ValidString(content) {
45		return "", errors.New("response content is not valid UTF-8")
46	}
47
48	contentType := resp.Header.Get("Content-Type")
49
50	// Convert HTML to markdown for better AI processing.
51	if strings.Contains(contentType, "text/html") {
52		markdown, err := ConvertHTMLToMarkdown(content)
53		if err != nil {
54			return "", fmt.Errorf("failed to convert HTML to markdown: %w", err)
55		}
56		content = markdown
57	} else if strings.Contains(contentType, "application/json") || strings.Contains(contentType, "text/json") {
58		// Format JSON for better readability.
59		formatted, err := FormatJSON(content)
60		if err == nil {
61			content = formatted
62		}
63		// If formatting fails, keep original content.
64	}
65
66	return content, nil
67}
68
69// ConvertHTMLToMarkdown converts HTML content to markdown format.
70func ConvertHTMLToMarkdown(html string) (string, error) {
71	converter := md.NewConverter("", true, nil)
72
73	markdown, err := converter.ConvertString(html)
74	if err != nil {
75		return "", err
76	}
77
78	return markdown, nil
79}
80
81// FormatJSON formats JSON content with proper indentation.
82func FormatJSON(content string) (string, error) {
83	var data interface{}
84	if err := json.Unmarshal([]byte(content), &data); err != nil {
85		return "", err
86	}
87
88	var buf bytes.Buffer
89	encoder := json.NewEncoder(&buf)
90	encoder.SetIndent("", "  ")
91	if err := encoder.Encode(data); err != nil {
92		return "", err
93	}
94
95	return buf.String(), nil
96}