1package tools
2
3import (
4 "context"
5 "fmt"
6 "io"
7 "net/http"
8 "strings"
9 "time"
10 "unicode/utf8"
11
12 md "github.com/JohannesKaufmann/html-to-markdown"
13 "github.com/PuerkitoBio/goquery"
14 "github.com/charmbracelet/crush/internal/ai"
15 "github.com/charmbracelet/crush/internal/permission"
16)
17
18type FetchParams struct {
19 URL string `json:"url" description:"The URL to fetch content from"`
20 Format string `json:"format" description:"The format to return the content in (text, markdown, or html)"`
21 Timeout int `json:"timeout,omitempty" description:"Optional timeout in seconds (max 120)"`
22}
23
24type FetchPermissionsParams struct {
25 URL string `json:"url"`
26 Format string `json:"format"`
27 Timeout int `json:"timeout,omitempty"`
28}
29
30const (
31 FetchToolName = "fetch"
32)
33
34func NewFetchTool(permissions permission.Service, workingDir string) ai.AgentTool {
35 client := &http.Client{
36 Timeout: 30 * time.Second,
37 Transport: &http.Transport{
38 MaxIdleConns: 100,
39 MaxIdleConnsPerHost: 10,
40 IdleConnTimeout: 90 * time.Second,
41 },
42 }
43 return ai.NewTypedToolFunc(
44 FetchToolName,
45 `Fetches content from a URL and returns it in the specified format.
46
47WHEN TO USE THIS TOOL:
48- Use when you need to download content from a URL
49- Helpful for retrieving documentation, API responses, or web content
50- Useful for getting external information to assist with tasks
51
52HOW TO USE:
53- Provide the URL to fetch content from
54- Specify the desired output format (text, markdown, or html)
55- Optionally set a timeout for the request
56
57FEATURES:
58- Supports three output formats: text, markdown, and html
59- Automatically handles HTTP redirects
60- Sets reasonable timeouts to prevent hanging
61- Validates input parameters before making requests
62
63LIMITATIONS:
64- Maximum response size is 5MB
65- Only supports HTTP and HTTPS protocols
66- Cannot handle authentication or cookies
67- Some websites may block automated requests
68
69TIPS:
70- Use text format for plain text content or simple API responses
71- Use markdown format for content that should be rendered with formatting
72- Use html format when you need the raw HTML structure
73- Set appropriate timeouts for potentially slow websites`,
74 func(ctx context.Context, params FetchParams, call ai.ToolCall) (ai.ToolResponse, error) {
75 if params.URL == "" {
76 return ai.NewTextErrorResponse("URL parameter is required"), nil
77 }
78
79 format := strings.ToLower(params.Format)
80 if format != "text" && format != "markdown" && format != "html" {
81 return ai.NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
82 }
83
84 if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
85 return ai.NewTextErrorResponse("URL must start with http:// or https://"), nil
86 }
87
88 sessionID, messageID := GetContextValues(ctx)
89 if sessionID == "" || messageID == "" {
90 return ai.ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
91 }
92
93 granted := permissions.Request(
94 permission.CreatePermissionRequest{
95 SessionID: sessionID,
96 Path: workingDir,
97 ToolCallID: call.ID,
98 ToolName: FetchToolName,
99 Action: "fetch",
100 Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
101 Params: FetchPermissionsParams(params),
102 },
103 )
104
105 if !granted {
106 return ai.ToolResponse{}, permission.ErrorPermissionDenied
107 }
108
109 // Handle timeout with context
110 requestCtx := ctx
111 if params.Timeout > 0 {
112 maxTimeout := 120 // 2 minutes
113 if params.Timeout > maxTimeout {
114 params.Timeout = maxTimeout
115 }
116 var cancel context.CancelFunc
117 requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
118 defer cancel()
119 }
120
121 req, err := http.NewRequestWithContext(requestCtx, "GET", params.URL, nil)
122 if err != nil {
123 return ai.ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
124 }
125
126 req.Header.Set("User-Agent", "crush/1.0")
127
128 resp, err := client.Do(req)
129 if err != nil {
130 return ai.ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
131 }
132 defer resp.Body.Close()
133
134 if resp.StatusCode != http.StatusOK {
135 return ai.NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
136 }
137
138 maxSize := int64(5 * 1024 * 1024) // 5MB
139 body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
140 if err != nil {
141 return ai.NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
142 }
143
144 content := string(body)
145
146 isValidUt8 := utf8.ValidString(content)
147 if !isValidUt8 {
148 return ai.NewTextErrorResponse("Response content is not valid UTF-8"), nil
149 }
150 contentType := resp.Header.Get("Content-Type")
151
152 switch format {
153 case "text":
154 if strings.Contains(contentType, "text/html") {
155 text, err := extractTextFromHTML(content)
156 if err != nil {
157 return ai.NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
158 }
159 content = text
160 }
161
162 case "markdown":
163 if strings.Contains(contentType, "text/html") {
164 markdown, err := convertHTMLToMarkdown(content)
165 if err != nil {
166 return ai.NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
167 }
168 content = markdown
169 }
170
171 content = "```\n" + content + "\n```"
172
173 case "html":
174 // return only the body of the HTML document
175 if strings.Contains(contentType, "text/html") {
176 doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
177 if err != nil {
178 return ai.NewTextErrorResponse("Failed to parse HTML: " + err.Error()), nil
179 }
180 body, err := doc.Find("body").Html()
181 if err != nil {
182 return ai.NewTextErrorResponse("Failed to extract body from HTML: " + err.Error()), nil
183 }
184 if body == "" {
185 return ai.NewTextErrorResponse("No body content found in HTML"), nil
186 }
187 content = "<html>\n<body>\n" + body + "\n</body>\n</html>"
188 }
189 }
190 // calculate byte size of content
191 contentSize := int64(len(content))
192 if contentSize > MaxReadSize {
193 content = content[:MaxReadSize]
194 content += fmt.Sprintf("\n\n[Content truncated to %d bytes]", MaxReadSize)
195 }
196
197 return ai.NewTextResponse(content), nil
198 })
199}
200
201func extractTextFromHTML(html string) (string, error) {
202 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
203 if err != nil {
204 return "", err
205 }
206
207 text := doc.Find("body").Text()
208 text = strings.Join(strings.Fields(text), " ")
209
210 return text, nil
211}
212
213func convertHTMLToMarkdown(html string) (string, error) {
214 converter := md.NewConverter("", true, nil)
215
216 markdown, err := converter.ConvertString(html)
217 if err != nil {
218 return "", err
219 }
220
221 return markdown, nil
222}