1package tools
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "io"
8 "net/http"
9 "strings"
10 "sync"
11 "time"
12
13 md "github.com/JohannesKaufmann/html-to-markdown"
14 "github.com/PuerkitoBio/goquery"
15 "github.com/charmbracelet/crush/internal/config"
16 "github.com/charmbracelet/crush/internal/permission"
17)
18
19type FetchParams struct {
20 URL string `json:"url"`
21 Format string `json:"format"`
22 Timeout int `json:"timeout,omitempty"`
23}
24
25type FetchPermissionsParams struct {
26 URL string `json:"url"`
27 Format string `json:"format"`
28 Timeout int `json:"timeout,omitempty"`
29}
30
31type fetchTool struct {
32 client *http.Client
33 clientPool map[int]*http.Client
34 clientPoolMu sync.RWMutex
35 permissions permission.Service
36}
37
38const (
39 FetchToolName = "fetch"
40 fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
41
42WHEN TO USE THIS TOOL:
43- Use when you need to download content from a URL
44- Helpful for retrieving documentation, API responses, or web content
45- Useful for getting external information to assist with tasks
46
47HOW TO USE:
48- Provide the URL to fetch content from
49- Specify the desired output format (text, markdown, or html)
50- Optionally set a timeout for the request
51
52FEATURES:
53- Supports three output formats: text, markdown, and html
54- Automatically handles HTTP redirects
55- Sets reasonable timeouts to prevent hanging
56- Validates input parameters before making requests
57
58LIMITATIONS:
59- Maximum response size is 5MB
60- Only supports HTTP and HTTPS protocols
61- Cannot handle authentication or cookies
62- Some websites may block automated requests
63
64TIPS:
65- Use text format for plain text content or simple API responses
66- Use markdown format for content that should be rendered with formatting
67- Use html format when you need the raw HTML structure
68- Set appropriate timeouts for potentially slow websites`
69)
70
71func NewFetchTool(permissions permission.Service) BaseTool {
72 return &fetchTool{
73 client: &http.Client{
74 Timeout: 30 * time.Second,
75 Transport: &http.Transport{
76 MaxIdleConns: 100,
77 MaxIdleConnsPerHost: 10,
78 IdleConnTimeout: 90 * time.Second,
79 },
80 },
81 clientPool: make(map[int]*http.Client),
82 permissions: permissions,
83 }
84}
85
86// getClientForTimeout returns a cached client for the given timeout or the default client
87func (t *fetchTool) getClientForTimeout(timeout int) *http.Client {
88 if timeout <= 0 {
89 return t.client
90 }
91
92 maxTimeout := 120 // 2 minutes
93 if timeout > maxTimeout {
94 timeout = maxTimeout
95 }
96
97 // Check if we have a cached client for this timeout
98 t.clientPoolMu.RLock()
99 if client, exists := t.clientPool[timeout]; exists {
100 t.clientPoolMu.RUnlock()
101 return client
102 }
103 t.clientPoolMu.RUnlock()
104
105 // Create and cache a new client
106 t.clientPoolMu.Lock()
107 defer t.clientPoolMu.Unlock()
108
109 // Double-check in case another goroutine created it
110 if client, exists := t.clientPool[timeout]; exists {
111 return client
112 }
113
114 client := &http.Client{
115 Timeout: time.Duration(timeout) * time.Second,
116 Transport: &http.Transport{
117 MaxIdleConns: 100,
118 MaxIdleConnsPerHost: 10,
119 IdleConnTimeout: 90 * time.Second,
120 },
121 }
122 t.clientPool[timeout] = client
123 return client
124}
125
126func (t *fetchTool) Info() ToolInfo {
127 return ToolInfo{
128 Name: FetchToolName,
129 Description: fetchToolDescription,
130 Parameters: map[string]any{
131 "url": map[string]any{
132 "type": "string",
133 "description": "The URL to fetch content from",
134 },
135 "format": map[string]any{
136 "type": "string",
137 "description": "The format to return the content in (text, markdown, or html)",
138 "enum": []string{"text", "markdown", "html"},
139 },
140 "timeout": map[string]any{
141 "type": "number",
142 "description": "Optional timeout in seconds (max 120)",
143 },
144 },
145 Required: []string{"url", "format"},
146 }
147}
148
149func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
150 var params FetchParams
151 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
152 return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
153 }
154
155 if params.URL == "" {
156 return NewTextErrorResponse("URL parameter is required"), nil
157 }
158
159 format := strings.ToLower(params.Format)
160 if format != "text" && format != "markdown" && format != "html" {
161 return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
162 }
163
164 if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
165 return NewTextErrorResponse("URL must start with http:// or https://"), nil
166 }
167
168 sessionID, messageID := GetContextValues(ctx)
169 if sessionID == "" || messageID == "" {
170 return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
171 }
172
173 p := t.permissions.Request(
174 permission.CreatePermissionRequest{
175 SessionID: sessionID,
176 Path: config.WorkingDirectory(),
177 ToolName: FetchToolName,
178 Action: "fetch",
179 Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
180 Params: FetchPermissionsParams(params),
181 },
182 )
183
184 if !p {
185 return ToolResponse{}, permission.ErrorPermissionDenied
186 }
187
188 client := t.getClientForTimeout(params.Timeout)
189
190 req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
191 if err != nil {
192 return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
193 }
194
195 req.Header.Set("User-Agent", "crush/1.0")
196
197 resp, err := client.Do(req)
198 if err != nil {
199 return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
200 }
201 defer resp.Body.Close()
202
203 if resp.StatusCode != http.StatusOK {
204 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
205 }
206
207 maxSize := int64(5 * 1024 * 1024) // 5MB
208 body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
209 if err != nil {
210 return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
211 }
212
213 content := string(body)
214 contentType := resp.Header.Get("Content-Type")
215
216 switch format {
217 case "text":
218 if strings.Contains(contentType, "text/html") {
219 text, err := extractTextFromHTML(content)
220 if err != nil {
221 return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
222 }
223 return NewTextResponse(text), nil
224 }
225 return NewTextResponse(content), nil
226
227 case "markdown":
228 if strings.Contains(contentType, "text/html") {
229 markdown, err := convertHTMLToMarkdown(content)
230 if err != nil {
231 return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
232 }
233 return NewTextResponse(markdown), nil
234 }
235
236 return NewTextResponse("```\n" + content + "\n```"), nil
237
238 case "html":
239 return NewTextResponse(content), nil
240
241 default:
242 return NewTextResponse(content), nil
243 }
244}
245
246func extractTextFromHTML(html string) (string, error) {
247 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
248 if err != nil {
249 return "", err
250 }
251
252 text := doc.Text()
253 text = strings.Join(strings.Fields(text), " ")
254
255 return text, nil
256}
257
258func convertHTMLToMarkdown(html string) (string, error) {
259 converter := md.NewConverter("", true, nil)
260
261 markdown, err := converter.ConvertString(html)
262 if err != nil {
263 return "", err
264 }
265
266 return markdown, nil
267}