1package tools
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "io"
8 "net/http"
9 "strings"
10 "time"
11
12 md "github.com/JohannesKaufmann/html-to-markdown"
13 "github.com/PuerkitoBio/goquery"
14 "github.com/charmbracelet/crush/internal/permission"
15)
16
17type FetchParams struct {
18 URL string `json:"url"`
19 Format string `json:"format"`
20 Timeout int `json:"timeout,omitempty"`
21}
22
23type FetchPermissionsParams struct {
24 URL string `json:"url"`
25 Format string `json:"format"`
26 Timeout int `json:"timeout,omitempty"`
27}
28
29type fetchTool struct {
30 client *http.Client
31 permissions permission.Service
32 workingDir string
33}
34
35const (
36 FetchToolName = "fetch"
37 fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
38
39WHEN TO USE THIS TOOL:
40- Use when you need to download content from a URL
41- Helpful for retrieving documentation, API responses, or web content
42- Useful for getting external information to assist with tasks
43
44HOW TO USE:
45- Provide the URL to fetch content from
46- Specify the desired output format (text, markdown, or html)
47- Optionally set a timeout for the request
48
49FEATURES:
50- Supports three output formats: text, markdown, and html
51- Automatically handles HTTP redirects
52- Sets reasonable timeouts to prevent hanging
53- Validates input parameters before making requests
54
55LIMITATIONS:
56- Maximum response size is 5MB
57- Only supports HTTP and HTTPS protocols
58- Cannot handle authentication or cookies
59- Some websites may block automated requests
60
61TIPS:
62- Use text format for plain text content or simple API responses
63- Use markdown format for content that should be rendered with formatting
64- Use html format when you need the raw HTML structure
65- Set appropriate timeouts for potentially slow websites`
66)
67
68func NewFetchTool(permissions permission.Service, workingDir string) BaseTool {
69 return &fetchTool{
70 client: &http.Client{
71 Timeout: 30 * time.Second,
72 Transport: &http.Transport{
73 MaxIdleConns: 100,
74 MaxIdleConnsPerHost: 10,
75 IdleConnTimeout: 90 * time.Second,
76 },
77 },
78 permissions: permissions,
79 workingDir: workingDir,
80 }
81}
82
83func (t *fetchTool) Name() string {
84 return FetchToolName
85}
86
87func (t *fetchTool) Info() ToolInfo {
88 return ToolInfo{
89 Name: FetchToolName,
90 Description: fetchToolDescription,
91 Parameters: map[string]any{
92 "url": map[string]any{
93 "type": "string",
94 "description": "The URL to fetch content from",
95 },
96 "format": map[string]any{
97 "type": "string",
98 "description": "The format to return the content in (text, markdown, or html)",
99 "enum": []string{"text", "markdown", "html"},
100 },
101 "timeout": map[string]any{
102 "type": "number",
103 "description": "Optional timeout in seconds (max 120)",
104 },
105 },
106 Required: []string{"url", "format"},
107 }
108}
109
110func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
111 var params FetchParams
112 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
113 return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
114 }
115
116 if params.URL == "" {
117 return NewTextErrorResponse("URL parameter is required"), nil
118 }
119
120 format := strings.ToLower(params.Format)
121 if format != "text" && format != "markdown" && format != "html" {
122 return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
123 }
124
125 if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
126 return NewTextErrorResponse("URL must start with http:// or https://"), nil
127 }
128
129 sessionID, messageID := GetContextValues(ctx)
130 if sessionID == "" || messageID == "" {
131 return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
132 }
133
134 p := t.permissions.Request(
135 permission.CreatePermissionRequest{
136 SessionID: sessionID,
137 Path: t.workingDir,
138 ToolName: FetchToolName,
139 Action: "fetch",
140 Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
141 Params: FetchPermissionsParams(params),
142 },
143 )
144
145 if !p {
146 return ToolResponse{}, permission.ErrorPermissionDenied
147 }
148
149 // Handle timeout with context
150 requestCtx := ctx
151 if params.Timeout > 0 {
152 maxTimeout := 120 // 2 minutes
153 if params.Timeout > maxTimeout {
154 params.Timeout = maxTimeout
155 }
156 var cancel context.CancelFunc
157 requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
158 defer cancel()
159 }
160
161 req, err := http.NewRequestWithContext(requestCtx, "GET", params.URL, nil)
162 if err != nil {
163 return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
164 }
165
166 req.Header.Set("User-Agent", "crush/1.0")
167
168 resp, err := t.client.Do(req)
169 if err != nil {
170 return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
171 }
172 defer resp.Body.Close()
173
174 if resp.StatusCode != http.StatusOK {
175 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
176 }
177
178 maxSize := int64(5 * 1024 * 1024) // 5MB
179 body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
180 if err != nil {
181 return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
182 }
183
184 content := string(body)
185 contentType := resp.Header.Get("Content-Type")
186
187 switch format {
188 case "text":
189 if strings.Contains(contentType, "text/html") {
190 text, err := extractTextFromHTML(content)
191 if err != nil {
192 return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
193 }
194 return NewTextResponse(text), nil
195 }
196 return NewTextResponse(content), nil
197
198 case "markdown":
199 if strings.Contains(contentType, "text/html") {
200 markdown, err := convertHTMLToMarkdown(content)
201 if err != nil {
202 return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
203 }
204 return NewTextResponse(markdown), nil
205 }
206
207 return NewTextResponse("```\n" + content + "\n```"), nil
208
209 case "html":
210 return NewTextResponse(content), nil
211
212 default:
213 return NewTextResponse(content), nil
214 }
215}
216
217func extractTextFromHTML(html string) (string, error) {
218 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
219 if err != nil {
220 return "", err
221 }
222
223 text := doc.Text()
224 text = strings.Join(strings.Fields(text), " ")
225
226 return text, nil
227}
228
229func convertHTMLToMarkdown(html string) (string, error) {
230 converter := md.NewConverter("", true, nil)
231
232 markdown, err := converter.ConvertString(html)
233 if err != nil {
234 return "", err
235 }
236
237 return markdown, nil
238}