fetch.go

  1package tools
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"fmt"
  7	"io"
  8	"net/http"
  9	"strings"
 10	"time"
 11
 12	md "github.com/JohannesKaufmann/html-to-markdown"
 13	"github.com/PuerkitoBio/goquery"
 14	"github.com/charmbracelet/crush/internal/permission"
 15)
 16
 17type FetchParams struct {
 18	URL     string `json:"url"`
 19	Format  string `json:"format"`
 20	Timeout int    `json:"timeout,omitempty"`
 21}
 22
 23type FetchPermissionsParams struct {
 24	URL     string `json:"url"`
 25	Format  string `json:"format"`
 26	Timeout int    `json:"timeout,omitempty"`
 27}
 28
 29type fetchTool struct {
 30	client      *http.Client
 31	permissions permission.Service
 32	workingDir  string
 33}
 34
 35const (
 36	FetchToolName        = "fetch"
 37	fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
 38
 39WHEN TO USE THIS TOOL:
 40- Use when you need to download content from a URL
 41- Helpful for retrieving documentation, API responses, or web content
 42- Useful for getting external information to assist with tasks
 43
 44HOW TO USE:
 45- Provide the URL to fetch content from
 46- Specify the desired output format (text, markdown, or html)
 47- Optionally set a timeout for the request
 48
 49FEATURES:
 50- Supports three output formats: text, markdown, and html
 51- Automatically handles HTTP redirects
 52- Sets reasonable timeouts to prevent hanging
 53- Validates input parameters before making requests
 54
 55LIMITATIONS:
 56- Maximum response size is 5MB
 57- Only supports HTTP and HTTPS protocols
 58- Cannot handle authentication or cookies
 59- Some websites may block automated requests
 60
 61TIPS:
 62- Use text format for plain text content or simple API responses
 63- Use markdown format for content that should be rendered with formatting
 64- Use html format when you need the raw HTML structure
 65- Set appropriate timeouts for potentially slow websites`
 66)
 67
 68func NewFetchTool(permissions permission.Service, workingDir string) BaseTool {
 69	return &fetchTool{
 70		client: &http.Client{
 71			Timeout: 30 * time.Second,
 72			Transport: &http.Transport{
 73				MaxIdleConns:        100,
 74				MaxIdleConnsPerHost: 10,
 75				IdleConnTimeout:     90 * time.Second,
 76			},
 77		},
 78		permissions: permissions,
 79		workingDir:  workingDir,
 80	}
 81}
 82
 83func (t *fetchTool) Name() string {
 84	return FetchToolName
 85}
 86
 87func (t *fetchTool) Info() ToolInfo {
 88	return ToolInfo{
 89		Name:        FetchToolName,
 90		Description: fetchToolDescription,
 91		Parameters: map[string]any{
 92			"url": map[string]any{
 93				"type":        "string",
 94				"description": "The URL to fetch content from",
 95			},
 96			"format": map[string]any{
 97				"type":        "string",
 98				"description": "The format to return the content in (text, markdown, or html)",
 99				"enum":        []string{"text", "markdown", "html"},
100			},
101			"timeout": map[string]any{
102				"type":        "number",
103				"description": "Optional timeout in seconds (max 120)",
104			},
105		},
106		Required: []string{"url", "format"},
107	}
108}
109
110func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
111	var params FetchParams
112	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
113		return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
114	}
115
116	if params.URL == "" {
117		return NewTextErrorResponse("URL parameter is required"), nil
118	}
119
120	format := strings.ToLower(params.Format)
121	if format != "text" && format != "markdown" && format != "html" {
122		return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
123	}
124
125	if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
126		return NewTextErrorResponse("URL must start with http:// or https://"), nil
127	}
128
129	sessionID, messageID := GetContextValues(ctx)
130	if sessionID == "" || messageID == "" {
131		return ToolResponse{}, fmt.Errorf("session ID and message ID are required for creating a new file")
132	}
133
134	p := t.permissions.Request(
135		permission.CreatePermissionRequest{
136			SessionID:   sessionID,
137			Path:        t.workingDir,
138			ToolName:    FetchToolName,
139			Action:      "fetch",
140			Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
141			Params:      FetchPermissionsParams(params),
142		},
143	)
144
145	if !p {
146		return ToolResponse{}, permission.ErrorPermissionDenied
147	}
148
149	// Handle timeout with context
150	requestCtx := ctx
151	if params.Timeout > 0 {
152		maxTimeout := 120 // 2 minutes
153		if params.Timeout > maxTimeout {
154			params.Timeout = maxTimeout
155		}
156		var cancel context.CancelFunc
157		requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
158		defer cancel()
159	}
160
161	req, err := http.NewRequestWithContext(requestCtx, "GET", params.URL, nil)
162	if err != nil {
163		return ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
164	}
165
166	req.Header.Set("User-Agent", "crush/1.0")
167
168	resp, err := t.client.Do(req)
169	if err != nil {
170		return ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
171	}
172	defer resp.Body.Close()
173
174	if resp.StatusCode != http.StatusOK {
175		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
176	}
177
178	maxSize := int64(5 * 1024 * 1024) // 5MB
179	body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
180	if err != nil {
181		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
182	}
183
184	content := string(body)
185	contentType := resp.Header.Get("Content-Type")
186
187	switch format {
188	case "text":
189		if strings.Contains(contentType, "text/html") {
190			text, err := extractTextFromHTML(content)
191			if err != nil {
192				return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
193			}
194			return NewTextResponse(text), nil
195		}
196		return NewTextResponse(content), nil
197
198	case "markdown":
199		if strings.Contains(contentType, "text/html") {
200			markdown, err := convertHTMLToMarkdown(content)
201			if err != nil {
202				return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
203			}
204			return NewTextResponse(markdown), nil
205		}
206
207		return NewTextResponse("```\n" + content + "\n```"), nil
208
209	case "html":
210		return NewTextResponse(content), nil
211
212	default:
213		return NewTextResponse(content), nil
214	}
215}
216
217func extractTextFromHTML(html string) (string, error) {
218	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
219	if err != nil {
220		return "", err
221	}
222
223	text := doc.Text()
224	text = strings.Join(strings.Fields(text), " ")
225
226	return text, nil
227}
228
229func convertHTMLToMarkdown(html string) (string, error) {
230	converter := md.NewConverter("", true, nil)
231
232	markdown, err := converter.ConvertString(html)
233	if err != nil {
234		return "", err
235	}
236
237	return markdown, nil
238}