fetch.go

  1package tools
  2
  3import (
  4	"bytes"
  5	"context"
  6	_ "embed"
  7	"encoding/json"
  8	"fmt"
  9	"io"
 10	"net/http"
 11	"sort"
 12	"strings"
 13	"time"
 14	"unicode/utf8"
 15
 16	"charm.land/fantasy"
 17	md "github.com/JohannesKaufmann/html-to-markdown"
 18	"github.com/PuerkitoBio/goquery"
 19	"github.com/charmbracelet/crush/internal/permission"
 20	"github.com/itchyny/gojq"
 21)
 22
 23const (
 24	FetchToolName = "fetch"
 25	MaxFetchSize  = 1 * 1024 * 1024 // 1MB
 26	// jqHintThreshold is the response size above which fetch will
 27	// append a trailing [crush-hint: ...] banner nudging the caller
 28	// toward the `jq` parameter when the body looks like JSON and no
 29	// filter was provided. Appended (not prepended) so that any
 30	// downstream consumer that parses the body from the start still
 31	// sees valid JSON up to the banner.
 32	jqHintThreshold = 50 * 1024 // 50 KB
 33)
 34
 35//go:embed fetch.md
 36var fetchDescription []byte
 37
 38func NewFetchTool(permissions permission.Service, workingDir string, client *http.Client) fantasy.AgentTool {
 39	if client == nil {
 40		transport := http.DefaultTransport.(*http.Transport).Clone()
 41		transport.MaxIdleConns = 100
 42		transport.MaxIdleConnsPerHost = 10
 43		transport.IdleConnTimeout = 90 * time.Second
 44
 45		client = &http.Client{
 46			Timeout:   30 * time.Second,
 47			Transport: transport,
 48		}
 49	}
 50
 51	return fantasy.NewParallelAgentTool(
 52		FetchToolName,
 53		FirstLineDescription(fetchDescription),
 54		func(ctx context.Context, params FetchParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
 55			if params.URL == "" {
 56				return fantasy.NewTextErrorResponse("URL parameter is required"), nil
 57			}
 58
 59			// When a jq expression is provided, format is ignored. We
 60			// skip validation entirely in that case and normalize format
 61			// to "text" so any later code paths inspecting it see a
 62			// valid value. Without jq, format must be one of the
 63			// supported values.
 64			format := strings.ToLower(params.Format)
 65			if params.JQ != "" {
 66				format = "text"
 67			} else if format != "text" && format != "markdown" && format != "html" {
 68				return fantasy.NewTextErrorResponse(
 69					"Format must be one of: text, markdown, html. " +
 70						"For JSON responses, set the `jq` parameter to filter " +
 71						"server-side — format then becomes optional " +
 72						"(e.g. fetch(url=..., jq=\"length\")).",
 73				), nil
 74			}
 75
 76			if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
 77				return fantasy.NewTextErrorResponse("URL must start with http:// or https://"), nil
 78			}
 79
 80			sessionID := GetSessionFromContext(ctx)
 81			if sessionID == "" {
 82				return fantasy.ToolResponse{}, fmt.Errorf("session ID is required for creating a new file")
 83			}
 84
 85			p, err := permissions.Request(ctx,
 86				permission.CreatePermissionRequest{
 87					SessionID:   sessionID,
 88					Path:        workingDir,
 89					ToolCallID:  call.ID,
 90					ToolName:    FetchToolName,
 91					Action:      "fetch",
 92					Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
 93					Params:      FetchPermissionsParams(params),
 94				},
 95			)
 96			if err != nil {
 97				return fantasy.ToolResponse{}, err
 98			}
 99			if !p {
100				return fantasy.ToolResponse{}, permission.ErrorPermissionDenied
101			}
102
103			// maxFetchTimeoutSeconds is the maximum allowed timeout for fetch requests (2 minutes)
104			const maxFetchTimeoutSeconds = 120
105
106			// Handle timeout with context
107			requestCtx := ctx
108			if params.Timeout > 0 {
109				if params.Timeout > maxFetchTimeoutSeconds {
110					params.Timeout = maxFetchTimeoutSeconds
111				}
112				var cancel context.CancelFunc
113				requestCtx, cancel = context.WithTimeout(ctx, time.Duration(params.Timeout)*time.Second)
114				defer cancel()
115			}
116
117			req, err := http.NewRequestWithContext(requestCtx, "GET", params.URL, nil)
118			if err != nil {
119				return fantasy.ToolResponse{}, fmt.Errorf("failed to create request: %w", err)
120			}
121
122			req.Header.Set("User-Agent", "crush/1.0")
123
124			resp, err := client.Do(req)
125			if err != nil {
126				return fantasy.ToolResponse{}, fmt.Errorf("failed to fetch URL: %w", err)
127			}
128			defer resp.Body.Close()
129
130			if resp.StatusCode != http.StatusOK {
131				return fantasy.NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
132			}
133
134			body, err := io.ReadAll(io.LimitReader(resp.Body, MaxFetchSize))
135			if err != nil {
136				return fantasy.NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
137			}
138
139			content := string(body)
140
141			validUTF8 := utf8.ValidString(content)
142			if !validUTF8 {
143				return fantasy.NewTextErrorResponse("Response content is not valid UTF-8"), nil
144			}
145			contentType := resp.Header.Get("Content-Type")
146
147			// If a jq expression was provided, parse the body as JSON,
148			// apply the filter, and return the result directly (format is
149			// ignored).
150			if params.JQ != "" {
151				filtered, err := applyJQ(content, params.JQ)
152				if err != nil {
153					return fantasy.NewTextErrorResponse("jq: " + err.Error()), nil
154				}
155				return fantasy.NewTextResponse(filtered), nil
156			}
157
158			largeJSONWithoutFilter := format == "text" &&
159				len(body) > jqHintThreshold &&
160				looksLikeJSON(contentType, body)
161
162			switch format {
163			case "text":
164				if strings.Contains(contentType, "text/html") {
165					text, err := extractTextFromHTML(content)
166					if err != nil {
167						return fantasy.NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
168					}
169					content = text
170				}
171
172			case "markdown":
173				if strings.Contains(contentType, "text/html") {
174					markdown, err := convertHTMLToMarkdown(content)
175					if err != nil {
176						return fantasy.NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
177					}
178					content = markdown
179				}
180
181				content = "```\n" + content + "\n```"
182
183			case "html":
184				// return only the body of the HTML document
185				if strings.Contains(contentType, "text/html") {
186					doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
187					if err != nil {
188						return fantasy.NewTextErrorResponse("Failed to parse HTML: " + err.Error()), nil
189					}
190					body, err := doc.Find("body").Html()
191					if err != nil {
192						return fantasy.NewTextErrorResponse("Failed to extract body from HTML: " + err.Error()), nil
193					}
194					if body == "" {
195						return fantasy.NewTextErrorResponse("No body content found in HTML"), nil
196					}
197					content = "<html>\n<body>\n" + body + "\n</body>\n</html>"
198				}
199			}
200			// truncate content if it exceeds max read size
201			if int64(len(content)) >= MaxFetchSize {
202				content = content[:MaxFetchSize]
203				content += fmt.Sprintf("\n\n[Content truncated to %d bytes]", MaxFetchSize)
204			}
205
206			// Append the jq hint last so it's always at the true end of
207			// the response, even if the format switch above rewrote the
208			// content (HTML extraction, markdown wrapping, etc.) and
209			// after MaxFetchSize truncation.
210			if largeJSONWithoutFilter {
211				content += fmt.Sprintf(
212					"\n\n[crush-hint: response body is %d bytes of JSON. "+
213						"Prefer re-calling fetch() with a `jq` expression to "+
214						"filter server-side (e.g. jq=\"length\", jq=\"[.[].name]\") "+
215						"instead of loading the full payload into context.]",
216					len(body),
217				)
218			}
219
220			return fantasy.NewTextResponse(content), nil
221		})
222}
223
224func extractTextFromHTML(html string) (string, error) {
225	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
226	if err != nil {
227		return "", err
228	}
229
230	text := doc.Find("body").Text()
231	text = strings.Join(strings.Fields(text), " ")
232
233	return text, nil
234}
235
236func convertHTMLToMarkdown(html string) (string, error) {
237	converter := md.NewConverter("", true, nil)
238
239	markdown, err := converter.ConvertString(html)
240	if err != nil {
241		return "", err
242	}
243
244	return markdown, nil
245}
246
247// applyJQ parses body as JSON and runs the given jq expression against it,
248// returning pretty-printed results joined by newlines. Multiple top-level
249// JSON values in the body are supported (each is filtered independently).
250//
251// When the filter errors against the actual shape of the body, the error
252// is annotated with a short shape description so the caller (usually an
253// LLM) can fix the filter on the next attempt instead of guessing.
254func applyJQ(body, expr string) (string, error) {
255	query, err := gojq.Parse(expr)
256	if err != nil {
257		return "", fmt.Errorf("parse: %w", err)
258	}
259	code, err := gojq.Compile(query)
260	if err != nil {
261		return "", fmt.Errorf("compile: %w", err)
262	}
263
264	dec := json.NewDecoder(strings.NewReader(body))
265	dec.UseNumber()
266	var inputs []any
267	for {
268		var v any
269		if err := dec.Decode(&v); err != nil {
270			if err == io.EOF {
271				break
272			}
273			return "", fmt.Errorf("invalid JSON: %w", err)
274		}
275		inputs = append(inputs, v)
276	}
277	if len(inputs) == 0 {
278		return "", fmt.Errorf("empty response body")
279	}
280
281	var out strings.Builder
282	for _, in := range inputs {
283		iter := code.Run(in)
284		for {
285			v, ok := iter.Next()
286			if !ok {
287				break
288			}
289			if e, ok := v.(error); ok {
290				return "", fmt.Errorf("%w (input shape: %s)", e, describeShape(in))
291			}
292			bs, err := json.MarshalIndent(v, "", "  ")
293			if err != nil {
294				return "", err
295			}
296			out.Write(bs)
297			out.WriteByte('\n')
298		}
299	}
300	return strings.TrimRight(out.String(), "\n"), nil
301}
302
303// describeShape returns a short, human-readable description of v. Used in
304// jq error messages so the caller can see what the body actually looks
305// like without us dumping the whole payload back.
306func describeShape(v any) string {
307	switch x := v.(type) {
308	case nil:
309		return "null"
310	case bool:
311		return "boolean"
312	case json.Number:
313		return "number"
314	case string:
315		return "string"
316	case []any:
317		if len(x) == 0 {
318			return "empty array"
319		}
320		return fmt.Sprintf("array of %d items; first item is %s", len(x), describeShape(x[0]))
321	case map[string]any:
322		if len(x) == 0 {
323			return "empty object"
324		}
325		keys := make([]string, 0, len(x))
326		for k := range x {
327			keys = append(keys, k)
328		}
329		sort.Strings(keys)
330		const maxKeys = 8
331		suffix := ""
332		if len(keys) > maxKeys {
333			keys = keys[:maxKeys]
334			suffix = ", ..."
335		}
336		return fmt.Sprintf("object with keys: %s%s", strings.Join(keys, ", "), suffix)
337	}
338	return fmt.Sprintf("unknown (%T)", v)
339}
340
341// looksLikeJSON reports whether body is most likely JSON based on the
342// Content-Type header and/or the first non-whitespace byte.
343func looksLikeJSON(contentType string, body []byte) bool {
344	if strings.Contains(strings.ToLower(contentType), "json") {
345		return true
346	}
347	trimmed := bytes.TrimLeft(body, " \t\r\n")
348	return len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[')
349}