@@ -1,12 +1,14 @@
package tools
import (
+ "bytes"
"context"
_ "embed"
"encoding/json"
"fmt"
"io"
"net/http"
+ "sort"
"strings"
"time"
"unicode/utf8"
@@ -21,6 +23,13 @@ import (
const (
FetchToolName = "fetch"
MaxFetchSize = 1 * 1024 * 1024 // 1MB
+ // jqHintThreshold is the response size above which fetch will
+ // append a trailing [crush-hint: ...] banner nudging the caller
+ // toward the `jq` parameter when the body looks like JSON and no
+ // filter was provided. Appended (not prepended) so that any
+ // downstream consumer that parses the body from the start still
+ // sees valid JSON up to the banner.
+ jqHintThreshold = 50 * 1024 // 50 KB
)
//go:embed fetch.md
@@ -47,9 +56,21 @@ func NewFetchTool(permissions permission.Service, workingDir string, client *htt
return fantasy.NewTextErrorResponse("URL parameter is required"), nil
}
+ // When a jq expression is provided, format is ignored. We
+ // skip validation entirely in that case and normalize format
+ // to "text" so any later code paths inspecting it see a
+ // valid value. Without jq, format must be one of the
+ // supported values.
format := strings.ToLower(params.Format)
- if format != "text" && format != "markdown" && format != "html" {
- return fantasy.NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
+ if params.JQ != "" {
+ format = "text"
+ } else if format != "text" && format != "markdown" && format != "html" {
+ return fantasy.NewTextErrorResponse(
+ "Format must be one of: text, markdown, html. " +
+ "For JSON responses, set the `jq` parameter to filter " +
+ "server-side — format then becomes optional " +
+ "(e.g. fetch(url=..., jq=\"length\")).",
+ ), nil
}
if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
@@ -134,6 +155,10 @@ func NewFetchTool(permissions permission.Service, workingDir string, client *htt
return fantasy.NewTextResponse(filtered), nil
}
+ largeJSONWithoutFilter := format == "text" &&
+ len(body) > jqHintThreshold &&
+ looksLikeJSON(contentType, body)
+
switch format {
case "text":
if strings.Contains(contentType, "text/html") {
@@ -178,6 +203,20 @@ func NewFetchTool(permissions permission.Service, workingDir string, client *htt
content += fmt.Sprintf("\n\n[Content truncated to %d bytes]", MaxFetchSize)
}
+ // Append the jq hint last so it's always at the true end of
+ // the response, even if the format switch above rewrote the
+ // content (HTML extraction, markdown wrapping, etc.) and
+ // after MaxFetchSize truncation.
+ if largeJSONWithoutFilter {
+ content += fmt.Sprintf(
+ "\n\n[crush-hint: response body is %d bytes of JSON. "+
+ "Prefer re-calling fetch() with a `jq` expression to "+
+ "filter server-side (e.g. jq=\"length\", jq=\"[.[].name]\") "+
+ "instead of loading the full payload into context.]",
+ len(body),
+ )
+ }
+
return fantasy.NewTextResponse(content), nil
})
}
@@ -208,6 +247,10 @@ func convertHTMLToMarkdown(html string) (string, error) {
// applyJQ parses body as JSON and runs the given jq expression against it,
// returning pretty-printed results joined by newlines. Multiple top-level
// JSON values in the body are supported (each is filtered independently).
+//
+// When the filter errors against the actual shape of the body, the error
+// is annotated with a short shape description so the caller (usually an
+// LLM) can fix the filter on the next attempt instead of guessing.
func applyJQ(body, expr string) (string, error) {
query, err := gojq.Parse(expr)
if err != nil {
@@ -244,7 +287,7 @@ func applyJQ(body, expr string) (string, error) {
break
}
if e, ok := v.(error); ok {
- return "", e
+ return "", fmt.Errorf("%w (input shape: %s)", e, describeShape(in))
}
bs, err := json.MarshalIndent(v, "", " ")
if err != nil {
@@ -256,3 +299,51 @@ func applyJQ(body, expr string) (string, error) {
}
return strings.TrimRight(out.String(), "\n"), nil
}
+
+// describeShape returns a short, human-readable description of v. Used in
+// jq error messages so the caller can see what the body actually looks
+// like without us dumping the whole payload back.
+func describeShape(v any) string {
+ switch x := v.(type) {
+ case nil:
+ return "null"
+ case bool:
+ return "boolean"
+ case json.Number:
+ return "number"
+ case string:
+ return "string"
+ case []any:
+ if len(x) == 0 {
+ return "empty array"
+ }
+ return fmt.Sprintf("array of %d items; first item is %s", len(x), describeShape(x[0]))
+ case map[string]any:
+ if len(x) == 0 {
+ return "empty object"
+ }
+ keys := make([]string, 0, len(x))
+ for k := range x {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ const maxKeys = 8
+ suffix := ""
+ if len(keys) > maxKeys {
+ keys = keys[:maxKeys]
+ suffix = ", ..."
+ }
+ return fmt.Sprintf("object with keys: %s%s", strings.Join(keys, ", "), suffix)
+ }
+ return fmt.Sprintf("unknown (%T)", v)
+}
+
+// looksLikeJSON reports whether body is most likely JSON based on the
+// Content-Type header and/or the first non-whitespace byte.
+func looksLikeJSON(contentType string, body []byte) bool {
+ if strings.Contains(strings.ToLower(contentType), "json") {
+ return true
+ }
+ trimmed := bytes.TrimLeft(body, " \t\r\n")
+ return len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[')
+}
@@ -1,11 +1,52 @@
package tools
import (
+ "context"
+ "encoding/json"
+ "net/http"
+ "net/http/httptest"
+ "strings"
"testing"
+ "charm.land/fantasy"
+ "github.com/charmbracelet/crush/internal/permission"
+ "github.com/charmbracelet/crush/internal/pubsub"
"github.com/stretchr/testify/require"
)
+type mockFetchPermissionService struct {
+ *pubsub.Broker[permission.PermissionRequest]
+}
+
+func (m *mockFetchPermissionService) Request(ctx context.Context, req permission.CreatePermissionRequest) (bool, error) {
+ return true, nil
+}
+
+func (m *mockFetchPermissionService) Grant(req permission.PermissionRequest) {}
+func (m *mockFetchPermissionService) Deny(req permission.PermissionRequest) {}
+func (m *mockFetchPermissionService) GrantPersistent(req permission.PermissionRequest) {}
+func (m *mockFetchPermissionService) AutoApproveSession(sessionID string) {}
+func (m *mockFetchPermissionService) SetSkipRequests(skip bool) {}
+func (m *mockFetchPermissionService) SkipRequests() bool { return false }
+func (m *mockFetchPermissionService) SubscribeNotifications(ctx context.Context) <-chan pubsub.Event[permission.PermissionNotification] {
+ return make(<-chan pubsub.Event[permission.PermissionNotification])
+}
+
+func newFetchToolForTest() fantasy.AgentTool {
+ permissions := &mockFetchPermissionService{Broker: pubsub.NewBroker[permission.PermissionRequest]()}
+ return NewFetchTool(permissions, "/tmp", http.DefaultClient)
+}
+
+func runFetchTool(t *testing.T, tool fantasy.AgentTool, params FetchParams) fantasy.ToolResponse {
+ t.Helper()
+ input, err := json.Marshal(params)
+ require.NoError(t, err)
+ ctx := context.WithValue(context.Background(), SessionIDContextKey, "test-session")
+ resp, err := tool.Run(ctx, fantasy.ToolCall{ID: "t", Name: FetchToolName, Input: string(input)})
+ require.NoError(t, err)
+ return resp
+}
+
func TestApplyJQ(t *testing.T) {
t.Parallel()
@@ -69,3 +110,250 @@ func TestApplyJQErrors(t *testing.T) {
_, err = applyJQ(``, `.`)
require.Error(t, err)
}
+
+// TestApplyJQShapeHint verifies that when a jq filter fails because it
+// assumed the wrong top-level shape, the error message includes a
+// describeShape() hint so the caller can self-correct.
+func TestApplyJQShapeHint(t *testing.T) {
+ t.Parallel()
+
+ // Filter assumes an object but body is an array. This is the exact
+ // failure mode observed with kimi-k2.5 in the eval harness:
+ // `.providers[]` against a top-level array.
+ _, err := applyJQ(`[{"id":"a"},{"id":"b"}]`, `.providers[]`)
+ require.Error(t, err)
+ require.Contains(t, err.Error(), "input shape:")
+ require.Contains(t, err.Error(), "array of 2 items")
+ require.Contains(t, err.Error(), "object with keys: id")
+
+ // Filter assumes an array index but body is an object.
+ _, err = applyJQ(`{"data":{"x":1},"meta":{}}`, `.[0]`)
+ require.Error(t, err)
+ require.Contains(t, err.Error(), "input shape:")
+ require.Contains(t, err.Error(), "object with keys: data, meta")
+}
+
+func TestDescribeShape(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ json string
+ want string
+ }{
+ {"null", `null`, "null"},
+ {"bool", `true`, "boolean"},
+ {"number", `42`, "number"},
+ {"string", `"hi"`, "string"},
+ {"empty array", `[]`, "empty array"},
+ {"empty object", `{}`, "empty object"},
+ {"array of objects", `[{"a":1,"b":2},{"a":3}]`, "array of 2 items; first item is object with keys: a, b"},
+ {"object with keys", `{"zebra":1,"apple":2,"mango":3}`, "object with keys: apple, mango, zebra"},
+ {
+ "object truncates keys",
+ `{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9,"j":10}`,
+ "object with keys: a, b, c, d, e, f, g, h, ...",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ var v any
+ dec := json.NewDecoder(strings.NewReader(tt.json))
+ dec.UseNumber()
+ require.NoError(t, dec.Decode(&v))
+ require.Equal(t, tt.want, describeShape(v))
+ })
+ }
+}
+
+func TestLooksLikeJSON(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ contentType string
+ body string
+ want bool
+ }{
+ {"json content type", "application/json", "garbage", true},
+ {"json content type uppercase", "Application/JSON; charset=utf-8", "garbage", true},
+ {"array body", "text/plain", "[1,2,3]", true},
+ {"object body", "text/plain", `{"a":1}`, true},
+ {"leading whitespace", "text/plain", "\n \t[1]", true},
+ {"html body", "text/html", "<html></html>", false},
+ {"plain text", "text/plain", "hello world", false},
+ {"empty", "", "", false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ require.Equal(t, tt.want, looksLikeJSON(tt.contentType, []byte(tt.body)))
+ })
+ }
+}
+
+// TestFetchToolJQHintPlacement verifies that when fetch returns a large
+// JSON body without a jq filter:
+//
+// 1. The hint banner is appended (not prepended).
+// 2. The content up to the banner is the unmodified original JSON and
+// parses cleanly — this is the "agent pipes response to jq" path we
+// want to protect from regressions.
+// 3. Small JSON bodies do NOT get the banner (no unnecessary noise).
+// 4. Non-JSON bodies (even if large) do NOT get the banner.
+// 5. When jq is set, there is no banner and format validation is skipped.
+func TestFetchToolJQHintPlacement(t *testing.T) {
+ t.Parallel()
+
+ // Build a JSON body larger than jqHintThreshold.
+ items := make([]map[string]any, 3000)
+ for i := range items {
+ items[i] = map[string]any{"id": i, "name": "item"}
+ }
+ largeJSON, err := json.Marshal(items)
+ require.NoError(t, err)
+ require.Greater(t, len(largeJSON), jqHintThreshold, "fixture must exceed threshold")
+
+ smallJSON := []byte(`[{"id":1,"name":"item"},{"id":2,"name":"item"}]`)
+ require.Less(t, len(smallJSON), jqHintThreshold)
+
+ largeText := strings.Repeat("lorem ipsum dolor sit amet ", 3000)
+ require.Greater(t, len(largeText), jqHintThreshold)
+
+ tests := []struct {
+ name string
+ contentType string
+ body []byte
+ params FetchParams
+ wantBanner bool
+ wantErr bool
+ }{
+ {
+ name: "large JSON without jq gets trailing banner",
+ contentType: "application/json",
+ body: largeJSON,
+ params: FetchParams{Format: "text"},
+ wantBanner: true,
+ },
+ {
+ name: "large JSON with jq has no banner",
+ contentType: "application/json",
+ body: largeJSON,
+ params: FetchParams{JQ: "length"},
+ wantBanner: false,
+ },
+ {
+ name: "large JSON with jq and no format still works",
+ contentType: "application/json",
+ body: largeJSON,
+ params: FetchParams{JQ: "length"}, // note: Format unset
+ wantBanner: false,
+ },
+ {
+ name: "small JSON gets no banner",
+ contentType: "application/json",
+ body: smallJSON,
+ params: FetchParams{Format: "text"},
+ wantBanner: false,
+ },
+ {
+ name: "large non-JSON text gets no banner",
+ contentType: "text/plain",
+ body: []byte(largeText),
+ params: FetchParams{Format: "text"},
+ wantBanner: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", tt.contentType)
+ w.Write(tt.body)
+ }))
+ t.Cleanup(srv.Close)
+
+ tool := newFetchToolForTest()
+ params := tt.params
+ params.URL = srv.URL
+
+ resp := runFetchTool(t, tool, params)
+ require.False(t, resp.IsError, "unexpected error: %s", resp.Content)
+
+ if tt.wantBanner {
+ require.Contains(t, resp.Content, "[crush-hint:")
+ // Banner must be trailing, not leading.
+ require.False(t, strings.HasPrefix(resp.Content, "[crush-hint:"),
+ "banner must be appended, not prepended")
+ // Critical regression guard: the content BEFORE the banner
+ // must be the original JSON body, still parseable.
+ bannerIdx := strings.LastIndex(resp.Content, "\n\n[crush-hint:")
+ require.Greater(t, bannerIdx, 0, "banner not found in expected form")
+ jsonPortion := resp.Content[:bannerIdx]
+ var parsed any
+ require.NoError(t, json.Unmarshal([]byte(jsonPortion), &parsed),
+ "content before banner must be valid JSON")
+ } else {
+ require.NotContains(t, resp.Content, "[crush-hint:")
+ }
+ })
+ }
+}
+
+// TestFetchToolFormatOptionalWithJQ verifies that format is optional
+// (defaults to text) when jq is set, so callers don't get bounced on
+// format="json" or missing format when they're using jq.
+func TestFetchToolFormatOptionalWithJQ(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte(`[{"id":1},{"id":2},{"id":3}]`))
+ }))
+ t.Cleanup(srv.Close)
+
+ tool := newFetchToolForTest()
+
+ // No format at all.
+ resp := runFetchTool(t, tool, FetchParams{URL: srv.URL, JQ: "length"})
+ require.False(t, resp.IsError, "expected success, got: %s", resp.Content)
+ require.Equal(t, "3", resp.Content)
+
+ // format="json" — historically rejected, should now pass because jq is set.
+ resp = runFetchTool(t, tool, FetchParams{URL: srv.URL, Format: "json", JQ: "length"})
+ require.False(t, resp.IsError, "expected success with format=json + jq, got: %s", resp.Content)
+ require.Equal(t, "3", resp.Content)
+
+ // Sanity: invalid format WITHOUT jq still rejected.
+ resp = runFetchTool(t, tool, FetchParams{URL: srv.URL, Format: "json"})
+ require.True(t, resp.IsError, "invalid format without jq should still error")
+ require.Contains(t, resp.Content, "jq")
+}
+
+// TestFetchToolShapeHintSurfaces verifies that a wrong-shape jq filter
+// against a fetched body returns an error whose message includes the
+// (input shape: ...) hint, so the LLM has enough info to self-correct.
+func TestFetchToolShapeHintSurfaces(t *testing.T) {
+ t.Parallel()
+
+ srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ w.Write([]byte(`[{"id":"a"},{"id":"b"}]`))
+ }))
+ t.Cleanup(srv.Close)
+
+ tool := newFetchToolForTest()
+ resp := runFetchTool(t, tool, FetchParams{
+ URL: srv.URL,
+ JQ: ".providers[].id",
+ })
+ require.True(t, resp.IsError)
+ require.Contains(t, resp.Content, "jq:")
+ require.Contains(t, resp.Content, "input shape:")
+ require.Contains(t, resp.Content, "array of 2 items")
+}