From a38fe8e3a5ee0d1a1cc6f28e23b4d0266d5c3265 Mon Sep 17 00:00:00 2001 From: Christian Rocha Date: Thu, 16 Apr 2026 17:21:27 -0400 Subject: [PATCH] chore(jq,fetch): allow fetch to pass into jq, improve jq skill By allowing fetch to send results directly into jq we can prevent a lot of tokens from entering the context, and compress several tool calls. --- internal/agent/tools/fetch.go | 65 ++++++++++++++++++++++++++ internal/agent/tools/fetch.md | 8 +++- internal/agent/tools/fetch_test.go | 71 +++++++++++++++++++++++++++++ internal/agent/tools/fetch_types.go | 2 + internal/skills/builtin/jq/SKILL.md | 18 +++++++- 5 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 internal/agent/tools/fetch_test.go diff --git a/internal/agent/tools/fetch.go b/internal/agent/tools/fetch.go index 90db1a179980e8fa3f65491c28123b0b62e797fb..90c8c9fd6d411bda0b1a8cf605bbde437e4e6007 100644 --- a/internal/agent/tools/fetch.go +++ b/internal/agent/tools/fetch.go @@ -3,6 +3,7 @@ package tools import ( "context" _ "embed" + "encoding/json" "fmt" "io" "net/http" @@ -14,6 +15,7 @@ import ( md "github.com/JohannesKaufmann/html-to-markdown" "github.com/PuerkitoBio/goquery" "github.com/charmbracelet/crush/internal/permission" + "github.com/itchyny/gojq" ) const ( @@ -121,6 +123,17 @@ func NewFetchTool(permissions permission.Service, workingDir string, client *htt } contentType := resp.Header.Get("Content-Type") + // If a jq expression was provided, parse the body as JSON, + // apply the filter, and return the result directly (format is + // ignored). + if params.JQ != "" { + filtered, err := applyJQ(content, params.JQ) + if err != nil { + return fantasy.NewTextErrorResponse("jq: " + err.Error()), nil + } + return fantasy.NewTextResponse(filtered), nil + } + switch format { case "text": if strings.Contains(contentType, "text/html") { @@ -191,3 +204,55 @@ func convertHTMLToMarkdown(html string) (string, error) { return markdown, nil } + +// applyJQ parses body as JSON and runs the given jq expression against it, +// returning pretty-printed results joined by newlines. Multiple top-level +// JSON values in the body are supported (each is filtered independently). +func applyJQ(body, expr string) (string, error) { + query, err := gojq.Parse(expr) + if err != nil { + return "", fmt.Errorf("parse: %w", err) + } + code, err := gojq.Compile(query) + if err != nil { + return "", fmt.Errorf("compile: %w", err) + } + + dec := json.NewDecoder(strings.NewReader(body)) + dec.UseNumber() + var inputs []any + for { + var v any + if err := dec.Decode(&v); err != nil { + if err == io.EOF { + break + } + return "", fmt.Errorf("invalid JSON: %w", err) + } + inputs = append(inputs, v) + } + if len(inputs) == 0 { + return "", fmt.Errorf("empty response body") + } + + var out strings.Builder + for _, in := range inputs { + iter := code.Run(in) + for { + v, ok := iter.Next() + if !ok { + break + } + if e, ok := v.(error); ok { + return "", e + } + bs, err := json.MarshalIndent(v, "", " ") + if err != nil { + return "", err + } + out.Write(bs) + out.WriteByte('\n') + } + } + return strings.TrimRight(out.String(), "\n"), nil +} diff --git a/internal/agent/tools/fetch.md b/internal/agent/tools/fetch.md index 1dabf06bfe847ec92295f0bf054902b78c08fb40..700de5b95a261698e4e636670292b00a1e6110bb 100644 --- a/internal/agent/tools/fetch.md +++ b/internal/agent/tools/fetch.md @@ -1,4 +1,4 @@ -Fetch raw content from a URL as text, markdown, or html (max 5MB); no AI processing. For analysis or extraction use agentic_fetch. +Fetch raw content from a URL as text, markdown, or html (max 5MB); no AI processing. Optional `jq` parameter filters JSON responses server-side — use it for counting, extracting, or aggregating API data instead of loading the full payload. For analysis or extraction of prose/HTML use agentic_fetch. Use this tool when you need: @@ -7,6 +7,7 @@ Use this tool when you need: - HTML/text/markdown content without interpretation - Simple, fast content retrieval without analysis - To save tokens by avoiding AI processing +- To count, sum, or extract fields from a JSON API response (use the `jq` parameter) DO NOT use this tool when you need to: - Extract specific information from a webpage (use agentic_fetch instead) @@ -18,6 +19,11 @@ DO NOT use this tool when you need to: - Provide URL to fetch content from - Specify desired output format (text, markdown, or html) - Optional timeout for request +- Optional `jq` expression to filter JSON responses. When set, the body is parsed as JSON and the expression is applied server-side; `format` is ignored. Examples: + - `jq: "length"` — count items in a top-level array + - `jq: "[.[].name]"` — extract names from an array of objects + - `jq: "[.[].models | length] | add"` — sum nested array lengths + - `jq: ".data | keys"` — list keys of a nested object diff --git a/internal/agent/tools/fetch_test.go b/internal/agent/tools/fetch_test.go new file mode 100644 index 0000000000000000000000000000000000000000..88689b1292f51b887f80a9361f7e5c5b6a5833c4 --- /dev/null +++ b/internal/agent/tools/fetch_test.go @@ -0,0 +1,71 @@ +package tools + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestApplyJQ(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + body string + expr string + want string + }{ + { + name: "length of array", + body: `[1,2,3,4,5]`, + expr: `length`, + want: `5`, + }, + { + name: "extract field", + body: `{"name":"crush","version":"1.0"}`, + expr: `.name`, + want: `"crush"`, + }, + { + name: "count objects in array", + body: `[{"id":"a"},{"id":"b"},{"id":"c"}]`, + expr: `length`, + want: `3`, + }, + { + name: "sum nested array lengths", + body: `[{"models":[1,2]},{"models":[3,4,5]},{"models":[6]}]`, + expr: `[.[].models | length] | add`, + want: `6`, + }, + { + name: "extract names", + body: `[{"name":"a"},{"name":"b"}]`, + expr: `[.[].name]`, + want: "[\n \"a\",\n \"b\"\n]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got, err := applyJQ(tt.body, tt.expr) + require.NoError(t, err) + require.Equal(t, tt.want, got) + }) + } +} + +func TestApplyJQErrors(t *testing.T) { + t.Parallel() + + _, err := applyJQ(`not json`, `.`) + require.Error(t, err) + + _, err = applyJQ(`[1,2,3]`, `|||`) + require.Error(t, err) + + _, err = applyJQ(``, `.`) + require.Error(t, err) +} diff --git a/internal/agent/tools/fetch_types.go b/internal/agent/tools/fetch_types.go index 98bcb3b6010fb6d76e316afaa5b2581a942e1463..5245f967a6ebb52f01d6fe1ef0d0a14d9f5c5e97 100644 --- a/internal/agent/tools/fetch_types.go +++ b/internal/agent/tools/fetch_types.go @@ -40,6 +40,7 @@ type FetchParams struct { URL string `json:"url" description:"The URL to fetch content from"` Format string `json:"format" description:"The format to return the content in (text, markdown, or html)"` Timeout int `json:"timeout,omitempty" description:"Optional timeout in seconds (max 120)"` + JQ string `json:"jq,omitempty" description:"Optional jq expression to apply to the fetched content (assumes JSON). When set, the response body is parsed as JSON and filtered server-side; format is ignored. Use for counting, extracting, or reshaping JSON API responses without loading the full payload into context."` } // FetchPermissionsParams defines the permission parameters for the simple fetch tool. @@ -47,4 +48,5 @@ type FetchPermissionsParams struct { URL string `json:"url"` Format string `json:"format"` Timeout int `json:"timeout,omitempty"` + JQ string `json:"jq,omitempty"` } diff --git a/internal/skills/builtin/jq/SKILL.md b/internal/skills/builtin/jq/SKILL.md index 6d7be404fe4455586dad14ee13221b5422b588b8..18bd5c3ce3663d9684c1567baaf597c552bff6eb 100644 --- a/internal/skills/builtin/jq/SKILL.md +++ b/internal/skills/builtin/jq/SKILL.md @@ -1,6 +1,6 @@ --- name: jq -description: Use when the user needs to query, filter, reshape, extract, create, or construct JSON data — including API responses, config files, log output, or any structured data — or when helping the user write or debug JSON transformations. +description: Use when the user needs to query, filter, reshape, extract, create, construct, count, sum, or aggregate JSON data — including API responses, config files, log output, or any structured data — or when helping the user write or debug JSON transformations, or when answering "how many", "how much", "which", or "what are the" questions over JSON or arrays. --- # jq — Built-in JSON Processor @@ -95,3 +95,19 @@ jq -n --arg msg hello '{"message": $msg}' - Use `try` to suppress errors on missing keys: `jq 'try .foo.bar'` - Use `// "default"` for fallback values: `jq '.name // "unknown"'` - Use `@csv`, `@tsv`, `@base64`, `@html`, `@uri` for format strings. + +## Filtering remote JSON with `fetch` + +The `fetch` tool accepts an optional `jq` parameter that applies a jq +expression to the response body server-side. Prefer it over pulling entire +JSON payloads into context — it's faster, cheaper, and avoids manual +counting mistakes. + +```text +fetch(url="https://api.example.com/items", format="text", jq="length") +fetch(url="https://api.example.com/items", format="text", jq="[.[].name]") +fetch(url="https://catwalk.charm.sh/v2/providers", format="text", + jq="[.[].models | length] | add") +``` + +When `jq` is set, `format` is ignored and the body is parsed as JSON.