feat(prompt): require loading appropriate skills before acting

Christian Rocha created

Models frequently skipped viewing a skill's SKILL.md and inferred its
behavior from the description alone; making the load mandatory in
<critical_rules> prevents LLMs from taking that shortcut.

Change summary

internal/agent/templates/coder.md.tpl                                   | 19 
internal/agent/testdata/TestCoderAgent/glm-5.1/bash_tool.yaml           | 28 
internal/agent/testdata/TestCoderAgent/glm-5.1/download_tool.yaml       | 22 
internal/agent/testdata/TestCoderAgent/glm-5.1/fetch_tool.yaml          | 24 
internal/agent/testdata/TestCoderAgent/glm-5.1/glob_tool.yaml           | 28 
internal/agent/testdata/TestCoderAgent/glm-5.1/grep_tool.yaml           |  3 
internal/agent/testdata/TestCoderAgent/glm-5.1/ls_tool.yaml             | 20 
internal/agent/testdata/TestCoderAgent/glm-5.1/multiedit_tool.yaml      | 26 
internal/agent/testdata/TestCoderAgent/glm-5.1/parallel_tool_calls.yaml | 28 
internal/agent/testdata/TestCoderAgent/glm-5.1/read_a_file.yaml         | 18 
internal/agent/testdata/TestCoderAgent/glm-5.1/simple_test.yaml         | 12 
internal/agent/testdata/TestCoderAgent/glm-5.1/sourcegraph_tool.yaml    | 28 
internal/agent/testdata/TestCoderAgent/glm-5.1/update_a_file.yaml       | 26 
internal/agent/testdata/TestCoderAgent/glm-5.1/write_tool.yaml          | 22 
14 files changed, 155 insertions(+), 149 deletions(-)

Detailed changes

internal/agent/templates/coder.md.tpl 🔗

@@ -16,6 +16,7 @@ These rules override everything else. Follow them strictly:
 11. **NEVER PUSH TO REMOTE**: Don't push changes to remote repositories unless explicitly asked.
 12. **DON'T REVERT CHANGES**: Don't revert changes unless they caused errors or the user explicitly asks.
 13. **TOOL CONSTRAINTS**: Only use documented tools. Never attempt 'apply_patch' or 'apply_diff' - they don't exist. Use 'edit' or 'multiedit' instead.
+14. **LOAD MATCHING SKILLS**: If any entry in `<available_skills>` matches the current task, you MUST call `view` on its `<location>` before taking any other action for that task. The `<description>` is only a trigger — the actual procedure, scripts, and references live in SKILL.md. Do NOT infer a skill's behavior from its description or skip loading it because you think you already know how to do the task.
 </critical_rules>
 
 <communication_style>
@@ -376,12 +377,20 @@ Diagnostics (lint/typecheck) included in tool output.
 {{.AvailSkillXML}}
 
 <skills_usage>
-When a user task matches a skill's description, read the skill's SKILL.md file to get full instructions.
-Skills are activated by reading their **exact** location path as shown above using the View tool. Always pass the location value directly to the View tool's file_path parameter — never guess, modify, or construct skill paths yourself.
-Builtin skills (type=builtin) have virtual location identifiers starting with "crush://skills/". The "crush://" prefix is NOT a URL or network address — it is a special internal identifier that the View tool understands natively. Pass them verbatim to the View tool. Do not treat them as URLs, MCP resources, or filesystem paths.
+The `<description>` of each skill is a TRIGGER — it tells you *when* a skill applies. It is NOT a specification of what the skill does or how to do it. The procedure, scripts, commands, references, and required flags live only in the SKILL.md body. You do not know what a skill actually does until you have read its SKILL.md.
+
+MANDATORY activation flow:
+1. Scan `<available_skills>` against the current user task.
+2. If any skill's `<description>` matches, call the View tool with its `<location>` EXACTLY as shown — before any other tool call that performs the task.
+3. Read the entire SKILL.md and follow its instructions.
+4. Only then execute the task, using the skill's prescribed commands/tools.
+
+Do NOT skip step 2 because you think you already know how to do the task. Do NOT infer a skill's behavior from its name or description. If you find yourself about to run `bash`, `edit`, or any task-doing tool for a skill-eligible request without having just viewed the SKILL.md, stop and load the skill first.
+
+Builtin skills (type=builtin) use virtual `crush://skills/...` location identifiers. The "crush://" prefix is NOT a URL, network address, or MCP resource — it is a special internal identifier the View tool understands natively. Pass the `<location>` verbatim to View.
+
 Do not use MCP tools (including read_mcp_resource) to load skills.
-Follow the skill's instructions to complete the task.
-If a skill mentions scripts, references, or assets, they are placed in the same folder as the skill itself (e.g., scripts/, references/, assets/ subdirectories within the skill's folder).
+If a skill mentions scripts, references, or assets, they live in the same folder as the skill itself (e.g., scripts/, references/, assets/ subdirectories within the skill's folder).
 </skills_usage>
 {{end}}
 

internal/agent/testdata/TestCoderAgent/glm-5.1/bash_tool.yaml 🔗

@@ -25,29 +25,29 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"B"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"B"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"ash"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"ash"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" script"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" command"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" to"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" to"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" create"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" create"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" test"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" test"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".txt"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".txt"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" hello"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" hello"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" bash"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" bash"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-d398f349fdddf325c9b169a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":208,"total_tokens":427}}
+      data: {"id":"chatcmpl-d5a6e284964b0a3f2a16b22b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":43,"completion_tokens":169,"total_tokens":388}}
 
       data: [DONE]
 
@@ -56,15 +56,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 2.437881292s
+    duration: 8.619412125s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32514
+    content_length: 33261
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/download_tool.yaml 🔗

@@ -25,23 +25,23 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Download"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Download"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Save"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Save"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" example"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" example"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".txt"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".txt"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" from"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" from"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" URL"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" URL"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-94f2b9d7e0f03b37bafdabe7","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":14,"completion_tokens":99,"total_tokens":321}}
+      data: {"id":"chatcmpl-c0f235c7fa42d37e1166b625","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":46,"completion_tokens":209,"total_tokens":431}}
 
       data: [DONE]
 
@@ -50,15 +50,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 1.431387417s
+    duration: 9.787436125s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32539
+    content_length: 33286
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/fetch_tool.yaml 🔗

@@ -25,25 +25,25 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Check"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Check"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" example"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" example"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".html"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".html"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" presence"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" presence"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" of"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" of"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" John"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" John"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Doe"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Doe"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-b5ecccdabaf2739cf6c865ed","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":3,"completion_tokens":202,"total_tokens":429}}
+      data: {"id":"chatcmpl-bb4548940b23c618051ce46c","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":51,"completion_tokens":122,"total_tokens":349}}
 
       data: [DONE]
 
@@ -52,15 +52,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 2.462296417s
+    duration: 5.694187416s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32557
+    content_length: 33304
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/glob_tool.yaml 🔗

@@ -25,29 +25,27 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Glob"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Finding"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" to"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ."},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" find"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"go"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" all"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Files"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ."},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Glob"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" files"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Current"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" current"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Directory"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" directory"},"finish_reason":null}]}
-
-      data: {"id":"chatcmpl-be405350a6a14c0be30ea59b","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":3,"completion_tokens":321,"total_tokens":532}}
+      data: {"id":"chatcmpl-ebbbf3e8177bc9dac4cf975f","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":35,"completion_tokens":85,"total_tokens":296}}
 
       data: [DONE]
 
@@ -56,15 +54,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 3.449660583s
+    duration: 4.936793125s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32475
+    content_length: 33222
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/grep_tool.yaml 🔗

@@ -6,9 +6,9 @@ interactions:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 812
+    content_length: 33220
     host: ""
-    body: '{"messages":[{"content":"You will generate a short title based on the first message a user begins a conversation with.\n\n<rules>\n- Keep the title in the same language that the user wrote their message in.\n- Ensure it is not more than 50 characters long.\n- The title should be a summary of the user''s message.\n- It should be one line long.\n- Do not use quotes or colons.\n- The entire text you return will be used as the title.\n- Never return anything that is more than one sentence (one line) long.\n</rules>\n\n /no_think","role":"system"},{"content":"Generate a concise title for the following content:\n\nuse grep to search for the word ''package'' in go files\n <think>\n\n</think>","role":"user"}],"model":"gpt-oss-120b","max_completion_tokens":40,"stream_options":{"include_usage":true},"stream":true}'

internal/agent/testdata/TestCoderAgent/glm-5.1/ls_tool.yaml 🔗

@@ -25,21 +25,21 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"List"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Listing"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" current"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" current"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" directory"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" directory"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" files"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" files"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ls"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ls"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-808ff38e4347f4a1cff78d6a","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":1,"completion_tokens":67,"total_tokens":276}}
+      data: {"id":"chatcmpl-329b23fc74f060bf504aa822","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":33,"completion_tokens":106,"total_tokens":315}}
 
       data: [DONE]
 
@@ -48,15 +48,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 1.046801209s
+    duration: 1.387848083s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32467
+    content_length: 33214
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/multiedit_tool.yaml 🔗

@@ -25,25 +25,27 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Replace"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Replace"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" greeting"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" greeting"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" add"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" add"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" comment"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" comment"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" main"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" mult"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"ied"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-e79e8b371d7ee5b55bed5aeb","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":7,"completion_tokens":188,"total_tokens":419}}
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"it"},"finish_reason":null}]}
+
+      data: {"id":"chatcmpl-2325276afb054a7e67328b09","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":55,"completion_tokens":92,"total_tokens":323}}
 
       data: [DONE]
 
@@ -52,15 +54,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 2.188941416s
+    duration: 1.174051791s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32553
+    content_length: 33300
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/parallel_tool_calls.yaml 🔗

@@ -25,27 +25,29 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Parallel"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Run"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" glob"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" glob"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ."},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ."},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"go"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" files"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" files"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" and"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ls"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" ls"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" directory"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-01717753dc1a38b96633c031","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":330,"total_tokens":558}}
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" parallel"},"finish_reason":null}]}
+
+      data: {"id":"chatcmpl-2e855cfd07f4df61a8fe1fc4","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":36,"completion_tokens":205,"total_tokens":433}}
 
       data: [DONE]
 
@@ -54,15 +56,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 3.63693525s
+    duration: 2.384251s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32564
+    content_length: 33311
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/read_a_file.yaml 🔗

@@ -25,19 +25,19 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Reading"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Reading"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" the"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" the"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Go"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".mod"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Mod"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" file"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" File"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-64e6912ac69feb86e3e0e006","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":11,"completion_tokens":82,"total_tokens":285}}
+      data: {"id":"chatcmpl-cfe0235dd83acfdb27434619","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":27,"completion_tokens":73,"total_tokens":276}}
 
       data: [DONE]
 
@@ -46,15 +46,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 1.298010542s
+    duration: 1.120596042s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32437
+    content_length: 33184
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/simple_test.yaml 🔗

@@ -25,13 +25,11 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-5b7e249615a472bf74485690","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-77cc6d39d8676a0150259c04","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-5b7e249615a472bf74485690","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Simple"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-77cc6d39d8676a0150259c04","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Greeting"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-5b7e249615a472bf74485690","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Greeting"},"finish_reason":null}]}
-
-      data: {"id":"chatcmpl-5b7e249615a472bf74485690","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":8,"completion_tokens":92,"total_tokens":292}}
+      data: {"id":"chatcmpl-77cc6d39d8676a0150259c04","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":24,"completion_tokens":48,"total_tokens":248}}
 
       data: [DONE]
 
@@ -40,15 +38,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 1.498550375s
+    duration: 1.697065792s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32427
+    content_length: 33174
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/sourcegraph_tool.yaml 🔗

@@ -25,29 +25,29 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Search"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Search"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" for"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" func"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" func"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" main"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" main"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" in"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Go"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" repos"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" repos"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Source"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" Source"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"graph"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"graph"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-302ede2cd604cba8426a8de2","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":4,"completion_tokens":237,"total_tokens":449}}
+      data: {"id":"chatcmpl-e6c46acec51f6245887eff57","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":36,"completion_tokens":293,"total_tokens":505}}
 
       data: [DONE]
 
@@ -56,15 +56,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 2.742813917s
+    duration: 3.248102625s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32487
+    content_length: 33234
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/update_a_file.yaml 🔗

@@ -25,27 +25,25 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Update"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Update"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" main"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" main"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".go"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".go"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" print"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" to"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" to"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" print"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" say"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" hello"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" hello"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" from"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" from"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" crush"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" crush"},"finish_reason":null}]}
-
-      data: {"id":"chatcmpl-119f3bd1ab434e07234a80a5","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":5,"completion_tokens":83,"total_tokens":296}}
+      data: {"id":"chatcmpl-a8e05e7c620b8d32791e3e2d","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":37,"completion_tokens":102,"total_tokens":315}}
 
       data: [DONE]
 
@@ -54,15 +52,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 1.174858458s
+    duration: 1.28689725s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32493
+    content_length: 33240
     host: ""

internal/agent/testdata/TestCoderAgent/glm-5.1/write_tool.yaml 🔗

@@ -25,23 +25,23 @@ interactions:
     content_length: -1
     uncompressed: true
     body: |+
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Create"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":"Create"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" config"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" config"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".json"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":".json"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" with"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" specified"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" specified"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" JSON"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" JSON"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" content"},"finish_reason":null}]}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{"content":" content"},"finish_reason":null}]}
 
-      data: {"id":"chatcmpl-ffece83c09caeb767476cda3","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":3,"completion_tokens":229,"total_tokens":456}}
+      data: {"id":"chatcmpl-a822e8d61c93835b48c2bad1","object":"chat.completion.chunk","created":0,"model":"gpt-oss-120b","choices":[{"index":0,"delta":{},"finish_reason":"stop"}],"usage":{"prompt_tokens":51,"completion_tokens":94,"total_tokens":321}}
 
       data: [DONE]
 
@@ -50,15 +50,15 @@ interactions:
       - text/event-stream
     status: 200 OK
     code: 200
-    duration: 2.674125375s
+    duration: 1.269441625s
 - id: 1
   request:
     proto: HTTP/1.1
     proto_major: 1
     proto_minor: 1
-    content_length: 32530
+    content_length: 33277
     host: ""