1package server
2
3import (
4 "context"
5 "encoding/json"
6 "log/slog"
7 "net/http"
8 "net/http/httptest"
9 "os"
10 "strings"
11 "testing"
12 "time"
13
14 "shelley.exe.dev/claudetool"
15 "shelley.exe.dev/db"
16 "shelley.exe.dev/db/generated"
17 "shelley.exe.dev/llm"
18 "shelley.exe.dev/loop"
19)
20
21// TestCancelAfterToolCompletesCreatesDuplicateToolResult reproduces the bug where
22// cancelling a conversation after a tool has already completed creates a duplicate
23// tool_result for the same tool_use_id.
24//
25// The bug is in CancelConversation's search logic: it finds the first tool_use in
26// the last assistant message and immediately breaks without checking if that tool
27// already has a result. This causes it to create a cancelled tool_result even when
28// the tool already completed successfully.
29//
30// This leads to the Anthropic API error:
31// "each tool_use must have a single result. Found multiple `tool_result` blocks with id: ..."
32func TestCancelAfterToolCompletesCreatesDuplicateToolResult(t *testing.T) {
33 database, cleanup := setupTestDB(t)
34 defer cleanup()
35
36 predictableService := loop.NewPredictableService()
37 llmManager := &testLLMManager{service: predictableService}
38 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
39
40 toolSetConfig := claudetool.ToolSetConfig{EnableBrowser: false}
41 server := NewServer(database, llmManager, toolSetConfig, logger, true, "", "predictable", "", nil)
42
43 // Create conversation
44 conversation, err := database.CreateConversation(context.Background(), nil, true, nil, nil)
45 if err != nil {
46 t.Fatalf("failed to create conversation: %v", err)
47 }
48 conversationID := conversation.ConversationID
49
50 // Start a conversation with a fast tool call that completes quickly
51 chatReq := ChatRequest{
52 Message: "bash: echo hello",
53 Model: "predictable",
54 }
55 chatBody, _ := json.Marshal(chatReq)
56
57 req := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(chatBody)))
58 req.Header.Set("Content-Type", "application/json")
59 w := httptest.NewRecorder()
60
61 server.handleChatConversation(w, req, conversationID)
62 if w.Code != http.StatusAccepted {
63 t.Fatalf("expected status 202, got %d: %s", w.Code, w.Body.String())
64 }
65
66 // Wait for the tool to complete - this is important!
67 // The bash command "echo hello" should complete very quickly
68 deadline := time.Now().Add(5 * time.Second)
69 var toolResultFound bool
70 for time.Now().Before(deadline) {
71 var messages []generated.Message
72 err := database.Queries(context.Background(), func(q *generated.Queries) error {
73 var qerr error
74 messages, qerr = q.ListMessages(context.Background(), conversationID)
75 return qerr
76 })
77 if err != nil {
78 t.Fatalf("failed to get messages: %v", err)
79 }
80
81 // Look for a tool_result message
82 for _, msg := range messages {
83 if msg.Type != string(db.MessageTypeUser) || msg.LlmData == nil {
84 continue
85 }
86 var llmMsg llm.Message
87 if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
88 continue
89 }
90 for _, content := range llmMsg.Content {
91 if content.Type == llm.ContentTypeToolResult && !content.ToolError {
92 // Found a successful tool result
93 toolResultFound = true
94 break
95 }
96 }
97 if toolResultFound {
98 break
99 }
100 }
101 if toolResultFound {
102 break
103 }
104 time.Sleep(50 * time.Millisecond)
105 }
106
107 if !toolResultFound {
108 t.Fatal("tool result was not found - tool didn't complete")
109 }
110
111 // Now cancel the conversation AFTER the tool has completed
112 // This should NOT create a new tool_result because the tool already finished
113 cancelReq := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/cancel", nil)
114 cancelW := httptest.NewRecorder()
115
116 server.handleCancelConversation(cancelW, cancelReq, conversationID)
117 if cancelW.Code != http.StatusOK {
118 t.Fatalf("cancel: expected status 200, got %d: %s", cancelW.Code, cancelW.Body.String())
119 }
120
121 // Wait for agent to stop working (cancel to process)
122 deadline = time.Now().Add(5 * time.Second)
123 for time.Now().Before(deadline) {
124 if !server.IsAgentWorking(conversationID) {
125 break
126 }
127 time.Sleep(10 * time.Millisecond)
128 }
129
130 // Check the messages to see if there are duplicate tool_results for the same tool_use_id
131 var messages []generated.Message
132 err = database.Queries(context.Background(), func(q *generated.Queries) error {
133 var qerr error
134 messages, qerr = q.ListMessages(context.Background(), conversationID)
135 return qerr
136 })
137 if err != nil {
138 t.Fatalf("failed to get messages after cancel: %v", err)
139 }
140
141 // Count tool_results by tool_use_id
142 toolResultsByID := make(map[string]int)
143 for _, msg := range messages {
144 if msg.LlmData == nil {
145 continue
146 }
147 var llmMsg llm.Message
148 if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
149 continue
150 }
151 for _, content := range llmMsg.Content {
152 if content.Type == llm.ContentTypeToolResult && content.ToolUseID != "" {
153 toolResultsByID[content.ToolUseID]++
154 }
155 }
156 }
157
158 // Check for duplicates - this is the bug!
159 for toolID, count := range toolResultsByID {
160 if count > 1 {
161 t.Errorf("BUG: found %d tool_results for tool_use_id %s (expected 1)", count, toolID)
162 }
163 }
164
165 // Clear requests to get a clean slate for the next request
166 predictableService.ClearRequests()
167
168 // Now try to continue the conversation - this should trigger the API error
169 // if duplicates exist
170 resumeReq := ChatRequest{
171 Message: "echo: test after cancel",
172 Model: "predictable",
173 }
174 resumeBody, _ := json.Marshal(resumeReq)
175
176 resumeChatReq := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(resumeBody)))
177 resumeChatReq.Header.Set("Content-Type", "application/json")
178 resumeW := httptest.NewRecorder()
179
180 server.handleChatConversation(resumeW, resumeChatReq, conversationID)
181 if resumeW.Code != http.StatusAccepted {
182 t.Fatalf("resume: expected status 202, got %d: %s", resumeW.Code, resumeW.Body.String())
183 }
184
185 // Wait for agent to stop working
186 deadline = time.Now().Add(5 * time.Second)
187 for time.Now().Before(deadline) {
188 if !server.IsAgentWorking(conversationID) {
189 break
190 }
191 time.Sleep(10 * time.Millisecond)
192 }
193
194 // Check the last request sent to the LLM for duplicate tool_results
195 lastRequest := predictableService.GetLastRequest()
196 if lastRequest == nil {
197 t.Fatal("no request was sent to the LLM")
198 }
199
200 // Count tool_results in the request by tool_use_id
201 requestToolResultsByID := make(map[string]int)
202 for _, msg := range lastRequest.Messages {
203 for _, content := range msg.Content {
204 if content.Type == llm.ContentTypeToolResult && content.ToolUseID != "" {
205 requestToolResultsByID[content.ToolUseID]++
206 }
207 }
208 }
209
210 // Check for duplicates in the request - this would cause the Anthropic API error
211 for toolID, count := range requestToolResultsByID {
212 if count > 1 {
213 t.Errorf("BUG: LLM request contains %d tool_results for tool_use_id %s (expected 1). "+
214 "This would cause Anthropic API error: 'each tool_use must have a single result'",
215 count, toolID)
216 }
217 }
218}