1package server
2
3import (
4 "context"
5 "encoding/json"
6 "log/slog"
7 "net/http"
8 "net/http/httptest"
9 "os"
10 "strings"
11 "testing"
12 "time"
13
14 "shelley.exe.dev/claudetool"
15 "shelley.exe.dev/db"
16 "shelley.exe.dev/llm"
17 "shelley.exe.dev/loop"
18)
19
20// TestOrphanToolResultAfterCancellation reproduces the bug where a tool_result
21// is written after CancelConversation has already written an end-turn message.
22//
23// This leads to the Anthropic API error:
24// "unexpected `tool_use_id` found in `tool_result` blocks: <id>.
25// Each `tool_result` block must have a corresponding `tool_use` block in the previous message."
26//
27// The sequence is:
28// 1. LLM returns assistant message with tool_use X
29// 2. Tool X starts executing
30// 3. User cancels
31// 4. CancelConversation writes:
32// - user message with cancelled tool_result X
33// - assistant message with end-turn "[Operation cancelled]"
34//
35// 5. Tool X completes and writes its result AFTER the cancel messages
36// 6. DB now has:
37// - assistant with tool_use X
38// - user with tool_result X (cancelled)
39// - assistant end-turn
40// - user with tool_result X (actual) <- ORPHAN - references X but previous msg has no tool_use!
41func TestOrphanToolResultAfterCancellation(t *testing.T) {
42 database, cleanup := setupTestDB(t)
43 defer cleanup()
44
45 predictableService := loop.NewPredictableService()
46 llmManager := &testLLMManager{service: predictableService}
47 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
48
49 toolSetConfig := claudetool.ToolSetConfig{EnableBrowser: false}
50 server := NewServer(database, llmManager, toolSetConfig, logger, true, "", "predictable", "", nil)
51
52 // Create conversation
53 conversation, err := database.CreateConversation(context.Background(), nil, true, nil, nil)
54 if err != nil {
55 t.Fatalf("failed to create conversation: %v", err)
56 }
57 conversationID := conversation.ConversationID
58
59 // Manually create the problematic message sequence in the database
60 // This simulates the race condition where a tool result is written after cancellation
61
62 toolUseID := "toolu_test_orphan_12345"
63
64 // Message 1: User message "run something"
65 userMsg1 := llm.Message{
66 Role: llm.MessageRoleUser,
67 Content: []llm.Content{
68 {Type: llm.ContentTypeText, Text: "bash: echo hello"},
69 },
70 }
71 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
72 ConversationID: conversationID,
73 Type: db.MessageTypeUser,
74 LLMData: userMsg1,
75 UsageData: llm.Usage{},
76 }); err != nil {
77 t.Fatalf("failed to create user message: %v", err)
78 }
79
80 // Message 2: Assistant message with tool_use
81 assistantMsg1 := llm.Message{
82 Role: llm.MessageRoleAssistant,
83 Content: []llm.Content{
84 {Type: llm.ContentTypeText, Text: "I'll run the command"},
85 {
86 ID: toolUseID,
87 Type: llm.ContentTypeToolUse,
88 ToolName: "bash",
89 ToolInput: json.RawMessage(`{"command": "echo hello"}`),
90 },
91 },
92 }
93 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
94 ConversationID: conversationID,
95 Type: db.MessageTypeAgent,
96 LLMData: assistantMsg1,
97 UsageData: llm.Usage{},
98 }); err != nil {
99 t.Fatalf("failed to create assistant message: %v", err)
100 }
101
102 // Message 3: User message with cancelled tool_result (from CancelConversation)
103 now := time.Now()
104 cancelledToolResult := llm.Message{
105 Role: llm.MessageRoleUser,
106 Content: []llm.Content{
107 {
108 Type: llm.ContentTypeToolResult,
109 ToolUseID: toolUseID,
110 ToolError: true,
111 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "Tool execution cancelled by user"}},
112 ToolUseStartTime: &now,
113 ToolUseEndTime: &now,
114 },
115 },
116 }
117 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
118 ConversationID: conversationID,
119 Type: db.MessageTypeUser,
120 LLMData: cancelledToolResult,
121 UsageData: llm.Usage{},
122 }); err != nil {
123 t.Fatalf("failed to create cancelled tool_result message: %v", err)
124 }
125
126 // Message 4: Assistant end-turn message (from CancelConversation)
127 endTurnMsg := llm.Message{
128 Role: llm.MessageRoleAssistant,
129 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "[Operation cancelled]"}},
130 EndOfTurn: true,
131 }
132 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
133 ConversationID: conversationID,
134 Type: db.MessageTypeAgent,
135 LLMData: endTurnMsg,
136 UsageData: llm.Usage{},
137 }); err != nil {
138 t.Fatalf("failed to create end-turn message: %v", err)
139 }
140
141 // Message 5: ORPHAN - User message with actual tool_result (written after cancel due to race)
142 // This references the tool_use from message 2, but the previous message (4) has no tool_use!
143 actualToolResult := llm.Message{
144 Role: llm.MessageRoleUser,
145 Content: []llm.Content{
146 {
147 Type: llm.ContentTypeToolResult,
148 ToolUseID: toolUseID,
149 ToolError: false,
150 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "hello\n"}},
151 ToolUseStartTime: &now,
152 ToolUseEndTime: &now,
153 },
154 },
155 }
156 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
157 ConversationID: conversationID,
158 Type: db.MessageTypeUser,
159 LLMData: actualToolResult,
160 UsageData: llm.Usage{},
161 }); err != nil {
162 t.Fatalf("failed to create orphan tool_result message: %v", err)
163 }
164
165 // Now try to resume the conversation
166 // This should trigger the Anthropic API error if we don't fix the orphan tool_result
167 resumeReq := ChatRequest{
168 Message: "echo: continue",
169 Model: "predictable",
170 }
171 resumeBody, _ := json.Marshal(resumeReq)
172
173 req := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(resumeBody)))
174 req.Header.Set("Content-Type", "application/json")
175 w := httptest.NewRecorder()
176
177 server.handleChatConversation(w, req, conversationID)
178 if w.Code != http.StatusAccepted {
179 t.Fatalf("expected status 202, got %d: %s", w.Code, w.Body.String())
180 }
181
182 // Wait for the request to be processed
183 time.Sleep(300 * time.Millisecond)
184
185 // Check the last request sent to the LLM for orphan tool_results
186 lastRequest := predictableService.GetLastRequest()
187 if lastRequest == nil {
188 t.Fatal("no request was sent to the LLM")
189 }
190
191 // Check that orphan tool_results have been removed
192 // An orphan tool_result is one that references a tool_use_id that doesn't exist
193 // in the immediately preceding assistant message
194
195 var previousAssistantToolUses map[string]bool
196 for i, msg := range lastRequest.Messages {
197 if msg.Role == llm.MessageRoleAssistant {
198 // Track all tool_use IDs in this assistant message
199 previousAssistantToolUses = make(map[string]bool)
200 for _, content := range msg.Content {
201 if content.Type == llm.ContentTypeToolUse {
202 previousAssistantToolUses[content.ID] = true
203 }
204 }
205 } else if msg.Role == llm.MessageRoleUser {
206 // Check if any tool_results reference IDs not in previous assistant message
207 for _, content := range msg.Content {
208 if content.Type == llm.ContentTypeToolResult {
209 if previousAssistantToolUses != nil && !previousAssistantToolUses[content.ToolUseID] {
210 t.Errorf("BUG: Found orphan tool_result at message index %d with ToolUseID=%s that doesn't match any tool_use in the previous assistant message. "+
211 "This would cause Anthropic API error: 'Each tool_result block must have a corresponding tool_use block in the previous message'",
212 i, content.ToolUseID)
213 }
214 }
215 }
216 // Clear previousAssistantToolUses since user messages reset the expectation
217 previousAssistantToolUses = nil
218 }
219 }
220
221 t.Logf("LLM request has %d messages - test verified orphan tool_results are handled", len(lastRequest.Messages))
222}
223
224// TestOrphanToolResultFiltering tests that orphan tool_results are filtered out
225// even when they appear in the middle of the conversation
226func TestOrphanToolResultFiltering(t *testing.T) {
227 database, cleanup := setupTestDB(t)
228 defer cleanup()
229
230 predictableService := loop.NewPredictableService()
231 llmManager := &testLLMManager{service: predictableService}
232 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
233
234 server := NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, true, "", "predictable", "", nil)
235
236 conversation, err := database.CreateConversation(context.Background(), nil, true, nil, nil)
237 if err != nil {
238 t.Fatalf("failed to create conversation: %v", err)
239 }
240 conversationID := conversation.ConversationID
241
242 // Create a conversation where there's an orphan tool_result in the middle
243 // followed by valid messages
244
245 // Message 1: User message
246 userMsg1 := llm.Message{
247 Role: llm.MessageRoleUser,
248 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
249 }
250 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
251 ConversationID: conversationID,
252 Type: db.MessageTypeUser,
253 LLMData: userMsg1,
254 }); err != nil {
255 t.Fatalf("failed to create message: %v", err)
256 }
257
258 // Message 2: Assistant response with end_of_turn (no tool_use)
259 assistantMsg := llm.Message{
260 Role: llm.MessageRoleAssistant,
261 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hi there!"}},
262 EndOfTurn: true,
263 }
264 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
265 ConversationID: conversationID,
266 Type: db.MessageTypeAgent,
267 LLMData: assistantMsg,
268 }); err != nil {
269 t.Fatalf("failed to create message: %v", err)
270 }
271
272 // Message 3: ORPHAN tool_result - previous assistant has no tool_use!
273 now := time.Now()
274 orphanResult := llm.Message{
275 Role: llm.MessageRoleUser,
276 Content: []llm.Content{
277 {
278 Type: llm.ContentTypeToolResult,
279 ToolUseID: "toolu_orphan_xyz",
280 ToolError: false,
281 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "orphan result"}},
282 ToolUseStartTime: &now,
283 ToolUseEndTime: &now,
284 },
285 },
286 }
287 if _, err := database.CreateMessage(context.Background(), db.CreateMessageParams{
288 ConversationID: conversationID,
289 Type: db.MessageTypeUser,
290 LLMData: orphanResult,
291 }); err != nil {
292 t.Fatalf("failed to create orphan message: %v", err)
293 }
294
295 // Now try to chat
296 chatReq := ChatRequest{
297 Message: "echo: test",
298 Model: "predictable",
299 }
300 chatBody, _ := json.Marshal(chatReq)
301
302 req := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(chatBody)))
303 req.Header.Set("Content-Type", "application/json")
304 w := httptest.NewRecorder()
305
306 server.handleChatConversation(w, req, conversationID)
307 if w.Code != http.StatusAccepted {
308 t.Fatalf("expected status 202, got %d: %s", w.Code, w.Body.String())
309 }
310
311 time.Sleep(300 * time.Millisecond)
312
313 lastRequest := predictableService.GetLastRequest()
314 if lastRequest == nil {
315 t.Fatal("no request was sent to the LLM")
316 }
317
318 // Verify no orphan tool_results in the request
319 var prevToolUses map[string]bool
320 for i, msg := range lastRequest.Messages {
321 if msg.Role == llm.MessageRoleAssistant {
322 prevToolUses = make(map[string]bool)
323 for _, content := range msg.Content {
324 if content.Type == llm.ContentTypeToolUse {
325 prevToolUses[content.ID] = true
326 }
327 }
328 } else if msg.Role == llm.MessageRoleUser {
329 for _, content := range msg.Content {
330 if content.Type == llm.ContentTypeToolResult {
331 if prevToolUses != nil && !prevToolUses[content.ToolUseID] {
332 t.Errorf("BUG: Found orphan tool_result at message index %d", i)
333 }
334 }
335 }
336 prevToolUses = nil
337 }
338 }
339}