duplicate_tool_result_test.go

  1package server
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"log/slog"
  7	"net/http"
  8	"net/http/httptest"
  9	"os"
 10	"strings"
 11	"testing"
 12	"time"
 13
 14	"shelley.exe.dev/claudetool"
 15	"shelley.exe.dev/db"
 16	"shelley.exe.dev/db/generated"
 17	"shelley.exe.dev/llm"
 18	"shelley.exe.dev/loop"
 19)
 20
 21// TestCancelAfterToolCompletesCreatesDuplicateToolResult reproduces the bug where
 22// cancelling a conversation after a tool has already completed creates a duplicate
 23// tool_result for the same tool_use_id.
 24//
 25// The bug is in CancelConversation's search logic: it finds the first tool_use in
 26// the last assistant message and immediately breaks without checking if that tool
 27// already has a result. This causes it to create a cancelled tool_result even when
 28// the tool already completed successfully.
 29//
 30// This leads to the Anthropic API error:
 31// "each tool_use must have a single result. Found multiple `tool_result` blocks with id: ..."
 32func TestCancelAfterToolCompletesCreatesDuplicateToolResult(t *testing.T) {
 33	database, cleanup := setupTestDB(t)
 34	defer cleanup()
 35
 36	predictableService := loop.NewPredictableService()
 37	llmManager := &testLLMManager{service: predictableService}
 38	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
 39
 40	toolSetConfig := claudetool.ToolSetConfig{EnableBrowser: false}
 41	server := NewServer(database, llmManager, toolSetConfig, logger, true, "", "predictable", "", nil)
 42
 43	// Create conversation
 44	conversation, err := database.CreateConversation(context.Background(), nil, true, nil, nil)
 45	if err != nil {
 46		t.Fatalf("failed to create conversation: %v", err)
 47	}
 48	conversationID := conversation.ConversationID
 49
 50	// Start a conversation with a fast tool call that completes quickly
 51	chatReq := ChatRequest{
 52		Message: "bash: echo hello",
 53		Model:   "predictable",
 54	}
 55	chatBody, _ := json.Marshal(chatReq)
 56
 57	req := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(chatBody)))
 58	req.Header.Set("Content-Type", "application/json")
 59	w := httptest.NewRecorder()
 60
 61	server.handleChatConversation(w, req, conversationID)
 62	if w.Code != http.StatusAccepted {
 63		t.Fatalf("expected status 202, got %d: %s", w.Code, w.Body.String())
 64	}
 65
 66	// Wait for the tool to complete - this is important!
 67	// The bash command "echo hello" should complete very quickly
 68	deadline := time.Now().Add(5 * time.Second)
 69	var toolResultFound bool
 70	for time.Now().Before(deadline) {
 71		var messages []generated.Message
 72		err := database.Queries(context.Background(), func(q *generated.Queries) error {
 73			var qerr error
 74			messages, qerr = q.ListMessages(context.Background(), conversationID)
 75			return qerr
 76		})
 77		if err != nil {
 78			t.Fatalf("failed to get messages: %v", err)
 79		}
 80
 81		// Look for a tool_result message
 82		for _, msg := range messages {
 83			if msg.Type != string(db.MessageTypeUser) || msg.LlmData == nil {
 84				continue
 85			}
 86			var llmMsg llm.Message
 87			if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
 88				continue
 89			}
 90			for _, content := range llmMsg.Content {
 91				if content.Type == llm.ContentTypeToolResult && !content.ToolError {
 92					// Found a successful tool result
 93					toolResultFound = true
 94					break
 95				}
 96			}
 97			if toolResultFound {
 98				break
 99			}
100		}
101		if toolResultFound {
102			break
103		}
104		time.Sleep(50 * time.Millisecond)
105	}
106
107	if !toolResultFound {
108		t.Fatal("tool result was not found - tool didn't complete")
109	}
110
111	// Now cancel the conversation AFTER the tool has completed
112	// This should NOT create a new tool_result because the tool already finished
113	cancelReq := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/cancel", nil)
114	cancelW := httptest.NewRecorder()
115
116	server.handleCancelConversation(cancelW, cancelReq, conversationID)
117	if cancelW.Code != http.StatusOK {
118		t.Fatalf("cancel: expected status 200, got %d: %s", cancelW.Code, cancelW.Body.String())
119	}
120
121	// Wait for agent to stop working (cancel to process)
122	deadline = time.Now().Add(5 * time.Second)
123	for time.Now().Before(deadline) {
124		if !server.IsAgentWorking(conversationID) {
125			break
126		}
127		time.Sleep(10 * time.Millisecond)
128	}
129
130	// Check the messages to see if there are duplicate tool_results for the same tool_use_id
131	var messages []generated.Message
132	err = database.Queries(context.Background(), func(q *generated.Queries) error {
133		var qerr error
134		messages, qerr = q.ListMessages(context.Background(), conversationID)
135		return qerr
136	})
137	if err != nil {
138		t.Fatalf("failed to get messages after cancel: %v", err)
139	}
140
141	// Count tool_results by tool_use_id
142	toolResultsByID := make(map[string]int)
143	for _, msg := range messages {
144		if msg.LlmData == nil {
145			continue
146		}
147		var llmMsg llm.Message
148		if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
149			continue
150		}
151		for _, content := range llmMsg.Content {
152			if content.Type == llm.ContentTypeToolResult && content.ToolUseID != "" {
153				toolResultsByID[content.ToolUseID]++
154			}
155		}
156	}
157
158	// Check for duplicates - this is the bug!
159	for toolID, count := range toolResultsByID {
160		if count > 1 {
161			t.Errorf("BUG: found %d tool_results for tool_use_id %s (expected 1)", count, toolID)
162		}
163	}
164
165	// Clear requests to get a clean slate for the next request
166	predictableService.ClearRequests()
167
168	// Now try to continue the conversation - this should trigger the API error
169	// if duplicates exist
170	resumeReq := ChatRequest{
171		Message: "echo: test after cancel",
172		Model:   "predictable",
173	}
174	resumeBody, _ := json.Marshal(resumeReq)
175
176	resumeChatReq := httptest.NewRequest("POST", "/api/conversation/"+conversationID+"/chat", strings.NewReader(string(resumeBody)))
177	resumeChatReq.Header.Set("Content-Type", "application/json")
178	resumeW := httptest.NewRecorder()
179
180	server.handleChatConversation(resumeW, resumeChatReq, conversationID)
181	if resumeW.Code != http.StatusAccepted {
182		t.Fatalf("resume: expected status 202, got %d: %s", resumeW.Code, resumeW.Body.String())
183	}
184
185	// Wait for agent to stop working
186	deadline = time.Now().Add(5 * time.Second)
187	for time.Now().Before(deadline) {
188		if !server.IsAgentWorking(conversationID) {
189			break
190		}
191		time.Sleep(10 * time.Millisecond)
192	}
193
194	// Check the last request sent to the LLM for duplicate tool_results
195	lastRequest := predictableService.GetLastRequest()
196	if lastRequest == nil {
197		t.Fatal("no request was sent to the LLM")
198	}
199
200	// Count tool_results in the request by tool_use_id
201	requestToolResultsByID := make(map[string]int)
202	for _, msg := range lastRequest.Messages {
203		for _, content := range msg.Content {
204			if content.Type == llm.ContentTypeToolResult && content.ToolUseID != "" {
205				requestToolResultsByID[content.ToolUseID]++
206			}
207		}
208	}
209
210	// Check for duplicates in the request - this would cause the Anthropic API error
211	for toolID, count := range requestToolResultsByID {
212		if count > 1 {
213			t.Errorf("BUG: LLM request contains %d tool_results for tool_use_id %s (expected 1). "+
214				"This would cause Anthropic API error: 'each tool_use must have a single result'",
215				count, toolID)
216		}
217	}
218}