1package loop
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "io"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
12 "sync"
13 "testing"
14 "time"
15
16 "shelley.exe.dev/claudetool"
17 "shelley.exe.dev/gitstate"
18 "shelley.exe.dev/llm"
19)
20
21func TestNewLoop(t *testing.T) {
22 history := []llm.Message{
23 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
24 }
25 tools := []*llm.Tool{}
26 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
27 return nil
28 }
29
30 loop := NewLoop(Config{
31 LLM: NewPredictableService(),
32 History: history,
33 Tools: tools,
34 RecordMessage: recordFunc,
35 })
36 if loop == nil {
37 t.Fatal("NewLoop returned nil")
38 }
39
40 if len(loop.history) != 1 {
41 t.Errorf("expected history length 1, got %d", len(loop.history))
42 }
43
44 if len(loop.messageQueue) != 0 {
45 t.Errorf("expected empty message queue, got %d", len(loop.messageQueue))
46 }
47}
48
49func TestQueueUserMessage(t *testing.T) {
50 loop := NewLoop(Config{
51 LLM: NewPredictableService(),
52 History: []llm.Message{},
53 Tools: []*llm.Tool{},
54 })
55
56 message := llm.Message{
57 Role: llm.MessageRoleUser,
58 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Test message"}},
59 }
60
61 loop.QueueUserMessage(message)
62
63 loop.mu.Lock()
64 queueLen := len(loop.messageQueue)
65 loop.mu.Unlock()
66
67 if queueLen != 1 {
68 t.Errorf("expected message queue length 1, got %d", queueLen)
69 }
70}
71
72func TestPredictableService(t *testing.T) {
73 service := NewPredictableService()
74
75 // Test simple hello response
76 ctx := context.Background()
77 req := &llm.Request{
78 Messages: []llm.Message{
79 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
80 },
81 }
82
83 resp, err := service.Do(ctx, req)
84 if err != nil {
85 t.Fatalf("predictable service Do failed: %v", err)
86 }
87
88 if resp.Role != llm.MessageRoleAssistant {
89 t.Errorf("expected assistant role, got %v", resp.Role)
90 }
91
92 if len(resp.Content) == 0 {
93 t.Error("expected non-empty content")
94 }
95
96 if resp.Content[0].Type != llm.ContentTypeText {
97 t.Errorf("expected text content, got %v", resp.Content[0].Type)
98 }
99
100 if resp.Content[0].Text != "Well, hi there!" {
101 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
102 }
103}
104
105func TestPredictableServiceEcho(t *testing.T) {
106 service := NewPredictableService()
107
108 ctx := context.Background()
109 req := &llm.Request{
110 Messages: []llm.Message{
111 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "echo: foo"}}},
112 },
113 }
114
115 resp, err := service.Do(ctx, req)
116 if err != nil {
117 t.Fatalf("echo test failed: %v", err)
118 }
119
120 if resp.Content[0].Text != "foo" {
121 t.Errorf("expected 'foo', got '%s'", resp.Content[0].Text)
122 }
123
124 // Test another echo
125 req.Messages[0].Content[0].Text = "echo: hello world"
126 resp, err = service.Do(ctx, req)
127 if err != nil {
128 t.Fatalf("echo hello world test failed: %v", err)
129 }
130
131 if resp.Content[0].Text != "hello world" {
132 t.Errorf("expected 'hello world', got '%s'", resp.Content[0].Text)
133 }
134}
135
136func TestPredictableServiceBashTool(t *testing.T) {
137 service := NewPredictableService()
138
139 ctx := context.Background()
140 req := &llm.Request{
141 Messages: []llm.Message{
142 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: ls -la"}}},
143 },
144 }
145
146 resp, err := service.Do(ctx, req)
147 if err != nil {
148 t.Fatalf("bash tool test failed: %v", err)
149 }
150
151 if resp.StopReason != llm.StopReasonToolUse {
152 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
153 }
154
155 if len(resp.Content) != 2 {
156 t.Errorf("expected 2 content items (text + tool_use), got %d", len(resp.Content))
157 }
158
159 // Find the tool use content
160 var toolUseContent *llm.Content
161 for _, content := range resp.Content {
162 if content.Type == llm.ContentTypeToolUse {
163 toolUseContent = &content
164 break
165 }
166 }
167
168 if toolUseContent == nil {
169 t.Fatal("no tool use content found")
170 }
171
172 if toolUseContent.ToolName != "bash" {
173 t.Errorf("expected tool name 'bash', got '%s'", toolUseContent.ToolName)
174 }
175
176 // Check tool input contains the command
177 var toolInput map[string]interface{}
178 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
179 t.Fatalf("failed to parse tool input: %v", err)
180 }
181
182 if toolInput["command"] != "ls -la" {
183 t.Errorf("expected command 'ls -la', got '%v'", toolInput["command"])
184 }
185}
186
187func TestPredictableServiceDefaultResponse(t *testing.T) {
188 service := NewPredictableService()
189
190 ctx := context.Background()
191 req := &llm.Request{
192 Messages: []llm.Message{
193 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "some unknown input"}}},
194 },
195 }
196
197 resp, err := service.Do(ctx, req)
198 if err != nil {
199 t.Fatalf("default response test failed: %v", err)
200 }
201
202 if resp.Content[0].Text != "edit predictable.go to add a response for that one..." {
203 t.Errorf("unexpected default response: %s", resp.Content[0].Text)
204 }
205}
206
207func TestPredictableServiceDelay(t *testing.T) {
208 service := NewPredictableService()
209
210 ctx := context.Background()
211 req := &llm.Request{
212 Messages: []llm.Message{
213 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "delay: 0.1"}}},
214 },
215 }
216
217 start := time.Now()
218 resp, err := service.Do(ctx, req)
219 elapsed := time.Since(start)
220
221 if err != nil {
222 t.Fatalf("delay test failed: %v", err)
223 }
224
225 if elapsed < 100*time.Millisecond {
226 t.Errorf("expected delay of at least 100ms, got %v", elapsed)
227 }
228
229 if resp.Content[0].Text != "Delayed for 0.1 seconds" {
230 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
231 }
232}
233
234func TestLoopWithPredictableService(t *testing.T) {
235 var recordedMessages []llm.Message
236 var recordedUsages []llm.Usage
237
238 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
239 recordedMessages = append(recordedMessages, message)
240 recordedUsages = append(recordedUsages, usage)
241 return nil
242 }
243
244 service := NewPredictableService()
245 loop := NewLoop(Config{
246 LLM: service,
247 History: []llm.Message{},
248 Tools: []*llm.Tool{},
249 RecordMessage: recordFunc,
250 })
251
252 // Queue a user message that triggers a known response
253 userMessage := llm.Message{
254 Role: llm.MessageRoleUser,
255 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
256 }
257 loop.QueueUserMessage(userMessage)
258
259 // Run the loop with a short timeout
260 ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
261 defer cancel()
262
263 err := loop.Go(ctx)
264 if err != context.DeadlineExceeded {
265 t.Errorf("expected context deadline exceeded, got %v", err)
266 }
267
268 // Check that messages were recorded
269 if len(recordedMessages) < 1 {
270 t.Errorf("expected at least 1 recorded message, got %d", len(recordedMessages))
271 }
272
273 // Check usage tracking
274 usage := loop.GetUsage()
275 if usage.IsZero() {
276 t.Error("expected non-zero usage")
277 }
278}
279
280func TestLoopWithTools(t *testing.T) {
281 var toolCalls []string
282
283 testTool := &llm.Tool{
284 Name: "bash",
285 Description: "A test bash tool",
286 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"command": {"type": "string"}}}`),
287 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
288 toolCalls = append(toolCalls, string(input))
289 return llm.ToolOut{
290 LLMContent: []llm.Content{
291 {Type: llm.ContentTypeText, Text: "Command executed successfully"},
292 },
293 }
294 },
295 }
296
297 service := NewPredictableService()
298 loop := NewLoop(Config{
299 LLM: service,
300 History: []llm.Message{},
301 Tools: []*llm.Tool{testTool},
302 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
303 return nil
304 },
305 })
306
307 // Queue a user message that triggers the bash tool
308 userMessage := llm.Message{
309 Role: llm.MessageRoleUser,
310 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}},
311 }
312 loop.QueueUserMessage(userMessage)
313
314 // Run the loop with a short timeout
315 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
316 defer cancel()
317
318 err := loop.Go(ctx)
319 if err != context.DeadlineExceeded {
320 t.Errorf("expected context deadline exceeded, got %v", err)
321 }
322
323 // Check that the tool was called
324 if len(toolCalls) != 1 {
325 t.Errorf("expected 1 tool call, got %d", len(toolCalls))
326 }
327
328 if toolCalls[0] != `{"command":"echo hello"}` {
329 t.Errorf("unexpected tool call input: %s", toolCalls[0])
330 }
331}
332
333func TestGetHistory(t *testing.T) {
334 initialHistory := []llm.Message{
335 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
336 }
337
338 loop := NewLoop(Config{
339 LLM: NewPredictableService(),
340 History: initialHistory,
341 Tools: []*llm.Tool{},
342 })
343
344 history := loop.GetHistory()
345 if len(history) != 1 {
346 t.Errorf("expected history length 1, got %d", len(history))
347 }
348
349 // Modify returned slice to ensure it's a copy
350 history[0].Content[0].Text = "Modified"
351
352 // Original should be unchanged
353 original := loop.GetHistory()
354 if original[0].Content[0].Text != "Hello" {
355 t.Error("GetHistory should return a copy, not the original slice")
356 }
357}
358
359func TestLoopWithKeywordTool(t *testing.T) {
360 // Test that keyword tool doesn't crash with nil pointer dereference
361 service := NewPredictableService()
362
363 var messages []llm.Message
364 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
365 messages = append(messages, message)
366 return nil
367 }
368
369 // Add a mock keyword tool that doesn't actually search
370 tools := []*llm.Tool{
371 {
372 Name: "keyword_search",
373 Description: "Mock keyword search",
374 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
375 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
376 // Simple mock implementation
377 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
378 },
379 },
380 }
381
382 loop := NewLoop(Config{
383 LLM: service,
384 History: []llm.Message{},
385 Tools: tools,
386 RecordMessage: recordMessage,
387 })
388
389 // Send a user message that will trigger the default response
390 userMessage := llm.Message{
391 Role: llm.MessageRoleUser,
392 Content: []llm.Content{
393 {Type: llm.ContentTypeText, Text: "Please search for some files"},
394 },
395 }
396
397 loop.QueueUserMessage(userMessage)
398
399 // Process one turn
400 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
401 defer cancel()
402
403 err := loop.ProcessOneTurn(ctx)
404 if err != nil {
405 t.Fatalf("ProcessOneTurn failed: %v", err)
406 }
407
408 // Verify we got expected messages
409 // Note: User messages are recorded by ConversationManager, not by Loop,
410 // so we only expect the assistant response to be recorded here
411 if len(messages) < 1 {
412 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
413 }
414
415 // Should have assistant response
416 if messages[0].Role != llm.MessageRoleAssistant {
417 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
418 }
419}
420
421func TestLoopWithActualKeywordTool(t *testing.T) {
422 // Test that actual keyword tool works with Loop
423 service := NewPredictableService()
424
425 var messages []llm.Message
426 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
427 messages = append(messages, message)
428 return nil
429 }
430
431 // Use the actual keyword tool from claudetool package
432 // Note: We need to import it first
433 tools := []*llm.Tool{
434 // Add a simplified keyword tool to avoid file system dependencies in tests
435 {
436 Name: "keyword_search",
437 Description: "Search for files by keyword",
438 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
439 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
440 // Simple mock implementation - no context dependencies
441 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
442 },
443 },
444 }
445
446 loop := NewLoop(Config{
447 LLM: service,
448 History: []llm.Message{},
449 Tools: tools,
450 RecordMessage: recordMessage,
451 })
452
453 // Send a user message that will trigger the default response
454 userMessage := llm.Message{
455 Role: llm.MessageRoleUser,
456 Content: []llm.Content{
457 {Type: llm.ContentTypeText, Text: "Please search for some files"},
458 },
459 }
460
461 loop.QueueUserMessage(userMessage)
462
463 // Process one turn
464 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
465 defer cancel()
466
467 err := loop.ProcessOneTurn(ctx)
468 if err != nil {
469 t.Fatalf("ProcessOneTurn failed: %v", err)
470 }
471
472 // Verify we got expected messages
473 // Note: User messages are recorded by ConversationManager, not by Loop,
474 // so we only expect the assistant response to be recorded here
475 if len(messages) < 1 {
476 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
477 }
478
479 // Should have assistant response
480 if messages[0].Role != llm.MessageRoleAssistant {
481 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
482 }
483
484 t.Log("Keyword tool test passed - no nil pointer dereference occurred")
485}
486
487func TestKeywordToolWithLLMProvider(t *testing.T) {
488 // Create a temp directory with a test file to search
489 tempDir := t.TempDir()
490 testFile := filepath.Join(tempDir, "test.txt")
491 if err := os.WriteFile(testFile, []byte("this is a test file\n"), 0o644); err != nil {
492 t.Fatal(err)
493 }
494
495 // Create a predictable service for testing
496 predictableService := NewPredictableService()
497
498 // Create a simple LLM provider for testing
499 llmProvider := &testLLMProvider{
500 service: predictableService,
501 models: []string{"predictable"},
502 }
503
504 // Create keyword tool with provider - use temp dir instead of /
505 keywordTool := claudetool.NewKeywordToolWithWorkingDir(llmProvider, claudetool.NewMutableWorkingDir(tempDir))
506 tool := keywordTool.Tool()
507
508 // Test input
509 input := `{"query": "test search", "search_terms": ["test"]}`
510
511 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
512 defer cancel()
513 result := tool.Run(ctx, json.RawMessage(input))
514
515 // Should get a result without error (even though ripgrep will fail in test environment)
516 // The important thing is that it doesn't crash with nil pointer dereference
517 if result.Error != nil {
518 t.Logf("Expected error in test environment (no ripgrep): %v", result.Error)
519 // This is expected in test environment
520 } else {
521 t.Log("Keyword tool executed successfully")
522 if len(result.LLMContent) == 0 {
523 t.Error("Expected some content in result")
524 }
525 }
526}
527
528// testLLMProvider implements LLMServiceProvider for testing
529type testLLMProvider struct {
530 service llm.Service
531 models []string
532}
533
534func (t *testLLMProvider) GetService(modelID string) (llm.Service, error) {
535 for _, model := range t.models {
536 if model == modelID {
537 return t.service, nil
538 }
539 }
540 return nil, fmt.Errorf("model %s not available", modelID)
541}
542
543func (t *testLLMProvider) GetAvailableModels() []string {
544 return t.models
545}
546
547func TestInsertMissingToolResults(t *testing.T) {
548 tests := []struct {
549 name string
550 messages []llm.Message
551 wantLen int
552 wantText string
553 }{
554 {
555 name: "no missing tool results",
556 messages: []llm.Message{
557 {
558 Role: llm.MessageRoleAssistant,
559 Content: []llm.Content{
560 {Type: llm.ContentTypeText, Text: "Let me help you"},
561 },
562 },
563 {
564 Role: llm.MessageRoleUser,
565 Content: []llm.Content{
566 {Type: llm.ContentTypeText, Text: "Thanks"},
567 },
568 },
569 },
570 wantLen: 1,
571 wantText: "", // No synthetic result expected
572 },
573 {
574 name: "missing tool result - should insert synthetic result",
575 messages: []llm.Message{
576 {
577 Role: llm.MessageRoleAssistant,
578 Content: []llm.Content{
579 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
580 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
581 },
582 },
583 {
584 Role: llm.MessageRoleUser,
585 Content: []llm.Content{
586 {Type: llm.ContentTypeText, Text: "Error occurred"},
587 },
588 },
589 },
590 wantLen: 2, // Should have synthetic tool_result + error message
591 wantText: "not executed; retry possible",
592 },
593 {
594 name: "multiple missing tool results",
595 messages: []llm.Message{
596 {
597 Role: llm.MessageRoleAssistant,
598 Content: []llm.Content{
599 {Type: llm.ContentTypeText, Text: "I'll use multiple tools"},
600 {Type: llm.ContentTypeToolUse, ID: "tool_1", ToolName: "bash"},
601 {Type: llm.ContentTypeToolUse, ID: "tool_2", ToolName: "read"},
602 },
603 },
604 {
605 Role: llm.MessageRoleUser,
606 Content: []llm.Content{
607 {Type: llm.ContentTypeText, Text: "Error occurred"},
608 },
609 },
610 },
611 wantLen: 3, // Should have 2 synthetic tool_results + error message
612 },
613 {
614 name: "has tool results - should not insert",
615 messages: []llm.Message{
616 {
617 Role: llm.MessageRoleAssistant,
618 Content: []llm.Content{
619 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
620 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
621 },
622 },
623 {
624 Role: llm.MessageRoleUser,
625 Content: []llm.Content{
626 {
627 Type: llm.ContentTypeToolResult,
628 ToolUseID: "tool_123",
629 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "result"}},
630 },
631 },
632 },
633 },
634 wantLen: 1, // Should not insert anything
635 },
636 }
637
638 for _, tt := range tests {
639 t.Run(tt.name, func(t *testing.T) {
640 loop := NewLoop(Config{
641 LLM: NewPredictableService(),
642 History: []llm.Message{},
643 })
644
645 req := &llm.Request{
646 Messages: tt.messages,
647 }
648
649 loop.insertMissingToolResults(req)
650
651 got := req.Messages[len(req.Messages)-1]
652 if len(got.Content) != tt.wantLen {
653 t.Errorf("expected %d content items, got %d", tt.wantLen, len(got.Content))
654 }
655
656 if tt.wantText != "" {
657 // Find the synthetic tool result
658 found := false
659 for _, c := range got.Content {
660 if c.Type == llm.ContentTypeToolResult && len(c.ToolResult) > 0 {
661 if c.ToolResult[0].Text == tt.wantText {
662 found = true
663 if !c.ToolError {
664 t.Error("synthetic tool result should have ToolError=true")
665 }
666 break
667 }
668 }
669 }
670 if !found {
671 t.Errorf("expected to find synthetic tool result with text %q", tt.wantText)
672 }
673 }
674 })
675 }
676}
677
678func TestInsertMissingToolResultsWithEdgeCases(t *testing.T) {
679 // Test for the bug: when an assistant error message is recorded after a tool_use
680 // but before tool execution, the tool_use is "hidden" from insertMissingToolResults
681 // because it only checks the last two messages.
682 t.Run("tool_use hidden by subsequent assistant message", func(t *testing.T) {
683 loop := NewLoop(Config{
684 LLM: NewPredictableService(),
685 History: []llm.Message{},
686 })
687
688 // Scenario:
689 // 1. LLM responds with tool_use
690 // 2. Something fails, error message recorded (assistant message)
691 // 3. User sends new message
692 // The tool_use in message 0 is never followed by a tool_result
693 req := &llm.Request{
694 Messages: []llm.Message{
695 {
696 Role: llm.MessageRoleAssistant,
697 Content: []llm.Content{
698 {Type: llm.ContentTypeText, Text: "I'll run a command"},
699 {Type: llm.ContentTypeToolUse, ID: "tool_hidden", ToolName: "bash"},
700 },
701 },
702 {
703 Role: llm.MessageRoleAssistant,
704 Content: []llm.Content{
705 {Type: llm.ContentTypeText, Text: "LLM request failed: some error"},
706 },
707 },
708 {
709 Role: llm.MessageRoleUser,
710 Content: []llm.Content{
711 {Type: llm.ContentTypeText, Text: "Please try again"},
712 },
713 },
714 },
715 }
716
717 loop.insertMissingToolResults(req)
718
719 // The function should have inserted a tool_result for tool_hidden
720 // It should be inserted as a user message after the assistant message with tool_use
721 // Since we can't insert in the middle, we need to ensure the history is valid
722
723 // Check that there's a tool_result for tool_hidden somewhere in the messages
724 found := false
725 for _, msg := range req.Messages {
726 for _, c := range msg.Content {
727 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_hidden" {
728 found = true
729 if !c.ToolError {
730 t.Error("synthetic tool result should have ToolError=true")
731 }
732 break
733 }
734 }
735 }
736 if !found {
737 t.Error("expected to find synthetic tool result for tool_hidden - the bug is that tool_use is hidden by subsequent assistant message")
738 }
739 })
740
741 // Test for tool_use in earlier message (not the second-to-last)
742 t.Run("tool_use in earlier message without result", func(t *testing.T) {
743 loop := NewLoop(Config{
744 LLM: NewPredictableService(),
745 History: []llm.Message{},
746 })
747
748 req := &llm.Request{
749 Messages: []llm.Message{
750 {
751 Role: llm.MessageRoleUser,
752 Content: []llm.Content{
753 {Type: llm.ContentTypeText, Text: "Do something"},
754 },
755 },
756 {
757 Role: llm.MessageRoleAssistant,
758 Content: []llm.Content{
759 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
760 {Type: llm.ContentTypeToolUse, ID: "tool_earlier", ToolName: "bash"},
761 },
762 },
763 // Missing: user message with tool_result for tool_earlier
764 {
765 Role: llm.MessageRoleAssistant,
766 Content: []llm.Content{
767 {Type: llm.ContentTypeText, Text: "Something went wrong"},
768 },
769 },
770 {
771 Role: llm.MessageRoleUser,
772 Content: []llm.Content{
773 {Type: llm.ContentTypeText, Text: "Try again"},
774 },
775 },
776 },
777 }
778
779 loop.insertMissingToolResults(req)
780
781 // Should have inserted a tool_result for tool_earlier
782 found := false
783 for _, msg := range req.Messages {
784 for _, c := range msg.Content {
785 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_earlier" {
786 found = true
787 break
788 }
789 }
790 }
791 if !found {
792 t.Error("expected to find synthetic tool result for tool_earlier")
793 }
794 })
795
796 t.Run("empty message list", func(t *testing.T) {
797 loop := NewLoop(Config{
798 LLM: NewPredictableService(),
799 History: []llm.Message{},
800 })
801
802 req := &llm.Request{
803 Messages: []llm.Message{},
804 }
805
806 loop.insertMissingToolResults(req)
807 // Should not panic
808 })
809
810 t.Run("single message", func(t *testing.T) {
811 loop := NewLoop(Config{
812 LLM: NewPredictableService(),
813 History: []llm.Message{},
814 })
815
816 req := &llm.Request{
817 Messages: []llm.Message{
818 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
819 },
820 }
821
822 loop.insertMissingToolResults(req)
823 // Should not panic, should not modify
824 if len(req.Messages[0].Content) != 1 {
825 t.Error("should not modify single message")
826 }
827 })
828
829 t.Run("wrong role order - user then assistant", func(t *testing.T) {
830 loop := NewLoop(Config{
831 LLM: NewPredictableService(),
832 History: []llm.Message{},
833 })
834
835 req := &llm.Request{
836 Messages: []llm.Message{
837 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
838 {Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}}},
839 },
840 }
841
842 loop.insertMissingToolResults(req)
843 // Should not modify when roles are wrong order
844 if len(req.Messages[1].Content) != 1 {
845 t.Error("should not modify when roles are in wrong order")
846 }
847 })
848}
849
850func TestInsertMissingToolResults_EmptyAssistantContent(t *testing.T) {
851 // Test for the bug: when an assistant message has empty content (can happen when
852 // the model ends its turn without producing any output), we need to add placeholder
853 // content if it's not the last message. Otherwise the API will reject with:
854 // "messages.N: all messages must have non-empty content except for the optional
855 // final assistant message"
856
857 t.Run("empty assistant content in middle of conversation", func(t *testing.T) {
858 loop := NewLoop(Config{
859 LLM: NewPredictableService(),
860 History: []llm.Message{},
861 })
862
863 req := &llm.Request{
864 Messages: []llm.Message{
865 {
866 Role: llm.MessageRoleUser,
867 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "run git fetch"}},
868 },
869 {
870 Role: llm.MessageRoleAssistant,
871 Content: []llm.Content{{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"}},
872 },
873 {
874 Role: llm.MessageRoleUser,
875 Content: []llm.Content{{
876 Type: llm.ContentTypeToolResult,
877 ToolUseID: "tool1",
878 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "success"}},
879 }},
880 },
881 {
882 // Empty assistant message - this can happen when model ends turn without output
883 Role: llm.MessageRoleAssistant,
884 Content: []llm.Content{},
885 EndOfTurn: true,
886 },
887 {
888 Role: llm.MessageRoleUser,
889 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "next question"}},
890 },
891 },
892 }
893
894 loop.insertMissingToolResults(req)
895
896 // The empty assistant message (index 3) should now have placeholder content
897 if len(req.Messages[3].Content) == 0 {
898 t.Error("expected placeholder content to be added to empty assistant message")
899 }
900 if req.Messages[3].Content[0].Type != llm.ContentTypeText {
901 t.Error("expected placeholder to be text content")
902 }
903 if req.Messages[3].Content[0].Text != "(no response)" {
904 t.Errorf("expected placeholder text '(no response)', got %q", req.Messages[3].Content[0].Text)
905 }
906 })
907
908 t.Run("empty assistant content at end of conversation - no modification needed", func(t *testing.T) {
909 loop := NewLoop(Config{
910 LLM: NewPredictableService(),
911 History: []llm.Message{},
912 })
913
914 req := &llm.Request{
915 Messages: []llm.Message{
916 {
917 Role: llm.MessageRoleUser,
918 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
919 },
920 {
921 // Empty assistant message at end is allowed by the API
922 Role: llm.MessageRoleAssistant,
923 Content: []llm.Content{},
924 EndOfTurn: true,
925 },
926 },
927 }
928
929 loop.insertMissingToolResults(req)
930
931 // The empty assistant message at the end should NOT be modified
932 // because the API allows empty content for the final assistant message
933 if len(req.Messages[1].Content) != 0 {
934 t.Error("expected final empty assistant message to remain empty")
935 }
936 })
937
938 t.Run("non-empty assistant content - no modification needed", func(t *testing.T) {
939 loop := NewLoop(Config{
940 LLM: NewPredictableService(),
941 History: []llm.Message{},
942 })
943
944 req := &llm.Request{
945 Messages: []llm.Message{
946 {
947 Role: llm.MessageRoleUser,
948 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
949 },
950 {
951 Role: llm.MessageRoleAssistant,
952 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi there"}},
953 },
954 {
955 Role: llm.MessageRoleUser,
956 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "goodbye"}},
957 },
958 },
959 }
960
961 loop.insertMissingToolResults(req)
962
963 // The assistant message should not be modified
964 if len(req.Messages[1].Content) != 1 {
965 t.Errorf("expected assistant message to have 1 content item, got %d", len(req.Messages[1].Content))
966 }
967 if req.Messages[1].Content[0].Text != "hi there" {
968 t.Errorf("expected assistant message text 'hi there', got %q", req.Messages[1].Content[0].Text)
969 }
970 })
971}
972
973func TestGitStateTracking(t *testing.T) {
974 // Create a test repo
975 tmpDir := t.TempDir()
976
977 // Initialize git repo
978 runGit(t, tmpDir, "init")
979 runGit(t, tmpDir, "config", "user.email", "test@test.com")
980 runGit(t, tmpDir, "config", "user.name", "Test")
981
982 // Create initial commit
983 testFile := filepath.Join(tmpDir, "test.txt")
984 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
985 t.Fatal(err)
986 }
987 runGit(t, tmpDir, "add", ".")
988 runGit(t, tmpDir, "commit", "-m", "initial")
989
990 // Track git state changes
991 var mu sync.Mutex
992 var gitStateChanges []*gitstate.GitState
993
994 loop := NewLoop(Config{
995 LLM: NewPredictableService(),
996 History: []llm.Message{},
997 WorkingDir: tmpDir,
998 GetWorkingDir: func() string { return tmpDir },
999 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1000 mu.Lock()
1001 gitStateChanges = append(gitStateChanges, state)
1002 mu.Unlock()
1003 },
1004 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1005 return nil
1006 },
1007 })
1008
1009 // Verify initial state was captured
1010 if loop.lastGitState == nil {
1011 t.Fatal("expected initial git state to be captured")
1012 }
1013 if !loop.lastGitState.IsRepo {
1014 t.Error("expected IsRepo to be true")
1015 }
1016
1017 // Process a turn (no state change should occur)
1018 loop.QueueUserMessage(llm.Message{
1019 Role: llm.MessageRoleUser,
1020 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1021 })
1022
1023 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1024 defer cancel()
1025
1026 err := loop.ProcessOneTurn(ctx)
1027 if err != nil {
1028 t.Fatalf("ProcessOneTurn failed: %v", err)
1029 }
1030
1031 // No state change should have occurred
1032 mu.Lock()
1033 numChanges := len(gitStateChanges)
1034 mu.Unlock()
1035 if numChanges != 0 {
1036 t.Errorf("expected no git state changes, got %d", numChanges)
1037 }
1038
1039 // Now make a commit
1040 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1041 t.Fatal(err)
1042 }
1043 runGit(t, tmpDir, "add", ".")
1044 runGit(t, tmpDir, "commit", "-m", "update")
1045
1046 // Process another turn - this should detect the commit change
1047 loop.QueueUserMessage(llm.Message{
1048 Role: llm.MessageRoleUser,
1049 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello again"}},
1050 })
1051
1052 err = loop.ProcessOneTurn(ctx)
1053 if err != nil {
1054 t.Fatalf("ProcessOneTurn failed: %v", err)
1055 }
1056
1057 // Now a state change should have been detected
1058 mu.Lock()
1059 numChanges = len(gitStateChanges)
1060 mu.Unlock()
1061 if numChanges != 1 {
1062 t.Errorf("expected 1 git state change, got %d", numChanges)
1063 }
1064}
1065
1066func TestGitStateTrackingWorktree(t *testing.T) {
1067 tmpDir, err := filepath.EvalSymlinks(t.TempDir())
1068 if err != nil {
1069 t.Fatal(err)
1070 }
1071 mainRepo := filepath.Join(tmpDir, "main")
1072 worktreeDir := filepath.Join(tmpDir, "worktree")
1073
1074 // Create main repo
1075 if err := os.MkdirAll(mainRepo, 0o755); err != nil {
1076 t.Fatal(err)
1077 }
1078 runGit(t, mainRepo, "init")
1079 runGit(t, mainRepo, "config", "user.email", "test@test.com")
1080 runGit(t, mainRepo, "config", "user.name", "Test")
1081
1082 // Create initial commit
1083 testFile := filepath.Join(mainRepo, "test.txt")
1084 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1085 t.Fatal(err)
1086 }
1087 runGit(t, mainRepo, "add", ".")
1088 runGit(t, mainRepo, "commit", "-m", "initial")
1089
1090 // Create a worktree
1091 runGit(t, mainRepo, "worktree", "add", "-b", "feature", worktreeDir)
1092
1093 // Track git state changes in the worktree
1094 var mu sync.Mutex
1095 var gitStateChanges []*gitstate.GitState
1096
1097 loop := NewLoop(Config{
1098 LLM: NewPredictableService(),
1099 History: []llm.Message{},
1100 WorkingDir: worktreeDir,
1101 GetWorkingDir: func() string { return worktreeDir },
1102 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1103 mu.Lock()
1104 gitStateChanges = append(gitStateChanges, state)
1105 mu.Unlock()
1106 },
1107 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1108 return nil
1109 },
1110 })
1111
1112 // Verify initial state
1113 if loop.lastGitState == nil {
1114 t.Fatal("expected initial git state to be captured")
1115 }
1116 if loop.lastGitState.Branch != "feature" {
1117 t.Errorf("expected branch 'feature', got %q", loop.lastGitState.Branch)
1118 }
1119 if loop.lastGitState.Worktree != worktreeDir {
1120 t.Errorf("expected worktree %q, got %q", worktreeDir, loop.lastGitState.Worktree)
1121 }
1122
1123 // Make a commit in the worktree
1124 worktreeFile := filepath.Join(worktreeDir, "feature.txt")
1125 if err := os.WriteFile(worktreeFile, []byte("feature content"), 0o644); err != nil {
1126 t.Fatal(err)
1127 }
1128 runGit(t, worktreeDir, "add", ".")
1129 runGit(t, worktreeDir, "commit", "-m", "feature commit")
1130
1131 // Process a turn to detect the change
1132 loop.QueueUserMessage(llm.Message{
1133 Role: llm.MessageRoleUser,
1134 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1135 })
1136
1137 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1138 defer cancel()
1139
1140 err = loop.ProcessOneTurn(ctx)
1141 if err != nil {
1142 t.Fatalf("ProcessOneTurn failed: %v", err)
1143 }
1144
1145 mu.Lock()
1146 numChanges := len(gitStateChanges)
1147 mu.Unlock()
1148
1149 if numChanges != 1 {
1150 t.Errorf("expected 1 git state change in worktree, got %d", numChanges)
1151 }
1152}
1153
1154func runGit(t *testing.T, dir string, args ...string) {
1155 t.Helper()
1156 // For commits, use --no-verify to skip hooks
1157 if len(args) > 0 && args[0] == "commit" {
1158 newArgs := []string{"commit", "--no-verify"}
1159 newArgs = append(newArgs, args[1:]...)
1160 args = newArgs
1161 }
1162 cmd := exec.Command("git", args...)
1163 cmd.Dir = dir
1164 output, err := cmd.CombinedOutput()
1165 if err != nil {
1166 t.Fatalf("git %v failed: %v\n%s", args, err, output)
1167 }
1168}
1169
1170func TestPredictableServiceTokenContextWindow(t *testing.T) {
1171 service := NewPredictableService()
1172 window := service.TokenContextWindow()
1173 if window != 200000 {
1174 t.Errorf("expected TokenContextWindow to return 200000, got %d", window)
1175 }
1176}
1177
1178func TestPredictableServiceMaxImageDimension(t *testing.T) {
1179 service := NewPredictableService()
1180 dimension := service.MaxImageDimension()
1181 if dimension != 2000 {
1182 t.Errorf("expected MaxImageDimension to return 2000, got %d", dimension)
1183 }
1184}
1185
1186func TestPredictableServiceThinkTool(t *testing.T) {
1187 service := NewPredictableService()
1188
1189 ctx := context.Background()
1190 req := &llm.Request{
1191 Messages: []llm.Message{
1192 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "think: This is a test thought"}}},
1193 },
1194 }
1195
1196 resp, err := service.Do(ctx, req)
1197 if err != nil {
1198 t.Fatalf("think tool test failed: %v", err)
1199 }
1200
1201 if resp.StopReason != llm.StopReasonToolUse {
1202 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1203 }
1204
1205 // Find the tool use content
1206 var toolUseContent *llm.Content
1207 for _, content := range resp.Content {
1208 if content.Type == llm.ContentTypeToolUse && content.ToolName == "think" {
1209 toolUseContent = &content
1210 break
1211 }
1212 }
1213
1214 if toolUseContent == nil {
1215 t.Fatal("no think tool use content found")
1216 }
1217
1218 // Check tool input contains the thoughts
1219 var toolInput map[string]interface{}
1220 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1221 t.Fatalf("failed to parse tool input: %v", err)
1222 }
1223
1224 if toolInput["thoughts"] != "This is a test thought" {
1225 t.Errorf("expected thoughts 'This is a test thought', got '%v'", toolInput["thoughts"])
1226 }
1227}
1228
1229func TestPredictableServicePatchTool(t *testing.T) {
1230 service := NewPredictableService()
1231
1232 ctx := context.Background()
1233 req := &llm.Request{
1234 Messages: []llm.Message{
1235 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch: /tmp/test.txt"}}},
1236 },
1237 }
1238
1239 resp, err := service.Do(ctx, req)
1240 if err != nil {
1241 t.Fatalf("patch tool test failed: %v", err)
1242 }
1243
1244 if resp.StopReason != llm.StopReasonToolUse {
1245 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1246 }
1247
1248 // Find the tool use content
1249 var toolUseContent *llm.Content
1250 for _, content := range resp.Content {
1251 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1252 toolUseContent = &content
1253 break
1254 }
1255 }
1256
1257 if toolUseContent == nil {
1258 t.Fatal("no patch tool use content found")
1259 }
1260
1261 // Check tool input contains the file path
1262 var toolInput map[string]interface{}
1263 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1264 t.Fatalf("failed to parse tool input: %v", err)
1265 }
1266
1267 if toolInput["path"] != "/tmp/test.txt" {
1268 t.Errorf("expected path '/tmp/test.txt', got '%v'", toolInput["path"])
1269 }
1270}
1271
1272func TestPredictableServiceMalformedPatchTool(t *testing.T) {
1273 service := NewPredictableService()
1274
1275 ctx := context.Background()
1276 req := &llm.Request{
1277 Messages: []llm.Message{
1278 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch bad json"}}},
1279 },
1280 }
1281
1282 resp, err := service.Do(ctx, req)
1283 if err != nil {
1284 t.Fatalf("malformed patch tool test failed: %v", err)
1285 }
1286
1287 if resp.StopReason != llm.StopReasonToolUse {
1288 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1289 }
1290
1291 // Find the tool use content
1292 var toolUseContent *llm.Content
1293 for _, content := range resp.Content {
1294 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1295 toolUseContent = &content
1296 break
1297 }
1298 }
1299
1300 if toolUseContent == nil {
1301 t.Fatal("no patch tool use content found")
1302 }
1303
1304 // Check that the tool input is malformed JSON (as expected)
1305 toolInputStr := string(toolUseContent.ToolInput)
1306 if !strings.Contains(toolInputStr, "parameter name") {
1307 t.Errorf("expected malformed JSON in tool input, got: %s", toolInputStr)
1308 }
1309}
1310
1311func TestPredictableServiceError(t *testing.T) {
1312 service := NewPredictableService()
1313
1314 ctx := context.Background()
1315 req := &llm.Request{
1316 Messages: []llm.Message{
1317 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "error: test error"}}},
1318 },
1319 }
1320
1321 resp, err := service.Do(ctx, req)
1322 if err == nil {
1323 t.Fatal("expected error, got nil")
1324 }
1325
1326 if !strings.Contains(err.Error(), "predictable error: test error") {
1327 t.Errorf("expected error message to contain 'predictable error: test error', got: %v", err)
1328 }
1329
1330 if resp != nil {
1331 t.Error("expected response to be nil when error occurs")
1332 }
1333}
1334
1335func TestPredictableServiceRequestTracking(t *testing.T) {
1336 service := NewPredictableService()
1337
1338 // Initially no requests
1339 requests := service.GetRecentRequests()
1340 if requests != nil {
1341 t.Errorf("expected nil requests initially, got %v", requests)
1342 }
1343
1344 lastReq := service.GetLastRequest()
1345 if lastReq != nil {
1346 t.Errorf("expected nil last request initially, got %v", lastReq)
1347 }
1348
1349 // Make a request
1350 ctx := context.Background()
1351 req := &llm.Request{
1352 Messages: []llm.Message{
1353 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1354 },
1355 }
1356
1357 _, err := service.Do(ctx, req)
1358 if err != nil {
1359 t.Fatalf("Do failed: %v", err)
1360 }
1361
1362 // Check that request was tracked
1363 requests = service.GetRecentRequests()
1364 if len(requests) != 1 {
1365 t.Errorf("expected 1 request, got %d", len(requests))
1366 }
1367
1368 lastReq = service.GetLastRequest()
1369 if lastReq == nil {
1370 t.Fatal("expected last request to be non-nil")
1371 }
1372
1373 if len(lastReq.Messages) != 1 {
1374 t.Errorf("expected 1 message in last request, got %d", len(lastReq.Messages))
1375 }
1376
1377 // Test clearing requests
1378 service.ClearRequests()
1379 requests = service.GetRecentRequests()
1380 if requests != nil {
1381 t.Errorf("expected nil requests after clearing, got %v", requests)
1382 }
1383
1384 lastReq = service.GetLastRequest()
1385 if lastReq != nil {
1386 t.Errorf("expected nil last request after clearing, got %v", lastReq)
1387 }
1388
1389 // Test that only last 10 requests are kept
1390 for i := 0; i < 15; i++ {
1391 testReq := &llm.Request{
1392 Messages: []llm.Message{
1393 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: fmt.Sprintf("test %d", i)}}},
1394 },
1395 }
1396 _, err := service.Do(ctx, testReq)
1397 if err != nil {
1398 t.Fatalf("Do failed on iteration %d: %v", i, err)
1399 }
1400 }
1401
1402 requests = service.GetRecentRequests()
1403 if len(requests) != 10 {
1404 t.Errorf("expected 10 requests (last 10), got %d", len(requests))
1405 }
1406
1407 // Check that we have requests 5-14 (0-indexed)
1408 for i, req := range requests {
1409 expectedText := fmt.Sprintf("test %d", i+5)
1410 if len(req.Messages) == 0 || len(req.Messages[0].Content) == 0 {
1411 t.Errorf("request %d has no content", i)
1412 continue
1413 }
1414 if req.Messages[0].Content[0].Text != expectedText {
1415 t.Errorf("expected request %d to have text '%s', got '%s'", i, expectedText, req.Messages[0].Content[0].Text)
1416 }
1417 }
1418}
1419
1420func TestPredictableServiceScreenshotTool(t *testing.T) {
1421 service := NewPredictableService()
1422
1423 ctx := context.Background()
1424 req := &llm.Request{
1425 Messages: []llm.Message{
1426 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "screenshot: .test-class"}}},
1427 },
1428 }
1429
1430 resp, err := service.Do(ctx, req)
1431 if err != nil {
1432 t.Fatalf("screenshot tool test failed: %v", err)
1433 }
1434
1435 if resp.StopReason != llm.StopReasonToolUse {
1436 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1437 }
1438
1439 // Find the tool use content
1440 var toolUseContent *llm.Content
1441 for _, content := range resp.Content {
1442 if content.Type == llm.ContentTypeToolUse && content.ToolName == "browser_take_screenshot" {
1443 toolUseContent = &content
1444 break
1445 }
1446 }
1447
1448 if toolUseContent == nil {
1449 t.Fatal("no screenshot tool use content found")
1450 }
1451
1452 // Check tool input contains the selector
1453 var toolInput map[string]interface{}
1454 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1455 t.Fatalf("failed to parse tool input: %v", err)
1456 }
1457
1458 if toolInput["selector"] != ".test-class" {
1459 t.Errorf("expected selector '.test-class', got '%v'", toolInput["selector"])
1460 }
1461}
1462
1463func TestPredictableServiceToolSmorgasbord(t *testing.T) {
1464 service := NewPredictableService()
1465
1466 ctx := context.Background()
1467 req := &llm.Request{
1468 Messages: []llm.Message{
1469 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "tool smorgasbord"}}},
1470 },
1471 }
1472
1473 resp, err := service.Do(ctx, req)
1474 if err != nil {
1475 t.Fatalf("tool smorgasbord test failed: %v", err)
1476 }
1477
1478 if resp.StopReason != llm.StopReasonToolUse {
1479 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1480 }
1481
1482 // Count the tool use contents
1483 toolUseCount := 0
1484 for _, content := range resp.Content {
1485 if content.Type == llm.ContentTypeToolUse {
1486 toolUseCount++
1487 }
1488 }
1489
1490 // Should have at least several tool uses
1491 if toolUseCount < 5 {
1492 t.Errorf("expected at least 5 tool uses, got %d", toolUseCount)
1493 }
1494}
1495
1496func TestProcessLLMRequestError(t *testing.T) {
1497 // Test error handling when LLM service returns an error
1498 errorService := &errorLLMService{err: fmt.Errorf("test LLM error")}
1499
1500 var recordedMessages []llm.Message
1501 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1502 recordedMessages = append(recordedMessages, message)
1503 return nil
1504 }
1505
1506 loop := NewLoop(Config{
1507 LLM: errorService,
1508 History: []llm.Message{},
1509 Tools: []*llm.Tool{},
1510 RecordMessage: recordFunc,
1511 })
1512
1513 // Queue a user message
1514 userMessage := llm.Message{
1515 Role: llm.MessageRoleUser,
1516 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1517 }
1518 loop.QueueUserMessage(userMessage)
1519
1520 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1521 defer cancel()
1522
1523 err := loop.ProcessOneTurn(ctx)
1524 if err == nil {
1525 t.Fatal("expected error from ProcessOneTurn, got nil")
1526 }
1527
1528 if !strings.Contains(err.Error(), "LLM request failed") {
1529 t.Errorf("expected error to contain 'LLM request failed', got: %v", err)
1530 }
1531
1532 // Check that error message was recorded
1533 if len(recordedMessages) < 1 {
1534 t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1535 }
1536
1537 if recordedMessages[0].Role != llm.MessageRoleAssistant {
1538 t.Errorf("expected recorded message to be assistant role, got %s", recordedMessages[0].Role)
1539 }
1540
1541 if len(recordedMessages[0].Content) != 1 {
1542 t.Fatalf("expected 1 content item in recorded message, got %d", len(recordedMessages[0].Content))
1543 }
1544
1545 if recordedMessages[0].Content[0].Type != llm.ContentTypeText {
1546 t.Errorf("expected text content, got %s", recordedMessages[0].Content[0].Type)
1547 }
1548
1549 if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1550 t.Errorf("expected error message to contain 'LLM request failed', got: %s", recordedMessages[0].Content[0].Text)
1551 }
1552
1553 // Verify EndOfTurn is set so the agent working state is properly updated
1554 if !recordedMessages[0].EndOfTurn {
1555 t.Error("expected error message to have EndOfTurn=true so agent working state is updated")
1556 }
1557}
1558
1559// errorLLMService is a test LLM service that always returns an error
1560type errorLLMService struct {
1561 err error
1562}
1563
1564func (e *errorLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1565 return nil, e.err
1566}
1567
1568func (e *errorLLMService) TokenContextWindow() int {
1569 return 200000
1570}
1571
1572func (e *errorLLMService) MaxImageDimension() int {
1573 return 2000
1574}
1575
1576// retryableLLMService fails with a retryable error a specified number of times, then succeeds
1577type retryableLLMService struct {
1578 failuresRemaining int
1579 callCount int
1580 mu sync.Mutex
1581}
1582
1583func (r *retryableLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1584 r.mu.Lock()
1585 r.callCount++
1586 if r.failuresRemaining > 0 {
1587 r.failuresRemaining--
1588 r.mu.Unlock()
1589 return nil, fmt.Errorf("connection error: EOF")
1590 }
1591 r.mu.Unlock()
1592 return &llm.Response{
1593 Content: []llm.Content{
1594 {Type: llm.ContentTypeText, Text: "Success after retry"},
1595 },
1596 StopReason: llm.StopReasonEndTurn,
1597 }, nil
1598}
1599
1600func (r *retryableLLMService) TokenContextWindow() int {
1601 return 200000
1602}
1603
1604func (r *retryableLLMService) MaxImageDimension() int {
1605 return 2000
1606}
1607
1608func (r *retryableLLMService) getCallCount() int {
1609 r.mu.Lock()
1610 defer r.mu.Unlock()
1611 return r.callCount
1612}
1613
1614func TestLLMRequestRetryOnEOF(t *testing.T) {
1615 // Test that LLM requests are retried on EOF errors
1616 retryService := &retryableLLMService{failuresRemaining: 1}
1617
1618 var recordedMessages []llm.Message
1619 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1620 recordedMessages = append(recordedMessages, message)
1621 return nil
1622 }
1623
1624 loop := NewLoop(Config{
1625 LLM: retryService,
1626 History: []llm.Message{},
1627 Tools: []*llm.Tool{},
1628 RecordMessage: recordFunc,
1629 })
1630
1631 // Queue a user message
1632 userMessage := llm.Message{
1633 Role: llm.MessageRoleUser,
1634 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1635 }
1636 loop.QueueUserMessage(userMessage)
1637
1638 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1639 defer cancel()
1640
1641 err := loop.ProcessOneTurn(ctx)
1642 if err != nil {
1643 t.Fatalf("expected no error after retry, got: %v", err)
1644 }
1645
1646 // Should have been called twice (1 failure + 1 success)
1647 if retryService.getCallCount() != 2 {
1648 t.Errorf("expected 2 LLM calls (retry), got %d", retryService.getCallCount())
1649 }
1650
1651 // Check that success message was recorded
1652 if len(recordedMessages) != 1 {
1653 t.Fatalf("expected 1 recorded message (success), got %d", len(recordedMessages))
1654 }
1655
1656 if !strings.Contains(recordedMessages[0].Content[0].Text, "Success after retry") {
1657 t.Errorf("expected success message, got: %s", recordedMessages[0].Content[0].Text)
1658 }
1659}
1660
1661func TestLLMRequestRetryExhausted(t *testing.T) {
1662 // Test that after max retries, error is returned
1663 retryService := &retryableLLMService{failuresRemaining: 10} // More than maxRetries
1664
1665 var recordedMessages []llm.Message
1666 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1667 recordedMessages = append(recordedMessages, message)
1668 return nil
1669 }
1670
1671 loop := NewLoop(Config{
1672 LLM: retryService,
1673 History: []llm.Message{},
1674 Tools: []*llm.Tool{},
1675 RecordMessage: recordFunc,
1676 })
1677
1678 userMessage := llm.Message{
1679 Role: llm.MessageRoleUser,
1680 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1681 }
1682 loop.QueueUserMessage(userMessage)
1683
1684 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1685 defer cancel()
1686
1687 err := loop.ProcessOneTurn(ctx)
1688 if err == nil {
1689 t.Fatal("expected error after exhausting retries")
1690 }
1691
1692 // Should have been called maxRetries times (2)
1693 if retryService.getCallCount() != 2 {
1694 t.Errorf("expected 2 LLM calls (maxRetries), got %d", retryService.getCallCount())
1695 }
1696
1697 // Check error message was recorded
1698 if len(recordedMessages) != 1 {
1699 t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1700 }
1701
1702 if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1703 t.Errorf("expected error message, got: %s", recordedMessages[0].Content[0].Text)
1704 }
1705}
1706
1707func TestIsRetryableError(t *testing.T) {
1708 tests := []struct {
1709 name string
1710 err error
1711 retryable bool
1712 }{
1713 {"nil error", nil, false},
1714 {"io.EOF", io.EOF, true},
1715 {"io.ErrUnexpectedEOF", io.ErrUnexpectedEOF, true},
1716 {"EOF error string", fmt.Errorf("EOF"), true},
1717 {"wrapped EOF", fmt.Errorf("connection error: EOF"), true},
1718 {"connection reset", fmt.Errorf("connection reset by peer"), true},
1719 {"connection refused", fmt.Errorf("connection refused"), true},
1720 {"timeout", fmt.Errorf("i/o timeout"), true},
1721 {"api error", fmt.Errorf("rate limit exceeded"), false},
1722 {"generic error", fmt.Errorf("something went wrong"), false},
1723 }
1724
1725 for _, tt := range tests {
1726 t.Run(tt.name, func(t *testing.T) {
1727 if got := isRetryableError(tt.err); got != tt.retryable {
1728 t.Errorf("isRetryableError(%v) = %v, want %v", tt.err, got, tt.retryable)
1729 }
1730 })
1731 }
1732}
1733
1734func TestCheckGitStateChange(t *testing.T) {
1735 // Create a test repo
1736 tmpDir := t.TempDir()
1737
1738 // Initialize git repo
1739 runGit(t, tmpDir, "init")
1740 runGit(t, tmpDir, "config", "user.email", "test@test.com")
1741 runGit(t, tmpDir, "config", "user.name", "Test")
1742
1743 // Create initial commit
1744 testFile := filepath.Join(tmpDir, "test.txt")
1745 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1746 t.Fatal(err)
1747 }
1748 runGit(t, tmpDir, "add", ".")
1749 runGit(t, tmpDir, "commit", "-m", "initial")
1750
1751 // Test with nil OnGitStateChange - should not panic
1752 loop := NewLoop(Config{
1753 LLM: NewPredictableService(),
1754 History: []llm.Message{},
1755 WorkingDir: tmpDir,
1756 GetWorkingDir: func() string { return tmpDir },
1757 // OnGitStateChange is nil
1758 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1759 return nil
1760 },
1761 })
1762
1763 // This should not panic
1764 loop.checkGitStateChange(context.Background())
1765
1766 // Test with actual callback
1767 var gitStateChanges []*gitstate.GitState
1768 loop = NewLoop(Config{
1769 LLM: NewPredictableService(),
1770 History: []llm.Message{},
1771 WorkingDir: tmpDir,
1772 GetWorkingDir: func() string { return tmpDir },
1773 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1774 gitStateChanges = append(gitStateChanges, state)
1775 },
1776 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1777 return nil
1778 },
1779 })
1780
1781 // Make a change
1782 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1783 t.Fatal(err)
1784 }
1785 runGit(t, tmpDir, "add", ".")
1786 runGit(t, tmpDir, "commit", "-m", "update")
1787
1788 // Check git state change
1789 loop.checkGitStateChange(context.Background())
1790
1791 if len(gitStateChanges) != 1 {
1792 t.Errorf("expected 1 git state change, got %d", len(gitStateChanges))
1793 }
1794
1795 // Call again - should not trigger another change since state is the same
1796 loop.checkGitStateChange(context.Background())
1797
1798 if len(gitStateChanges) != 1 {
1799 t.Errorf("expected still 1 git state change (no new changes), got %d", len(gitStateChanges))
1800 }
1801}
1802
1803func TestHandleToolCallsWithMissingTool(t *testing.T) {
1804 var recordedMessages []llm.Message
1805 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1806 recordedMessages = append(recordedMessages, message)
1807 return nil
1808 }
1809
1810 loop := NewLoop(Config{
1811 LLM: NewPredictableService(),
1812 History: []llm.Message{},
1813 Tools: []*llm.Tool{}, // No tools registered
1814 RecordMessage: recordFunc,
1815 })
1816
1817 // Create content with a tool use for a tool that doesn't exist
1818 content := []llm.Content{
1819 {
1820 ID: "test_tool_123",
1821 Type: llm.ContentTypeToolUse,
1822 ToolName: "nonexistent_tool",
1823 ToolInput: json.RawMessage(`{"test": "input"}`),
1824 },
1825 }
1826
1827 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1828 defer cancel()
1829
1830 err := loop.handleToolCalls(ctx, content)
1831 if err != nil {
1832 t.Fatalf("handleToolCalls failed: %v", err)
1833 }
1834
1835 // Should have recorded a user message with tool result
1836 if len(recordedMessages) < 1 {
1837 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1838 }
1839
1840 msg := recordedMessages[0]
1841 if msg.Role != llm.MessageRoleUser {
1842 t.Errorf("expected user role, got %s", msg.Role)
1843 }
1844
1845 if len(msg.Content) != 1 {
1846 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1847 }
1848
1849 toolResult := msg.Content[0]
1850 if toolResult.Type != llm.ContentTypeToolResult {
1851 t.Errorf("expected tool result content, got %s", toolResult.Type)
1852 }
1853
1854 if toolResult.ToolUseID != "test_tool_123" {
1855 t.Errorf("expected tool use ID 'test_tool_123', got %s", toolResult.ToolUseID)
1856 }
1857
1858 if !toolResult.ToolError {
1859 t.Error("expected ToolError to be true")
1860 }
1861
1862 if len(toolResult.ToolResult) != 1 {
1863 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1864 }
1865
1866 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1867 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1868 }
1869
1870 expectedText := "Tool 'nonexistent_tool' not found"
1871 if toolResult.ToolResult[0].Text != expectedText {
1872 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1873 }
1874}
1875
1876func TestHandleToolCallsWithErrorTool(t *testing.T) {
1877 var recordedMessages []llm.Message
1878 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1879 recordedMessages = append(recordedMessages, message)
1880 return nil
1881 }
1882
1883 // Create a tool that always returns an error
1884 errorTool := &llm.Tool{
1885 Name: "error_tool",
1886 Description: "A tool that always errors",
1887 InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
1888 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
1889 return llm.ErrorToolOut(fmt.Errorf("intentional test error"))
1890 },
1891 }
1892
1893 loop := NewLoop(Config{
1894 LLM: NewPredictableService(),
1895 History: []llm.Message{},
1896 Tools: []*llm.Tool{errorTool},
1897 RecordMessage: recordFunc,
1898 })
1899
1900 // Create content with a tool use that will error
1901 content := []llm.Content{
1902 {
1903 ID: "error_tool_123",
1904 Type: llm.ContentTypeToolUse,
1905 ToolName: "error_tool",
1906 ToolInput: json.RawMessage(`{}`),
1907 },
1908 }
1909
1910 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1911 defer cancel()
1912
1913 err := loop.handleToolCalls(ctx, content)
1914 if err != nil {
1915 t.Fatalf("handleToolCalls failed: %v", err)
1916 }
1917
1918 // Should have recorded a user message with tool result
1919 if len(recordedMessages) < 1 {
1920 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1921 }
1922
1923 msg := recordedMessages[0]
1924 if msg.Role != llm.MessageRoleUser {
1925 t.Errorf("expected user role, got %s", msg.Role)
1926 }
1927
1928 if len(msg.Content) != 1 {
1929 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1930 }
1931
1932 toolResult := msg.Content[0]
1933 if toolResult.Type != llm.ContentTypeToolResult {
1934 t.Errorf("expected tool result content, got %s", toolResult.Type)
1935 }
1936
1937 if toolResult.ToolUseID != "error_tool_123" {
1938 t.Errorf("expected tool use ID 'error_tool_123', got %s", toolResult.ToolUseID)
1939 }
1940
1941 if !toolResult.ToolError {
1942 t.Error("expected ToolError to be true")
1943 }
1944
1945 if len(toolResult.ToolResult) != 1 {
1946 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1947 }
1948
1949 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1950 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1951 }
1952
1953 expectedText := "intentional test error"
1954 if toolResult.ToolResult[0].Text != expectedText {
1955 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1956 }
1957}
1958
1959func TestMaxTokensTruncation(t *testing.T) {
1960 var mu sync.Mutex
1961 var recordedMessages []llm.Message
1962 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1963 mu.Lock()
1964 recordedMessages = append(recordedMessages, message)
1965 mu.Unlock()
1966 return nil
1967 }
1968
1969 service := NewPredictableService()
1970 loop := NewLoop(Config{
1971 LLM: service,
1972 History: []llm.Message{},
1973 Tools: []*llm.Tool{},
1974 RecordMessage: recordFunc,
1975 })
1976
1977 // Queue a user message that triggers max tokens truncation
1978 userMessage := llm.Message{
1979 Role: llm.MessageRoleUser,
1980 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
1981 }
1982 loop.QueueUserMessage(userMessage)
1983
1984 // Run the loop - it should stop after handling truncation
1985 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
1986 defer cancel()
1987
1988 err := loop.Go(ctx)
1989 if err != context.DeadlineExceeded {
1990 t.Errorf("expected context deadline exceeded, got %v", err)
1991 }
1992
1993 // Check recorded messages
1994 mu.Lock()
1995 numMessages := len(recordedMessages)
1996 messages := make([]llm.Message, len(recordedMessages))
1997 copy(messages, recordedMessages)
1998 mu.Unlock()
1999
2000 // We should see two messages:
2001 // 1. The truncated message (with ExcludedFromContext=true) for cost tracking
2002 // 2. The truncation error message (with ErrorType=truncation)
2003 if numMessages != 2 {
2004 t.Errorf("Expected 2 recorded messages (truncated + error), got %d", numMessages)
2005 for i, msg := range messages {
2006 t.Logf("Message %d: Role=%v, EndOfTurn=%v, ExcludedFromContext=%v, ErrorType=%v",
2007 i, msg.Role, msg.EndOfTurn, msg.ExcludedFromContext, msg.ErrorType)
2008 }
2009 return
2010 }
2011
2012 // First message: truncated response (for cost tracking, excluded from context)
2013 truncatedMsg := messages[0]
2014 if truncatedMsg.Role != llm.MessageRoleAssistant {
2015 t.Errorf("Truncated message should be assistant, got %v", truncatedMsg.Role)
2016 }
2017 if !truncatedMsg.ExcludedFromContext {
2018 t.Error("Truncated message should have ExcludedFromContext=true")
2019 }
2020
2021 // Second message: truncation error
2022 errorMsg := messages[1]
2023 if errorMsg.Role != llm.MessageRoleAssistant {
2024 t.Errorf("Error message should be assistant, got %v", errorMsg.Role)
2025 }
2026 if !errorMsg.EndOfTurn {
2027 t.Error("Error message should have EndOfTurn=true")
2028 }
2029 if errorMsg.ErrorType != llm.ErrorTypeTruncation {
2030 t.Errorf("Error message should have ErrorType=truncation, got %v", errorMsg.ErrorType)
2031 }
2032 if errorMsg.ExcludedFromContext {
2033 t.Error("Error message should not be excluded from context")
2034 }
2035 if !strings.Contains(errorMsg.Content[0].Text, "SYSTEM ERROR") {
2036 t.Errorf("Error message should contain SYSTEM ERROR, got: %s", errorMsg.Content[0].Text)
2037 }
2038
2039 // Verify history contains user message + error message, but NOT the truncated response
2040 loop.mu.Lock()
2041 history := loop.history
2042 loop.mu.Unlock()
2043
2044 // History should have: user message + error message (the truncated response is NOT added to history)
2045 if len(history) != 2 {
2046 t.Errorf("History should have 2 messages (user + error), got %d", len(history))
2047 }
2048}
2049
2050//func TestInsertMissingToolResultsEdgeCases(t *testing.T) {
2051// loop := NewLoop(Config{
2052// LLM: NewPredictableService(),
2053// History: []llm.Message{},
2054// })
2055//
2056// // Test with nil request
2057// loop.insertMissingToolResults(nil) // Should not panic
2058//
2059// // Test with empty messages
2060// req := &llm.Request{Messages: []llm.Message{}}
2061// loop.insertMissingToolResults(req) // Should not panic
2062//
2063// // Test with single message
2064// req = &llm.Request{
2065// Messages: []llm.Message{
2066// {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
2067// },
2068// }
2069// loop.insertMissingToolResults(req) // Should not panic
2070// if len(req.Messages) != 1 {
2071// t.Errorf("expected 1 message, got %d", len(req.Messages))
2072// }
2073//
2074// // Test with multiple consecutive assistant messages with tool_use
2075// req = &llm.Request{
2076// Messages: []llm.Message{
2077// {
2078// Role: llm.MessageRoleAssistant,
2079// Content: []llm.Content{
2080// {Type: llm.ContentTypeText, Text: "First tool"},
2081// {Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"},
2082// },
2083// },
2084// {
2085// Role: llm.MessageRoleAssistant,
2086// Content: []llm.Content{
2087// {Type: llm.ContentTypeText, Text: "Second tool"},
2088// {Type: llm.ContentTypeToolUse, ID: "tool2", ToolName: "read"},
2089// },
2090// },
2091// {
2092// Role: llm.MessageRoleUser,
2093// Content: []llm.Content{
2094// {Type: llm.ContentTypeText, Text: "User response"},
2095// },
2096// },
2097// },
2098// }
2099//
2100// loop.insertMissingToolResults(req)
2101//
2102// // Should have inserted synthetic tool results for both tool_uses
2103// // The structure should be:
2104// // 0: First assistant message
2105// // 1: Synthetic user message with tool1 result
2106// // 2: Second assistant message
2107// // 3: Synthetic user message with tool2 result
2108// // 4: Original user message
2109// if len(req.Messages) != 5 {
2110// t.Fatalf("expected 5 messages after processing, got %d", len(req.Messages))
2111// }
2112//
2113// // Check first synthetic message
2114// if req.Messages[1].Role != llm.MessageRoleUser {
2115// t.Errorf("expected message 1 to be user role, got %s", req.Messages[1].Role)
2116// }
2117// foundTool1 := false
2118// for _, content := range req.Messages[1].Content {
2119// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool1" {
2120// foundTool1 = true
2121// break
2122// }
2123// }
2124// if !foundTool1 {
2125// t.Error("expected to find tool1 result in message 1")
2126// }
2127//
2128// // Check second synthetic message
2129// if req.Messages[3].Role != llm.MessageRoleUser {
2130// t.Errorf("expected message 3 to be user role, got %s", req.Messages[3].Role)
2131// }
2132// foundTool2 := false
2133// for _, content := range req.Messages[3].Content {
2134// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool2" {
2135// foundTool2 = true
2136// break
2137// }
2138//}
2139// if !foundTool2 {
2140// t.Error("expected to find tool2 result in message 3")
2141// }
2142//}