1package loop
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "io"
8 "os"
9 "os/exec"
10 "path/filepath"
11 "strings"
12 "sync"
13 "testing"
14 "time"
15
16 "shelley.exe.dev/claudetool"
17 "shelley.exe.dev/gitstate"
18 "shelley.exe.dev/llm"
19)
20
21func TestNewLoop(t *testing.T) {
22 history := []llm.Message{
23 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
24 }
25 tools := []*llm.Tool{}
26 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
27 return nil
28 }
29
30 loop := NewLoop(Config{
31 LLM: NewPredictableService(),
32 History: history,
33 Tools: tools,
34 RecordMessage: recordFunc,
35 })
36 if loop == nil {
37 t.Fatal("NewLoop returned nil")
38 }
39
40 if len(loop.history) != 1 {
41 t.Errorf("expected history length 1, got %d", len(loop.history))
42 }
43
44 if len(loop.messageQueue) != 0 {
45 t.Errorf("expected empty message queue, got %d", len(loop.messageQueue))
46 }
47}
48
49func TestQueueUserMessage(t *testing.T) {
50 loop := NewLoop(Config{
51 LLM: NewPredictableService(),
52 History: []llm.Message{},
53 Tools: []*llm.Tool{},
54 })
55
56 message := llm.Message{
57 Role: llm.MessageRoleUser,
58 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Test message"}},
59 }
60
61 loop.QueueUserMessage(message)
62
63 loop.mu.Lock()
64 queueLen := len(loop.messageQueue)
65 loop.mu.Unlock()
66
67 if queueLen != 1 {
68 t.Errorf("expected message queue length 1, got %d", queueLen)
69 }
70}
71
72func TestPredictableService(t *testing.T) {
73 service := NewPredictableService()
74
75 // Test simple hello response
76 ctx := context.Background()
77 req := &llm.Request{
78 Messages: []llm.Message{
79 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
80 },
81 }
82
83 resp, err := service.Do(ctx, req)
84 if err != nil {
85 t.Fatalf("predictable service Do failed: %v", err)
86 }
87
88 if resp.Role != llm.MessageRoleAssistant {
89 t.Errorf("expected assistant role, got %v", resp.Role)
90 }
91
92 if len(resp.Content) == 0 {
93 t.Error("expected non-empty content")
94 }
95
96 if resp.Content[0].Type != llm.ContentTypeText {
97 t.Errorf("expected text content, got %v", resp.Content[0].Type)
98 }
99
100 if resp.Content[0].Text != "Well, hi there!" {
101 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
102 }
103}
104
105func TestPredictableServiceEcho(t *testing.T) {
106 service := NewPredictableService()
107
108 ctx := context.Background()
109 req := &llm.Request{
110 Messages: []llm.Message{
111 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "echo: foo"}}},
112 },
113 }
114
115 resp, err := service.Do(ctx, req)
116 if err != nil {
117 t.Fatalf("echo test failed: %v", err)
118 }
119
120 if resp.Content[0].Text != "foo" {
121 t.Errorf("expected 'foo', got '%s'", resp.Content[0].Text)
122 }
123
124 // Test another echo
125 req.Messages[0].Content[0].Text = "echo: hello world"
126 resp, err = service.Do(ctx, req)
127 if err != nil {
128 t.Fatalf("echo hello world test failed: %v", err)
129 }
130
131 if resp.Content[0].Text != "hello world" {
132 t.Errorf("expected 'hello world', got '%s'", resp.Content[0].Text)
133 }
134}
135
136func TestPredictableServiceBashTool(t *testing.T) {
137 service := NewPredictableService()
138
139 ctx := context.Background()
140 req := &llm.Request{
141 Messages: []llm.Message{
142 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: ls -la"}}},
143 },
144 }
145
146 resp, err := service.Do(ctx, req)
147 if err != nil {
148 t.Fatalf("bash tool test failed: %v", err)
149 }
150
151 if resp.StopReason != llm.StopReasonToolUse {
152 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
153 }
154
155 if len(resp.Content) != 2 {
156 t.Errorf("expected 2 content items (text + tool_use), got %d", len(resp.Content))
157 }
158
159 // Find the tool use content
160 var toolUseContent *llm.Content
161 for _, content := range resp.Content {
162 if content.Type == llm.ContentTypeToolUse {
163 toolUseContent = &content
164 break
165 }
166 }
167
168 if toolUseContent == nil {
169 t.Fatal("no tool use content found")
170 }
171
172 if toolUseContent.ToolName != "bash" {
173 t.Errorf("expected tool name 'bash', got '%s'", toolUseContent.ToolName)
174 }
175
176 // Check tool input contains the command
177 var toolInput map[string]interface{}
178 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
179 t.Fatalf("failed to parse tool input: %v", err)
180 }
181
182 if toolInput["command"] != "ls -la" {
183 t.Errorf("expected command 'ls -la', got '%v'", toolInput["command"])
184 }
185}
186
187func TestPredictableServiceDefaultResponse(t *testing.T) {
188 service := NewPredictableService()
189
190 ctx := context.Background()
191 req := &llm.Request{
192 Messages: []llm.Message{
193 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "some unknown input"}}},
194 },
195 }
196
197 resp, err := service.Do(ctx, req)
198 if err != nil {
199 t.Fatalf("default response test failed: %v", err)
200 }
201
202 if resp.Content[0].Text != "edit predictable.go to add a response for that one..." {
203 t.Errorf("unexpected default response: %s", resp.Content[0].Text)
204 }
205}
206
207func TestPredictableServiceDelay(t *testing.T) {
208 service := NewPredictableService()
209
210 ctx := context.Background()
211 req := &llm.Request{
212 Messages: []llm.Message{
213 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "delay: 0.1"}}},
214 },
215 }
216
217 start := time.Now()
218 resp, err := service.Do(ctx, req)
219 elapsed := time.Since(start)
220
221 if err != nil {
222 t.Fatalf("delay test failed: %v", err)
223 }
224
225 if elapsed < 100*time.Millisecond {
226 t.Errorf("expected delay of at least 100ms, got %v", elapsed)
227 }
228
229 if resp.Content[0].Text != "Delayed for 0.1 seconds" {
230 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
231 }
232}
233
234func TestLoopWithPredictableService(t *testing.T) {
235 var recordedMessages []llm.Message
236 var recordedUsages []llm.Usage
237
238 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
239 recordedMessages = append(recordedMessages, message)
240 recordedUsages = append(recordedUsages, usage)
241 return nil
242 }
243
244 service := NewPredictableService()
245 loop := NewLoop(Config{
246 LLM: service,
247 History: []llm.Message{},
248 Tools: []*llm.Tool{},
249 RecordMessage: recordFunc,
250 })
251
252 // Queue a user message that triggers a known response
253 userMessage := llm.Message{
254 Role: llm.MessageRoleUser,
255 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
256 }
257 loop.QueueUserMessage(userMessage)
258
259 // Run the loop with a short timeout
260 ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
261 defer cancel()
262
263 err := loop.Go(ctx)
264 if err != context.DeadlineExceeded {
265 t.Errorf("expected context deadline exceeded, got %v", err)
266 }
267
268 // Check that messages were recorded
269 if len(recordedMessages) < 1 {
270 t.Errorf("expected at least 1 recorded message, got %d", len(recordedMessages))
271 }
272
273 // Check usage tracking
274 usage := loop.GetUsage()
275 if usage.IsZero() {
276 t.Error("expected non-zero usage")
277 }
278}
279
280func TestLoopWithTools(t *testing.T) {
281 var toolCalls []string
282
283 testTool := &llm.Tool{
284 Name: "bash",
285 Description: "A test bash tool",
286 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"command": {"type": "string"}}}`),
287 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
288 toolCalls = append(toolCalls, string(input))
289 return llm.ToolOut{
290 LLMContent: []llm.Content{
291 {Type: llm.ContentTypeText, Text: "Command executed successfully"},
292 },
293 }
294 },
295 }
296
297 service := NewPredictableService()
298 loop := NewLoop(Config{
299 LLM: service,
300 History: []llm.Message{},
301 Tools: []*llm.Tool{testTool},
302 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
303 return nil
304 },
305 })
306
307 // Queue a user message that triggers the bash tool
308 userMessage := llm.Message{
309 Role: llm.MessageRoleUser,
310 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}},
311 }
312 loop.QueueUserMessage(userMessage)
313
314 // Run the loop with a short timeout
315 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
316 defer cancel()
317
318 err := loop.Go(ctx)
319 if err != context.DeadlineExceeded {
320 t.Errorf("expected context deadline exceeded, got %v", err)
321 }
322
323 // Check that the tool was called
324 if len(toolCalls) != 1 {
325 t.Errorf("expected 1 tool call, got %d", len(toolCalls))
326 }
327
328 if toolCalls[0] != `{"command":"echo hello"}` {
329 t.Errorf("unexpected tool call input: %s", toolCalls[0])
330 }
331}
332
333func TestGetHistory(t *testing.T) {
334 initialHistory := []llm.Message{
335 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
336 }
337
338 loop := NewLoop(Config{
339 LLM: NewPredictableService(),
340 History: initialHistory,
341 Tools: []*llm.Tool{},
342 })
343
344 history := loop.GetHistory()
345 if len(history) != 1 {
346 t.Errorf("expected history length 1, got %d", len(history))
347 }
348
349 // Modify returned slice to ensure it's a copy
350 history[0].Content[0].Text = "Modified"
351
352 // Original should be unchanged
353 original := loop.GetHistory()
354 if original[0].Content[0].Text != "Hello" {
355 t.Error("GetHistory should return a copy, not the original slice")
356 }
357}
358
359func TestLoopWithKeywordTool(t *testing.T) {
360 // Test that keyword tool doesn't crash with nil pointer dereference
361 service := NewPredictableService()
362
363 var messages []llm.Message
364 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
365 messages = append(messages, message)
366 return nil
367 }
368
369 // Add a mock keyword tool that doesn't actually search
370 tools := []*llm.Tool{
371 {
372 Name: "keyword_search",
373 Description: "Mock keyword search",
374 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
375 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
376 // Simple mock implementation
377 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
378 },
379 },
380 }
381
382 loop := NewLoop(Config{
383 LLM: service,
384 History: []llm.Message{},
385 Tools: tools,
386 RecordMessage: recordMessage,
387 })
388
389 // Send a user message that will trigger the default response
390 userMessage := llm.Message{
391 Role: llm.MessageRoleUser,
392 Content: []llm.Content{
393 {Type: llm.ContentTypeText, Text: "Please search for some files"},
394 },
395 }
396
397 loop.QueueUserMessage(userMessage)
398
399 // Process one turn
400 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
401 defer cancel()
402
403 err := loop.ProcessOneTurn(ctx)
404 if err != nil {
405 t.Fatalf("ProcessOneTurn failed: %v", err)
406 }
407
408 // Verify we got expected messages
409 // Note: User messages are recorded by ConversationManager, not by Loop,
410 // so we only expect the assistant response to be recorded here
411 if len(messages) < 1 {
412 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
413 }
414
415 // Should have assistant response
416 if messages[0].Role != llm.MessageRoleAssistant {
417 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
418 }
419}
420
421func TestLoopWithActualKeywordTool(t *testing.T) {
422 // Test that actual keyword tool works with Loop
423 service := NewPredictableService()
424
425 var messages []llm.Message
426 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
427 messages = append(messages, message)
428 return nil
429 }
430
431 // Use the actual keyword tool from claudetool package
432 // Note: We need to import it first
433 tools := []*llm.Tool{
434 // Add a simplified keyword tool to avoid file system dependencies in tests
435 {
436 Name: "keyword_search",
437 Description: "Search for files by keyword",
438 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
439 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
440 // Simple mock implementation - no context dependencies
441 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
442 },
443 },
444 }
445
446 loop := NewLoop(Config{
447 LLM: service,
448 History: []llm.Message{},
449 Tools: tools,
450 RecordMessage: recordMessage,
451 })
452
453 // Send a user message that will trigger the default response
454 userMessage := llm.Message{
455 Role: llm.MessageRoleUser,
456 Content: []llm.Content{
457 {Type: llm.ContentTypeText, Text: "Please search for some files"},
458 },
459 }
460
461 loop.QueueUserMessage(userMessage)
462
463 // Process one turn
464 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
465 defer cancel()
466
467 err := loop.ProcessOneTurn(ctx)
468 if err != nil {
469 t.Fatalf("ProcessOneTurn failed: %v", err)
470 }
471
472 // Verify we got expected messages
473 // Note: User messages are recorded by ConversationManager, not by Loop,
474 // so we only expect the assistant response to be recorded here
475 if len(messages) < 1 {
476 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
477 }
478
479 // Should have assistant response
480 if messages[0].Role != llm.MessageRoleAssistant {
481 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
482 }
483
484 t.Log("Keyword tool test passed - no nil pointer dereference occurred")
485}
486
487func TestKeywordToolWithLLMProvider(t *testing.T) {
488 // Create a temp directory with a test file to search
489 tempDir := t.TempDir()
490 testFile := filepath.Join(tempDir, "test.txt")
491 if err := os.WriteFile(testFile, []byte("this is a test file\n"), 0o644); err != nil {
492 t.Fatal(err)
493 }
494
495 // Create a predictable service for testing
496 predictableService := NewPredictableService()
497
498 // Create a simple LLM provider for testing
499 llmProvider := &testLLMProvider{
500 service: predictableService,
501 models: []string{"predictable"},
502 }
503
504 // Create keyword tool with provider - use temp dir instead of /
505 keywordTool := claudetool.NewKeywordToolWithWorkingDir(llmProvider, claudetool.NewMutableWorkingDir(tempDir))
506 tool := keywordTool.Tool()
507
508 // Test input
509 input := `{"query": "test search", "search_terms": ["test"]}`
510
511 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
512 defer cancel()
513 result := tool.Run(ctx, json.RawMessage(input))
514
515 // Should get a result without error (even though ripgrep will fail in test environment)
516 // The important thing is that it doesn't crash with nil pointer dereference
517 if result.Error != nil {
518 t.Logf("Expected error in test environment (no ripgrep): %v", result.Error)
519 // This is expected in test environment
520 } else {
521 t.Log("Keyword tool executed successfully")
522 if len(result.LLMContent) == 0 {
523 t.Error("Expected some content in result")
524 }
525 }
526}
527
528// testLLMProvider implements LLMServiceProvider for testing
529type testLLMProvider struct {
530 service llm.Service
531 models []string
532}
533
534func (t *testLLMProvider) GetService(modelID string) (llm.Service, error) {
535 for _, model := range t.models {
536 if model == modelID {
537 return t.service, nil
538 }
539 }
540 return nil, fmt.Errorf("model %s not available", modelID)
541}
542
543func (t *testLLMProvider) GetAvailableModels() []string {
544 return t.models
545}
546
547func TestInsertMissingToolResults(t *testing.T) {
548 tests := []struct {
549 name string
550 messages []llm.Message
551 wantLen int
552 wantText string
553 }{
554 {
555 name: "no missing tool results",
556 messages: []llm.Message{
557 {
558 Role: llm.MessageRoleAssistant,
559 Content: []llm.Content{
560 {Type: llm.ContentTypeText, Text: "Let me help you"},
561 },
562 },
563 {
564 Role: llm.MessageRoleUser,
565 Content: []llm.Content{
566 {Type: llm.ContentTypeText, Text: "Thanks"},
567 },
568 },
569 },
570 wantLen: 1,
571 wantText: "", // No synthetic result expected
572 },
573 {
574 name: "missing tool result - should insert synthetic result",
575 messages: []llm.Message{
576 {
577 Role: llm.MessageRoleAssistant,
578 Content: []llm.Content{
579 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
580 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
581 },
582 },
583 {
584 Role: llm.MessageRoleUser,
585 Content: []llm.Content{
586 {Type: llm.ContentTypeText, Text: "Error occurred"},
587 },
588 },
589 },
590 wantLen: 2, // Should have synthetic tool_result + error message
591 wantText: "not executed; retry possible",
592 },
593 {
594 name: "multiple missing tool results",
595 messages: []llm.Message{
596 {
597 Role: llm.MessageRoleAssistant,
598 Content: []llm.Content{
599 {Type: llm.ContentTypeText, Text: "I'll use multiple tools"},
600 {Type: llm.ContentTypeToolUse, ID: "tool_1", ToolName: "bash"},
601 {Type: llm.ContentTypeToolUse, ID: "tool_2", ToolName: "read"},
602 },
603 },
604 {
605 Role: llm.MessageRoleUser,
606 Content: []llm.Content{
607 {Type: llm.ContentTypeText, Text: "Error occurred"},
608 },
609 },
610 },
611 wantLen: 3, // Should have 2 synthetic tool_results + error message
612 },
613 {
614 name: "has tool results - should not insert",
615 messages: []llm.Message{
616 {
617 Role: llm.MessageRoleAssistant,
618 Content: []llm.Content{
619 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
620 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
621 },
622 },
623 {
624 Role: llm.MessageRoleUser,
625 Content: []llm.Content{
626 {
627 Type: llm.ContentTypeToolResult,
628 ToolUseID: "tool_123",
629 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "result"}},
630 },
631 },
632 },
633 },
634 wantLen: 1, // Should not insert anything
635 },
636 }
637
638 for _, tt := range tests {
639 t.Run(tt.name, func(t *testing.T) {
640 loop := NewLoop(Config{
641 LLM: NewPredictableService(),
642 History: []llm.Message{},
643 })
644
645 req := &llm.Request{
646 Messages: tt.messages,
647 }
648
649 loop.insertMissingToolResults(req)
650
651 got := req.Messages[len(req.Messages)-1]
652 if len(got.Content) != tt.wantLen {
653 t.Errorf("expected %d content items, got %d", tt.wantLen, len(got.Content))
654 }
655
656 if tt.wantText != "" {
657 // Find the synthetic tool result
658 found := false
659 for _, c := range got.Content {
660 if c.Type == llm.ContentTypeToolResult && len(c.ToolResult) > 0 {
661 if c.ToolResult[0].Text == tt.wantText {
662 found = true
663 if !c.ToolError {
664 t.Error("synthetic tool result should have ToolError=true")
665 }
666 break
667 }
668 }
669 }
670 if !found {
671 t.Errorf("expected to find synthetic tool result with text %q", tt.wantText)
672 }
673 }
674 })
675 }
676}
677
678func TestInsertMissingToolResultsWithEdgeCases(t *testing.T) {
679 // Test for the bug: when an assistant error message is recorded after a tool_use
680 // but before tool execution, the tool_use is "hidden" from insertMissingToolResults
681 // because it only checks the last two messages.
682 t.Run("tool_use hidden by subsequent assistant message", func(t *testing.T) {
683 loop := NewLoop(Config{
684 LLM: NewPredictableService(),
685 History: []llm.Message{},
686 })
687
688 // Scenario:
689 // 1. LLM responds with tool_use
690 // 2. Something fails, error message recorded (assistant message)
691 // 3. User sends new message
692 // The tool_use in message 0 is never followed by a tool_result
693 req := &llm.Request{
694 Messages: []llm.Message{
695 {
696 Role: llm.MessageRoleAssistant,
697 Content: []llm.Content{
698 {Type: llm.ContentTypeText, Text: "I'll run a command"},
699 {Type: llm.ContentTypeToolUse, ID: "tool_hidden", ToolName: "bash"},
700 },
701 },
702 {
703 Role: llm.MessageRoleAssistant,
704 Content: []llm.Content{
705 {Type: llm.ContentTypeText, Text: "LLM request failed: some error"},
706 },
707 },
708 {
709 Role: llm.MessageRoleUser,
710 Content: []llm.Content{
711 {Type: llm.ContentTypeText, Text: "Please try again"},
712 },
713 },
714 },
715 }
716
717 loop.insertMissingToolResults(req)
718
719 // The function should have inserted a tool_result for tool_hidden
720 // It should be inserted as a user message after the assistant message with tool_use
721 // Since we can't insert in the middle, we need to ensure the history is valid
722
723 // Check that there's a tool_result for tool_hidden somewhere in the messages
724 found := false
725 for _, msg := range req.Messages {
726 for _, c := range msg.Content {
727 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_hidden" {
728 found = true
729 if !c.ToolError {
730 t.Error("synthetic tool result should have ToolError=true")
731 }
732 break
733 }
734 }
735 }
736 if !found {
737 t.Error("expected to find synthetic tool result for tool_hidden - the bug is that tool_use is hidden by subsequent assistant message")
738 }
739 })
740
741 // Test for tool_use in earlier message (not the second-to-last)
742 t.Run("tool_use in earlier message without result", func(t *testing.T) {
743 loop := NewLoop(Config{
744 LLM: NewPredictableService(),
745 History: []llm.Message{},
746 })
747
748 req := &llm.Request{
749 Messages: []llm.Message{
750 {
751 Role: llm.MessageRoleUser,
752 Content: []llm.Content{
753 {Type: llm.ContentTypeText, Text: "Do something"},
754 },
755 },
756 {
757 Role: llm.MessageRoleAssistant,
758 Content: []llm.Content{
759 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
760 {Type: llm.ContentTypeToolUse, ID: "tool_earlier", ToolName: "bash"},
761 },
762 },
763 // Missing: user message with tool_result for tool_earlier
764 {
765 Role: llm.MessageRoleAssistant,
766 Content: []llm.Content{
767 {Type: llm.ContentTypeText, Text: "Something went wrong"},
768 },
769 },
770 {
771 Role: llm.MessageRoleUser,
772 Content: []llm.Content{
773 {Type: llm.ContentTypeText, Text: "Try again"},
774 },
775 },
776 },
777 }
778
779 loop.insertMissingToolResults(req)
780
781 // Should have inserted a tool_result for tool_earlier
782 found := false
783 for _, msg := range req.Messages {
784 for _, c := range msg.Content {
785 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_earlier" {
786 found = true
787 break
788 }
789 }
790 }
791 if !found {
792 t.Error("expected to find synthetic tool result for tool_earlier")
793 }
794 })
795
796 t.Run("empty message list", func(t *testing.T) {
797 loop := NewLoop(Config{
798 LLM: NewPredictableService(),
799 History: []llm.Message{},
800 })
801
802 req := &llm.Request{
803 Messages: []llm.Message{},
804 }
805
806 loop.insertMissingToolResults(req)
807 // Should not panic
808 })
809
810 t.Run("single message", func(t *testing.T) {
811 loop := NewLoop(Config{
812 LLM: NewPredictableService(),
813 History: []llm.Message{},
814 })
815
816 req := &llm.Request{
817 Messages: []llm.Message{
818 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
819 },
820 }
821
822 loop.insertMissingToolResults(req)
823 // Should not panic, should not modify
824 if len(req.Messages[0].Content) != 1 {
825 t.Error("should not modify single message")
826 }
827 })
828
829 t.Run("wrong role order - user then assistant", func(t *testing.T) {
830 loop := NewLoop(Config{
831 LLM: NewPredictableService(),
832 History: []llm.Message{},
833 })
834
835 req := &llm.Request{
836 Messages: []llm.Message{
837 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
838 {Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}}},
839 },
840 }
841
842 loop.insertMissingToolResults(req)
843 // Should not modify when roles are wrong order
844 if len(req.Messages[1].Content) != 1 {
845 t.Error("should not modify when roles are in wrong order")
846 }
847 })
848}
849
850func TestInsertMissingToolResults_EmptyAssistantContent(t *testing.T) {
851 // Test for the bug: when an assistant message has empty content (can happen when
852 // the model ends its turn without producing any output), we need to add placeholder
853 // content if it's not the last message. Otherwise the API will reject with:
854 // "messages.N: all messages must have non-empty content except for the optional
855 // final assistant message"
856
857 t.Run("empty assistant content in middle of conversation", func(t *testing.T) {
858 loop := NewLoop(Config{
859 LLM: NewPredictableService(),
860 History: []llm.Message{},
861 })
862
863 req := &llm.Request{
864 Messages: []llm.Message{
865 {
866 Role: llm.MessageRoleUser,
867 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "run git fetch"}},
868 },
869 {
870 Role: llm.MessageRoleAssistant,
871 Content: []llm.Content{{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"}},
872 },
873 {
874 Role: llm.MessageRoleUser,
875 Content: []llm.Content{{
876 Type: llm.ContentTypeToolResult,
877 ToolUseID: "tool1",
878 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "success"}},
879 }},
880 },
881 {
882 // Empty assistant message - this can happen when model ends turn without output
883 Role: llm.MessageRoleAssistant,
884 Content: []llm.Content{},
885 EndOfTurn: true,
886 },
887 {
888 Role: llm.MessageRoleUser,
889 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "next question"}},
890 },
891 },
892 }
893
894 loop.insertMissingToolResults(req)
895
896 // The empty assistant message (index 3) should now have placeholder content
897 if len(req.Messages[3].Content) == 0 {
898 t.Error("expected placeholder content to be added to empty assistant message")
899 }
900 if req.Messages[3].Content[0].Type != llm.ContentTypeText {
901 t.Error("expected placeholder to be text content")
902 }
903 if req.Messages[3].Content[0].Text != "(no response)" {
904 t.Errorf("expected placeholder text '(no response)', got %q", req.Messages[3].Content[0].Text)
905 }
906 })
907
908 t.Run("empty assistant content at end of conversation - no modification needed", func(t *testing.T) {
909 loop := NewLoop(Config{
910 LLM: NewPredictableService(),
911 History: []llm.Message{},
912 })
913
914 req := &llm.Request{
915 Messages: []llm.Message{
916 {
917 Role: llm.MessageRoleUser,
918 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
919 },
920 {
921 // Empty assistant message at end is allowed by the API
922 Role: llm.MessageRoleAssistant,
923 Content: []llm.Content{},
924 EndOfTurn: true,
925 },
926 },
927 }
928
929 loop.insertMissingToolResults(req)
930
931 // The empty assistant message at the end should NOT be modified
932 // because the API allows empty content for the final assistant message
933 if len(req.Messages[1].Content) != 0 {
934 t.Error("expected final empty assistant message to remain empty")
935 }
936 })
937
938 t.Run("non-empty assistant content - no modification needed", func(t *testing.T) {
939 loop := NewLoop(Config{
940 LLM: NewPredictableService(),
941 History: []llm.Message{},
942 })
943
944 req := &llm.Request{
945 Messages: []llm.Message{
946 {
947 Role: llm.MessageRoleUser,
948 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
949 },
950 {
951 Role: llm.MessageRoleAssistant,
952 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi there"}},
953 },
954 {
955 Role: llm.MessageRoleUser,
956 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "goodbye"}},
957 },
958 },
959 }
960
961 loop.insertMissingToolResults(req)
962
963 // The assistant message should not be modified
964 if len(req.Messages[1].Content) != 1 {
965 t.Errorf("expected assistant message to have 1 content item, got %d", len(req.Messages[1].Content))
966 }
967 if req.Messages[1].Content[0].Text != "hi there" {
968 t.Errorf("expected assistant message text 'hi there', got %q", req.Messages[1].Content[0].Text)
969 }
970 })
971}
972
973func TestGitStateTracking(t *testing.T) {
974 // Create a test repo
975 tmpDir := t.TempDir()
976
977 // Initialize git repo
978 runGit(t, tmpDir, "init")
979 runGit(t, tmpDir, "config", "user.email", "test@test.com")
980 runGit(t, tmpDir, "config", "user.name", "Test")
981
982 // Create initial commit
983 testFile := filepath.Join(tmpDir, "test.txt")
984 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
985 t.Fatal(err)
986 }
987 runGit(t, tmpDir, "add", ".")
988 runGit(t, tmpDir, "commit", "-m", "initial")
989
990 // Track git state changes
991 var mu sync.Mutex
992 var gitStateChanges []*gitstate.GitState
993
994 loop := NewLoop(Config{
995 LLM: NewPredictableService(),
996 History: []llm.Message{},
997 WorkingDir: tmpDir,
998 GetWorkingDir: func() string { return tmpDir },
999 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1000 mu.Lock()
1001 gitStateChanges = append(gitStateChanges, state)
1002 mu.Unlock()
1003 },
1004 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1005 return nil
1006 },
1007 })
1008
1009 // Verify initial state was captured
1010 if loop.lastGitState == nil {
1011 t.Fatal("expected initial git state to be captured")
1012 }
1013 if !loop.lastGitState.IsRepo {
1014 t.Error("expected IsRepo to be true")
1015 }
1016
1017 // Process a turn (no state change should occur)
1018 loop.QueueUserMessage(llm.Message{
1019 Role: llm.MessageRoleUser,
1020 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1021 })
1022
1023 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1024 defer cancel()
1025
1026 err := loop.ProcessOneTurn(ctx)
1027 if err != nil {
1028 t.Fatalf("ProcessOneTurn failed: %v", err)
1029 }
1030
1031 // No state change should have occurred
1032 mu.Lock()
1033 numChanges := len(gitStateChanges)
1034 mu.Unlock()
1035 if numChanges != 0 {
1036 t.Errorf("expected no git state changes, got %d", numChanges)
1037 }
1038
1039 // Now make a commit
1040 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1041 t.Fatal(err)
1042 }
1043 runGit(t, tmpDir, "add", ".")
1044 runGit(t, tmpDir, "commit", "-m", "update")
1045
1046 // Process another turn - this should detect the commit change
1047 loop.QueueUserMessage(llm.Message{
1048 Role: llm.MessageRoleUser,
1049 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello again"}},
1050 })
1051
1052 err = loop.ProcessOneTurn(ctx)
1053 if err != nil {
1054 t.Fatalf("ProcessOneTurn failed: %v", err)
1055 }
1056
1057 // Now a state change should have been detected
1058 mu.Lock()
1059 numChanges = len(gitStateChanges)
1060 mu.Unlock()
1061 if numChanges != 1 {
1062 t.Errorf("expected 1 git state change, got %d", numChanges)
1063 }
1064}
1065
1066func TestGitStateTrackingWorktree(t *testing.T) {
1067 tmpDir, err := filepath.EvalSymlinks(t.TempDir())
1068 if err != nil {
1069 t.Fatal(err)
1070 }
1071 mainRepo := filepath.Join(tmpDir, "main")
1072 worktreeDir := filepath.Join(tmpDir, "worktree")
1073
1074 // Create main repo
1075 if err := os.MkdirAll(mainRepo, 0o755); err != nil {
1076 t.Fatal(err)
1077 }
1078 runGit(t, mainRepo, "init")
1079 runGit(t, mainRepo, "config", "user.email", "test@test.com")
1080 runGit(t, mainRepo, "config", "user.name", "Test")
1081
1082 // Create initial commit
1083 testFile := filepath.Join(mainRepo, "test.txt")
1084 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1085 t.Fatal(err)
1086 }
1087 runGit(t, mainRepo, "add", ".")
1088 runGit(t, mainRepo, "commit", "-m", "initial")
1089
1090 // Create a worktree
1091 runGit(t, mainRepo, "worktree", "add", "-b", "feature", worktreeDir)
1092
1093 // Track git state changes in the worktree
1094 var mu sync.Mutex
1095 var gitStateChanges []*gitstate.GitState
1096
1097 loop := NewLoop(Config{
1098 LLM: NewPredictableService(),
1099 History: []llm.Message{},
1100 WorkingDir: worktreeDir,
1101 GetWorkingDir: func() string { return worktreeDir },
1102 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1103 mu.Lock()
1104 gitStateChanges = append(gitStateChanges, state)
1105 mu.Unlock()
1106 },
1107 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1108 return nil
1109 },
1110 })
1111
1112 // Verify initial state
1113 if loop.lastGitState == nil {
1114 t.Fatal("expected initial git state to be captured")
1115 }
1116 if loop.lastGitState.Branch != "feature" {
1117 t.Errorf("expected branch 'feature', got %q", loop.lastGitState.Branch)
1118 }
1119 if loop.lastGitState.Worktree != worktreeDir {
1120 t.Errorf("expected worktree %q, got %q", worktreeDir, loop.lastGitState.Worktree)
1121 }
1122
1123 // Make a commit in the worktree
1124 worktreeFile := filepath.Join(worktreeDir, "feature.txt")
1125 if err := os.WriteFile(worktreeFile, []byte("feature content"), 0o644); err != nil {
1126 t.Fatal(err)
1127 }
1128 runGit(t, worktreeDir, "add", ".")
1129 runGit(t, worktreeDir, "commit", "-m", "feature commit")
1130
1131 // Process a turn to detect the change
1132 loop.QueueUserMessage(llm.Message{
1133 Role: llm.MessageRoleUser,
1134 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1135 })
1136
1137 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1138 defer cancel()
1139
1140 err = loop.ProcessOneTurn(ctx)
1141 if err != nil {
1142 t.Fatalf("ProcessOneTurn failed: %v", err)
1143 }
1144
1145 mu.Lock()
1146 numChanges := len(gitStateChanges)
1147 mu.Unlock()
1148
1149 if numChanges != 1 {
1150 t.Errorf("expected 1 git state change in worktree, got %d", numChanges)
1151 }
1152}
1153
1154func runGit(t *testing.T, dir string, args ...string) {
1155 t.Helper()
1156 // For commits, use --no-verify to skip hooks
1157 if len(args) > 0 && args[0] == "commit" {
1158 newArgs := []string{"commit", "--no-verify"}
1159 newArgs = append(newArgs, args[1:]...)
1160 args = newArgs
1161 }
1162 cmd := exec.Command("git", args...)
1163 cmd.Dir = dir
1164 output, err := cmd.CombinedOutput()
1165 if err != nil {
1166 t.Fatalf("git %v failed: %v\n%s", args, err, output)
1167 }
1168}
1169
1170func TestPredictableServiceTokenContextWindow(t *testing.T) {
1171 service := NewPredictableService()
1172 window := service.TokenContextWindow()
1173 if window != 200000 {
1174 t.Errorf("expected TokenContextWindow to return 200000, got %d", window)
1175 }
1176}
1177
1178func TestPredictableServiceMaxImageDimension(t *testing.T) {
1179 service := NewPredictableService()
1180 dimension := service.MaxImageDimension()
1181 if dimension != 2000 {
1182 t.Errorf("expected MaxImageDimension to return 2000, got %d", dimension)
1183 }
1184}
1185
1186func TestPredictableServiceThinking(t *testing.T) {
1187 service := NewPredictableService()
1188
1189 ctx := context.Background()
1190 req := &llm.Request{
1191 Messages: []llm.Message{
1192 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "think: This is a test thought"}}},
1193 },
1194 }
1195
1196 resp, err := service.Do(ctx, req)
1197 if err != nil {
1198 t.Fatalf("thinking test failed: %v", err)
1199 }
1200
1201 // Now returns EndTurn since thinking is content, not a tool
1202 if resp.StopReason != llm.StopReasonEndTurn {
1203 t.Errorf("expected end turn stop reason, got %v", resp.StopReason)
1204 }
1205
1206 // Find the thinking content
1207 var thinkingContent *llm.Content
1208 for _, content := range resp.Content {
1209 if content.Type == llm.ContentTypeThinking {
1210 thinkingContent = &content
1211 break
1212 }
1213 }
1214
1215 if thinkingContent == nil {
1216 t.Fatal("no thinking content found")
1217 }
1218
1219 // Check thinking content contains the thoughts
1220 if thinkingContent.Thinking != "This is a test thought" {
1221 t.Errorf("expected thinking 'This is a test thought', got '%v'", thinkingContent.Thinking)
1222 }
1223}
1224
1225func TestPredictableServicePatchTool(t *testing.T) {
1226 service := NewPredictableService()
1227
1228 ctx := context.Background()
1229 req := &llm.Request{
1230 Messages: []llm.Message{
1231 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch: /tmp/test.txt"}}},
1232 },
1233 }
1234
1235 resp, err := service.Do(ctx, req)
1236 if err != nil {
1237 t.Fatalf("patch tool test failed: %v", err)
1238 }
1239
1240 if resp.StopReason != llm.StopReasonToolUse {
1241 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1242 }
1243
1244 // Find the tool use content
1245 var toolUseContent *llm.Content
1246 for _, content := range resp.Content {
1247 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1248 toolUseContent = &content
1249 break
1250 }
1251 }
1252
1253 if toolUseContent == nil {
1254 t.Fatal("no patch tool use content found")
1255 }
1256
1257 // Check tool input contains the file path
1258 var toolInput map[string]interface{}
1259 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1260 t.Fatalf("failed to parse tool input: %v", err)
1261 }
1262
1263 if toolInput["path"] != "/tmp/test.txt" {
1264 t.Errorf("expected path '/tmp/test.txt', got '%v'", toolInput["path"])
1265 }
1266}
1267
1268func TestPredictableServiceMalformedPatchTool(t *testing.T) {
1269 service := NewPredictableService()
1270
1271 ctx := context.Background()
1272 req := &llm.Request{
1273 Messages: []llm.Message{
1274 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch bad json"}}},
1275 },
1276 }
1277
1278 resp, err := service.Do(ctx, req)
1279 if err != nil {
1280 t.Fatalf("malformed patch tool test failed: %v", err)
1281 }
1282
1283 if resp.StopReason != llm.StopReasonToolUse {
1284 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1285 }
1286
1287 // Find the tool use content
1288 var toolUseContent *llm.Content
1289 for _, content := range resp.Content {
1290 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1291 toolUseContent = &content
1292 break
1293 }
1294 }
1295
1296 if toolUseContent == nil {
1297 t.Fatal("no patch tool use content found")
1298 }
1299
1300 // Check that the tool input is malformed JSON (as expected)
1301 toolInputStr := string(toolUseContent.ToolInput)
1302 if !strings.Contains(toolInputStr, "parameter name") {
1303 t.Errorf("expected malformed JSON in tool input, got: %s", toolInputStr)
1304 }
1305}
1306
1307func TestPredictableServiceError(t *testing.T) {
1308 service := NewPredictableService()
1309
1310 ctx := context.Background()
1311 req := &llm.Request{
1312 Messages: []llm.Message{
1313 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "error: test error"}}},
1314 },
1315 }
1316
1317 resp, err := service.Do(ctx, req)
1318 if err == nil {
1319 t.Fatal("expected error, got nil")
1320 }
1321
1322 if !strings.Contains(err.Error(), "predictable error: test error") {
1323 t.Errorf("expected error message to contain 'predictable error: test error', got: %v", err)
1324 }
1325
1326 if resp != nil {
1327 t.Error("expected response to be nil when error occurs")
1328 }
1329}
1330
1331func TestPredictableServiceRequestTracking(t *testing.T) {
1332 service := NewPredictableService()
1333
1334 // Initially no requests
1335 requests := service.GetRecentRequests()
1336 if requests != nil {
1337 t.Errorf("expected nil requests initially, got %v", requests)
1338 }
1339
1340 lastReq := service.GetLastRequest()
1341 if lastReq != nil {
1342 t.Errorf("expected nil last request initially, got %v", lastReq)
1343 }
1344
1345 // Make a request
1346 ctx := context.Background()
1347 req := &llm.Request{
1348 Messages: []llm.Message{
1349 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1350 },
1351 }
1352
1353 _, err := service.Do(ctx, req)
1354 if err != nil {
1355 t.Fatalf("Do failed: %v", err)
1356 }
1357
1358 // Check that request was tracked
1359 requests = service.GetRecentRequests()
1360 if len(requests) != 1 {
1361 t.Errorf("expected 1 request, got %d", len(requests))
1362 }
1363
1364 lastReq = service.GetLastRequest()
1365 if lastReq == nil {
1366 t.Fatal("expected last request to be non-nil")
1367 }
1368
1369 if len(lastReq.Messages) != 1 {
1370 t.Errorf("expected 1 message in last request, got %d", len(lastReq.Messages))
1371 }
1372
1373 // Test clearing requests
1374 service.ClearRequests()
1375 requests = service.GetRecentRequests()
1376 if requests != nil {
1377 t.Errorf("expected nil requests after clearing, got %v", requests)
1378 }
1379
1380 lastReq = service.GetLastRequest()
1381 if lastReq != nil {
1382 t.Errorf("expected nil last request after clearing, got %v", lastReq)
1383 }
1384
1385 // Test that only last 10 requests are kept
1386 for i := 0; i < 15; i++ {
1387 testReq := &llm.Request{
1388 Messages: []llm.Message{
1389 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: fmt.Sprintf("test %d", i)}}},
1390 },
1391 }
1392 _, err := service.Do(ctx, testReq)
1393 if err != nil {
1394 t.Fatalf("Do failed on iteration %d: %v", i, err)
1395 }
1396 }
1397
1398 requests = service.GetRecentRequests()
1399 if len(requests) != 10 {
1400 t.Errorf("expected 10 requests (last 10), got %d", len(requests))
1401 }
1402
1403 // Check that we have requests 5-14 (0-indexed)
1404 for i, req := range requests {
1405 expectedText := fmt.Sprintf("test %d", i+5)
1406 if len(req.Messages) == 0 || len(req.Messages[0].Content) == 0 {
1407 t.Errorf("request %d has no content", i)
1408 continue
1409 }
1410 if req.Messages[0].Content[0].Text != expectedText {
1411 t.Errorf("expected request %d to have text '%s', got '%s'", i, expectedText, req.Messages[0].Content[0].Text)
1412 }
1413 }
1414}
1415
1416func TestPredictableServiceScreenshotTool(t *testing.T) {
1417 service := NewPredictableService()
1418
1419 ctx := context.Background()
1420 req := &llm.Request{
1421 Messages: []llm.Message{
1422 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "screenshot: .test-class"}}},
1423 },
1424 }
1425
1426 resp, err := service.Do(ctx, req)
1427 if err != nil {
1428 t.Fatalf("screenshot tool test failed: %v", err)
1429 }
1430
1431 if resp.StopReason != llm.StopReasonToolUse {
1432 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1433 }
1434
1435 // Find the tool use content
1436 var toolUseContent *llm.Content
1437 for _, content := range resp.Content {
1438 if content.Type == llm.ContentTypeToolUse && content.ToolName == "browser_take_screenshot" {
1439 toolUseContent = &content
1440 break
1441 }
1442 }
1443
1444 if toolUseContent == nil {
1445 t.Fatal("no screenshot tool use content found")
1446 }
1447
1448 // Check tool input contains the selector
1449 var toolInput map[string]interface{}
1450 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1451 t.Fatalf("failed to parse tool input: %v", err)
1452 }
1453
1454 if toolInput["selector"] != ".test-class" {
1455 t.Errorf("expected selector '.test-class', got '%v'", toolInput["selector"])
1456 }
1457}
1458
1459func TestPredictableServiceToolSmorgasbord(t *testing.T) {
1460 service := NewPredictableService()
1461
1462 ctx := context.Background()
1463 req := &llm.Request{
1464 Messages: []llm.Message{
1465 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "tool smorgasbord"}}},
1466 },
1467 }
1468
1469 resp, err := service.Do(ctx, req)
1470 if err != nil {
1471 t.Fatalf("tool smorgasbord test failed: %v", err)
1472 }
1473
1474 if resp.StopReason != llm.StopReasonToolUse {
1475 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1476 }
1477
1478 // Count the tool use contents
1479 toolUseCount := 0
1480 for _, content := range resp.Content {
1481 if content.Type == llm.ContentTypeToolUse {
1482 toolUseCount++
1483 }
1484 }
1485
1486 // Should have at least several tool uses
1487 if toolUseCount < 5 {
1488 t.Errorf("expected at least 5 tool uses, got %d", toolUseCount)
1489 }
1490}
1491
1492func TestProcessLLMRequestError(t *testing.T) {
1493 // Test error handling when LLM service returns an error
1494 errorService := &errorLLMService{err: fmt.Errorf("test LLM error")}
1495
1496 var recordedMessages []llm.Message
1497 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1498 recordedMessages = append(recordedMessages, message)
1499 return nil
1500 }
1501
1502 loop := NewLoop(Config{
1503 LLM: errorService,
1504 History: []llm.Message{},
1505 Tools: []*llm.Tool{},
1506 RecordMessage: recordFunc,
1507 })
1508
1509 // Queue a user message
1510 userMessage := llm.Message{
1511 Role: llm.MessageRoleUser,
1512 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1513 }
1514 loop.QueueUserMessage(userMessage)
1515
1516 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1517 defer cancel()
1518
1519 err := loop.ProcessOneTurn(ctx)
1520 if err == nil {
1521 t.Fatal("expected error from ProcessOneTurn, got nil")
1522 }
1523
1524 if !strings.Contains(err.Error(), "LLM request failed") {
1525 t.Errorf("expected error to contain 'LLM request failed', got: %v", err)
1526 }
1527
1528 // Check that error message was recorded
1529 if len(recordedMessages) < 1 {
1530 t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1531 }
1532
1533 if recordedMessages[0].Role != llm.MessageRoleAssistant {
1534 t.Errorf("expected recorded message to be assistant role, got %s", recordedMessages[0].Role)
1535 }
1536
1537 if len(recordedMessages[0].Content) != 1 {
1538 t.Fatalf("expected 1 content item in recorded message, got %d", len(recordedMessages[0].Content))
1539 }
1540
1541 if recordedMessages[0].Content[0].Type != llm.ContentTypeText {
1542 t.Errorf("expected text content, got %s", recordedMessages[0].Content[0].Type)
1543 }
1544
1545 if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1546 t.Errorf("expected error message to contain 'LLM request failed', got: %s", recordedMessages[0].Content[0].Text)
1547 }
1548
1549 // Verify EndOfTurn is set so the agent working state is properly updated
1550 if !recordedMessages[0].EndOfTurn {
1551 t.Error("expected error message to have EndOfTurn=true so agent working state is updated")
1552 }
1553}
1554
1555// errorLLMService is a test LLM service that always returns an error
1556type errorLLMService struct {
1557 err error
1558}
1559
1560func (e *errorLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1561 return nil, e.err
1562}
1563
1564func (e *errorLLMService) TokenContextWindow() int {
1565 return 200000
1566}
1567
1568func (e *errorLLMService) MaxImageDimension() int {
1569 return 2000
1570}
1571
1572// retryableLLMService fails with a retryable error a specified number of times, then succeeds
1573type retryableLLMService struct {
1574 failuresRemaining int
1575 callCount int
1576 mu sync.Mutex
1577}
1578
1579func (r *retryableLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1580 r.mu.Lock()
1581 r.callCount++
1582 if r.failuresRemaining > 0 {
1583 r.failuresRemaining--
1584 r.mu.Unlock()
1585 return nil, fmt.Errorf("connection error: EOF")
1586 }
1587 r.mu.Unlock()
1588 return &llm.Response{
1589 Content: []llm.Content{
1590 {Type: llm.ContentTypeText, Text: "Success after retry"},
1591 },
1592 StopReason: llm.StopReasonEndTurn,
1593 }, nil
1594}
1595
1596func (r *retryableLLMService) TokenContextWindow() int {
1597 return 200000
1598}
1599
1600func (r *retryableLLMService) MaxImageDimension() int {
1601 return 2000
1602}
1603
1604func (r *retryableLLMService) getCallCount() int {
1605 r.mu.Lock()
1606 defer r.mu.Unlock()
1607 return r.callCount
1608}
1609
1610func TestLLMRequestRetryOnEOF(t *testing.T) {
1611 // Test that LLM requests are retried on EOF errors
1612 retryService := &retryableLLMService{failuresRemaining: 1}
1613
1614 var recordedMessages []llm.Message
1615 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1616 recordedMessages = append(recordedMessages, message)
1617 return nil
1618 }
1619
1620 loop := NewLoop(Config{
1621 LLM: retryService,
1622 History: []llm.Message{},
1623 Tools: []*llm.Tool{},
1624 RecordMessage: recordFunc,
1625 })
1626
1627 // Queue a user message
1628 userMessage := llm.Message{
1629 Role: llm.MessageRoleUser,
1630 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1631 }
1632 loop.QueueUserMessage(userMessage)
1633
1634 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1635 defer cancel()
1636
1637 err := loop.ProcessOneTurn(ctx)
1638 if err != nil {
1639 t.Fatalf("expected no error after retry, got: %v", err)
1640 }
1641
1642 // Should have been called twice (1 failure + 1 success)
1643 if retryService.getCallCount() != 2 {
1644 t.Errorf("expected 2 LLM calls (retry), got %d", retryService.getCallCount())
1645 }
1646
1647 // Check that success message was recorded
1648 if len(recordedMessages) != 1 {
1649 t.Fatalf("expected 1 recorded message (success), got %d", len(recordedMessages))
1650 }
1651
1652 if !strings.Contains(recordedMessages[0].Content[0].Text, "Success after retry") {
1653 t.Errorf("expected success message, got: %s", recordedMessages[0].Content[0].Text)
1654 }
1655}
1656
1657func TestLLMRequestRetryExhausted(t *testing.T) {
1658 // Test that after max retries, error is returned
1659 retryService := &retryableLLMService{failuresRemaining: 10} // More than maxRetries
1660
1661 var recordedMessages []llm.Message
1662 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1663 recordedMessages = append(recordedMessages, message)
1664 return nil
1665 }
1666
1667 loop := NewLoop(Config{
1668 LLM: retryService,
1669 History: []llm.Message{},
1670 Tools: []*llm.Tool{},
1671 RecordMessage: recordFunc,
1672 })
1673
1674 userMessage := llm.Message{
1675 Role: llm.MessageRoleUser,
1676 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1677 }
1678 loop.QueueUserMessage(userMessage)
1679
1680 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1681 defer cancel()
1682
1683 err := loop.ProcessOneTurn(ctx)
1684 if err == nil {
1685 t.Fatal("expected error after exhausting retries")
1686 }
1687
1688 // Should have been called maxRetries times (2)
1689 if retryService.getCallCount() != 2 {
1690 t.Errorf("expected 2 LLM calls (maxRetries), got %d", retryService.getCallCount())
1691 }
1692
1693 // Check error message was recorded
1694 if len(recordedMessages) != 1 {
1695 t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1696 }
1697
1698 if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1699 t.Errorf("expected error message, got: %s", recordedMessages[0].Content[0].Text)
1700 }
1701}
1702
1703func TestIsRetryableError(t *testing.T) {
1704 tests := []struct {
1705 name string
1706 err error
1707 retryable bool
1708 }{
1709 {"nil error", nil, false},
1710 {"io.EOF", io.EOF, true},
1711 {"io.ErrUnexpectedEOF", io.ErrUnexpectedEOF, true},
1712 {"EOF error string", fmt.Errorf("EOF"), true},
1713 {"wrapped EOF", fmt.Errorf("connection error: EOF"), true},
1714 {"connection reset", fmt.Errorf("connection reset by peer"), true},
1715 {"connection refused", fmt.Errorf("connection refused"), true},
1716 {"timeout", fmt.Errorf("i/o timeout"), true},
1717 {"api error", fmt.Errorf("rate limit exceeded"), false},
1718 {"generic error", fmt.Errorf("something went wrong"), false},
1719 }
1720
1721 for _, tt := range tests {
1722 t.Run(tt.name, func(t *testing.T) {
1723 if got := isRetryableError(tt.err); got != tt.retryable {
1724 t.Errorf("isRetryableError(%v) = %v, want %v", tt.err, got, tt.retryable)
1725 }
1726 })
1727 }
1728}
1729
1730func TestCheckGitStateChange(t *testing.T) {
1731 // Create a test repo
1732 tmpDir := t.TempDir()
1733
1734 // Initialize git repo
1735 runGit(t, tmpDir, "init")
1736 runGit(t, tmpDir, "config", "user.email", "test@test.com")
1737 runGit(t, tmpDir, "config", "user.name", "Test")
1738
1739 // Create initial commit
1740 testFile := filepath.Join(tmpDir, "test.txt")
1741 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1742 t.Fatal(err)
1743 }
1744 runGit(t, tmpDir, "add", ".")
1745 runGit(t, tmpDir, "commit", "-m", "initial")
1746
1747 // Test with nil OnGitStateChange - should not panic
1748 loop := NewLoop(Config{
1749 LLM: NewPredictableService(),
1750 History: []llm.Message{},
1751 WorkingDir: tmpDir,
1752 GetWorkingDir: func() string { return tmpDir },
1753 // OnGitStateChange is nil
1754 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1755 return nil
1756 },
1757 })
1758
1759 // This should not panic
1760 loop.checkGitStateChange(context.Background())
1761
1762 // Test with actual callback
1763 var gitStateChanges []*gitstate.GitState
1764 loop = NewLoop(Config{
1765 LLM: NewPredictableService(),
1766 History: []llm.Message{},
1767 WorkingDir: tmpDir,
1768 GetWorkingDir: func() string { return tmpDir },
1769 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1770 gitStateChanges = append(gitStateChanges, state)
1771 },
1772 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1773 return nil
1774 },
1775 })
1776
1777 // Make a change
1778 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1779 t.Fatal(err)
1780 }
1781 runGit(t, tmpDir, "add", ".")
1782 runGit(t, tmpDir, "commit", "-m", "update")
1783
1784 // Check git state change
1785 loop.checkGitStateChange(context.Background())
1786
1787 if len(gitStateChanges) != 1 {
1788 t.Errorf("expected 1 git state change, got %d", len(gitStateChanges))
1789 }
1790
1791 // Call again - should not trigger another change since state is the same
1792 loop.checkGitStateChange(context.Background())
1793
1794 if len(gitStateChanges) != 1 {
1795 t.Errorf("expected still 1 git state change (no new changes), got %d", len(gitStateChanges))
1796 }
1797}
1798
1799func TestHandleToolCallsWithMissingTool(t *testing.T) {
1800 var recordedMessages []llm.Message
1801 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1802 recordedMessages = append(recordedMessages, message)
1803 return nil
1804 }
1805
1806 loop := NewLoop(Config{
1807 LLM: NewPredictableService(),
1808 History: []llm.Message{},
1809 Tools: []*llm.Tool{}, // No tools registered
1810 RecordMessage: recordFunc,
1811 })
1812
1813 // Create content with a tool use for a tool that doesn't exist
1814 content := []llm.Content{
1815 {
1816 ID: "test_tool_123",
1817 Type: llm.ContentTypeToolUse,
1818 ToolName: "nonexistent_tool",
1819 ToolInput: json.RawMessage(`{"test": "input"}`),
1820 },
1821 }
1822
1823 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1824 defer cancel()
1825
1826 err := loop.handleToolCalls(ctx, content)
1827 if err != nil {
1828 t.Fatalf("handleToolCalls failed: %v", err)
1829 }
1830
1831 // Should have recorded a user message with tool result
1832 if len(recordedMessages) < 1 {
1833 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1834 }
1835
1836 msg := recordedMessages[0]
1837 if msg.Role != llm.MessageRoleUser {
1838 t.Errorf("expected user role, got %s", msg.Role)
1839 }
1840
1841 if len(msg.Content) != 1 {
1842 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1843 }
1844
1845 toolResult := msg.Content[0]
1846 if toolResult.Type != llm.ContentTypeToolResult {
1847 t.Errorf("expected tool result content, got %s", toolResult.Type)
1848 }
1849
1850 if toolResult.ToolUseID != "test_tool_123" {
1851 t.Errorf("expected tool use ID 'test_tool_123', got %s", toolResult.ToolUseID)
1852 }
1853
1854 if !toolResult.ToolError {
1855 t.Error("expected ToolError to be true")
1856 }
1857
1858 if len(toolResult.ToolResult) != 1 {
1859 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1860 }
1861
1862 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1863 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1864 }
1865
1866 expectedText := "Tool 'nonexistent_tool' not found"
1867 if toolResult.ToolResult[0].Text != expectedText {
1868 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1869 }
1870}
1871
1872func TestHandleToolCallsWithErrorTool(t *testing.T) {
1873 var recordedMessages []llm.Message
1874 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1875 recordedMessages = append(recordedMessages, message)
1876 return nil
1877 }
1878
1879 // Create a tool that always returns an error
1880 errorTool := &llm.Tool{
1881 Name: "error_tool",
1882 Description: "A tool that always errors",
1883 InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
1884 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
1885 return llm.ErrorToolOut(fmt.Errorf("intentional test error"))
1886 },
1887 }
1888
1889 loop := NewLoop(Config{
1890 LLM: NewPredictableService(),
1891 History: []llm.Message{},
1892 Tools: []*llm.Tool{errorTool},
1893 RecordMessage: recordFunc,
1894 })
1895
1896 // Create content with a tool use that will error
1897 content := []llm.Content{
1898 {
1899 ID: "error_tool_123",
1900 Type: llm.ContentTypeToolUse,
1901 ToolName: "error_tool",
1902 ToolInput: json.RawMessage(`{}`),
1903 },
1904 }
1905
1906 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1907 defer cancel()
1908
1909 err := loop.handleToolCalls(ctx, content)
1910 if err != nil {
1911 t.Fatalf("handleToolCalls failed: %v", err)
1912 }
1913
1914 // Should have recorded a user message with tool result
1915 if len(recordedMessages) < 1 {
1916 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1917 }
1918
1919 msg := recordedMessages[0]
1920 if msg.Role != llm.MessageRoleUser {
1921 t.Errorf("expected user role, got %s", msg.Role)
1922 }
1923
1924 if len(msg.Content) != 1 {
1925 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1926 }
1927
1928 toolResult := msg.Content[0]
1929 if toolResult.Type != llm.ContentTypeToolResult {
1930 t.Errorf("expected tool result content, got %s", toolResult.Type)
1931 }
1932
1933 if toolResult.ToolUseID != "error_tool_123" {
1934 t.Errorf("expected tool use ID 'error_tool_123', got %s", toolResult.ToolUseID)
1935 }
1936
1937 if !toolResult.ToolError {
1938 t.Error("expected ToolError to be true")
1939 }
1940
1941 if len(toolResult.ToolResult) != 1 {
1942 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1943 }
1944
1945 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1946 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1947 }
1948
1949 expectedText := "intentional test error"
1950 if toolResult.ToolResult[0].Text != expectedText {
1951 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1952 }
1953}
1954
1955func TestMaxTokensTruncation(t *testing.T) {
1956 var mu sync.Mutex
1957 var recordedMessages []llm.Message
1958 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1959 mu.Lock()
1960 recordedMessages = append(recordedMessages, message)
1961 mu.Unlock()
1962 return nil
1963 }
1964
1965 service := NewPredictableService()
1966 loop := NewLoop(Config{
1967 LLM: service,
1968 History: []llm.Message{},
1969 Tools: []*llm.Tool{},
1970 RecordMessage: recordFunc,
1971 })
1972
1973 // Queue a user message that triggers max tokens truncation
1974 userMessage := llm.Message{
1975 Role: llm.MessageRoleUser,
1976 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
1977 }
1978 loop.QueueUserMessage(userMessage)
1979
1980 // Run the loop - it should stop after handling truncation
1981 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
1982 defer cancel()
1983
1984 err := loop.Go(ctx)
1985 if err != context.DeadlineExceeded {
1986 t.Errorf("expected context deadline exceeded, got %v", err)
1987 }
1988
1989 // Check recorded messages
1990 mu.Lock()
1991 numMessages := len(recordedMessages)
1992 messages := make([]llm.Message, len(recordedMessages))
1993 copy(messages, recordedMessages)
1994 mu.Unlock()
1995
1996 // We should see two messages:
1997 // 1. The truncated message (with ExcludedFromContext=true) for cost tracking
1998 // 2. The truncation error message (with ErrorType=truncation)
1999 if numMessages != 2 {
2000 t.Errorf("Expected 2 recorded messages (truncated + error), got %d", numMessages)
2001 for i, msg := range messages {
2002 t.Logf("Message %d: Role=%v, EndOfTurn=%v, ExcludedFromContext=%v, ErrorType=%v",
2003 i, msg.Role, msg.EndOfTurn, msg.ExcludedFromContext, msg.ErrorType)
2004 }
2005 return
2006 }
2007
2008 // First message: truncated response (for cost tracking, excluded from context)
2009 truncatedMsg := messages[0]
2010 if truncatedMsg.Role != llm.MessageRoleAssistant {
2011 t.Errorf("Truncated message should be assistant, got %v", truncatedMsg.Role)
2012 }
2013 if !truncatedMsg.ExcludedFromContext {
2014 t.Error("Truncated message should have ExcludedFromContext=true")
2015 }
2016
2017 // Second message: truncation error
2018 errorMsg := messages[1]
2019 if errorMsg.Role != llm.MessageRoleAssistant {
2020 t.Errorf("Error message should be assistant, got %v", errorMsg.Role)
2021 }
2022 if !errorMsg.EndOfTurn {
2023 t.Error("Error message should have EndOfTurn=true")
2024 }
2025 if errorMsg.ErrorType != llm.ErrorTypeTruncation {
2026 t.Errorf("Error message should have ErrorType=truncation, got %v", errorMsg.ErrorType)
2027 }
2028 if errorMsg.ExcludedFromContext {
2029 t.Error("Error message should not be excluded from context")
2030 }
2031 if !strings.Contains(errorMsg.Content[0].Text, "SYSTEM ERROR") {
2032 t.Errorf("Error message should contain SYSTEM ERROR, got: %s", errorMsg.Content[0].Text)
2033 }
2034
2035 // Verify history contains user message + error message, but NOT the truncated response
2036 loop.mu.Lock()
2037 history := loop.history
2038 loop.mu.Unlock()
2039
2040 // History should have: user message + error message (the truncated response is NOT added to history)
2041 if len(history) != 2 {
2042 t.Errorf("History should have 2 messages (user + error), got %d", len(history))
2043 }
2044}
2045
2046//func TestInsertMissingToolResultsEdgeCases(t *testing.T) {
2047// loop := NewLoop(Config{
2048// LLM: NewPredictableService(),
2049// History: []llm.Message{},
2050// })
2051//
2052// // Test with nil request
2053// loop.insertMissingToolResults(nil) // Should not panic
2054//
2055// // Test with empty messages
2056// req := &llm.Request{Messages: []llm.Message{}}
2057// loop.insertMissingToolResults(req) // Should not panic
2058//
2059// // Test with single message
2060// req = &llm.Request{
2061// Messages: []llm.Message{
2062// {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
2063// },
2064// }
2065// loop.insertMissingToolResults(req) // Should not panic
2066// if len(req.Messages) != 1 {
2067// t.Errorf("expected 1 message, got %d", len(req.Messages))
2068// }
2069//
2070// // Test with multiple consecutive assistant messages with tool_use
2071// req = &llm.Request{
2072// Messages: []llm.Message{
2073// {
2074// Role: llm.MessageRoleAssistant,
2075// Content: []llm.Content{
2076// {Type: llm.ContentTypeText, Text: "First tool"},
2077// {Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"},
2078// },
2079// },
2080// {
2081// Role: llm.MessageRoleAssistant,
2082// Content: []llm.Content{
2083// {Type: llm.ContentTypeText, Text: "Second tool"},
2084// {Type: llm.ContentTypeToolUse, ID: "tool2", ToolName: "read"},
2085// },
2086// },
2087// {
2088// Role: llm.MessageRoleUser,
2089// Content: []llm.Content{
2090// {Type: llm.ContentTypeText, Text: "User response"},
2091// },
2092// },
2093// },
2094// }
2095//
2096// loop.insertMissingToolResults(req)
2097//
2098// // Should have inserted synthetic tool results for both tool_uses
2099// // The structure should be:
2100// // 0: First assistant message
2101// // 1: Synthetic user message with tool1 result
2102// // 2: Second assistant message
2103// // 3: Synthetic user message with tool2 result
2104// // 4: Original user message
2105// if len(req.Messages) != 5 {
2106// t.Fatalf("expected 5 messages after processing, got %d", len(req.Messages))
2107// }
2108//
2109// // Check first synthetic message
2110// if req.Messages[1].Role != llm.MessageRoleUser {
2111// t.Errorf("expected message 1 to be user role, got %s", req.Messages[1].Role)
2112// }
2113// foundTool1 := false
2114// for _, content := range req.Messages[1].Content {
2115// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool1" {
2116// foundTool1 = true
2117// break
2118// }
2119// }
2120// if !foundTool1 {
2121// t.Error("expected to find tool1 result in message 1")
2122// }
2123//
2124// // Check second synthetic message
2125// if req.Messages[3].Role != llm.MessageRoleUser {
2126// t.Errorf("expected message 3 to be user role, got %s", req.Messages[3].Role)
2127// }
2128// foundTool2 := false
2129// for _, content := range req.Messages[3].Content {
2130// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool2" {
2131// foundTool2 = true
2132// break
2133// }
2134//}
2135// if !foundTool2 {
2136// t.Error("expected to find tool2 result in message 3")
2137// }
2138//}