1package loop
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "os"
8 "os/exec"
9 "path/filepath"
10 "strings"
11 "sync"
12 "testing"
13 "time"
14
15 "shelley.exe.dev/claudetool"
16 "shelley.exe.dev/gitstate"
17 "shelley.exe.dev/llm"
18)
19
20func TestNewLoop(t *testing.T) {
21 history := []llm.Message{
22 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
23 }
24 tools := []*llm.Tool{}
25 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
26 return nil
27 }
28
29 loop := NewLoop(Config{
30 LLM: NewPredictableService(),
31 History: history,
32 Tools: tools,
33 RecordMessage: recordFunc,
34 })
35 if loop == nil {
36 t.Fatal("NewLoop returned nil")
37 }
38
39 if len(loop.history) != 1 {
40 t.Errorf("expected history length 1, got %d", len(loop.history))
41 }
42
43 if len(loop.messageQueue) != 0 {
44 t.Errorf("expected empty message queue, got %d", len(loop.messageQueue))
45 }
46}
47
48func TestQueueUserMessage(t *testing.T) {
49 loop := NewLoop(Config{
50 LLM: NewPredictableService(),
51 History: []llm.Message{},
52 Tools: []*llm.Tool{},
53 })
54
55 message := llm.Message{
56 Role: llm.MessageRoleUser,
57 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Test message"}},
58 }
59
60 loop.QueueUserMessage(message)
61
62 loop.mu.Lock()
63 queueLen := len(loop.messageQueue)
64 loop.mu.Unlock()
65
66 if queueLen != 1 {
67 t.Errorf("expected message queue length 1, got %d", queueLen)
68 }
69}
70
71func TestPredictableService(t *testing.T) {
72 service := NewPredictableService()
73
74 // Test simple hello response
75 ctx := context.Background()
76 req := &llm.Request{
77 Messages: []llm.Message{
78 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
79 },
80 }
81
82 resp, err := service.Do(ctx, req)
83 if err != nil {
84 t.Fatalf("predictable service Do failed: %v", err)
85 }
86
87 if resp.Role != llm.MessageRoleAssistant {
88 t.Errorf("expected assistant role, got %v", resp.Role)
89 }
90
91 if len(resp.Content) == 0 {
92 t.Error("expected non-empty content")
93 }
94
95 if resp.Content[0].Type != llm.ContentTypeText {
96 t.Errorf("expected text content, got %v", resp.Content[0].Type)
97 }
98
99 if resp.Content[0].Text != "Well, hi there!" {
100 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
101 }
102}
103
104func TestPredictableServiceEcho(t *testing.T) {
105 service := NewPredictableService()
106
107 ctx := context.Background()
108 req := &llm.Request{
109 Messages: []llm.Message{
110 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "echo: foo"}}},
111 },
112 }
113
114 resp, err := service.Do(ctx, req)
115 if err != nil {
116 t.Fatalf("echo test failed: %v", err)
117 }
118
119 if resp.Content[0].Text != "foo" {
120 t.Errorf("expected 'foo', got '%s'", resp.Content[0].Text)
121 }
122
123 // Test another echo
124 req.Messages[0].Content[0].Text = "echo: hello world"
125 resp, err = service.Do(ctx, req)
126 if err != nil {
127 t.Fatalf("echo hello world test failed: %v", err)
128 }
129
130 if resp.Content[0].Text != "hello world" {
131 t.Errorf("expected 'hello world', got '%s'", resp.Content[0].Text)
132 }
133}
134
135func TestPredictableServiceBashTool(t *testing.T) {
136 service := NewPredictableService()
137
138 ctx := context.Background()
139 req := &llm.Request{
140 Messages: []llm.Message{
141 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: ls -la"}}},
142 },
143 }
144
145 resp, err := service.Do(ctx, req)
146 if err != nil {
147 t.Fatalf("bash tool test failed: %v", err)
148 }
149
150 if resp.StopReason != llm.StopReasonToolUse {
151 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
152 }
153
154 if len(resp.Content) != 2 {
155 t.Errorf("expected 2 content items (text + tool_use), got %d", len(resp.Content))
156 }
157
158 // Find the tool use content
159 var toolUseContent *llm.Content
160 for _, content := range resp.Content {
161 if content.Type == llm.ContentTypeToolUse {
162 toolUseContent = &content
163 break
164 }
165 }
166
167 if toolUseContent == nil {
168 t.Fatal("no tool use content found")
169 }
170
171 if toolUseContent.ToolName != "bash" {
172 t.Errorf("expected tool name 'bash', got '%s'", toolUseContent.ToolName)
173 }
174
175 // Check tool input contains the command
176 var toolInput map[string]interface{}
177 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
178 t.Fatalf("failed to parse tool input: %v", err)
179 }
180
181 if toolInput["command"] != "ls -la" {
182 t.Errorf("expected command 'ls -la', got '%v'", toolInput["command"])
183 }
184}
185
186func TestPredictableServiceDefaultResponse(t *testing.T) {
187 service := NewPredictableService()
188
189 ctx := context.Background()
190 req := &llm.Request{
191 Messages: []llm.Message{
192 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "some unknown input"}}},
193 },
194 }
195
196 resp, err := service.Do(ctx, req)
197 if err != nil {
198 t.Fatalf("default response test failed: %v", err)
199 }
200
201 if resp.Content[0].Text != "edit predictable.go to add a response for that one..." {
202 t.Errorf("unexpected default response: %s", resp.Content[0].Text)
203 }
204}
205
206func TestPredictableServiceDelay(t *testing.T) {
207 service := NewPredictableService()
208
209 ctx := context.Background()
210 req := &llm.Request{
211 Messages: []llm.Message{
212 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "delay: 0.1"}}},
213 },
214 }
215
216 start := time.Now()
217 resp, err := service.Do(ctx, req)
218 elapsed := time.Since(start)
219
220 if err != nil {
221 t.Fatalf("delay test failed: %v", err)
222 }
223
224 if elapsed < 100*time.Millisecond {
225 t.Errorf("expected delay of at least 100ms, got %v", elapsed)
226 }
227
228 if resp.Content[0].Text != "Delayed for 0.1 seconds" {
229 t.Errorf("unexpected response text: %s", resp.Content[0].Text)
230 }
231}
232
233func TestLoopWithPredictableService(t *testing.T) {
234 var recordedMessages []llm.Message
235 var recordedUsages []llm.Usage
236
237 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
238 recordedMessages = append(recordedMessages, message)
239 recordedUsages = append(recordedUsages, usage)
240 return nil
241 }
242
243 service := NewPredictableService()
244 loop := NewLoop(Config{
245 LLM: service,
246 History: []llm.Message{},
247 Tools: []*llm.Tool{},
248 RecordMessage: recordFunc,
249 })
250
251 // Queue a user message that triggers a known response
252 userMessage := llm.Message{
253 Role: llm.MessageRoleUser,
254 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
255 }
256 loop.QueueUserMessage(userMessage)
257
258 // Run the loop with a short timeout
259 ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
260 defer cancel()
261
262 err := loop.Go(ctx)
263 if err != context.DeadlineExceeded {
264 t.Errorf("expected context deadline exceeded, got %v", err)
265 }
266
267 // Check that messages were recorded
268 if len(recordedMessages) < 1 {
269 t.Errorf("expected at least 1 recorded message, got %d", len(recordedMessages))
270 }
271
272 // Check usage tracking
273 usage := loop.GetUsage()
274 if usage.IsZero() {
275 t.Error("expected non-zero usage")
276 }
277}
278
279func TestLoopWithTools(t *testing.T) {
280 var toolCalls []string
281
282 testTool := &llm.Tool{
283 Name: "bash",
284 Description: "A test bash tool",
285 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"command": {"type": "string"}}}`),
286 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
287 toolCalls = append(toolCalls, string(input))
288 return llm.ToolOut{
289 LLMContent: []llm.Content{
290 {Type: llm.ContentTypeText, Text: "Command executed successfully"},
291 },
292 }
293 },
294 }
295
296 service := NewPredictableService()
297 loop := NewLoop(Config{
298 LLM: service,
299 History: []llm.Message{},
300 Tools: []*llm.Tool{testTool},
301 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
302 return nil
303 },
304 })
305
306 // Queue a user message that triggers the bash tool
307 userMessage := llm.Message{
308 Role: llm.MessageRoleUser,
309 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}},
310 }
311 loop.QueueUserMessage(userMessage)
312
313 // Run the loop with a short timeout
314 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
315 defer cancel()
316
317 err := loop.Go(ctx)
318 if err != context.DeadlineExceeded {
319 t.Errorf("expected context deadline exceeded, got %v", err)
320 }
321
322 // Check that the tool was called
323 if len(toolCalls) != 1 {
324 t.Errorf("expected 1 tool call, got %d", len(toolCalls))
325 }
326
327 if toolCalls[0] != `{"command":"echo hello"}` {
328 t.Errorf("unexpected tool call input: %s", toolCalls[0])
329 }
330}
331
332func TestGetHistory(t *testing.T) {
333 initialHistory := []llm.Message{
334 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
335 }
336
337 loop := NewLoop(Config{
338 LLM: NewPredictableService(),
339 History: initialHistory,
340 Tools: []*llm.Tool{},
341 })
342
343 history := loop.GetHistory()
344 if len(history) != 1 {
345 t.Errorf("expected history length 1, got %d", len(history))
346 }
347
348 // Modify returned slice to ensure it's a copy
349 history[0].Content[0].Text = "Modified"
350
351 // Original should be unchanged
352 original := loop.GetHistory()
353 if original[0].Content[0].Text != "Hello" {
354 t.Error("GetHistory should return a copy, not the original slice")
355 }
356}
357
358func TestLoopWithKeywordTool(t *testing.T) {
359 // Test that keyword tool doesn't crash with nil pointer dereference
360 service := NewPredictableService()
361
362 var messages []llm.Message
363 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
364 messages = append(messages, message)
365 return nil
366 }
367
368 // Add a mock keyword tool that doesn't actually search
369 tools := []*llm.Tool{
370 {
371 Name: "keyword_search",
372 Description: "Mock keyword search",
373 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
374 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
375 // Simple mock implementation
376 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
377 },
378 },
379 }
380
381 loop := NewLoop(Config{
382 LLM: service,
383 History: []llm.Message{},
384 Tools: tools,
385 RecordMessage: recordMessage,
386 })
387
388 // Send a user message that will trigger the default response
389 userMessage := llm.Message{
390 Role: llm.MessageRoleUser,
391 Content: []llm.Content{
392 {Type: llm.ContentTypeText, Text: "Please search for some files"},
393 },
394 }
395
396 loop.QueueUserMessage(userMessage)
397
398 // Process one turn
399 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
400 defer cancel()
401
402 err := loop.ProcessOneTurn(ctx)
403 if err != nil {
404 t.Fatalf("ProcessOneTurn failed: %v", err)
405 }
406
407 // Verify we got expected messages
408 // Note: User messages are recorded by ConversationManager, not by Loop,
409 // so we only expect the assistant response to be recorded here
410 if len(messages) < 1 {
411 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
412 }
413
414 // Should have assistant response
415 if messages[0].Role != llm.MessageRoleAssistant {
416 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
417 }
418}
419
420func TestLoopWithActualKeywordTool(t *testing.T) {
421 // Test that actual keyword tool works with Loop
422 service := NewPredictableService()
423
424 var messages []llm.Message
425 recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
426 messages = append(messages, message)
427 return nil
428 }
429
430 // Use the actual keyword tool from claudetool package
431 // Note: We need to import it first
432 tools := []*llm.Tool{
433 // Add a simplified keyword tool to avoid file system dependencies in tests
434 {
435 Name: "keyword_search",
436 Description: "Search for files by keyword",
437 InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
438 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
439 // Simple mock implementation - no context dependencies
440 return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
441 },
442 },
443 }
444
445 loop := NewLoop(Config{
446 LLM: service,
447 History: []llm.Message{},
448 Tools: tools,
449 RecordMessage: recordMessage,
450 })
451
452 // Send a user message that will trigger the default response
453 userMessage := llm.Message{
454 Role: llm.MessageRoleUser,
455 Content: []llm.Content{
456 {Type: llm.ContentTypeText, Text: "Please search for some files"},
457 },
458 }
459
460 loop.QueueUserMessage(userMessage)
461
462 // Process one turn
463 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
464 defer cancel()
465
466 err := loop.ProcessOneTurn(ctx)
467 if err != nil {
468 t.Fatalf("ProcessOneTurn failed: %v", err)
469 }
470
471 // Verify we got expected messages
472 // Note: User messages are recorded by ConversationManager, not by Loop,
473 // so we only expect the assistant response to be recorded here
474 if len(messages) < 1 {
475 t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
476 }
477
478 // Should have assistant response
479 if messages[0].Role != llm.MessageRoleAssistant {
480 t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
481 }
482
483 t.Log("Keyword tool test passed - no nil pointer dereference occurred")
484}
485
486func TestKeywordToolWithLLMProvider(t *testing.T) {
487 // Create a temp directory with a test file to search
488 tempDir := t.TempDir()
489 testFile := filepath.Join(tempDir, "test.txt")
490 if err := os.WriteFile(testFile, []byte("this is a test file\n"), 0o644); err != nil {
491 t.Fatal(err)
492 }
493
494 // Create a predictable service for testing
495 predictableService := NewPredictableService()
496
497 // Create a simple LLM provider for testing
498 llmProvider := &testLLMProvider{
499 service: predictableService,
500 models: []string{"predictable"},
501 }
502
503 // Create keyword tool with provider - use temp dir instead of /
504 keywordTool := claudetool.NewKeywordToolWithWorkingDir(llmProvider, claudetool.NewMutableWorkingDir(tempDir))
505 tool := keywordTool.Tool()
506
507 // Test input
508 input := `{"query": "test search", "search_terms": ["test"]}`
509
510 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
511 defer cancel()
512 result := tool.Run(ctx, json.RawMessage(input))
513
514 // Should get a result without error (even though ripgrep will fail in test environment)
515 // The important thing is that it doesn't crash with nil pointer dereference
516 if result.Error != nil {
517 t.Logf("Expected error in test environment (no ripgrep): %v", result.Error)
518 // This is expected in test environment
519 } else {
520 t.Log("Keyword tool executed successfully")
521 if len(result.LLMContent) == 0 {
522 t.Error("Expected some content in result")
523 }
524 }
525}
526
527// testLLMProvider implements LLMServiceProvider for testing
528type testLLMProvider struct {
529 service llm.Service
530 models []string
531}
532
533func (t *testLLMProvider) GetService(modelID string) (llm.Service, error) {
534 for _, model := range t.models {
535 if model == modelID {
536 return t.service, nil
537 }
538 }
539 return nil, fmt.Errorf("model %s not available", modelID)
540}
541
542func (t *testLLMProvider) GetAvailableModels() []string {
543 return t.models
544}
545
546func TestInsertMissingToolResults(t *testing.T) {
547 tests := []struct {
548 name string
549 messages []llm.Message
550 wantLen int
551 wantText string
552 }{
553 {
554 name: "no missing tool results",
555 messages: []llm.Message{
556 {
557 Role: llm.MessageRoleAssistant,
558 Content: []llm.Content{
559 {Type: llm.ContentTypeText, Text: "Let me help you"},
560 },
561 },
562 {
563 Role: llm.MessageRoleUser,
564 Content: []llm.Content{
565 {Type: llm.ContentTypeText, Text: "Thanks"},
566 },
567 },
568 },
569 wantLen: 1,
570 wantText: "", // No synthetic result expected
571 },
572 {
573 name: "missing tool result - should insert synthetic result",
574 messages: []llm.Message{
575 {
576 Role: llm.MessageRoleAssistant,
577 Content: []llm.Content{
578 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
579 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
580 },
581 },
582 {
583 Role: llm.MessageRoleUser,
584 Content: []llm.Content{
585 {Type: llm.ContentTypeText, Text: "Error occurred"},
586 },
587 },
588 },
589 wantLen: 2, // Should have synthetic tool_result + error message
590 wantText: "not executed; retry possible",
591 },
592 {
593 name: "multiple missing tool results",
594 messages: []llm.Message{
595 {
596 Role: llm.MessageRoleAssistant,
597 Content: []llm.Content{
598 {Type: llm.ContentTypeText, Text: "I'll use multiple tools"},
599 {Type: llm.ContentTypeToolUse, ID: "tool_1", ToolName: "bash"},
600 {Type: llm.ContentTypeToolUse, ID: "tool_2", ToolName: "read"},
601 },
602 },
603 {
604 Role: llm.MessageRoleUser,
605 Content: []llm.Content{
606 {Type: llm.ContentTypeText, Text: "Error occurred"},
607 },
608 },
609 },
610 wantLen: 3, // Should have 2 synthetic tool_results + error message
611 },
612 {
613 name: "has tool results - should not insert",
614 messages: []llm.Message{
615 {
616 Role: llm.MessageRoleAssistant,
617 Content: []llm.Content{
618 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
619 {Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
620 },
621 },
622 {
623 Role: llm.MessageRoleUser,
624 Content: []llm.Content{
625 {
626 Type: llm.ContentTypeToolResult,
627 ToolUseID: "tool_123",
628 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "result"}},
629 },
630 },
631 },
632 },
633 wantLen: 1, // Should not insert anything
634 },
635 }
636
637 for _, tt := range tests {
638 t.Run(tt.name, func(t *testing.T) {
639 loop := NewLoop(Config{
640 LLM: NewPredictableService(),
641 History: []llm.Message{},
642 })
643
644 req := &llm.Request{
645 Messages: tt.messages,
646 }
647
648 loop.insertMissingToolResults(req)
649
650 got := req.Messages[len(req.Messages)-1]
651 if len(got.Content) != tt.wantLen {
652 t.Errorf("expected %d content items, got %d", tt.wantLen, len(got.Content))
653 }
654
655 if tt.wantText != "" {
656 // Find the synthetic tool result
657 found := false
658 for _, c := range got.Content {
659 if c.Type == llm.ContentTypeToolResult && len(c.ToolResult) > 0 {
660 if c.ToolResult[0].Text == tt.wantText {
661 found = true
662 if !c.ToolError {
663 t.Error("synthetic tool result should have ToolError=true")
664 }
665 break
666 }
667 }
668 }
669 if !found {
670 t.Errorf("expected to find synthetic tool result with text %q", tt.wantText)
671 }
672 }
673 })
674 }
675}
676
677func TestInsertMissingToolResultsWithEdgeCases(t *testing.T) {
678 // Test for the bug: when an assistant error message is recorded after a tool_use
679 // but before tool execution, the tool_use is "hidden" from insertMissingToolResults
680 // because it only checks the last two messages.
681 t.Run("tool_use hidden by subsequent assistant message", func(t *testing.T) {
682 loop := NewLoop(Config{
683 LLM: NewPredictableService(),
684 History: []llm.Message{},
685 })
686
687 // Scenario:
688 // 1. LLM responds with tool_use
689 // 2. Something fails, error message recorded (assistant message)
690 // 3. User sends new message
691 // The tool_use in message 0 is never followed by a tool_result
692 req := &llm.Request{
693 Messages: []llm.Message{
694 {
695 Role: llm.MessageRoleAssistant,
696 Content: []llm.Content{
697 {Type: llm.ContentTypeText, Text: "I'll run a command"},
698 {Type: llm.ContentTypeToolUse, ID: "tool_hidden", ToolName: "bash"},
699 },
700 },
701 {
702 Role: llm.MessageRoleAssistant,
703 Content: []llm.Content{
704 {Type: llm.ContentTypeText, Text: "LLM request failed: some error"},
705 },
706 },
707 {
708 Role: llm.MessageRoleUser,
709 Content: []llm.Content{
710 {Type: llm.ContentTypeText, Text: "Please try again"},
711 },
712 },
713 },
714 }
715
716 loop.insertMissingToolResults(req)
717
718 // The function should have inserted a tool_result for tool_hidden
719 // It should be inserted as a user message after the assistant message with tool_use
720 // Since we can't insert in the middle, we need to ensure the history is valid
721
722 // Check that there's a tool_result for tool_hidden somewhere in the messages
723 found := false
724 for _, msg := range req.Messages {
725 for _, c := range msg.Content {
726 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_hidden" {
727 found = true
728 if !c.ToolError {
729 t.Error("synthetic tool result should have ToolError=true")
730 }
731 break
732 }
733 }
734 }
735 if !found {
736 t.Error("expected to find synthetic tool result for tool_hidden - the bug is that tool_use is hidden by subsequent assistant message")
737 }
738 })
739
740 // Test for tool_use in earlier message (not the second-to-last)
741 t.Run("tool_use in earlier message without result", func(t *testing.T) {
742 loop := NewLoop(Config{
743 LLM: NewPredictableService(),
744 History: []llm.Message{},
745 })
746
747 req := &llm.Request{
748 Messages: []llm.Message{
749 {
750 Role: llm.MessageRoleUser,
751 Content: []llm.Content{
752 {Type: llm.ContentTypeText, Text: "Do something"},
753 },
754 },
755 {
756 Role: llm.MessageRoleAssistant,
757 Content: []llm.Content{
758 {Type: llm.ContentTypeText, Text: "I'll use a tool"},
759 {Type: llm.ContentTypeToolUse, ID: "tool_earlier", ToolName: "bash"},
760 },
761 },
762 // Missing: user message with tool_result for tool_earlier
763 {
764 Role: llm.MessageRoleAssistant,
765 Content: []llm.Content{
766 {Type: llm.ContentTypeText, Text: "Something went wrong"},
767 },
768 },
769 {
770 Role: llm.MessageRoleUser,
771 Content: []llm.Content{
772 {Type: llm.ContentTypeText, Text: "Try again"},
773 },
774 },
775 },
776 }
777
778 loop.insertMissingToolResults(req)
779
780 // Should have inserted a tool_result for tool_earlier
781 found := false
782 for _, msg := range req.Messages {
783 for _, c := range msg.Content {
784 if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_earlier" {
785 found = true
786 break
787 }
788 }
789 }
790 if !found {
791 t.Error("expected to find synthetic tool result for tool_earlier")
792 }
793 })
794
795 t.Run("empty message list", func(t *testing.T) {
796 loop := NewLoop(Config{
797 LLM: NewPredictableService(),
798 History: []llm.Message{},
799 })
800
801 req := &llm.Request{
802 Messages: []llm.Message{},
803 }
804
805 loop.insertMissingToolResults(req)
806 // Should not panic
807 })
808
809 t.Run("single message", func(t *testing.T) {
810 loop := NewLoop(Config{
811 LLM: NewPredictableService(),
812 History: []llm.Message{},
813 })
814
815 req := &llm.Request{
816 Messages: []llm.Message{
817 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
818 },
819 }
820
821 loop.insertMissingToolResults(req)
822 // Should not panic, should not modify
823 if len(req.Messages[0].Content) != 1 {
824 t.Error("should not modify single message")
825 }
826 })
827
828 t.Run("wrong role order - user then assistant", func(t *testing.T) {
829 loop := NewLoop(Config{
830 LLM: NewPredictableService(),
831 History: []llm.Message{},
832 })
833
834 req := &llm.Request{
835 Messages: []llm.Message{
836 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
837 {Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}}},
838 },
839 }
840
841 loop.insertMissingToolResults(req)
842 // Should not modify when roles are wrong order
843 if len(req.Messages[1].Content) != 1 {
844 t.Error("should not modify when roles are in wrong order")
845 }
846 })
847}
848
849func TestInsertMissingToolResults_EmptyAssistantContent(t *testing.T) {
850 // Test for the bug: when an assistant message has empty content (can happen when
851 // the model ends its turn without producing any output), we need to add placeholder
852 // content if it's not the last message. Otherwise the API will reject with:
853 // "messages.N: all messages must have non-empty content except for the optional
854 // final assistant message"
855
856 t.Run("empty assistant content in middle of conversation", func(t *testing.T) {
857 loop := NewLoop(Config{
858 LLM: NewPredictableService(),
859 History: []llm.Message{},
860 })
861
862 req := &llm.Request{
863 Messages: []llm.Message{
864 {
865 Role: llm.MessageRoleUser,
866 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "run git fetch"}},
867 },
868 {
869 Role: llm.MessageRoleAssistant,
870 Content: []llm.Content{{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"}},
871 },
872 {
873 Role: llm.MessageRoleUser,
874 Content: []llm.Content{{
875 Type: llm.ContentTypeToolResult,
876 ToolUseID: "tool1",
877 ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "success"}},
878 }},
879 },
880 {
881 // Empty assistant message - this can happen when model ends turn without output
882 Role: llm.MessageRoleAssistant,
883 Content: []llm.Content{},
884 EndOfTurn: true,
885 },
886 {
887 Role: llm.MessageRoleUser,
888 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "next question"}},
889 },
890 },
891 }
892
893 loop.insertMissingToolResults(req)
894
895 // The empty assistant message (index 3) should now have placeholder content
896 if len(req.Messages[3].Content) == 0 {
897 t.Error("expected placeholder content to be added to empty assistant message")
898 }
899 if req.Messages[3].Content[0].Type != llm.ContentTypeText {
900 t.Error("expected placeholder to be text content")
901 }
902 if req.Messages[3].Content[0].Text != "(no response)" {
903 t.Errorf("expected placeholder text '(no response)', got %q", req.Messages[3].Content[0].Text)
904 }
905 })
906
907 t.Run("empty assistant content at end of conversation - no modification needed", func(t *testing.T) {
908 loop := NewLoop(Config{
909 LLM: NewPredictableService(),
910 History: []llm.Message{},
911 })
912
913 req := &llm.Request{
914 Messages: []llm.Message{
915 {
916 Role: llm.MessageRoleUser,
917 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
918 },
919 {
920 // Empty assistant message at end is allowed by the API
921 Role: llm.MessageRoleAssistant,
922 Content: []llm.Content{},
923 EndOfTurn: true,
924 },
925 },
926 }
927
928 loop.insertMissingToolResults(req)
929
930 // The empty assistant message at the end should NOT be modified
931 // because the API allows empty content for the final assistant message
932 if len(req.Messages[1].Content) != 0 {
933 t.Error("expected final empty assistant message to remain empty")
934 }
935 })
936
937 t.Run("non-empty assistant content - no modification needed", func(t *testing.T) {
938 loop := NewLoop(Config{
939 LLM: NewPredictableService(),
940 History: []llm.Message{},
941 })
942
943 req := &llm.Request{
944 Messages: []llm.Message{
945 {
946 Role: llm.MessageRoleUser,
947 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
948 },
949 {
950 Role: llm.MessageRoleAssistant,
951 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi there"}},
952 },
953 {
954 Role: llm.MessageRoleUser,
955 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "goodbye"}},
956 },
957 },
958 }
959
960 loop.insertMissingToolResults(req)
961
962 // The assistant message should not be modified
963 if len(req.Messages[1].Content) != 1 {
964 t.Errorf("expected assistant message to have 1 content item, got %d", len(req.Messages[1].Content))
965 }
966 if req.Messages[1].Content[0].Text != "hi there" {
967 t.Errorf("expected assistant message text 'hi there', got %q", req.Messages[1].Content[0].Text)
968 }
969 })
970}
971
972func TestGitStateTracking(t *testing.T) {
973 // Create a test repo
974 tmpDir := t.TempDir()
975
976 // Initialize git repo
977 runGit(t, tmpDir, "init")
978 runGit(t, tmpDir, "config", "user.email", "test@test.com")
979 runGit(t, tmpDir, "config", "user.name", "Test")
980
981 // Create initial commit
982 testFile := filepath.Join(tmpDir, "test.txt")
983 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
984 t.Fatal(err)
985 }
986 runGit(t, tmpDir, "add", ".")
987 runGit(t, tmpDir, "commit", "-m", "initial")
988
989 // Track git state changes
990 var mu sync.Mutex
991 var gitStateChanges []*gitstate.GitState
992
993 loop := NewLoop(Config{
994 LLM: NewPredictableService(),
995 History: []llm.Message{},
996 WorkingDir: tmpDir,
997 GetWorkingDir: func() string { return tmpDir },
998 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
999 mu.Lock()
1000 gitStateChanges = append(gitStateChanges, state)
1001 mu.Unlock()
1002 },
1003 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1004 return nil
1005 },
1006 })
1007
1008 // Verify initial state was captured
1009 if loop.lastGitState == nil {
1010 t.Fatal("expected initial git state to be captured")
1011 }
1012 if !loop.lastGitState.IsRepo {
1013 t.Error("expected IsRepo to be true")
1014 }
1015
1016 // Process a turn (no state change should occur)
1017 loop.QueueUserMessage(llm.Message{
1018 Role: llm.MessageRoleUser,
1019 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1020 })
1021
1022 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1023 defer cancel()
1024
1025 err := loop.ProcessOneTurn(ctx)
1026 if err != nil {
1027 t.Fatalf("ProcessOneTurn failed: %v", err)
1028 }
1029
1030 // No state change should have occurred
1031 mu.Lock()
1032 numChanges := len(gitStateChanges)
1033 mu.Unlock()
1034 if numChanges != 0 {
1035 t.Errorf("expected no git state changes, got %d", numChanges)
1036 }
1037
1038 // Now make a commit
1039 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1040 t.Fatal(err)
1041 }
1042 runGit(t, tmpDir, "add", ".")
1043 runGit(t, tmpDir, "commit", "-m", "update")
1044
1045 // Process another turn - this should detect the commit change
1046 loop.QueueUserMessage(llm.Message{
1047 Role: llm.MessageRoleUser,
1048 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello again"}},
1049 })
1050
1051 err = loop.ProcessOneTurn(ctx)
1052 if err != nil {
1053 t.Fatalf("ProcessOneTurn failed: %v", err)
1054 }
1055
1056 // Now a state change should have been detected
1057 mu.Lock()
1058 numChanges = len(gitStateChanges)
1059 mu.Unlock()
1060 if numChanges != 1 {
1061 t.Errorf("expected 1 git state change, got %d", numChanges)
1062 }
1063}
1064
1065func TestGitStateTrackingWorktree(t *testing.T) {
1066 tmpDir, err := filepath.EvalSymlinks(t.TempDir())
1067 if err != nil {
1068 t.Fatal(err)
1069 }
1070 mainRepo := filepath.Join(tmpDir, "main")
1071 worktreeDir := filepath.Join(tmpDir, "worktree")
1072
1073 // Create main repo
1074 if err := os.MkdirAll(mainRepo, 0o755); err != nil {
1075 t.Fatal(err)
1076 }
1077 runGit(t, mainRepo, "init")
1078 runGit(t, mainRepo, "config", "user.email", "test@test.com")
1079 runGit(t, mainRepo, "config", "user.name", "Test")
1080
1081 // Create initial commit
1082 testFile := filepath.Join(mainRepo, "test.txt")
1083 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1084 t.Fatal(err)
1085 }
1086 runGit(t, mainRepo, "add", ".")
1087 runGit(t, mainRepo, "commit", "-m", "initial")
1088
1089 // Create a worktree
1090 runGit(t, mainRepo, "worktree", "add", "-b", "feature", worktreeDir)
1091
1092 // Track git state changes in the worktree
1093 var mu sync.Mutex
1094 var gitStateChanges []*gitstate.GitState
1095
1096 loop := NewLoop(Config{
1097 LLM: NewPredictableService(),
1098 History: []llm.Message{},
1099 WorkingDir: worktreeDir,
1100 GetWorkingDir: func() string { return worktreeDir },
1101 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1102 mu.Lock()
1103 gitStateChanges = append(gitStateChanges, state)
1104 mu.Unlock()
1105 },
1106 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1107 return nil
1108 },
1109 })
1110
1111 // Verify initial state
1112 if loop.lastGitState == nil {
1113 t.Fatal("expected initial git state to be captured")
1114 }
1115 if loop.lastGitState.Branch != "feature" {
1116 t.Errorf("expected branch 'feature', got %q", loop.lastGitState.Branch)
1117 }
1118 if loop.lastGitState.Worktree != worktreeDir {
1119 t.Errorf("expected worktree %q, got %q", worktreeDir, loop.lastGitState.Worktree)
1120 }
1121
1122 // Make a commit in the worktree
1123 worktreeFile := filepath.Join(worktreeDir, "feature.txt")
1124 if err := os.WriteFile(worktreeFile, []byte("feature content"), 0o644); err != nil {
1125 t.Fatal(err)
1126 }
1127 runGit(t, worktreeDir, "add", ".")
1128 runGit(t, worktreeDir, "commit", "-m", "feature commit")
1129
1130 // Process a turn to detect the change
1131 loop.QueueUserMessage(llm.Message{
1132 Role: llm.MessageRoleUser,
1133 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1134 })
1135
1136 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1137 defer cancel()
1138
1139 err = loop.ProcessOneTurn(ctx)
1140 if err != nil {
1141 t.Fatalf("ProcessOneTurn failed: %v", err)
1142 }
1143
1144 mu.Lock()
1145 numChanges := len(gitStateChanges)
1146 mu.Unlock()
1147
1148 if numChanges != 1 {
1149 t.Errorf("expected 1 git state change in worktree, got %d", numChanges)
1150 }
1151}
1152
1153func runGit(t *testing.T, dir string, args ...string) {
1154 t.Helper()
1155 // For commits, use --no-verify to skip hooks
1156 if len(args) > 0 && args[0] == "commit" {
1157 newArgs := []string{"commit", "--no-verify"}
1158 newArgs = append(newArgs, args[1:]...)
1159 args = newArgs
1160 }
1161 cmd := exec.Command("git", args...)
1162 cmd.Dir = dir
1163 output, err := cmd.CombinedOutput()
1164 if err != nil {
1165 t.Fatalf("git %v failed: %v\n%s", args, err, output)
1166 }
1167}
1168
1169func TestPredictableServiceTokenContextWindow(t *testing.T) {
1170 service := NewPredictableService()
1171 window := service.TokenContextWindow()
1172 if window != 200000 {
1173 t.Errorf("expected TokenContextWindow to return 200000, got %d", window)
1174 }
1175}
1176
1177func TestPredictableServiceMaxImageDimension(t *testing.T) {
1178 service := NewPredictableService()
1179 dimension := service.MaxImageDimension()
1180 if dimension != 2000 {
1181 t.Errorf("expected MaxImageDimension to return 2000, got %d", dimension)
1182 }
1183}
1184
1185func TestPredictableServiceThinkTool(t *testing.T) {
1186 service := NewPredictableService()
1187
1188 ctx := context.Background()
1189 req := &llm.Request{
1190 Messages: []llm.Message{
1191 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "think: This is a test thought"}}},
1192 },
1193 }
1194
1195 resp, err := service.Do(ctx, req)
1196 if err != nil {
1197 t.Fatalf("think tool test failed: %v", err)
1198 }
1199
1200 if resp.StopReason != llm.StopReasonToolUse {
1201 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1202 }
1203
1204 // Find the tool use content
1205 var toolUseContent *llm.Content
1206 for _, content := range resp.Content {
1207 if content.Type == llm.ContentTypeToolUse && content.ToolName == "think" {
1208 toolUseContent = &content
1209 break
1210 }
1211 }
1212
1213 if toolUseContent == nil {
1214 t.Fatal("no think tool use content found")
1215 }
1216
1217 // Check tool input contains the thoughts
1218 var toolInput map[string]interface{}
1219 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1220 t.Fatalf("failed to parse tool input: %v", err)
1221 }
1222
1223 if toolInput["thoughts"] != "This is a test thought" {
1224 t.Errorf("expected thoughts 'This is a test thought', got '%v'", toolInput["thoughts"])
1225 }
1226}
1227
1228func TestPredictableServicePatchTool(t *testing.T) {
1229 service := NewPredictableService()
1230
1231 ctx := context.Background()
1232 req := &llm.Request{
1233 Messages: []llm.Message{
1234 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch: /tmp/test.txt"}}},
1235 },
1236 }
1237
1238 resp, err := service.Do(ctx, req)
1239 if err != nil {
1240 t.Fatalf("patch tool test failed: %v", err)
1241 }
1242
1243 if resp.StopReason != llm.StopReasonToolUse {
1244 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1245 }
1246
1247 // Find the tool use content
1248 var toolUseContent *llm.Content
1249 for _, content := range resp.Content {
1250 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1251 toolUseContent = &content
1252 break
1253 }
1254 }
1255
1256 if toolUseContent == nil {
1257 t.Fatal("no patch tool use content found")
1258 }
1259
1260 // Check tool input contains the file path
1261 var toolInput map[string]interface{}
1262 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1263 t.Fatalf("failed to parse tool input: %v", err)
1264 }
1265
1266 if toolInput["path"] != "/tmp/test.txt" {
1267 t.Errorf("expected path '/tmp/test.txt', got '%v'", toolInput["path"])
1268 }
1269}
1270
1271func TestPredictableServiceMalformedPatchTool(t *testing.T) {
1272 service := NewPredictableService()
1273
1274 ctx := context.Background()
1275 req := &llm.Request{
1276 Messages: []llm.Message{
1277 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch bad json"}}},
1278 },
1279 }
1280
1281 resp, err := service.Do(ctx, req)
1282 if err != nil {
1283 t.Fatalf("malformed patch tool test failed: %v", err)
1284 }
1285
1286 if resp.StopReason != llm.StopReasonToolUse {
1287 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1288 }
1289
1290 // Find the tool use content
1291 var toolUseContent *llm.Content
1292 for _, content := range resp.Content {
1293 if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1294 toolUseContent = &content
1295 break
1296 }
1297 }
1298
1299 if toolUseContent == nil {
1300 t.Fatal("no patch tool use content found")
1301 }
1302
1303 // Check that the tool input is malformed JSON (as expected)
1304 toolInputStr := string(toolUseContent.ToolInput)
1305 if !strings.Contains(toolInputStr, "parameter name") {
1306 t.Errorf("expected malformed JSON in tool input, got: %s", toolInputStr)
1307 }
1308}
1309
1310func TestPredictableServiceError(t *testing.T) {
1311 service := NewPredictableService()
1312
1313 ctx := context.Background()
1314 req := &llm.Request{
1315 Messages: []llm.Message{
1316 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "error: test error"}}},
1317 },
1318 }
1319
1320 resp, err := service.Do(ctx, req)
1321 if err == nil {
1322 t.Fatal("expected error, got nil")
1323 }
1324
1325 if !strings.Contains(err.Error(), "predictable error: test error") {
1326 t.Errorf("expected error message to contain 'predictable error: test error', got: %v", err)
1327 }
1328
1329 if resp != nil {
1330 t.Error("expected response to be nil when error occurs")
1331 }
1332}
1333
1334func TestPredictableServiceRequestTracking(t *testing.T) {
1335 service := NewPredictableService()
1336
1337 // Initially no requests
1338 requests := service.GetRecentRequests()
1339 if requests != nil {
1340 t.Errorf("expected nil requests initially, got %v", requests)
1341 }
1342
1343 lastReq := service.GetLastRequest()
1344 if lastReq != nil {
1345 t.Errorf("expected nil last request initially, got %v", lastReq)
1346 }
1347
1348 // Make a request
1349 ctx := context.Background()
1350 req := &llm.Request{
1351 Messages: []llm.Message{
1352 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1353 },
1354 }
1355
1356 _, err := service.Do(ctx, req)
1357 if err != nil {
1358 t.Fatalf("Do failed: %v", err)
1359 }
1360
1361 // Check that request was tracked
1362 requests = service.GetRecentRequests()
1363 if len(requests) != 1 {
1364 t.Errorf("expected 1 request, got %d", len(requests))
1365 }
1366
1367 lastReq = service.GetLastRequest()
1368 if lastReq == nil {
1369 t.Fatal("expected last request to be non-nil")
1370 }
1371
1372 if len(lastReq.Messages) != 1 {
1373 t.Errorf("expected 1 message in last request, got %d", len(lastReq.Messages))
1374 }
1375
1376 // Test clearing requests
1377 service.ClearRequests()
1378 requests = service.GetRecentRequests()
1379 if requests != nil {
1380 t.Errorf("expected nil requests after clearing, got %v", requests)
1381 }
1382
1383 lastReq = service.GetLastRequest()
1384 if lastReq != nil {
1385 t.Errorf("expected nil last request after clearing, got %v", lastReq)
1386 }
1387
1388 // Test that only last 10 requests are kept
1389 for i := 0; i < 15; i++ {
1390 testReq := &llm.Request{
1391 Messages: []llm.Message{
1392 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: fmt.Sprintf("test %d", i)}}},
1393 },
1394 }
1395 _, err := service.Do(ctx, testReq)
1396 if err != nil {
1397 t.Fatalf("Do failed on iteration %d: %v", i, err)
1398 }
1399 }
1400
1401 requests = service.GetRecentRequests()
1402 if len(requests) != 10 {
1403 t.Errorf("expected 10 requests (last 10), got %d", len(requests))
1404 }
1405
1406 // Check that we have requests 5-14 (0-indexed)
1407 for i, req := range requests {
1408 expectedText := fmt.Sprintf("test %d", i+5)
1409 if len(req.Messages) == 0 || len(req.Messages[0].Content) == 0 {
1410 t.Errorf("request %d has no content", i)
1411 continue
1412 }
1413 if req.Messages[0].Content[0].Text != expectedText {
1414 t.Errorf("expected request %d to have text '%s', got '%s'", i, expectedText, req.Messages[0].Content[0].Text)
1415 }
1416 }
1417}
1418
1419func TestPredictableServiceScreenshotTool(t *testing.T) {
1420 service := NewPredictableService()
1421
1422 ctx := context.Background()
1423 req := &llm.Request{
1424 Messages: []llm.Message{
1425 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "screenshot: .test-class"}}},
1426 },
1427 }
1428
1429 resp, err := service.Do(ctx, req)
1430 if err != nil {
1431 t.Fatalf("screenshot tool test failed: %v", err)
1432 }
1433
1434 if resp.StopReason != llm.StopReasonToolUse {
1435 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1436 }
1437
1438 // Find the tool use content
1439 var toolUseContent *llm.Content
1440 for _, content := range resp.Content {
1441 if content.Type == llm.ContentTypeToolUse && content.ToolName == "browser_take_screenshot" {
1442 toolUseContent = &content
1443 break
1444 }
1445 }
1446
1447 if toolUseContent == nil {
1448 t.Fatal("no screenshot tool use content found")
1449 }
1450
1451 // Check tool input contains the selector
1452 var toolInput map[string]interface{}
1453 if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1454 t.Fatalf("failed to parse tool input: %v", err)
1455 }
1456
1457 if toolInput["selector"] != ".test-class" {
1458 t.Errorf("expected selector '.test-class', got '%v'", toolInput["selector"])
1459 }
1460}
1461
1462func TestPredictableServiceToolSmorgasbord(t *testing.T) {
1463 service := NewPredictableService()
1464
1465 ctx := context.Background()
1466 req := &llm.Request{
1467 Messages: []llm.Message{
1468 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "tool smorgasbord"}}},
1469 },
1470 }
1471
1472 resp, err := service.Do(ctx, req)
1473 if err != nil {
1474 t.Fatalf("tool smorgasbord test failed: %v", err)
1475 }
1476
1477 if resp.StopReason != llm.StopReasonToolUse {
1478 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1479 }
1480
1481 // Count the tool use contents
1482 toolUseCount := 0
1483 for _, content := range resp.Content {
1484 if content.Type == llm.ContentTypeToolUse {
1485 toolUseCount++
1486 }
1487 }
1488
1489 // Should have at least several tool uses
1490 if toolUseCount < 5 {
1491 t.Errorf("expected at least 5 tool uses, got %d", toolUseCount)
1492 }
1493}
1494
1495func TestProcessLLMRequestError(t *testing.T) {
1496 // Test error handling when LLM service returns an error
1497 errorService := &errorLLMService{err: fmt.Errorf("test LLM error")}
1498
1499 var recordedMessages []llm.Message
1500 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1501 recordedMessages = append(recordedMessages, message)
1502 return nil
1503 }
1504
1505 loop := NewLoop(Config{
1506 LLM: errorService,
1507 History: []llm.Message{},
1508 Tools: []*llm.Tool{},
1509 RecordMessage: recordFunc,
1510 })
1511
1512 // Queue a user message
1513 userMessage := llm.Message{
1514 Role: llm.MessageRoleUser,
1515 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1516 }
1517 loop.QueueUserMessage(userMessage)
1518
1519 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1520 defer cancel()
1521
1522 err := loop.ProcessOneTurn(ctx)
1523 if err == nil {
1524 t.Fatal("expected error from ProcessOneTurn, got nil")
1525 }
1526
1527 if !strings.Contains(err.Error(), "LLM request failed") {
1528 t.Errorf("expected error to contain 'LLM request failed', got: %v", err)
1529 }
1530
1531 // Check that error message was recorded
1532 if len(recordedMessages) < 1 {
1533 t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1534 }
1535
1536 if recordedMessages[0].Role != llm.MessageRoleAssistant {
1537 t.Errorf("expected recorded message to be assistant role, got %s", recordedMessages[0].Role)
1538 }
1539
1540 if len(recordedMessages[0].Content) != 1 {
1541 t.Fatalf("expected 1 content item in recorded message, got %d", len(recordedMessages[0].Content))
1542 }
1543
1544 if recordedMessages[0].Content[0].Type != llm.ContentTypeText {
1545 t.Errorf("expected text content, got %s", recordedMessages[0].Content[0].Type)
1546 }
1547
1548 if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1549 t.Errorf("expected error message to contain 'LLM request failed', got: %s", recordedMessages[0].Content[0].Text)
1550 }
1551
1552 // Verify EndOfTurn is set so the agent working state is properly updated
1553 if !recordedMessages[0].EndOfTurn {
1554 t.Error("expected error message to have EndOfTurn=true so agent working state is updated")
1555 }
1556}
1557
1558// errorLLMService is a test LLM service that always returns an error
1559type errorLLMService struct {
1560 err error
1561}
1562
1563func (e *errorLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1564 return nil, e.err
1565}
1566
1567func (e *errorLLMService) TokenContextWindow() int {
1568 return 200000
1569}
1570
1571func (e *errorLLMService) MaxImageDimension() int {
1572 return 2000
1573}
1574
1575func TestCheckGitStateChange(t *testing.T) {
1576 // Create a test repo
1577 tmpDir := t.TempDir()
1578
1579 // Initialize git repo
1580 runGit(t, tmpDir, "init")
1581 runGit(t, tmpDir, "config", "user.email", "test@test.com")
1582 runGit(t, tmpDir, "config", "user.name", "Test")
1583
1584 // Create initial commit
1585 testFile := filepath.Join(tmpDir, "test.txt")
1586 if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1587 t.Fatal(err)
1588 }
1589 runGit(t, tmpDir, "add", ".")
1590 runGit(t, tmpDir, "commit", "-m", "initial")
1591
1592 // Test with nil OnGitStateChange - should not panic
1593 loop := NewLoop(Config{
1594 LLM: NewPredictableService(),
1595 History: []llm.Message{},
1596 WorkingDir: tmpDir,
1597 GetWorkingDir: func() string { return tmpDir },
1598 // OnGitStateChange is nil
1599 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1600 return nil
1601 },
1602 })
1603
1604 // This should not panic
1605 loop.checkGitStateChange(context.Background())
1606
1607 // Test with actual callback
1608 var gitStateChanges []*gitstate.GitState
1609 loop = NewLoop(Config{
1610 LLM: NewPredictableService(),
1611 History: []llm.Message{},
1612 WorkingDir: tmpDir,
1613 GetWorkingDir: func() string { return tmpDir },
1614 OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1615 gitStateChanges = append(gitStateChanges, state)
1616 },
1617 RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1618 return nil
1619 },
1620 })
1621
1622 // Make a change
1623 if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1624 t.Fatal(err)
1625 }
1626 runGit(t, tmpDir, "add", ".")
1627 runGit(t, tmpDir, "commit", "-m", "update")
1628
1629 // Check git state change
1630 loop.checkGitStateChange(context.Background())
1631
1632 if len(gitStateChanges) != 1 {
1633 t.Errorf("expected 1 git state change, got %d", len(gitStateChanges))
1634 }
1635
1636 // Call again - should not trigger another change since state is the same
1637 loop.checkGitStateChange(context.Background())
1638
1639 if len(gitStateChanges) != 1 {
1640 t.Errorf("expected still 1 git state change (no new changes), got %d", len(gitStateChanges))
1641 }
1642}
1643
1644func TestHandleToolCallsWithMissingTool(t *testing.T) {
1645 var recordedMessages []llm.Message
1646 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1647 recordedMessages = append(recordedMessages, message)
1648 return nil
1649 }
1650
1651 loop := NewLoop(Config{
1652 LLM: NewPredictableService(),
1653 History: []llm.Message{},
1654 Tools: []*llm.Tool{}, // No tools registered
1655 RecordMessage: recordFunc,
1656 })
1657
1658 // Create content with a tool use for a tool that doesn't exist
1659 content := []llm.Content{
1660 {
1661 ID: "test_tool_123",
1662 Type: llm.ContentTypeToolUse,
1663 ToolName: "nonexistent_tool",
1664 ToolInput: json.RawMessage(`{"test": "input"}`),
1665 },
1666 }
1667
1668 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1669 defer cancel()
1670
1671 err := loop.handleToolCalls(ctx, content)
1672 if err != nil {
1673 t.Fatalf("handleToolCalls failed: %v", err)
1674 }
1675
1676 // Should have recorded a user message with tool result
1677 if len(recordedMessages) < 1 {
1678 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1679 }
1680
1681 msg := recordedMessages[0]
1682 if msg.Role != llm.MessageRoleUser {
1683 t.Errorf("expected user role, got %s", msg.Role)
1684 }
1685
1686 if len(msg.Content) != 1 {
1687 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1688 }
1689
1690 toolResult := msg.Content[0]
1691 if toolResult.Type != llm.ContentTypeToolResult {
1692 t.Errorf("expected tool result content, got %s", toolResult.Type)
1693 }
1694
1695 if toolResult.ToolUseID != "test_tool_123" {
1696 t.Errorf("expected tool use ID 'test_tool_123', got %s", toolResult.ToolUseID)
1697 }
1698
1699 if !toolResult.ToolError {
1700 t.Error("expected ToolError to be true")
1701 }
1702
1703 if len(toolResult.ToolResult) != 1 {
1704 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1705 }
1706
1707 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1708 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1709 }
1710
1711 expectedText := "Tool 'nonexistent_tool' not found"
1712 if toolResult.ToolResult[0].Text != expectedText {
1713 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1714 }
1715}
1716
1717func TestHandleToolCallsWithErrorTool(t *testing.T) {
1718 var recordedMessages []llm.Message
1719 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1720 recordedMessages = append(recordedMessages, message)
1721 return nil
1722 }
1723
1724 // Create a tool that always returns an error
1725 errorTool := &llm.Tool{
1726 Name: "error_tool",
1727 Description: "A tool that always errors",
1728 InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
1729 Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
1730 return llm.ErrorToolOut(fmt.Errorf("intentional test error"))
1731 },
1732 }
1733
1734 loop := NewLoop(Config{
1735 LLM: NewPredictableService(),
1736 History: []llm.Message{},
1737 Tools: []*llm.Tool{errorTool},
1738 RecordMessage: recordFunc,
1739 })
1740
1741 // Create content with a tool use that will error
1742 content := []llm.Content{
1743 {
1744 ID: "error_tool_123",
1745 Type: llm.ContentTypeToolUse,
1746 ToolName: "error_tool",
1747 ToolInput: json.RawMessage(`{}`),
1748 },
1749 }
1750
1751 ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1752 defer cancel()
1753
1754 err := loop.handleToolCalls(ctx, content)
1755 if err != nil {
1756 t.Fatalf("handleToolCalls failed: %v", err)
1757 }
1758
1759 // Should have recorded a user message with tool result
1760 if len(recordedMessages) < 1 {
1761 t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1762 }
1763
1764 msg := recordedMessages[0]
1765 if msg.Role != llm.MessageRoleUser {
1766 t.Errorf("expected user role, got %s", msg.Role)
1767 }
1768
1769 if len(msg.Content) != 1 {
1770 t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1771 }
1772
1773 toolResult := msg.Content[0]
1774 if toolResult.Type != llm.ContentTypeToolResult {
1775 t.Errorf("expected tool result content, got %s", toolResult.Type)
1776 }
1777
1778 if toolResult.ToolUseID != "error_tool_123" {
1779 t.Errorf("expected tool use ID 'error_tool_123', got %s", toolResult.ToolUseID)
1780 }
1781
1782 if !toolResult.ToolError {
1783 t.Error("expected ToolError to be true")
1784 }
1785
1786 if len(toolResult.ToolResult) != 1 {
1787 t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1788 }
1789
1790 if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1791 t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1792 }
1793
1794 expectedText := "intentional test error"
1795 if toolResult.ToolResult[0].Text != expectedText {
1796 t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1797 }
1798}
1799
1800func TestMaxTokensTruncation(t *testing.T) {
1801 var mu sync.Mutex
1802 var recordedMessages []llm.Message
1803 recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1804 mu.Lock()
1805 recordedMessages = append(recordedMessages, message)
1806 mu.Unlock()
1807 return nil
1808 }
1809
1810 service := NewPredictableService()
1811 loop := NewLoop(Config{
1812 LLM: service,
1813 History: []llm.Message{},
1814 Tools: []*llm.Tool{},
1815 RecordMessage: recordFunc,
1816 })
1817
1818 // Queue a user message that triggers max tokens truncation
1819 userMessage := llm.Message{
1820 Role: llm.MessageRoleUser,
1821 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
1822 }
1823 loop.QueueUserMessage(userMessage)
1824
1825 // Run the loop - it should stop after handling truncation
1826 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
1827 defer cancel()
1828
1829 err := loop.Go(ctx)
1830 if err != context.DeadlineExceeded {
1831 t.Errorf("expected context deadline exceeded, got %v", err)
1832 }
1833
1834 // Check recorded messages
1835 mu.Lock()
1836 numMessages := len(recordedMessages)
1837 messages := make([]llm.Message, len(recordedMessages))
1838 copy(messages, recordedMessages)
1839 mu.Unlock()
1840
1841 // We should see two messages:
1842 // 1. The truncated message (with ExcludedFromContext=true) for cost tracking
1843 // 2. The truncation error message (with ErrorType=truncation)
1844 if numMessages != 2 {
1845 t.Errorf("Expected 2 recorded messages (truncated + error), got %d", numMessages)
1846 for i, msg := range messages {
1847 t.Logf("Message %d: Role=%v, EndOfTurn=%v, ExcludedFromContext=%v, ErrorType=%v",
1848 i, msg.Role, msg.EndOfTurn, msg.ExcludedFromContext, msg.ErrorType)
1849 }
1850 return
1851 }
1852
1853 // First message: truncated response (for cost tracking, excluded from context)
1854 truncatedMsg := messages[0]
1855 if truncatedMsg.Role != llm.MessageRoleAssistant {
1856 t.Errorf("Truncated message should be assistant, got %v", truncatedMsg.Role)
1857 }
1858 if !truncatedMsg.ExcludedFromContext {
1859 t.Error("Truncated message should have ExcludedFromContext=true")
1860 }
1861
1862 // Second message: truncation error
1863 errorMsg := messages[1]
1864 if errorMsg.Role != llm.MessageRoleAssistant {
1865 t.Errorf("Error message should be assistant, got %v", errorMsg.Role)
1866 }
1867 if !errorMsg.EndOfTurn {
1868 t.Error("Error message should have EndOfTurn=true")
1869 }
1870 if errorMsg.ErrorType != llm.ErrorTypeTruncation {
1871 t.Errorf("Error message should have ErrorType=truncation, got %v", errorMsg.ErrorType)
1872 }
1873 if errorMsg.ExcludedFromContext {
1874 t.Error("Error message should not be excluded from context")
1875 }
1876 if !strings.Contains(errorMsg.Content[0].Text, "SYSTEM ERROR") {
1877 t.Errorf("Error message should contain SYSTEM ERROR, got: %s", errorMsg.Content[0].Text)
1878 }
1879
1880 // Verify history contains user message + error message, but NOT the truncated response
1881 loop.mu.Lock()
1882 history := loop.history
1883 loop.mu.Unlock()
1884
1885 // History should have: user message + error message (the truncated response is NOT added to history)
1886 if len(history) != 2 {
1887 t.Errorf("History should have 2 messages (user + error), got %d", len(history))
1888 }
1889}
1890
1891//func TestInsertMissingToolResultsEdgeCases(t *testing.T) {
1892// loop := NewLoop(Config{
1893// LLM: NewPredictableService(),
1894// History: []llm.Message{},
1895// })
1896//
1897// // Test with nil request
1898// loop.insertMissingToolResults(nil) // Should not panic
1899//
1900// // Test with empty messages
1901// req := &llm.Request{Messages: []llm.Message{}}
1902// loop.insertMissingToolResults(req) // Should not panic
1903//
1904// // Test with single message
1905// req = &llm.Request{
1906// Messages: []llm.Message{
1907// {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1908// },
1909// }
1910// loop.insertMissingToolResults(req) // Should not panic
1911// if len(req.Messages) != 1 {
1912// t.Errorf("expected 1 message, got %d", len(req.Messages))
1913// }
1914//
1915// // Test with multiple consecutive assistant messages with tool_use
1916// req = &llm.Request{
1917// Messages: []llm.Message{
1918// {
1919// Role: llm.MessageRoleAssistant,
1920// Content: []llm.Content{
1921// {Type: llm.ContentTypeText, Text: "First tool"},
1922// {Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"},
1923// },
1924// },
1925// {
1926// Role: llm.MessageRoleAssistant,
1927// Content: []llm.Content{
1928// {Type: llm.ContentTypeText, Text: "Second tool"},
1929// {Type: llm.ContentTypeToolUse, ID: "tool2", ToolName: "read"},
1930// },
1931// },
1932// {
1933// Role: llm.MessageRoleUser,
1934// Content: []llm.Content{
1935// {Type: llm.ContentTypeText, Text: "User response"},
1936// },
1937// },
1938// },
1939// }
1940//
1941// loop.insertMissingToolResults(req)
1942//
1943// // Should have inserted synthetic tool results for both tool_uses
1944// // The structure should be:
1945// // 0: First assistant message
1946// // 1: Synthetic user message with tool1 result
1947// // 2: Second assistant message
1948// // 3: Synthetic user message with tool2 result
1949// // 4: Original user message
1950// if len(req.Messages) != 5 {
1951// t.Fatalf("expected 5 messages after processing, got %d", len(req.Messages))
1952// }
1953//
1954// // Check first synthetic message
1955// if req.Messages[1].Role != llm.MessageRoleUser {
1956// t.Errorf("expected message 1 to be user role, got %s", req.Messages[1].Role)
1957// }
1958// foundTool1 := false
1959// for _, content := range req.Messages[1].Content {
1960// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool1" {
1961// foundTool1 = true
1962// break
1963// }
1964// }
1965// if !foundTool1 {
1966// t.Error("expected to find tool1 result in message 1")
1967// }
1968//
1969// // Check second synthetic message
1970// if req.Messages[3].Role != llm.MessageRoleUser {
1971// t.Errorf("expected message 3 to be user role, got %s", req.Messages[3].Role)
1972// }
1973// foundTool2 := false
1974// for _, content := range req.Messages[3].Content {
1975// if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool2" {
1976// foundTool2 = true
1977// break
1978// }
1979//}
1980// if !foundTool2 {
1981// t.Error("expected to find tool2 result in message 3")
1982// }
1983//}