loop_test.go

   1package loop
   2
   3import (
   4	"context"
   5	"encoding/json"
   6	"fmt"
   7	"io"
   8	"os"
   9	"os/exec"
  10	"path/filepath"
  11	"strings"
  12	"sync"
  13	"testing"
  14	"time"
  15
  16	"shelley.exe.dev/claudetool"
  17	"shelley.exe.dev/gitstate"
  18	"shelley.exe.dev/llm"
  19)
  20
  21func TestNewLoop(t *testing.T) {
  22	history := []llm.Message{
  23		{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
  24	}
  25	tools := []*llm.Tool{}
  26	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
  27		return nil
  28	}
  29
  30	loop := NewLoop(Config{
  31		LLM:           NewPredictableService(),
  32		History:       history,
  33		Tools:         tools,
  34		RecordMessage: recordFunc,
  35	})
  36	if loop == nil {
  37		t.Fatal("NewLoop returned nil")
  38	}
  39
  40	if len(loop.history) != 1 {
  41		t.Errorf("expected history length 1, got %d", len(loop.history))
  42	}
  43
  44	if len(loop.messageQueue) != 0 {
  45		t.Errorf("expected empty message queue, got %d", len(loop.messageQueue))
  46	}
  47}
  48
  49func TestQueueUserMessage(t *testing.T) {
  50	loop := NewLoop(Config{
  51		LLM:     NewPredictableService(),
  52		History: []llm.Message{},
  53		Tools:   []*llm.Tool{},
  54	})
  55
  56	message := llm.Message{
  57		Role:    llm.MessageRoleUser,
  58		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Test message"}},
  59	}
  60
  61	loop.QueueUserMessage(message)
  62
  63	loop.mu.Lock()
  64	queueLen := len(loop.messageQueue)
  65	loop.mu.Unlock()
  66
  67	if queueLen != 1 {
  68		t.Errorf("expected message queue length 1, got %d", queueLen)
  69	}
  70}
  71
  72func TestPredictableService(t *testing.T) {
  73	service := NewPredictableService()
  74
  75	// Test simple hello response
  76	ctx := context.Background()
  77	req := &llm.Request{
  78		Messages: []llm.Message{
  79			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
  80		},
  81	}
  82
  83	resp, err := service.Do(ctx, req)
  84	if err != nil {
  85		t.Fatalf("predictable service Do failed: %v", err)
  86	}
  87
  88	if resp.Role != llm.MessageRoleAssistant {
  89		t.Errorf("expected assistant role, got %v", resp.Role)
  90	}
  91
  92	if len(resp.Content) == 0 {
  93		t.Error("expected non-empty content")
  94	}
  95
  96	if resp.Content[0].Type != llm.ContentTypeText {
  97		t.Errorf("expected text content, got %v", resp.Content[0].Type)
  98	}
  99
 100	if resp.Content[0].Text != "Well, hi there!" {
 101		t.Errorf("unexpected response text: %s", resp.Content[0].Text)
 102	}
 103}
 104
 105func TestPredictableServiceEcho(t *testing.T) {
 106	service := NewPredictableService()
 107
 108	ctx := context.Background()
 109	req := &llm.Request{
 110		Messages: []llm.Message{
 111			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "echo: foo"}}},
 112		},
 113	}
 114
 115	resp, err := service.Do(ctx, req)
 116	if err != nil {
 117		t.Fatalf("echo test failed: %v", err)
 118	}
 119
 120	if resp.Content[0].Text != "foo" {
 121		t.Errorf("expected 'foo', got '%s'", resp.Content[0].Text)
 122	}
 123
 124	// Test another echo
 125	req.Messages[0].Content[0].Text = "echo: hello world"
 126	resp, err = service.Do(ctx, req)
 127	if err != nil {
 128		t.Fatalf("echo hello world test failed: %v", err)
 129	}
 130
 131	if resp.Content[0].Text != "hello world" {
 132		t.Errorf("expected 'hello world', got '%s'", resp.Content[0].Text)
 133	}
 134}
 135
 136func TestPredictableServiceBashTool(t *testing.T) {
 137	service := NewPredictableService()
 138
 139	ctx := context.Background()
 140	req := &llm.Request{
 141		Messages: []llm.Message{
 142			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: ls -la"}}},
 143		},
 144	}
 145
 146	resp, err := service.Do(ctx, req)
 147	if err != nil {
 148		t.Fatalf("bash tool test failed: %v", err)
 149	}
 150
 151	if resp.StopReason != llm.StopReasonToolUse {
 152		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
 153	}
 154
 155	if len(resp.Content) != 2 {
 156		t.Errorf("expected 2 content items (text + tool_use), got %d", len(resp.Content))
 157	}
 158
 159	// Find the tool use content
 160	var toolUseContent *llm.Content
 161	for _, content := range resp.Content {
 162		if content.Type == llm.ContentTypeToolUse {
 163			toolUseContent = &content
 164			break
 165		}
 166	}
 167
 168	if toolUseContent == nil {
 169		t.Fatal("no tool use content found")
 170	}
 171
 172	if toolUseContent.ToolName != "bash" {
 173		t.Errorf("expected tool name 'bash', got '%s'", toolUseContent.ToolName)
 174	}
 175
 176	// Check tool input contains the command
 177	var toolInput map[string]interface{}
 178	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
 179		t.Fatalf("failed to parse tool input: %v", err)
 180	}
 181
 182	if toolInput["command"] != "ls -la" {
 183		t.Errorf("expected command 'ls -la', got '%v'", toolInput["command"])
 184	}
 185}
 186
 187func TestPredictableServiceDefaultResponse(t *testing.T) {
 188	service := NewPredictableService()
 189
 190	ctx := context.Background()
 191	req := &llm.Request{
 192		Messages: []llm.Message{
 193			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "some unknown input"}}},
 194		},
 195	}
 196
 197	resp, err := service.Do(ctx, req)
 198	if err != nil {
 199		t.Fatalf("default response test failed: %v", err)
 200	}
 201
 202	if resp.Content[0].Text != "edit predictable.go to add a response for that one..." {
 203		t.Errorf("unexpected default response: %s", resp.Content[0].Text)
 204	}
 205}
 206
 207func TestPredictableServiceDelay(t *testing.T) {
 208	service := NewPredictableService()
 209
 210	ctx := context.Background()
 211	req := &llm.Request{
 212		Messages: []llm.Message{
 213			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "delay: 0.1"}}},
 214		},
 215	}
 216
 217	start := time.Now()
 218	resp, err := service.Do(ctx, req)
 219	elapsed := time.Since(start)
 220
 221	if err != nil {
 222		t.Fatalf("delay test failed: %v", err)
 223	}
 224
 225	if elapsed < 100*time.Millisecond {
 226		t.Errorf("expected delay of at least 100ms, got %v", elapsed)
 227	}
 228
 229	if resp.Content[0].Text != "Delayed for 0.1 seconds" {
 230		t.Errorf("unexpected response text: %s", resp.Content[0].Text)
 231	}
 232}
 233
 234func TestLoopWithPredictableService(t *testing.T) {
 235	var recordedMessages []llm.Message
 236	var recordedUsages []llm.Usage
 237
 238	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 239		recordedMessages = append(recordedMessages, message)
 240		recordedUsages = append(recordedUsages, usage)
 241		return nil
 242	}
 243
 244	service := NewPredictableService()
 245	loop := NewLoop(Config{
 246		LLM:           service,
 247		History:       []llm.Message{},
 248		Tools:         []*llm.Tool{},
 249		RecordMessage: recordFunc,
 250	})
 251
 252	// Queue a user message that triggers a known response
 253	userMessage := llm.Message{
 254		Role:    llm.MessageRoleUser,
 255		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 256	}
 257	loop.QueueUserMessage(userMessage)
 258
 259	// Run the loop with a short timeout
 260	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
 261	defer cancel()
 262
 263	err := loop.Go(ctx)
 264	if err != context.DeadlineExceeded {
 265		t.Errorf("expected context deadline exceeded, got %v", err)
 266	}
 267
 268	// Check that messages were recorded
 269	if len(recordedMessages) < 1 {
 270		t.Errorf("expected at least 1 recorded message, got %d", len(recordedMessages))
 271	}
 272
 273	// Check usage tracking
 274	usage := loop.GetUsage()
 275	if usage.IsZero() {
 276		t.Error("expected non-zero usage")
 277	}
 278}
 279
 280func TestLoopWithTools(t *testing.T) {
 281	var toolCalls []string
 282
 283	testTool := &llm.Tool{
 284		Name:        "bash",
 285		Description: "A test bash tool",
 286		InputSchema: llm.MustSchema(`{"type": "object", "properties": {"command": {"type": "string"}}}`),
 287		Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 288			toolCalls = append(toolCalls, string(input))
 289			return llm.ToolOut{
 290				LLMContent: []llm.Content{
 291					{Type: llm.ContentTypeText, Text: "Command executed successfully"},
 292				},
 293			}
 294		},
 295	}
 296
 297	service := NewPredictableService()
 298	loop := NewLoop(Config{
 299		LLM:     service,
 300		History: []llm.Message{},
 301		Tools:   []*llm.Tool{testTool},
 302		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 303			return nil
 304		},
 305	})
 306
 307	// Queue a user message that triggers the bash tool
 308	userMessage := llm.Message{
 309		Role:    llm.MessageRoleUser,
 310		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}},
 311	}
 312	loop.QueueUserMessage(userMessage)
 313
 314	// Run the loop with a short timeout
 315	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 316	defer cancel()
 317
 318	err := loop.Go(ctx)
 319	if err != context.DeadlineExceeded {
 320		t.Errorf("expected context deadline exceeded, got %v", err)
 321	}
 322
 323	// Check that the tool was called
 324	if len(toolCalls) != 1 {
 325		t.Errorf("expected 1 tool call, got %d", len(toolCalls))
 326	}
 327
 328	if toolCalls[0] != `{"command":"echo hello"}` {
 329		t.Errorf("unexpected tool call input: %s", toolCalls[0])
 330	}
 331}
 332
 333func TestGetHistory(t *testing.T) {
 334	initialHistory := []llm.Message{
 335		{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
 336	}
 337
 338	loop := NewLoop(Config{
 339		LLM:     NewPredictableService(),
 340		History: initialHistory,
 341		Tools:   []*llm.Tool{},
 342	})
 343
 344	history := loop.GetHistory()
 345	if len(history) != 1 {
 346		t.Errorf("expected history length 1, got %d", len(history))
 347	}
 348
 349	// Modify returned slice to ensure it's a copy
 350	history[0].Content[0].Text = "Modified"
 351
 352	// Original should be unchanged
 353	original := loop.GetHistory()
 354	if original[0].Content[0].Text != "Hello" {
 355		t.Error("GetHistory should return a copy, not the original slice")
 356	}
 357}
 358
 359func TestLoopWithKeywordTool(t *testing.T) {
 360	// Test that keyword tool doesn't crash with nil pointer dereference
 361	service := NewPredictableService()
 362
 363	var messages []llm.Message
 364	recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 365		messages = append(messages, message)
 366		return nil
 367	}
 368
 369	// Add a mock keyword tool that doesn't actually search
 370	tools := []*llm.Tool{
 371		{
 372			Name:        "keyword_search",
 373			Description: "Mock keyword search",
 374			InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
 375			Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 376				// Simple mock implementation
 377				return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
 378			},
 379		},
 380	}
 381
 382	loop := NewLoop(Config{
 383		LLM:           service,
 384		History:       []llm.Message{},
 385		Tools:         tools,
 386		RecordMessage: recordMessage,
 387	})
 388
 389	// Send a user message that will trigger the default response
 390	userMessage := llm.Message{
 391		Role: llm.MessageRoleUser,
 392		Content: []llm.Content{
 393			{Type: llm.ContentTypeText, Text: "Please search for some files"},
 394		},
 395	}
 396
 397	loop.QueueUserMessage(userMessage)
 398
 399	// Process one turn
 400	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 401	defer cancel()
 402
 403	err := loop.ProcessOneTurn(ctx)
 404	if err != nil {
 405		t.Fatalf("ProcessOneTurn failed: %v", err)
 406	}
 407
 408	// Verify we got expected messages
 409	// Note: User messages are recorded by ConversationManager, not by Loop,
 410	// so we only expect the assistant response to be recorded here
 411	if len(messages) < 1 {
 412		t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
 413	}
 414
 415	// Should have assistant response
 416	if messages[0].Role != llm.MessageRoleAssistant {
 417		t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
 418	}
 419}
 420
 421func TestLoopWithActualKeywordTool(t *testing.T) {
 422	// Test that actual keyword tool works with Loop
 423	service := NewPredictableService()
 424
 425	var messages []llm.Message
 426	recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 427		messages = append(messages, message)
 428		return nil
 429	}
 430
 431	// Use the actual keyword tool from claudetool package
 432	// Note: We need to import it first
 433	tools := []*llm.Tool{
 434		// Add a simplified keyword tool to avoid file system dependencies in tests
 435		{
 436			Name:        "keyword_search",
 437			Description: "Search for files by keyword",
 438			InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
 439			Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 440				// Simple mock implementation - no context dependencies
 441				return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
 442			},
 443		},
 444	}
 445
 446	loop := NewLoop(Config{
 447		LLM:           service,
 448		History:       []llm.Message{},
 449		Tools:         tools,
 450		RecordMessage: recordMessage,
 451	})
 452
 453	// Send a user message that will trigger the default response
 454	userMessage := llm.Message{
 455		Role: llm.MessageRoleUser,
 456		Content: []llm.Content{
 457			{Type: llm.ContentTypeText, Text: "Please search for some files"},
 458		},
 459	}
 460
 461	loop.QueueUserMessage(userMessage)
 462
 463	// Process one turn
 464	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 465	defer cancel()
 466
 467	err := loop.ProcessOneTurn(ctx)
 468	if err != nil {
 469		t.Fatalf("ProcessOneTurn failed: %v", err)
 470	}
 471
 472	// Verify we got expected messages
 473	// Note: User messages are recorded by ConversationManager, not by Loop,
 474	// so we only expect the assistant response to be recorded here
 475	if len(messages) < 1 {
 476		t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
 477	}
 478
 479	// Should have assistant response
 480	if messages[0].Role != llm.MessageRoleAssistant {
 481		t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
 482	}
 483
 484	t.Log("Keyword tool test passed - no nil pointer dereference occurred")
 485}
 486
 487func TestKeywordToolWithLLMProvider(t *testing.T) {
 488	// Create a temp directory with a test file to search
 489	tempDir := t.TempDir()
 490	testFile := filepath.Join(tempDir, "test.txt")
 491	if err := os.WriteFile(testFile, []byte("this is a test file\n"), 0o644); err != nil {
 492		t.Fatal(err)
 493	}
 494
 495	// Create a predictable service for testing
 496	predictableService := NewPredictableService()
 497
 498	// Create a simple LLM provider for testing
 499	llmProvider := &testLLMProvider{
 500		service: predictableService,
 501		models:  []string{"predictable"},
 502	}
 503
 504	// Create keyword tool with provider - use temp dir instead of /
 505	keywordTool := claudetool.NewKeywordToolWithWorkingDir(llmProvider, claudetool.NewMutableWorkingDir(tempDir))
 506	tool := keywordTool.Tool()
 507
 508	// Test input
 509	input := `{"query": "test search", "search_terms": ["test"]}`
 510
 511	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 512	defer cancel()
 513	result := tool.Run(ctx, json.RawMessage(input))
 514
 515	// Should get a result without error (even though ripgrep will fail in test environment)
 516	// The important thing is that it doesn't crash with nil pointer dereference
 517	if result.Error != nil {
 518		t.Logf("Expected error in test environment (no ripgrep): %v", result.Error)
 519		// This is expected in test environment
 520	} else {
 521		t.Log("Keyword tool executed successfully")
 522		if len(result.LLMContent) == 0 {
 523			t.Error("Expected some content in result")
 524		}
 525	}
 526}
 527
 528// testLLMProvider implements LLMServiceProvider for testing
 529type testLLMProvider struct {
 530	service llm.Service
 531	models  []string
 532}
 533
 534func (t *testLLMProvider) GetService(modelID string) (llm.Service, error) {
 535	for _, model := range t.models {
 536		if model == modelID {
 537			return t.service, nil
 538		}
 539	}
 540	return nil, fmt.Errorf("model %s not available", modelID)
 541}
 542
 543func (t *testLLMProvider) GetAvailableModels() []string {
 544	return t.models
 545}
 546
 547func TestInsertMissingToolResults(t *testing.T) {
 548	tests := []struct {
 549		name     string
 550		messages []llm.Message
 551		wantLen  int
 552		wantText string
 553	}{
 554		{
 555			name: "no missing tool results",
 556			messages: []llm.Message{
 557				{
 558					Role: llm.MessageRoleAssistant,
 559					Content: []llm.Content{
 560						{Type: llm.ContentTypeText, Text: "Let me help you"},
 561					},
 562				},
 563				{
 564					Role: llm.MessageRoleUser,
 565					Content: []llm.Content{
 566						{Type: llm.ContentTypeText, Text: "Thanks"},
 567					},
 568				},
 569			},
 570			wantLen:  1,
 571			wantText: "", // No synthetic result expected
 572		},
 573		{
 574			name: "missing tool result - should insert synthetic result",
 575			messages: []llm.Message{
 576				{
 577					Role: llm.MessageRoleAssistant,
 578					Content: []llm.Content{
 579						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 580						{Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
 581					},
 582				},
 583				{
 584					Role: llm.MessageRoleUser,
 585					Content: []llm.Content{
 586						{Type: llm.ContentTypeText, Text: "Error occurred"},
 587					},
 588				},
 589			},
 590			wantLen:  2, // Should have synthetic tool_result + error message
 591			wantText: "not executed; retry possible",
 592		},
 593		{
 594			name: "multiple missing tool results",
 595			messages: []llm.Message{
 596				{
 597					Role: llm.MessageRoleAssistant,
 598					Content: []llm.Content{
 599						{Type: llm.ContentTypeText, Text: "I'll use multiple tools"},
 600						{Type: llm.ContentTypeToolUse, ID: "tool_1", ToolName: "bash"},
 601						{Type: llm.ContentTypeToolUse, ID: "tool_2", ToolName: "read"},
 602					},
 603				},
 604				{
 605					Role: llm.MessageRoleUser,
 606					Content: []llm.Content{
 607						{Type: llm.ContentTypeText, Text: "Error occurred"},
 608					},
 609				},
 610			},
 611			wantLen: 3, // Should have 2 synthetic tool_results + error message
 612		},
 613		{
 614			name: "has tool results - should not insert",
 615			messages: []llm.Message{
 616				{
 617					Role: llm.MessageRoleAssistant,
 618					Content: []llm.Content{
 619						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 620						{Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
 621					},
 622				},
 623				{
 624					Role: llm.MessageRoleUser,
 625					Content: []llm.Content{
 626						{
 627							Type:       llm.ContentTypeToolResult,
 628							ToolUseID:  "tool_123",
 629							ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "result"}},
 630						},
 631					},
 632				},
 633			},
 634			wantLen: 1, // Should not insert anything
 635		},
 636	}
 637
 638	for _, tt := range tests {
 639		t.Run(tt.name, func(t *testing.T) {
 640			loop := NewLoop(Config{
 641				LLM:     NewPredictableService(),
 642				History: []llm.Message{},
 643			})
 644
 645			req := &llm.Request{
 646				Messages: tt.messages,
 647			}
 648
 649			loop.insertMissingToolResults(req)
 650
 651			got := req.Messages[len(req.Messages)-1]
 652			if len(got.Content) != tt.wantLen {
 653				t.Errorf("expected %d content items, got %d", tt.wantLen, len(got.Content))
 654			}
 655
 656			if tt.wantText != "" {
 657				// Find the synthetic tool result
 658				found := false
 659				for _, c := range got.Content {
 660					if c.Type == llm.ContentTypeToolResult && len(c.ToolResult) > 0 {
 661						if c.ToolResult[0].Text == tt.wantText {
 662							found = true
 663							if !c.ToolError {
 664								t.Error("synthetic tool result should have ToolError=true")
 665							}
 666							break
 667						}
 668					}
 669				}
 670				if !found {
 671					t.Errorf("expected to find synthetic tool result with text %q", tt.wantText)
 672				}
 673			}
 674		})
 675	}
 676}
 677
 678func TestInsertMissingToolResultsWithEdgeCases(t *testing.T) {
 679	// Test for the bug: when an assistant error message is recorded after a tool_use
 680	// but before tool execution, the tool_use is "hidden" from insertMissingToolResults
 681	// because it only checks the last two messages.
 682	t.Run("tool_use hidden by subsequent assistant message", func(t *testing.T) {
 683		loop := NewLoop(Config{
 684			LLM:     NewPredictableService(),
 685			History: []llm.Message{},
 686		})
 687
 688		// Scenario:
 689		// 1. LLM responds with tool_use
 690		// 2. Something fails, error message recorded (assistant message)
 691		// 3. User sends new message
 692		// The tool_use in message 0 is never followed by a tool_result
 693		req := &llm.Request{
 694			Messages: []llm.Message{
 695				{
 696					Role: llm.MessageRoleAssistant,
 697					Content: []llm.Content{
 698						{Type: llm.ContentTypeText, Text: "I'll run a command"},
 699						{Type: llm.ContentTypeToolUse, ID: "tool_hidden", ToolName: "bash"},
 700					},
 701				},
 702				{
 703					Role: llm.MessageRoleAssistant,
 704					Content: []llm.Content{
 705						{Type: llm.ContentTypeText, Text: "LLM request failed: some error"},
 706					},
 707				},
 708				{
 709					Role: llm.MessageRoleUser,
 710					Content: []llm.Content{
 711						{Type: llm.ContentTypeText, Text: "Please try again"},
 712					},
 713				},
 714			},
 715		}
 716
 717		loop.insertMissingToolResults(req)
 718
 719		// The function should have inserted a tool_result for tool_hidden
 720		// It should be inserted as a user message after the assistant message with tool_use
 721		// Since we can't insert in the middle, we need to ensure the history is valid
 722
 723		// Check that there's a tool_result for tool_hidden somewhere in the messages
 724		found := false
 725		for _, msg := range req.Messages {
 726			for _, c := range msg.Content {
 727				if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_hidden" {
 728					found = true
 729					if !c.ToolError {
 730						t.Error("synthetic tool result should have ToolError=true")
 731					}
 732					break
 733				}
 734			}
 735		}
 736		if !found {
 737			t.Error("expected to find synthetic tool result for tool_hidden - the bug is that tool_use is hidden by subsequent assistant message")
 738		}
 739	})
 740
 741	// Test for tool_use in earlier message (not the second-to-last)
 742	t.Run("tool_use in earlier message without result", func(t *testing.T) {
 743		loop := NewLoop(Config{
 744			LLM:     NewPredictableService(),
 745			History: []llm.Message{},
 746		})
 747
 748		req := &llm.Request{
 749			Messages: []llm.Message{
 750				{
 751					Role: llm.MessageRoleUser,
 752					Content: []llm.Content{
 753						{Type: llm.ContentTypeText, Text: "Do something"},
 754					},
 755				},
 756				{
 757					Role: llm.MessageRoleAssistant,
 758					Content: []llm.Content{
 759						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 760						{Type: llm.ContentTypeToolUse, ID: "tool_earlier", ToolName: "bash"},
 761					},
 762				},
 763				// Missing: user message with tool_result for tool_earlier
 764				{
 765					Role: llm.MessageRoleAssistant,
 766					Content: []llm.Content{
 767						{Type: llm.ContentTypeText, Text: "Something went wrong"},
 768					},
 769				},
 770				{
 771					Role: llm.MessageRoleUser,
 772					Content: []llm.Content{
 773						{Type: llm.ContentTypeText, Text: "Try again"},
 774					},
 775				},
 776			},
 777		}
 778
 779		loop.insertMissingToolResults(req)
 780
 781		// Should have inserted a tool_result for tool_earlier
 782		found := false
 783		for _, msg := range req.Messages {
 784			for _, c := range msg.Content {
 785				if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_earlier" {
 786					found = true
 787					break
 788				}
 789			}
 790		}
 791		if !found {
 792			t.Error("expected to find synthetic tool result for tool_earlier")
 793		}
 794	})
 795
 796	t.Run("empty message list", func(t *testing.T) {
 797		loop := NewLoop(Config{
 798			LLM:     NewPredictableService(),
 799			History: []llm.Message{},
 800		})
 801
 802		req := &llm.Request{
 803			Messages: []llm.Message{},
 804		}
 805
 806		loop.insertMissingToolResults(req)
 807		// Should not panic
 808	})
 809
 810	t.Run("single message", func(t *testing.T) {
 811		loop := NewLoop(Config{
 812			LLM:     NewPredictableService(),
 813			History: []llm.Message{},
 814		})
 815
 816		req := &llm.Request{
 817			Messages: []llm.Message{
 818				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
 819			},
 820		}
 821
 822		loop.insertMissingToolResults(req)
 823		// Should not panic, should not modify
 824		if len(req.Messages[0].Content) != 1 {
 825			t.Error("should not modify single message")
 826		}
 827	})
 828
 829	t.Run("wrong role order - user then assistant", func(t *testing.T) {
 830		loop := NewLoop(Config{
 831			LLM:     NewPredictableService(),
 832			History: []llm.Message{},
 833		})
 834
 835		req := &llm.Request{
 836			Messages: []llm.Message{
 837				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
 838				{Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}}},
 839			},
 840		}
 841
 842		loop.insertMissingToolResults(req)
 843		// Should not modify when roles are wrong order
 844		if len(req.Messages[1].Content) != 1 {
 845			t.Error("should not modify when roles are in wrong order")
 846		}
 847	})
 848}
 849
 850func TestInsertMissingToolResults_EmptyAssistantContent(t *testing.T) {
 851	// Test for the bug: when an assistant message has empty content (can happen when
 852	// the model ends its turn without producing any output), we need to add placeholder
 853	// content if it's not the last message. Otherwise the API will reject with:
 854	// "messages.N: all messages must have non-empty content except for the optional
 855	// final assistant message"
 856
 857	t.Run("empty assistant content in middle of conversation", func(t *testing.T) {
 858		loop := NewLoop(Config{
 859			LLM:     NewPredictableService(),
 860			History: []llm.Message{},
 861		})
 862
 863		req := &llm.Request{
 864			Messages: []llm.Message{
 865				{
 866					Role:    llm.MessageRoleUser,
 867					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "run git fetch"}},
 868				},
 869				{
 870					Role:    llm.MessageRoleAssistant,
 871					Content: []llm.Content{{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"}},
 872				},
 873				{
 874					Role: llm.MessageRoleUser,
 875					Content: []llm.Content{{
 876						Type:       llm.ContentTypeToolResult,
 877						ToolUseID:  "tool1",
 878						ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "success"}},
 879					}},
 880				},
 881				{
 882					// Empty assistant message - this can happen when model ends turn without output
 883					Role:      llm.MessageRoleAssistant,
 884					Content:   []llm.Content{},
 885					EndOfTurn: true,
 886				},
 887				{
 888					Role:    llm.MessageRoleUser,
 889					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "next question"}},
 890				},
 891			},
 892		}
 893
 894		loop.insertMissingToolResults(req)
 895
 896		// The empty assistant message (index 3) should now have placeholder content
 897		if len(req.Messages[3].Content) == 0 {
 898			t.Error("expected placeholder content to be added to empty assistant message")
 899		}
 900		if req.Messages[3].Content[0].Type != llm.ContentTypeText {
 901			t.Error("expected placeholder to be text content")
 902		}
 903		if req.Messages[3].Content[0].Text != "(no response)" {
 904			t.Errorf("expected placeholder text '(no response)', got %q", req.Messages[3].Content[0].Text)
 905		}
 906	})
 907
 908	t.Run("empty assistant content at end of conversation - no modification needed", func(t *testing.T) {
 909		loop := NewLoop(Config{
 910			LLM:     NewPredictableService(),
 911			History: []llm.Message{},
 912		})
 913
 914		req := &llm.Request{
 915			Messages: []llm.Message{
 916				{
 917					Role:    llm.MessageRoleUser,
 918					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 919				},
 920				{
 921					// Empty assistant message at end is allowed by the API
 922					Role:      llm.MessageRoleAssistant,
 923					Content:   []llm.Content{},
 924					EndOfTurn: true,
 925				},
 926			},
 927		}
 928
 929		loop.insertMissingToolResults(req)
 930
 931		// The empty assistant message at the end should NOT be modified
 932		// because the API allows empty content for the final assistant message
 933		if len(req.Messages[1].Content) != 0 {
 934			t.Error("expected final empty assistant message to remain empty")
 935		}
 936	})
 937
 938	t.Run("non-empty assistant content - no modification needed", func(t *testing.T) {
 939		loop := NewLoop(Config{
 940			LLM:     NewPredictableService(),
 941			History: []llm.Message{},
 942		})
 943
 944		req := &llm.Request{
 945			Messages: []llm.Message{
 946				{
 947					Role:    llm.MessageRoleUser,
 948					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 949				},
 950				{
 951					Role:    llm.MessageRoleAssistant,
 952					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi there"}},
 953				},
 954				{
 955					Role:    llm.MessageRoleUser,
 956					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "goodbye"}},
 957				},
 958			},
 959		}
 960
 961		loop.insertMissingToolResults(req)
 962
 963		// The assistant message should not be modified
 964		if len(req.Messages[1].Content) != 1 {
 965			t.Errorf("expected assistant message to have 1 content item, got %d", len(req.Messages[1].Content))
 966		}
 967		if req.Messages[1].Content[0].Text != "hi there" {
 968			t.Errorf("expected assistant message text 'hi there', got %q", req.Messages[1].Content[0].Text)
 969		}
 970	})
 971}
 972
 973func TestGitStateTracking(t *testing.T) {
 974	// Create a test repo
 975	tmpDir := t.TempDir()
 976
 977	// Initialize git repo
 978	runGit(t, tmpDir, "init")
 979	runGit(t, tmpDir, "config", "user.email", "test@test.com")
 980	runGit(t, tmpDir, "config", "user.name", "Test")
 981
 982	// Create initial commit
 983	testFile := filepath.Join(tmpDir, "test.txt")
 984	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
 985		t.Fatal(err)
 986	}
 987	runGit(t, tmpDir, "add", ".")
 988	runGit(t, tmpDir, "commit", "-m", "initial")
 989
 990	// Track git state changes
 991	var mu sync.Mutex
 992	var gitStateChanges []*gitstate.GitState
 993
 994	loop := NewLoop(Config{
 995		LLM:           NewPredictableService(),
 996		History:       []llm.Message{},
 997		WorkingDir:    tmpDir,
 998		GetWorkingDir: func() string { return tmpDir },
 999		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1000			mu.Lock()
1001			gitStateChanges = append(gitStateChanges, state)
1002			mu.Unlock()
1003		},
1004		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1005			return nil
1006		},
1007	})
1008
1009	// Verify initial state was captured
1010	if loop.lastGitState == nil {
1011		t.Fatal("expected initial git state to be captured")
1012	}
1013	if !loop.lastGitState.IsRepo {
1014		t.Error("expected IsRepo to be true")
1015	}
1016
1017	// Process a turn (no state change should occur)
1018	loop.QueueUserMessage(llm.Message{
1019		Role:    llm.MessageRoleUser,
1020		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1021	})
1022
1023	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1024	defer cancel()
1025
1026	err := loop.ProcessOneTurn(ctx)
1027	if err != nil {
1028		t.Fatalf("ProcessOneTurn failed: %v", err)
1029	}
1030
1031	// No state change should have occurred
1032	mu.Lock()
1033	numChanges := len(gitStateChanges)
1034	mu.Unlock()
1035	if numChanges != 0 {
1036		t.Errorf("expected no git state changes, got %d", numChanges)
1037	}
1038
1039	// Now make a commit
1040	if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1041		t.Fatal(err)
1042	}
1043	runGit(t, tmpDir, "add", ".")
1044	runGit(t, tmpDir, "commit", "-m", "update")
1045
1046	// Process another turn - this should detect the commit change
1047	loop.QueueUserMessage(llm.Message{
1048		Role:    llm.MessageRoleUser,
1049		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello again"}},
1050	})
1051
1052	err = loop.ProcessOneTurn(ctx)
1053	if err != nil {
1054		t.Fatalf("ProcessOneTurn failed: %v", err)
1055	}
1056
1057	// Now a state change should have been detected
1058	mu.Lock()
1059	numChanges = len(gitStateChanges)
1060	mu.Unlock()
1061	if numChanges != 1 {
1062		t.Errorf("expected 1 git state change, got %d", numChanges)
1063	}
1064}
1065
1066func TestGitStateTrackingWorktree(t *testing.T) {
1067	tmpDir, err := filepath.EvalSymlinks(t.TempDir())
1068	if err != nil {
1069		t.Fatal(err)
1070	}
1071	mainRepo := filepath.Join(tmpDir, "main")
1072	worktreeDir := filepath.Join(tmpDir, "worktree")
1073
1074	// Create main repo
1075	if err := os.MkdirAll(mainRepo, 0o755); err != nil {
1076		t.Fatal(err)
1077	}
1078	runGit(t, mainRepo, "init")
1079	runGit(t, mainRepo, "config", "user.email", "test@test.com")
1080	runGit(t, mainRepo, "config", "user.name", "Test")
1081
1082	// Create initial commit
1083	testFile := filepath.Join(mainRepo, "test.txt")
1084	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1085		t.Fatal(err)
1086	}
1087	runGit(t, mainRepo, "add", ".")
1088	runGit(t, mainRepo, "commit", "-m", "initial")
1089
1090	// Create a worktree
1091	runGit(t, mainRepo, "worktree", "add", "-b", "feature", worktreeDir)
1092
1093	// Track git state changes in the worktree
1094	var mu sync.Mutex
1095	var gitStateChanges []*gitstate.GitState
1096
1097	loop := NewLoop(Config{
1098		LLM:           NewPredictableService(),
1099		History:       []llm.Message{},
1100		WorkingDir:    worktreeDir,
1101		GetWorkingDir: func() string { return worktreeDir },
1102		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1103			mu.Lock()
1104			gitStateChanges = append(gitStateChanges, state)
1105			mu.Unlock()
1106		},
1107		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1108			return nil
1109		},
1110	})
1111
1112	// Verify initial state
1113	if loop.lastGitState == nil {
1114		t.Fatal("expected initial git state to be captured")
1115	}
1116	if loop.lastGitState.Branch != "feature" {
1117		t.Errorf("expected branch 'feature', got %q", loop.lastGitState.Branch)
1118	}
1119	if loop.lastGitState.Worktree != worktreeDir {
1120		t.Errorf("expected worktree %q, got %q", worktreeDir, loop.lastGitState.Worktree)
1121	}
1122
1123	// Make a commit in the worktree
1124	worktreeFile := filepath.Join(worktreeDir, "feature.txt")
1125	if err := os.WriteFile(worktreeFile, []byte("feature content"), 0o644); err != nil {
1126		t.Fatal(err)
1127	}
1128	runGit(t, worktreeDir, "add", ".")
1129	runGit(t, worktreeDir, "commit", "-m", "feature commit")
1130
1131	// Process a turn to detect the change
1132	loop.QueueUserMessage(llm.Message{
1133		Role:    llm.MessageRoleUser,
1134		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1135	})
1136
1137	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1138	defer cancel()
1139
1140	err = loop.ProcessOneTurn(ctx)
1141	if err != nil {
1142		t.Fatalf("ProcessOneTurn failed: %v", err)
1143	}
1144
1145	mu.Lock()
1146	numChanges := len(gitStateChanges)
1147	mu.Unlock()
1148
1149	if numChanges != 1 {
1150		t.Errorf("expected 1 git state change in worktree, got %d", numChanges)
1151	}
1152}
1153
1154func runGit(t *testing.T, dir string, args ...string) {
1155	t.Helper()
1156	// For commits, use --no-verify to skip hooks
1157	if len(args) > 0 && args[0] == "commit" {
1158		newArgs := []string{"commit", "--no-verify"}
1159		newArgs = append(newArgs, args[1:]...)
1160		args = newArgs
1161	}
1162	cmd := exec.Command("git", args...)
1163	cmd.Dir = dir
1164	output, err := cmd.CombinedOutput()
1165	if err != nil {
1166		t.Fatalf("git %v failed: %v\n%s", args, err, output)
1167	}
1168}
1169
1170func TestPredictableServiceTokenContextWindow(t *testing.T) {
1171	service := NewPredictableService()
1172	window := service.TokenContextWindow()
1173	if window != 200000 {
1174		t.Errorf("expected TokenContextWindow to return 200000, got %d", window)
1175	}
1176}
1177
1178func TestPredictableServiceMaxImageDimension(t *testing.T) {
1179	service := NewPredictableService()
1180	dimension := service.MaxImageDimension()
1181	if dimension != 2000 {
1182		t.Errorf("expected MaxImageDimension to return 2000, got %d", dimension)
1183	}
1184}
1185
1186func TestPredictableServiceThinking(t *testing.T) {
1187	service := NewPredictableService()
1188
1189	ctx := context.Background()
1190	req := &llm.Request{
1191		Messages: []llm.Message{
1192			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "think: This is a test thought"}}},
1193		},
1194	}
1195
1196	resp, err := service.Do(ctx, req)
1197	if err != nil {
1198		t.Fatalf("thinking test failed: %v", err)
1199	}
1200
1201	// Now returns EndTurn since thinking is content, not a tool
1202	if resp.StopReason != llm.StopReasonEndTurn {
1203		t.Errorf("expected end turn stop reason, got %v", resp.StopReason)
1204	}
1205
1206	// Find the thinking content
1207	var thinkingContent *llm.Content
1208	for _, content := range resp.Content {
1209		if content.Type == llm.ContentTypeThinking {
1210			thinkingContent = &content
1211			break
1212		}
1213	}
1214
1215	if thinkingContent == nil {
1216		t.Fatal("no thinking content found")
1217	}
1218
1219	// Check thinking content contains the thoughts
1220	if thinkingContent.Thinking != "This is a test thought" {
1221		t.Errorf("expected thinking 'This is a test thought', got '%v'", thinkingContent.Thinking)
1222	}
1223}
1224
1225func TestPredictableServicePatchTool(t *testing.T) {
1226	service := NewPredictableService()
1227
1228	ctx := context.Background()
1229	req := &llm.Request{
1230		Messages: []llm.Message{
1231			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch: /tmp/test.txt"}}},
1232		},
1233	}
1234
1235	resp, err := service.Do(ctx, req)
1236	if err != nil {
1237		t.Fatalf("patch tool test failed: %v", err)
1238	}
1239
1240	if resp.StopReason != llm.StopReasonToolUse {
1241		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1242	}
1243
1244	// Find the tool use content
1245	var toolUseContent *llm.Content
1246	for _, content := range resp.Content {
1247		if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1248			toolUseContent = &content
1249			break
1250		}
1251	}
1252
1253	if toolUseContent == nil {
1254		t.Fatal("no patch tool use content found")
1255	}
1256
1257	// Check tool input contains the file path
1258	var toolInput map[string]interface{}
1259	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1260		t.Fatalf("failed to parse tool input: %v", err)
1261	}
1262
1263	if toolInput["path"] != "/tmp/test.txt" {
1264		t.Errorf("expected path '/tmp/test.txt', got '%v'", toolInput["path"])
1265	}
1266}
1267
1268func TestPredictableServiceMalformedPatchTool(t *testing.T) {
1269	service := NewPredictableService()
1270
1271	ctx := context.Background()
1272	req := &llm.Request{
1273		Messages: []llm.Message{
1274			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch bad json"}}},
1275		},
1276	}
1277
1278	resp, err := service.Do(ctx, req)
1279	if err != nil {
1280		t.Fatalf("malformed patch tool test failed: %v", err)
1281	}
1282
1283	if resp.StopReason != llm.StopReasonToolUse {
1284		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1285	}
1286
1287	// Find the tool use content
1288	var toolUseContent *llm.Content
1289	for _, content := range resp.Content {
1290		if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1291			toolUseContent = &content
1292			break
1293		}
1294	}
1295
1296	if toolUseContent == nil {
1297		t.Fatal("no patch tool use content found")
1298	}
1299
1300	// Check that the tool input is malformed JSON (as expected)
1301	toolInputStr := string(toolUseContent.ToolInput)
1302	if !strings.Contains(toolInputStr, "parameter name") {
1303		t.Errorf("expected malformed JSON in tool input, got: %s", toolInputStr)
1304	}
1305}
1306
1307func TestPredictableServiceError(t *testing.T) {
1308	service := NewPredictableService()
1309
1310	ctx := context.Background()
1311	req := &llm.Request{
1312		Messages: []llm.Message{
1313			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "error: test error"}}},
1314		},
1315	}
1316
1317	resp, err := service.Do(ctx, req)
1318	if err == nil {
1319		t.Fatal("expected error, got nil")
1320	}
1321
1322	if !strings.Contains(err.Error(), "predictable error: test error") {
1323		t.Errorf("expected error message to contain 'predictable error: test error', got: %v", err)
1324	}
1325
1326	if resp != nil {
1327		t.Error("expected response to be nil when error occurs")
1328	}
1329}
1330
1331func TestPredictableServiceRequestTracking(t *testing.T) {
1332	service := NewPredictableService()
1333
1334	// Initially no requests
1335	requests := service.GetRecentRequests()
1336	if requests != nil {
1337		t.Errorf("expected nil requests initially, got %v", requests)
1338	}
1339
1340	lastReq := service.GetLastRequest()
1341	if lastReq != nil {
1342		t.Errorf("expected nil last request initially, got %v", lastReq)
1343	}
1344
1345	// Make a request
1346	ctx := context.Background()
1347	req := &llm.Request{
1348		Messages: []llm.Message{
1349			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1350		},
1351	}
1352
1353	_, err := service.Do(ctx, req)
1354	if err != nil {
1355		t.Fatalf("Do failed: %v", err)
1356	}
1357
1358	// Check that request was tracked
1359	requests = service.GetRecentRequests()
1360	if len(requests) != 1 {
1361		t.Errorf("expected 1 request, got %d", len(requests))
1362	}
1363
1364	lastReq = service.GetLastRequest()
1365	if lastReq == nil {
1366		t.Fatal("expected last request to be non-nil")
1367	}
1368
1369	if len(lastReq.Messages) != 1 {
1370		t.Errorf("expected 1 message in last request, got %d", len(lastReq.Messages))
1371	}
1372
1373	// Test clearing requests
1374	service.ClearRequests()
1375	requests = service.GetRecentRequests()
1376	if requests != nil {
1377		t.Errorf("expected nil requests after clearing, got %v", requests)
1378	}
1379
1380	lastReq = service.GetLastRequest()
1381	if lastReq != nil {
1382		t.Errorf("expected nil last request after clearing, got %v", lastReq)
1383	}
1384
1385	// Test that only last 10 requests are kept
1386	for i := 0; i < 15; i++ {
1387		testReq := &llm.Request{
1388			Messages: []llm.Message{
1389				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: fmt.Sprintf("test %d", i)}}},
1390			},
1391		}
1392		_, err := service.Do(ctx, testReq)
1393		if err != nil {
1394			t.Fatalf("Do failed on iteration %d: %v", i, err)
1395		}
1396	}
1397
1398	requests = service.GetRecentRequests()
1399	if len(requests) != 10 {
1400		t.Errorf("expected 10 requests (last 10), got %d", len(requests))
1401	}
1402
1403	// Check that we have requests 5-14 (0-indexed)
1404	for i, req := range requests {
1405		expectedText := fmt.Sprintf("test %d", i+5)
1406		if len(req.Messages) == 0 || len(req.Messages[0].Content) == 0 {
1407			t.Errorf("request %d has no content", i)
1408			continue
1409		}
1410		if req.Messages[0].Content[0].Text != expectedText {
1411			t.Errorf("expected request %d to have text '%s', got '%s'", i, expectedText, req.Messages[0].Content[0].Text)
1412		}
1413	}
1414}
1415
1416func TestPredictableServiceScreenshotTool(t *testing.T) {
1417	service := NewPredictableService()
1418
1419	ctx := context.Background()
1420	req := &llm.Request{
1421		Messages: []llm.Message{
1422			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "screenshot: .test-class"}}},
1423		},
1424	}
1425
1426	resp, err := service.Do(ctx, req)
1427	if err != nil {
1428		t.Fatalf("screenshot tool test failed: %v", err)
1429	}
1430
1431	if resp.StopReason != llm.StopReasonToolUse {
1432		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1433	}
1434
1435	// Find the tool use content
1436	var toolUseContent *llm.Content
1437	for _, content := range resp.Content {
1438		if content.Type == llm.ContentTypeToolUse && content.ToolName == "browser_take_screenshot" {
1439			toolUseContent = &content
1440			break
1441		}
1442	}
1443
1444	if toolUseContent == nil {
1445		t.Fatal("no screenshot tool use content found")
1446	}
1447
1448	// Check tool input contains the selector
1449	var toolInput map[string]interface{}
1450	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1451		t.Fatalf("failed to parse tool input: %v", err)
1452	}
1453
1454	if toolInput["selector"] != ".test-class" {
1455		t.Errorf("expected selector '.test-class', got '%v'", toolInput["selector"])
1456	}
1457}
1458
1459func TestPredictableServiceToolSmorgasbord(t *testing.T) {
1460	service := NewPredictableService()
1461
1462	ctx := context.Background()
1463	req := &llm.Request{
1464		Messages: []llm.Message{
1465			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "tool smorgasbord"}}},
1466		},
1467	}
1468
1469	resp, err := service.Do(ctx, req)
1470	if err != nil {
1471		t.Fatalf("tool smorgasbord test failed: %v", err)
1472	}
1473
1474	if resp.StopReason != llm.StopReasonToolUse {
1475		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1476	}
1477
1478	// Count the tool use contents
1479	toolUseCount := 0
1480	for _, content := range resp.Content {
1481		if content.Type == llm.ContentTypeToolUse {
1482			toolUseCount++
1483		}
1484	}
1485
1486	// Should have at least several tool uses
1487	if toolUseCount < 5 {
1488		t.Errorf("expected at least 5 tool uses, got %d", toolUseCount)
1489	}
1490}
1491
1492func TestProcessLLMRequestError(t *testing.T) {
1493	// Test error handling when LLM service returns an error
1494	errorService := &errorLLMService{err: fmt.Errorf("test LLM error")}
1495
1496	var recordedMessages []llm.Message
1497	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1498		recordedMessages = append(recordedMessages, message)
1499		return nil
1500	}
1501
1502	loop := NewLoop(Config{
1503		LLM:           errorService,
1504		History:       []llm.Message{},
1505		Tools:         []*llm.Tool{},
1506		RecordMessage: recordFunc,
1507	})
1508
1509	// Queue a user message
1510	userMessage := llm.Message{
1511		Role:    llm.MessageRoleUser,
1512		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1513	}
1514	loop.QueueUserMessage(userMessage)
1515
1516	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1517	defer cancel()
1518
1519	err := loop.ProcessOneTurn(ctx)
1520	if err == nil {
1521		t.Fatal("expected error from ProcessOneTurn, got nil")
1522	}
1523
1524	if !strings.Contains(err.Error(), "LLM request failed") {
1525		t.Errorf("expected error to contain 'LLM request failed', got: %v", err)
1526	}
1527
1528	// Check that error message was recorded
1529	if len(recordedMessages) < 1 {
1530		t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1531	}
1532
1533	if recordedMessages[0].Role != llm.MessageRoleAssistant {
1534		t.Errorf("expected recorded message to be assistant role, got %s", recordedMessages[0].Role)
1535	}
1536
1537	if len(recordedMessages[0].Content) != 1 {
1538		t.Fatalf("expected 1 content item in recorded message, got %d", len(recordedMessages[0].Content))
1539	}
1540
1541	if recordedMessages[0].Content[0].Type != llm.ContentTypeText {
1542		t.Errorf("expected text content, got %s", recordedMessages[0].Content[0].Type)
1543	}
1544
1545	if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1546		t.Errorf("expected error message to contain 'LLM request failed', got: %s", recordedMessages[0].Content[0].Text)
1547	}
1548
1549	// Verify EndOfTurn is set so the agent working state is properly updated
1550	if !recordedMessages[0].EndOfTurn {
1551		t.Error("expected error message to have EndOfTurn=true so agent working state is updated")
1552	}
1553}
1554
1555// errorLLMService is a test LLM service that always returns an error
1556type errorLLMService struct {
1557	err error
1558}
1559
1560func (e *errorLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1561	return nil, e.err
1562}
1563
1564func (e *errorLLMService) TokenContextWindow() int {
1565	return 200000
1566}
1567
1568func (e *errorLLMService) MaxImageDimension() int {
1569	return 2000
1570}
1571
1572// retryableLLMService fails with a retryable error a specified number of times, then succeeds
1573type retryableLLMService struct {
1574	failuresRemaining int
1575	callCount         int
1576	mu                sync.Mutex
1577}
1578
1579func (r *retryableLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1580	r.mu.Lock()
1581	r.callCount++
1582	if r.failuresRemaining > 0 {
1583		r.failuresRemaining--
1584		r.mu.Unlock()
1585		return nil, fmt.Errorf("connection error: EOF")
1586	}
1587	r.mu.Unlock()
1588	return &llm.Response{
1589		Content: []llm.Content{
1590			{Type: llm.ContentTypeText, Text: "Success after retry"},
1591		},
1592		StopReason: llm.StopReasonEndTurn,
1593	}, nil
1594}
1595
1596func (r *retryableLLMService) TokenContextWindow() int {
1597	return 200000
1598}
1599
1600func (r *retryableLLMService) MaxImageDimension() int {
1601	return 2000
1602}
1603
1604func (r *retryableLLMService) getCallCount() int {
1605	r.mu.Lock()
1606	defer r.mu.Unlock()
1607	return r.callCount
1608}
1609
1610func TestLLMRequestRetryOnEOF(t *testing.T) {
1611	// Test that LLM requests are retried on EOF errors
1612	retryService := &retryableLLMService{failuresRemaining: 1}
1613
1614	var recordedMessages []llm.Message
1615	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1616		recordedMessages = append(recordedMessages, message)
1617		return nil
1618	}
1619
1620	loop := NewLoop(Config{
1621		LLM:           retryService,
1622		History:       []llm.Message{},
1623		Tools:         []*llm.Tool{},
1624		RecordMessage: recordFunc,
1625	})
1626
1627	// Queue a user message
1628	userMessage := llm.Message{
1629		Role:    llm.MessageRoleUser,
1630		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1631	}
1632	loop.QueueUserMessage(userMessage)
1633
1634	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1635	defer cancel()
1636
1637	err := loop.ProcessOneTurn(ctx)
1638	if err != nil {
1639		t.Fatalf("expected no error after retry, got: %v", err)
1640	}
1641
1642	// Should have been called twice (1 failure + 1 success)
1643	if retryService.getCallCount() != 2 {
1644		t.Errorf("expected 2 LLM calls (retry), got %d", retryService.getCallCount())
1645	}
1646
1647	// Check that success message was recorded
1648	if len(recordedMessages) != 1 {
1649		t.Fatalf("expected 1 recorded message (success), got %d", len(recordedMessages))
1650	}
1651
1652	if !strings.Contains(recordedMessages[0].Content[0].Text, "Success after retry") {
1653		t.Errorf("expected success message, got: %s", recordedMessages[0].Content[0].Text)
1654	}
1655}
1656
1657func TestLLMRequestRetryExhausted(t *testing.T) {
1658	// Test that after max retries, error is returned
1659	retryService := &retryableLLMService{failuresRemaining: 10} // More than maxRetries
1660
1661	var recordedMessages []llm.Message
1662	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1663		recordedMessages = append(recordedMessages, message)
1664		return nil
1665	}
1666
1667	loop := NewLoop(Config{
1668		LLM:           retryService,
1669		History:       []llm.Message{},
1670		Tools:         []*llm.Tool{},
1671		RecordMessage: recordFunc,
1672	})
1673
1674	userMessage := llm.Message{
1675		Role:    llm.MessageRoleUser,
1676		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1677	}
1678	loop.QueueUserMessage(userMessage)
1679
1680	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1681	defer cancel()
1682
1683	err := loop.ProcessOneTurn(ctx)
1684	if err == nil {
1685		t.Fatal("expected error after exhausting retries")
1686	}
1687
1688	// Should have been called maxRetries times (2)
1689	if retryService.getCallCount() != 2 {
1690		t.Errorf("expected 2 LLM calls (maxRetries), got %d", retryService.getCallCount())
1691	}
1692
1693	// Check error message was recorded
1694	if len(recordedMessages) != 1 {
1695		t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1696	}
1697
1698	if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1699		t.Errorf("expected error message, got: %s", recordedMessages[0].Content[0].Text)
1700	}
1701}
1702
1703func TestIsRetryableError(t *testing.T) {
1704	tests := []struct {
1705		name      string
1706		err       error
1707		retryable bool
1708	}{
1709		{"nil error", nil, false},
1710		{"io.EOF", io.EOF, true},
1711		{"io.ErrUnexpectedEOF", io.ErrUnexpectedEOF, true},
1712		{"EOF error string", fmt.Errorf("EOF"), true},
1713		{"wrapped EOF", fmt.Errorf("connection error: EOF"), true},
1714		{"connection reset", fmt.Errorf("connection reset by peer"), true},
1715		{"connection refused", fmt.Errorf("connection refused"), true},
1716		{"timeout", fmt.Errorf("i/o timeout"), true},
1717		{"api error", fmt.Errorf("rate limit exceeded"), false},
1718		{"generic error", fmt.Errorf("something went wrong"), false},
1719	}
1720
1721	for _, tt := range tests {
1722		t.Run(tt.name, func(t *testing.T) {
1723			if got := isRetryableError(tt.err); got != tt.retryable {
1724				t.Errorf("isRetryableError(%v) = %v, want %v", tt.err, got, tt.retryable)
1725			}
1726		})
1727	}
1728}
1729
1730func TestCheckGitStateChange(t *testing.T) {
1731	// Create a test repo
1732	tmpDir := t.TempDir()
1733
1734	// Initialize git repo
1735	runGit(t, tmpDir, "init")
1736	runGit(t, tmpDir, "config", "user.email", "test@test.com")
1737	runGit(t, tmpDir, "config", "user.name", "Test")
1738
1739	// Create initial commit
1740	testFile := filepath.Join(tmpDir, "test.txt")
1741	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1742		t.Fatal(err)
1743	}
1744	runGit(t, tmpDir, "add", ".")
1745	runGit(t, tmpDir, "commit", "-m", "initial")
1746
1747	// Test with nil OnGitStateChange - should not panic
1748	loop := NewLoop(Config{
1749		LLM:           NewPredictableService(),
1750		History:       []llm.Message{},
1751		WorkingDir:    tmpDir,
1752		GetWorkingDir: func() string { return tmpDir },
1753		// OnGitStateChange is nil
1754		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1755			return nil
1756		},
1757	})
1758
1759	// This should not panic
1760	loop.checkGitStateChange(context.Background())
1761
1762	// Test with actual callback
1763	var gitStateChanges []*gitstate.GitState
1764	loop = NewLoop(Config{
1765		LLM:           NewPredictableService(),
1766		History:       []llm.Message{},
1767		WorkingDir:    tmpDir,
1768		GetWorkingDir: func() string { return tmpDir },
1769		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1770			gitStateChanges = append(gitStateChanges, state)
1771		},
1772		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1773			return nil
1774		},
1775	})
1776
1777	// Make a change
1778	if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1779		t.Fatal(err)
1780	}
1781	runGit(t, tmpDir, "add", ".")
1782	runGit(t, tmpDir, "commit", "-m", "update")
1783
1784	// Check git state change
1785	loop.checkGitStateChange(context.Background())
1786
1787	if len(gitStateChanges) != 1 {
1788		t.Errorf("expected 1 git state change, got %d", len(gitStateChanges))
1789	}
1790
1791	// Call again - should not trigger another change since state is the same
1792	loop.checkGitStateChange(context.Background())
1793
1794	if len(gitStateChanges) != 1 {
1795		t.Errorf("expected still 1 git state change (no new changes), got %d", len(gitStateChanges))
1796	}
1797}
1798
1799func TestHandleToolCallsWithMissingTool(t *testing.T) {
1800	var recordedMessages []llm.Message
1801	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1802		recordedMessages = append(recordedMessages, message)
1803		return nil
1804	}
1805
1806	loop := NewLoop(Config{
1807		LLM:           NewPredictableService(),
1808		History:       []llm.Message{},
1809		Tools:         []*llm.Tool{}, // No tools registered
1810		RecordMessage: recordFunc,
1811	})
1812
1813	// Create content with a tool use for a tool that doesn't exist
1814	content := []llm.Content{
1815		{
1816			ID:        "test_tool_123",
1817			Type:      llm.ContentTypeToolUse,
1818			ToolName:  "nonexistent_tool",
1819			ToolInput: json.RawMessage(`{"test": "input"}`),
1820		},
1821	}
1822
1823	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1824	defer cancel()
1825
1826	err := loop.handleToolCalls(ctx, content)
1827	if err != nil {
1828		t.Fatalf("handleToolCalls failed: %v", err)
1829	}
1830
1831	// Should have recorded a user message with tool result
1832	if len(recordedMessages) < 1 {
1833		t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1834	}
1835
1836	msg := recordedMessages[0]
1837	if msg.Role != llm.MessageRoleUser {
1838		t.Errorf("expected user role, got %s", msg.Role)
1839	}
1840
1841	if len(msg.Content) != 1 {
1842		t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1843	}
1844
1845	toolResult := msg.Content[0]
1846	if toolResult.Type != llm.ContentTypeToolResult {
1847		t.Errorf("expected tool result content, got %s", toolResult.Type)
1848	}
1849
1850	if toolResult.ToolUseID != "test_tool_123" {
1851		t.Errorf("expected tool use ID 'test_tool_123', got %s", toolResult.ToolUseID)
1852	}
1853
1854	if !toolResult.ToolError {
1855		t.Error("expected ToolError to be true")
1856	}
1857
1858	if len(toolResult.ToolResult) != 1 {
1859		t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1860	}
1861
1862	if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1863		t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1864	}
1865
1866	expectedText := "Tool 'nonexistent_tool' not found"
1867	if toolResult.ToolResult[0].Text != expectedText {
1868		t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1869	}
1870}
1871
1872func TestHandleToolCallsWithErrorTool(t *testing.T) {
1873	var recordedMessages []llm.Message
1874	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1875		recordedMessages = append(recordedMessages, message)
1876		return nil
1877	}
1878
1879	// Create a tool that always returns an error
1880	errorTool := &llm.Tool{
1881		Name:        "error_tool",
1882		Description: "A tool that always errors",
1883		InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
1884		Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
1885			return llm.ErrorToolOut(fmt.Errorf("intentional test error"))
1886		},
1887	}
1888
1889	loop := NewLoop(Config{
1890		LLM:           NewPredictableService(),
1891		History:       []llm.Message{},
1892		Tools:         []*llm.Tool{errorTool},
1893		RecordMessage: recordFunc,
1894	})
1895
1896	// Create content with a tool use that will error
1897	content := []llm.Content{
1898		{
1899			ID:        "error_tool_123",
1900			Type:      llm.ContentTypeToolUse,
1901			ToolName:  "error_tool",
1902			ToolInput: json.RawMessage(`{}`),
1903		},
1904	}
1905
1906	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1907	defer cancel()
1908
1909	err := loop.handleToolCalls(ctx, content)
1910	if err != nil {
1911		t.Fatalf("handleToolCalls failed: %v", err)
1912	}
1913
1914	// Should have recorded a user message with tool result
1915	if len(recordedMessages) < 1 {
1916		t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1917	}
1918
1919	msg := recordedMessages[0]
1920	if msg.Role != llm.MessageRoleUser {
1921		t.Errorf("expected user role, got %s", msg.Role)
1922	}
1923
1924	if len(msg.Content) != 1 {
1925		t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1926	}
1927
1928	toolResult := msg.Content[0]
1929	if toolResult.Type != llm.ContentTypeToolResult {
1930		t.Errorf("expected tool result content, got %s", toolResult.Type)
1931	}
1932
1933	if toolResult.ToolUseID != "error_tool_123" {
1934		t.Errorf("expected tool use ID 'error_tool_123', got %s", toolResult.ToolUseID)
1935	}
1936
1937	if !toolResult.ToolError {
1938		t.Error("expected ToolError to be true")
1939	}
1940
1941	if len(toolResult.ToolResult) != 1 {
1942		t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1943	}
1944
1945	if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1946		t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1947	}
1948
1949	expectedText := "intentional test error"
1950	if toolResult.ToolResult[0].Text != expectedText {
1951		t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1952	}
1953}
1954
1955func TestMaxTokensTruncation(t *testing.T) {
1956	var mu sync.Mutex
1957	var recordedMessages []llm.Message
1958	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1959		mu.Lock()
1960		recordedMessages = append(recordedMessages, message)
1961		mu.Unlock()
1962		return nil
1963	}
1964
1965	service := NewPredictableService()
1966	loop := NewLoop(Config{
1967		LLM:           service,
1968		History:       []llm.Message{},
1969		Tools:         []*llm.Tool{},
1970		RecordMessage: recordFunc,
1971	})
1972
1973	// Queue a user message that triggers max tokens truncation
1974	userMessage := llm.Message{
1975		Role:    llm.MessageRoleUser,
1976		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
1977	}
1978	loop.QueueUserMessage(userMessage)
1979
1980	// Run the loop - it should stop after handling truncation
1981	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
1982	defer cancel()
1983
1984	err := loop.Go(ctx)
1985	if err != context.DeadlineExceeded {
1986		t.Errorf("expected context deadline exceeded, got %v", err)
1987	}
1988
1989	// Check recorded messages
1990	mu.Lock()
1991	numMessages := len(recordedMessages)
1992	messages := make([]llm.Message, len(recordedMessages))
1993	copy(messages, recordedMessages)
1994	mu.Unlock()
1995
1996	// We should see two messages:
1997	// 1. The truncated message (with ExcludedFromContext=true) for cost tracking
1998	// 2. The truncation error message (with ErrorType=truncation)
1999	if numMessages != 2 {
2000		t.Errorf("Expected 2 recorded messages (truncated + error), got %d", numMessages)
2001		for i, msg := range messages {
2002			t.Logf("Message %d: Role=%v, EndOfTurn=%v, ExcludedFromContext=%v, ErrorType=%v",
2003				i, msg.Role, msg.EndOfTurn, msg.ExcludedFromContext, msg.ErrorType)
2004		}
2005		return
2006	}
2007
2008	// First message: truncated response (for cost tracking, excluded from context)
2009	truncatedMsg := messages[0]
2010	if truncatedMsg.Role != llm.MessageRoleAssistant {
2011		t.Errorf("Truncated message should be assistant, got %v", truncatedMsg.Role)
2012	}
2013	if !truncatedMsg.ExcludedFromContext {
2014		t.Error("Truncated message should have ExcludedFromContext=true")
2015	}
2016
2017	// Second message: truncation error
2018	errorMsg := messages[1]
2019	if errorMsg.Role != llm.MessageRoleAssistant {
2020		t.Errorf("Error message should be assistant, got %v", errorMsg.Role)
2021	}
2022	if !errorMsg.EndOfTurn {
2023		t.Error("Error message should have EndOfTurn=true")
2024	}
2025	if errorMsg.ErrorType != llm.ErrorTypeTruncation {
2026		t.Errorf("Error message should have ErrorType=truncation, got %v", errorMsg.ErrorType)
2027	}
2028	if errorMsg.ExcludedFromContext {
2029		t.Error("Error message should not be excluded from context")
2030	}
2031	if !strings.Contains(errorMsg.Content[0].Text, "SYSTEM ERROR") {
2032		t.Errorf("Error message should contain SYSTEM ERROR, got: %s", errorMsg.Content[0].Text)
2033	}
2034
2035	// Verify history contains user message + error message, but NOT the truncated response
2036	loop.mu.Lock()
2037	history := loop.history
2038	loop.mu.Unlock()
2039
2040	// History should have: user message + error message (the truncated response is NOT added to history)
2041	if len(history) != 2 {
2042		t.Errorf("History should have 2 messages (user + error), got %d", len(history))
2043	}
2044}
2045
2046//func TestInsertMissingToolResultsEdgeCases(t *testing.T) {
2047//	loop := NewLoop(Config{
2048//		LLM:     NewPredictableService(),
2049//		History: []llm.Message{},
2050//	})
2051//
2052//	// Test with nil request
2053//	loop.insertMissingToolResults(nil) // Should not panic
2054//
2055//	// Test with empty messages
2056//	req := &llm.Request{Messages: []llm.Message{}}
2057//	loop.insertMissingToolResults(req) // Should not panic
2058//
2059//	// Test with single message
2060//	req = &llm.Request{
2061//		Messages: []llm.Message{
2062//			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
2063//		},
2064//	}
2065//	loop.insertMissingToolResults(req) // Should not panic
2066//	if len(req.Messages) != 1 {
2067//		t.Errorf("expected 1 message, got %d", len(req.Messages))
2068//	}
2069//
2070//	// Test with multiple consecutive assistant messages with tool_use
2071//	req = &llm.Request{
2072//		Messages: []llm.Message{
2073//			{
2074//				Role: llm.MessageRoleAssistant,
2075//				Content: []llm.Content{
2076//					{Type: llm.ContentTypeText, Text: "First tool"},
2077//					{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"},
2078//				},
2079//			},
2080//			{
2081//				Role: llm.MessageRoleAssistant,
2082//				Content: []llm.Content{
2083//					{Type: llm.ContentTypeText, Text: "Second tool"},
2084//					{Type: llm.ContentTypeToolUse, ID: "tool2", ToolName: "read"},
2085//				},
2086//			},
2087//			{
2088//				Role: llm.MessageRoleUser,
2089//				Content: []llm.Content{
2090//					{Type: llm.ContentTypeText, Text: "User response"},
2091//				},
2092//			},
2093//		},
2094//	}
2095//
2096//	loop.insertMissingToolResults(req)
2097//
2098//	// Should have inserted synthetic tool results for both tool_uses
2099//	// The structure should be:
2100//	// 0: First assistant message
2101//	// 1: Synthetic user message with tool1 result
2102//	// 2: Second assistant message
2103//	// 3: Synthetic user message with tool2 result
2104//	// 4: Original user message
2105//	if len(req.Messages) != 5 {
2106//		t.Fatalf("expected 5 messages after processing, got %d", len(req.Messages))
2107//	}
2108//
2109//	// Check first synthetic message
2110//	if req.Messages[1].Role != llm.MessageRoleUser {
2111//		t.Errorf("expected message 1 to be user role, got %s", req.Messages[1].Role)
2112//	}
2113//	foundTool1 := false
2114//	for _, content := range req.Messages[1].Content {
2115//		if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool1" {
2116//			foundTool1 = true
2117//			break
2118//		}
2119//	}
2120//	if !foundTool1 {
2121//		t.Error("expected to find tool1 result in message 1")
2122//	}
2123//
2124//	// Check second synthetic message
2125//	if req.Messages[3].Role != llm.MessageRoleUser {
2126//		t.Errorf("expected message 3 to be user role, got %s", req.Messages[3].Role)
2127//	}
2128//	foundTool2 := false
2129//	for _, content := range req.Messages[3].Content {
2130//		if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool2" {
2131//			foundTool2 = true
2132//			break
2133//		}
2134//}
2135//	if !foundTool2 {
2136//		t.Error("expected to find tool2 result in message 3")
2137//	}
2138//}