loop_test.go

   1package loop
   2
   3import (
   4	"context"
   5	"encoding/json"
   6	"fmt"
   7	"os"
   8	"os/exec"
   9	"path/filepath"
  10	"strings"
  11	"sync"
  12	"testing"
  13	"time"
  14
  15	"shelley.exe.dev/claudetool"
  16	"shelley.exe.dev/gitstate"
  17	"shelley.exe.dev/llm"
  18)
  19
  20func TestNewLoop(t *testing.T) {
  21	history := []llm.Message{
  22		{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
  23	}
  24	tools := []*llm.Tool{}
  25	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
  26		return nil
  27	}
  28
  29	loop := NewLoop(Config{
  30		LLM:           NewPredictableService(),
  31		History:       history,
  32		Tools:         tools,
  33		RecordMessage: recordFunc,
  34	})
  35	if loop == nil {
  36		t.Fatal("NewLoop returned nil")
  37	}
  38
  39	if len(loop.history) != 1 {
  40		t.Errorf("expected history length 1, got %d", len(loop.history))
  41	}
  42
  43	if len(loop.messageQueue) != 0 {
  44		t.Errorf("expected empty message queue, got %d", len(loop.messageQueue))
  45	}
  46}
  47
  48func TestQueueUserMessage(t *testing.T) {
  49	loop := NewLoop(Config{
  50		LLM:     NewPredictableService(),
  51		History: []llm.Message{},
  52		Tools:   []*llm.Tool{},
  53	})
  54
  55	message := llm.Message{
  56		Role:    llm.MessageRoleUser,
  57		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Test message"}},
  58	}
  59
  60	loop.QueueUserMessage(message)
  61
  62	loop.mu.Lock()
  63	queueLen := len(loop.messageQueue)
  64	loop.mu.Unlock()
  65
  66	if queueLen != 1 {
  67		t.Errorf("expected message queue length 1, got %d", queueLen)
  68	}
  69}
  70
  71func TestPredictableService(t *testing.T) {
  72	service := NewPredictableService()
  73
  74	// Test simple hello response
  75	ctx := context.Background()
  76	req := &llm.Request{
  77		Messages: []llm.Message{
  78			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
  79		},
  80	}
  81
  82	resp, err := service.Do(ctx, req)
  83	if err != nil {
  84		t.Fatalf("predictable service Do failed: %v", err)
  85	}
  86
  87	if resp.Role != llm.MessageRoleAssistant {
  88		t.Errorf("expected assistant role, got %v", resp.Role)
  89	}
  90
  91	if len(resp.Content) == 0 {
  92		t.Error("expected non-empty content")
  93	}
  94
  95	if resp.Content[0].Type != llm.ContentTypeText {
  96		t.Errorf("expected text content, got %v", resp.Content[0].Type)
  97	}
  98
  99	if resp.Content[0].Text != "Well, hi there!" {
 100		t.Errorf("unexpected response text: %s", resp.Content[0].Text)
 101	}
 102}
 103
 104func TestPredictableServiceEcho(t *testing.T) {
 105	service := NewPredictableService()
 106
 107	ctx := context.Background()
 108	req := &llm.Request{
 109		Messages: []llm.Message{
 110			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "echo: foo"}}},
 111		},
 112	}
 113
 114	resp, err := service.Do(ctx, req)
 115	if err != nil {
 116		t.Fatalf("echo test failed: %v", err)
 117	}
 118
 119	if resp.Content[0].Text != "foo" {
 120		t.Errorf("expected 'foo', got '%s'", resp.Content[0].Text)
 121	}
 122
 123	// Test another echo
 124	req.Messages[0].Content[0].Text = "echo: hello world"
 125	resp, err = service.Do(ctx, req)
 126	if err != nil {
 127		t.Fatalf("echo hello world test failed: %v", err)
 128	}
 129
 130	if resp.Content[0].Text != "hello world" {
 131		t.Errorf("expected 'hello world', got '%s'", resp.Content[0].Text)
 132	}
 133}
 134
 135func TestPredictableServiceBashTool(t *testing.T) {
 136	service := NewPredictableService()
 137
 138	ctx := context.Background()
 139	req := &llm.Request{
 140		Messages: []llm.Message{
 141			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: ls -la"}}},
 142		},
 143	}
 144
 145	resp, err := service.Do(ctx, req)
 146	if err != nil {
 147		t.Fatalf("bash tool test failed: %v", err)
 148	}
 149
 150	if resp.StopReason != llm.StopReasonToolUse {
 151		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
 152	}
 153
 154	if len(resp.Content) != 2 {
 155		t.Errorf("expected 2 content items (text + tool_use), got %d", len(resp.Content))
 156	}
 157
 158	// Find the tool use content
 159	var toolUseContent *llm.Content
 160	for _, content := range resp.Content {
 161		if content.Type == llm.ContentTypeToolUse {
 162			toolUseContent = &content
 163			break
 164		}
 165	}
 166
 167	if toolUseContent == nil {
 168		t.Fatal("no tool use content found")
 169	}
 170
 171	if toolUseContent.ToolName != "bash" {
 172		t.Errorf("expected tool name 'bash', got '%s'", toolUseContent.ToolName)
 173	}
 174
 175	// Check tool input contains the command
 176	var toolInput map[string]interface{}
 177	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
 178		t.Fatalf("failed to parse tool input: %v", err)
 179	}
 180
 181	if toolInput["command"] != "ls -la" {
 182		t.Errorf("expected command 'ls -la', got '%v'", toolInput["command"])
 183	}
 184}
 185
 186func TestPredictableServiceDefaultResponse(t *testing.T) {
 187	service := NewPredictableService()
 188
 189	ctx := context.Background()
 190	req := &llm.Request{
 191		Messages: []llm.Message{
 192			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "some unknown input"}}},
 193		},
 194	}
 195
 196	resp, err := service.Do(ctx, req)
 197	if err != nil {
 198		t.Fatalf("default response test failed: %v", err)
 199	}
 200
 201	if resp.Content[0].Text != "edit predictable.go to add a response for that one..." {
 202		t.Errorf("unexpected default response: %s", resp.Content[0].Text)
 203	}
 204}
 205
 206func TestPredictableServiceDelay(t *testing.T) {
 207	service := NewPredictableService()
 208
 209	ctx := context.Background()
 210	req := &llm.Request{
 211		Messages: []llm.Message{
 212			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "delay: 0.1"}}},
 213		},
 214	}
 215
 216	start := time.Now()
 217	resp, err := service.Do(ctx, req)
 218	elapsed := time.Since(start)
 219
 220	if err != nil {
 221		t.Fatalf("delay test failed: %v", err)
 222	}
 223
 224	if elapsed < 100*time.Millisecond {
 225		t.Errorf("expected delay of at least 100ms, got %v", elapsed)
 226	}
 227
 228	if resp.Content[0].Text != "Delayed for 0.1 seconds" {
 229		t.Errorf("unexpected response text: %s", resp.Content[0].Text)
 230	}
 231}
 232
 233func TestLoopWithPredictableService(t *testing.T) {
 234	var recordedMessages []llm.Message
 235	var recordedUsages []llm.Usage
 236
 237	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 238		recordedMessages = append(recordedMessages, message)
 239		recordedUsages = append(recordedUsages, usage)
 240		return nil
 241	}
 242
 243	service := NewPredictableService()
 244	loop := NewLoop(Config{
 245		LLM:           service,
 246		History:       []llm.Message{},
 247		Tools:         []*llm.Tool{},
 248		RecordMessage: recordFunc,
 249	})
 250
 251	// Queue a user message that triggers a known response
 252	userMessage := llm.Message{
 253		Role:    llm.MessageRoleUser,
 254		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 255	}
 256	loop.QueueUserMessage(userMessage)
 257
 258	// Run the loop with a short timeout
 259	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
 260	defer cancel()
 261
 262	err := loop.Go(ctx)
 263	if err != context.DeadlineExceeded {
 264		t.Errorf("expected context deadline exceeded, got %v", err)
 265	}
 266
 267	// Check that messages were recorded
 268	if len(recordedMessages) < 1 {
 269		t.Errorf("expected at least 1 recorded message, got %d", len(recordedMessages))
 270	}
 271
 272	// Check usage tracking
 273	usage := loop.GetUsage()
 274	if usage.IsZero() {
 275		t.Error("expected non-zero usage")
 276	}
 277}
 278
 279func TestLoopWithTools(t *testing.T) {
 280	var toolCalls []string
 281
 282	testTool := &llm.Tool{
 283		Name:        "bash",
 284		Description: "A test bash tool",
 285		InputSchema: llm.MustSchema(`{"type": "object", "properties": {"command": {"type": "string"}}}`),
 286		Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 287			toolCalls = append(toolCalls, string(input))
 288			return llm.ToolOut{
 289				LLMContent: []llm.Content{
 290					{Type: llm.ContentTypeText, Text: "Command executed successfully"},
 291				},
 292			}
 293		},
 294	}
 295
 296	service := NewPredictableService()
 297	loop := NewLoop(Config{
 298		LLM:     service,
 299		History: []llm.Message{},
 300		Tools:   []*llm.Tool{testTool},
 301		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 302			return nil
 303		},
 304	})
 305
 306	// Queue a user message that triggers the bash tool
 307	userMessage := llm.Message{
 308		Role:    llm.MessageRoleUser,
 309		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}},
 310	}
 311	loop.QueueUserMessage(userMessage)
 312
 313	// Run the loop with a short timeout
 314	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
 315	defer cancel()
 316
 317	err := loop.Go(ctx)
 318	if err != context.DeadlineExceeded {
 319		t.Errorf("expected context deadline exceeded, got %v", err)
 320	}
 321
 322	// Check that the tool was called
 323	if len(toolCalls) != 1 {
 324		t.Errorf("expected 1 tool call, got %d", len(toolCalls))
 325	}
 326
 327	if toolCalls[0] != `{"command":"echo hello"}` {
 328		t.Errorf("unexpected tool call input: %s", toolCalls[0])
 329	}
 330}
 331
 332func TestGetHistory(t *testing.T) {
 333	initialHistory := []llm.Message{
 334		{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
 335	}
 336
 337	loop := NewLoop(Config{
 338		LLM:     NewPredictableService(),
 339		History: initialHistory,
 340		Tools:   []*llm.Tool{},
 341	})
 342
 343	history := loop.GetHistory()
 344	if len(history) != 1 {
 345		t.Errorf("expected history length 1, got %d", len(history))
 346	}
 347
 348	// Modify returned slice to ensure it's a copy
 349	history[0].Content[0].Text = "Modified"
 350
 351	// Original should be unchanged
 352	original := loop.GetHistory()
 353	if original[0].Content[0].Text != "Hello" {
 354		t.Error("GetHistory should return a copy, not the original slice")
 355	}
 356}
 357
 358func TestLoopWithKeywordTool(t *testing.T) {
 359	// Test that keyword tool doesn't crash with nil pointer dereference
 360	service := NewPredictableService()
 361
 362	var messages []llm.Message
 363	recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 364		messages = append(messages, message)
 365		return nil
 366	}
 367
 368	// Add a mock keyword tool that doesn't actually search
 369	tools := []*llm.Tool{
 370		{
 371			Name:        "keyword_search",
 372			Description: "Mock keyword search",
 373			InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
 374			Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 375				// Simple mock implementation
 376				return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
 377			},
 378		},
 379	}
 380
 381	loop := NewLoop(Config{
 382		LLM:           service,
 383		History:       []llm.Message{},
 384		Tools:         tools,
 385		RecordMessage: recordMessage,
 386	})
 387
 388	// Send a user message that will trigger the default response
 389	userMessage := llm.Message{
 390		Role: llm.MessageRoleUser,
 391		Content: []llm.Content{
 392			{Type: llm.ContentTypeText, Text: "Please search for some files"},
 393		},
 394	}
 395
 396	loop.QueueUserMessage(userMessage)
 397
 398	// Process one turn
 399	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 400	defer cancel()
 401
 402	err := loop.ProcessOneTurn(ctx)
 403	if err != nil {
 404		t.Fatalf("ProcessOneTurn failed: %v", err)
 405	}
 406
 407	// Verify we got expected messages
 408	// Note: User messages are recorded by ConversationManager, not by Loop,
 409	// so we only expect the assistant response to be recorded here
 410	if len(messages) < 1 {
 411		t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
 412	}
 413
 414	// Should have assistant response
 415	if messages[0].Role != llm.MessageRoleAssistant {
 416		t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
 417	}
 418}
 419
 420func TestLoopWithActualKeywordTool(t *testing.T) {
 421	// Test that actual keyword tool works with Loop
 422	service := NewPredictableService()
 423
 424	var messages []llm.Message
 425	recordMessage := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
 426		messages = append(messages, message)
 427		return nil
 428	}
 429
 430	// Use the actual keyword tool from claudetool package
 431	// Note: We need to import it first
 432	tools := []*llm.Tool{
 433		// Add a simplified keyword tool to avoid file system dependencies in tests
 434		{
 435			Name:        "keyword_search",
 436			Description: "Search for files by keyword",
 437			InputSchema: llm.MustSchema(`{"type": "object", "properties": {"query": {"type": "string"}, "search_terms": {"type": "array", "items": {"type": "string"}}}, "required": ["query", "search_terms"]}`),
 438			Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
 439				// Simple mock implementation - no context dependencies
 440				return llm.ToolOut{LLMContent: []llm.Content{{Type: llm.ContentTypeText, Text: "mock keyword search result"}}}
 441			},
 442		},
 443	}
 444
 445	loop := NewLoop(Config{
 446		LLM:           service,
 447		History:       []llm.Message{},
 448		Tools:         tools,
 449		RecordMessage: recordMessage,
 450	})
 451
 452	// Send a user message that will trigger the default response
 453	userMessage := llm.Message{
 454		Role: llm.MessageRoleUser,
 455		Content: []llm.Content{
 456			{Type: llm.ContentTypeText, Text: "Please search for some files"},
 457		},
 458	}
 459
 460	loop.QueueUserMessage(userMessage)
 461
 462	// Process one turn
 463	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 464	defer cancel()
 465
 466	err := loop.ProcessOneTurn(ctx)
 467	if err != nil {
 468		t.Fatalf("ProcessOneTurn failed: %v", err)
 469	}
 470
 471	// Verify we got expected messages
 472	// Note: User messages are recorded by ConversationManager, not by Loop,
 473	// so we only expect the assistant response to be recorded here
 474	if len(messages) < 1 {
 475		t.Fatalf("Expected at least 1 message (assistant), got %d", len(messages))
 476	}
 477
 478	// Should have assistant response
 479	if messages[0].Role != llm.MessageRoleAssistant {
 480		t.Errorf("Expected first recorded message to be assistant, got %s", messages[0].Role)
 481	}
 482
 483	t.Log("Keyword tool test passed - no nil pointer dereference occurred")
 484}
 485
 486func TestKeywordToolWithLLMProvider(t *testing.T) {
 487	// Create a temp directory with a test file to search
 488	tempDir := t.TempDir()
 489	testFile := filepath.Join(tempDir, "test.txt")
 490	if err := os.WriteFile(testFile, []byte("this is a test file\n"), 0o644); err != nil {
 491		t.Fatal(err)
 492	}
 493
 494	// Create a predictable service for testing
 495	predictableService := NewPredictableService()
 496
 497	// Create a simple LLM provider for testing
 498	llmProvider := &testLLMProvider{
 499		service: predictableService,
 500		models:  []string{"predictable"},
 501	}
 502
 503	// Create keyword tool with provider - use temp dir instead of /
 504	keywordTool := claudetool.NewKeywordToolWithWorkingDir(llmProvider, claudetool.NewMutableWorkingDir(tempDir))
 505	tool := keywordTool.Tool()
 506
 507	// Test input
 508	input := `{"query": "test search", "search_terms": ["test"]}`
 509
 510	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 511	defer cancel()
 512	result := tool.Run(ctx, json.RawMessage(input))
 513
 514	// Should get a result without error (even though ripgrep will fail in test environment)
 515	// The important thing is that it doesn't crash with nil pointer dereference
 516	if result.Error != nil {
 517		t.Logf("Expected error in test environment (no ripgrep): %v", result.Error)
 518		// This is expected in test environment
 519	} else {
 520		t.Log("Keyword tool executed successfully")
 521		if len(result.LLMContent) == 0 {
 522			t.Error("Expected some content in result")
 523		}
 524	}
 525}
 526
 527// testLLMProvider implements LLMServiceProvider for testing
 528type testLLMProvider struct {
 529	service llm.Service
 530	models  []string
 531}
 532
 533func (t *testLLMProvider) GetService(modelID string) (llm.Service, error) {
 534	for _, model := range t.models {
 535		if model == modelID {
 536			return t.service, nil
 537		}
 538	}
 539	return nil, fmt.Errorf("model %s not available", modelID)
 540}
 541
 542func (t *testLLMProvider) GetAvailableModels() []string {
 543	return t.models
 544}
 545
 546func TestInsertMissingToolResults(t *testing.T) {
 547	tests := []struct {
 548		name     string
 549		messages []llm.Message
 550		wantLen  int
 551		wantText string
 552	}{
 553		{
 554			name: "no missing tool results",
 555			messages: []llm.Message{
 556				{
 557					Role: llm.MessageRoleAssistant,
 558					Content: []llm.Content{
 559						{Type: llm.ContentTypeText, Text: "Let me help you"},
 560					},
 561				},
 562				{
 563					Role: llm.MessageRoleUser,
 564					Content: []llm.Content{
 565						{Type: llm.ContentTypeText, Text: "Thanks"},
 566					},
 567				},
 568			},
 569			wantLen:  1,
 570			wantText: "", // No synthetic result expected
 571		},
 572		{
 573			name: "missing tool result - should insert synthetic result",
 574			messages: []llm.Message{
 575				{
 576					Role: llm.MessageRoleAssistant,
 577					Content: []llm.Content{
 578						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 579						{Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
 580					},
 581				},
 582				{
 583					Role: llm.MessageRoleUser,
 584					Content: []llm.Content{
 585						{Type: llm.ContentTypeText, Text: "Error occurred"},
 586					},
 587				},
 588			},
 589			wantLen:  2, // Should have synthetic tool_result + error message
 590			wantText: "not executed; retry possible",
 591		},
 592		{
 593			name: "multiple missing tool results",
 594			messages: []llm.Message{
 595				{
 596					Role: llm.MessageRoleAssistant,
 597					Content: []llm.Content{
 598						{Type: llm.ContentTypeText, Text: "I'll use multiple tools"},
 599						{Type: llm.ContentTypeToolUse, ID: "tool_1", ToolName: "bash"},
 600						{Type: llm.ContentTypeToolUse, ID: "tool_2", ToolName: "read"},
 601					},
 602				},
 603				{
 604					Role: llm.MessageRoleUser,
 605					Content: []llm.Content{
 606						{Type: llm.ContentTypeText, Text: "Error occurred"},
 607					},
 608				},
 609			},
 610			wantLen: 3, // Should have 2 synthetic tool_results + error message
 611		},
 612		{
 613			name: "has tool results - should not insert",
 614			messages: []llm.Message{
 615				{
 616					Role: llm.MessageRoleAssistant,
 617					Content: []llm.Content{
 618						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 619						{Type: llm.ContentTypeToolUse, ID: "tool_123", ToolName: "bash"},
 620					},
 621				},
 622				{
 623					Role: llm.MessageRoleUser,
 624					Content: []llm.Content{
 625						{
 626							Type:       llm.ContentTypeToolResult,
 627							ToolUseID:  "tool_123",
 628							ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "result"}},
 629						},
 630					},
 631				},
 632			},
 633			wantLen: 1, // Should not insert anything
 634		},
 635	}
 636
 637	for _, tt := range tests {
 638		t.Run(tt.name, func(t *testing.T) {
 639			loop := NewLoop(Config{
 640				LLM:     NewPredictableService(),
 641				History: []llm.Message{},
 642			})
 643
 644			req := &llm.Request{
 645				Messages: tt.messages,
 646			}
 647
 648			loop.insertMissingToolResults(req)
 649
 650			got := req.Messages[len(req.Messages)-1]
 651			if len(got.Content) != tt.wantLen {
 652				t.Errorf("expected %d content items, got %d", tt.wantLen, len(got.Content))
 653			}
 654
 655			if tt.wantText != "" {
 656				// Find the synthetic tool result
 657				found := false
 658				for _, c := range got.Content {
 659					if c.Type == llm.ContentTypeToolResult && len(c.ToolResult) > 0 {
 660						if c.ToolResult[0].Text == tt.wantText {
 661							found = true
 662							if !c.ToolError {
 663								t.Error("synthetic tool result should have ToolError=true")
 664							}
 665							break
 666						}
 667					}
 668				}
 669				if !found {
 670					t.Errorf("expected to find synthetic tool result with text %q", tt.wantText)
 671				}
 672			}
 673		})
 674	}
 675}
 676
 677func TestInsertMissingToolResultsWithEdgeCases(t *testing.T) {
 678	// Test for the bug: when an assistant error message is recorded after a tool_use
 679	// but before tool execution, the tool_use is "hidden" from insertMissingToolResults
 680	// because it only checks the last two messages.
 681	t.Run("tool_use hidden by subsequent assistant message", func(t *testing.T) {
 682		loop := NewLoop(Config{
 683			LLM:     NewPredictableService(),
 684			History: []llm.Message{},
 685		})
 686
 687		// Scenario:
 688		// 1. LLM responds with tool_use
 689		// 2. Something fails, error message recorded (assistant message)
 690		// 3. User sends new message
 691		// The tool_use in message 0 is never followed by a tool_result
 692		req := &llm.Request{
 693			Messages: []llm.Message{
 694				{
 695					Role: llm.MessageRoleAssistant,
 696					Content: []llm.Content{
 697						{Type: llm.ContentTypeText, Text: "I'll run a command"},
 698						{Type: llm.ContentTypeToolUse, ID: "tool_hidden", ToolName: "bash"},
 699					},
 700				},
 701				{
 702					Role: llm.MessageRoleAssistant,
 703					Content: []llm.Content{
 704						{Type: llm.ContentTypeText, Text: "LLM request failed: some error"},
 705					},
 706				},
 707				{
 708					Role: llm.MessageRoleUser,
 709					Content: []llm.Content{
 710						{Type: llm.ContentTypeText, Text: "Please try again"},
 711					},
 712				},
 713			},
 714		}
 715
 716		loop.insertMissingToolResults(req)
 717
 718		// The function should have inserted a tool_result for tool_hidden
 719		// It should be inserted as a user message after the assistant message with tool_use
 720		// Since we can't insert in the middle, we need to ensure the history is valid
 721
 722		// Check that there's a tool_result for tool_hidden somewhere in the messages
 723		found := false
 724		for _, msg := range req.Messages {
 725			for _, c := range msg.Content {
 726				if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_hidden" {
 727					found = true
 728					if !c.ToolError {
 729						t.Error("synthetic tool result should have ToolError=true")
 730					}
 731					break
 732				}
 733			}
 734		}
 735		if !found {
 736			t.Error("expected to find synthetic tool result for tool_hidden - the bug is that tool_use is hidden by subsequent assistant message")
 737		}
 738	})
 739
 740	// Test for tool_use in earlier message (not the second-to-last)
 741	t.Run("tool_use in earlier message without result", func(t *testing.T) {
 742		loop := NewLoop(Config{
 743			LLM:     NewPredictableService(),
 744			History: []llm.Message{},
 745		})
 746
 747		req := &llm.Request{
 748			Messages: []llm.Message{
 749				{
 750					Role: llm.MessageRoleUser,
 751					Content: []llm.Content{
 752						{Type: llm.ContentTypeText, Text: "Do something"},
 753					},
 754				},
 755				{
 756					Role: llm.MessageRoleAssistant,
 757					Content: []llm.Content{
 758						{Type: llm.ContentTypeText, Text: "I'll use a tool"},
 759						{Type: llm.ContentTypeToolUse, ID: "tool_earlier", ToolName: "bash"},
 760					},
 761				},
 762				// Missing: user message with tool_result for tool_earlier
 763				{
 764					Role: llm.MessageRoleAssistant,
 765					Content: []llm.Content{
 766						{Type: llm.ContentTypeText, Text: "Something went wrong"},
 767					},
 768				},
 769				{
 770					Role: llm.MessageRoleUser,
 771					Content: []llm.Content{
 772						{Type: llm.ContentTypeText, Text: "Try again"},
 773					},
 774				},
 775			},
 776		}
 777
 778		loop.insertMissingToolResults(req)
 779
 780		// Should have inserted a tool_result for tool_earlier
 781		found := false
 782		for _, msg := range req.Messages {
 783			for _, c := range msg.Content {
 784				if c.Type == llm.ContentTypeToolResult && c.ToolUseID == "tool_earlier" {
 785					found = true
 786					break
 787				}
 788			}
 789		}
 790		if !found {
 791			t.Error("expected to find synthetic tool result for tool_earlier")
 792		}
 793	})
 794
 795	t.Run("empty message list", func(t *testing.T) {
 796		loop := NewLoop(Config{
 797			LLM:     NewPredictableService(),
 798			History: []llm.Message{},
 799		})
 800
 801		req := &llm.Request{
 802			Messages: []llm.Message{},
 803		}
 804
 805		loop.insertMissingToolResults(req)
 806		// Should not panic
 807	})
 808
 809	t.Run("single message", func(t *testing.T) {
 810		loop := NewLoop(Config{
 811			LLM:     NewPredictableService(),
 812			History: []llm.Message{},
 813		})
 814
 815		req := &llm.Request{
 816			Messages: []llm.Message{
 817				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
 818			},
 819		}
 820
 821		loop.insertMissingToolResults(req)
 822		// Should not panic, should not modify
 823		if len(req.Messages[0].Content) != 1 {
 824			t.Error("should not modify single message")
 825		}
 826	})
 827
 828	t.Run("wrong role order - user then assistant", func(t *testing.T) {
 829		loop := NewLoop(Config{
 830			LLM:     NewPredictableService(),
 831			History: []llm.Message{},
 832		})
 833
 834		req := &llm.Request{
 835			Messages: []llm.Message{
 836				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
 837				{Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi"}}},
 838			},
 839		}
 840
 841		loop.insertMissingToolResults(req)
 842		// Should not modify when roles are wrong order
 843		if len(req.Messages[1].Content) != 1 {
 844			t.Error("should not modify when roles are in wrong order")
 845		}
 846	})
 847}
 848
 849func TestInsertMissingToolResults_EmptyAssistantContent(t *testing.T) {
 850	// Test for the bug: when an assistant message has empty content (can happen when
 851	// the model ends its turn without producing any output), we need to add placeholder
 852	// content if it's not the last message. Otherwise the API will reject with:
 853	// "messages.N: all messages must have non-empty content except for the optional
 854	// final assistant message"
 855
 856	t.Run("empty assistant content in middle of conversation", func(t *testing.T) {
 857		loop := NewLoop(Config{
 858			LLM:     NewPredictableService(),
 859			History: []llm.Message{},
 860		})
 861
 862		req := &llm.Request{
 863			Messages: []llm.Message{
 864				{
 865					Role:    llm.MessageRoleUser,
 866					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "run git fetch"}},
 867				},
 868				{
 869					Role:    llm.MessageRoleAssistant,
 870					Content: []llm.Content{{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"}},
 871				},
 872				{
 873					Role: llm.MessageRoleUser,
 874					Content: []llm.Content{{
 875						Type:       llm.ContentTypeToolResult,
 876						ToolUseID:  "tool1",
 877						ToolResult: []llm.Content{{Type: llm.ContentTypeText, Text: "success"}},
 878					}},
 879				},
 880				{
 881					// Empty assistant message - this can happen when model ends turn without output
 882					Role:      llm.MessageRoleAssistant,
 883					Content:   []llm.Content{},
 884					EndOfTurn: true,
 885				},
 886				{
 887					Role:    llm.MessageRoleUser,
 888					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "next question"}},
 889				},
 890			},
 891		}
 892
 893		loop.insertMissingToolResults(req)
 894
 895		// The empty assistant message (index 3) should now have placeholder content
 896		if len(req.Messages[3].Content) == 0 {
 897			t.Error("expected placeholder content to be added to empty assistant message")
 898		}
 899		if req.Messages[3].Content[0].Type != llm.ContentTypeText {
 900			t.Error("expected placeholder to be text content")
 901		}
 902		if req.Messages[3].Content[0].Text != "(no response)" {
 903			t.Errorf("expected placeholder text '(no response)', got %q", req.Messages[3].Content[0].Text)
 904		}
 905	})
 906
 907	t.Run("empty assistant content at end of conversation - no modification needed", func(t *testing.T) {
 908		loop := NewLoop(Config{
 909			LLM:     NewPredictableService(),
 910			History: []llm.Message{},
 911		})
 912
 913		req := &llm.Request{
 914			Messages: []llm.Message{
 915				{
 916					Role:    llm.MessageRoleUser,
 917					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 918				},
 919				{
 920					// Empty assistant message at end is allowed by the API
 921					Role:      llm.MessageRoleAssistant,
 922					Content:   []llm.Content{},
 923					EndOfTurn: true,
 924				},
 925			},
 926		}
 927
 928		loop.insertMissingToolResults(req)
 929
 930		// The empty assistant message at the end should NOT be modified
 931		// because the API allows empty content for the final assistant message
 932		if len(req.Messages[1].Content) != 0 {
 933			t.Error("expected final empty assistant message to remain empty")
 934		}
 935	})
 936
 937	t.Run("non-empty assistant content - no modification needed", func(t *testing.T) {
 938		loop := NewLoop(Config{
 939			LLM:     NewPredictableService(),
 940			History: []llm.Message{},
 941		})
 942
 943		req := &llm.Request{
 944			Messages: []llm.Message{
 945				{
 946					Role:    llm.MessageRoleUser,
 947					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
 948				},
 949				{
 950					Role:    llm.MessageRoleAssistant,
 951					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hi there"}},
 952				},
 953				{
 954					Role:    llm.MessageRoleUser,
 955					Content: []llm.Content{{Type: llm.ContentTypeText, Text: "goodbye"}},
 956				},
 957			},
 958		}
 959
 960		loop.insertMissingToolResults(req)
 961
 962		// The assistant message should not be modified
 963		if len(req.Messages[1].Content) != 1 {
 964			t.Errorf("expected assistant message to have 1 content item, got %d", len(req.Messages[1].Content))
 965		}
 966		if req.Messages[1].Content[0].Text != "hi there" {
 967			t.Errorf("expected assistant message text 'hi there', got %q", req.Messages[1].Content[0].Text)
 968		}
 969	})
 970}
 971
 972func TestGitStateTracking(t *testing.T) {
 973	// Create a test repo
 974	tmpDir := t.TempDir()
 975
 976	// Initialize git repo
 977	runGit(t, tmpDir, "init")
 978	runGit(t, tmpDir, "config", "user.email", "test@test.com")
 979	runGit(t, tmpDir, "config", "user.name", "Test")
 980
 981	// Create initial commit
 982	testFile := filepath.Join(tmpDir, "test.txt")
 983	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
 984		t.Fatal(err)
 985	}
 986	runGit(t, tmpDir, "add", ".")
 987	runGit(t, tmpDir, "commit", "-m", "initial")
 988
 989	// Track git state changes
 990	var mu sync.Mutex
 991	var gitStateChanges []*gitstate.GitState
 992
 993	loop := NewLoop(Config{
 994		LLM:           NewPredictableService(),
 995		History:       []llm.Message{},
 996		WorkingDir:    tmpDir,
 997		GetWorkingDir: func() string { return tmpDir },
 998		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
 999			mu.Lock()
1000			gitStateChanges = append(gitStateChanges, state)
1001			mu.Unlock()
1002		},
1003		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1004			return nil
1005		},
1006	})
1007
1008	// Verify initial state was captured
1009	if loop.lastGitState == nil {
1010		t.Fatal("expected initial git state to be captured")
1011	}
1012	if !loop.lastGitState.IsRepo {
1013		t.Error("expected IsRepo to be true")
1014	}
1015
1016	// Process a turn (no state change should occur)
1017	loop.QueueUserMessage(llm.Message{
1018		Role:    llm.MessageRoleUser,
1019		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1020	})
1021
1022	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1023	defer cancel()
1024
1025	err := loop.ProcessOneTurn(ctx)
1026	if err != nil {
1027		t.Fatalf("ProcessOneTurn failed: %v", err)
1028	}
1029
1030	// No state change should have occurred
1031	mu.Lock()
1032	numChanges := len(gitStateChanges)
1033	mu.Unlock()
1034	if numChanges != 0 {
1035		t.Errorf("expected no git state changes, got %d", numChanges)
1036	}
1037
1038	// Now make a commit
1039	if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1040		t.Fatal(err)
1041	}
1042	runGit(t, tmpDir, "add", ".")
1043	runGit(t, tmpDir, "commit", "-m", "update")
1044
1045	// Process another turn - this should detect the commit change
1046	loop.QueueUserMessage(llm.Message{
1047		Role:    llm.MessageRoleUser,
1048		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello again"}},
1049	})
1050
1051	err = loop.ProcessOneTurn(ctx)
1052	if err != nil {
1053		t.Fatalf("ProcessOneTurn failed: %v", err)
1054	}
1055
1056	// Now a state change should have been detected
1057	mu.Lock()
1058	numChanges = len(gitStateChanges)
1059	mu.Unlock()
1060	if numChanges != 1 {
1061		t.Errorf("expected 1 git state change, got %d", numChanges)
1062	}
1063}
1064
1065func TestGitStateTrackingWorktree(t *testing.T) {
1066	tmpDir, err := filepath.EvalSymlinks(t.TempDir())
1067	if err != nil {
1068		t.Fatal(err)
1069	}
1070	mainRepo := filepath.Join(tmpDir, "main")
1071	worktreeDir := filepath.Join(tmpDir, "worktree")
1072
1073	// Create main repo
1074	if err := os.MkdirAll(mainRepo, 0o755); err != nil {
1075		t.Fatal(err)
1076	}
1077	runGit(t, mainRepo, "init")
1078	runGit(t, mainRepo, "config", "user.email", "test@test.com")
1079	runGit(t, mainRepo, "config", "user.name", "Test")
1080
1081	// Create initial commit
1082	testFile := filepath.Join(mainRepo, "test.txt")
1083	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1084		t.Fatal(err)
1085	}
1086	runGit(t, mainRepo, "add", ".")
1087	runGit(t, mainRepo, "commit", "-m", "initial")
1088
1089	// Create a worktree
1090	runGit(t, mainRepo, "worktree", "add", "-b", "feature", worktreeDir)
1091
1092	// Track git state changes in the worktree
1093	var mu sync.Mutex
1094	var gitStateChanges []*gitstate.GitState
1095
1096	loop := NewLoop(Config{
1097		LLM:           NewPredictableService(),
1098		History:       []llm.Message{},
1099		WorkingDir:    worktreeDir,
1100		GetWorkingDir: func() string { return worktreeDir },
1101		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1102			mu.Lock()
1103			gitStateChanges = append(gitStateChanges, state)
1104			mu.Unlock()
1105		},
1106		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1107			return nil
1108		},
1109	})
1110
1111	// Verify initial state
1112	if loop.lastGitState == nil {
1113		t.Fatal("expected initial git state to be captured")
1114	}
1115	if loop.lastGitState.Branch != "feature" {
1116		t.Errorf("expected branch 'feature', got %q", loop.lastGitState.Branch)
1117	}
1118	if loop.lastGitState.Worktree != worktreeDir {
1119		t.Errorf("expected worktree %q, got %q", worktreeDir, loop.lastGitState.Worktree)
1120	}
1121
1122	// Make a commit in the worktree
1123	worktreeFile := filepath.Join(worktreeDir, "feature.txt")
1124	if err := os.WriteFile(worktreeFile, []byte("feature content"), 0o644); err != nil {
1125		t.Fatal(err)
1126	}
1127	runGit(t, worktreeDir, "add", ".")
1128	runGit(t, worktreeDir, "commit", "-m", "feature commit")
1129
1130	// Process a turn to detect the change
1131	loop.QueueUserMessage(llm.Message{
1132		Role:    llm.MessageRoleUser,
1133		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}},
1134	})
1135
1136	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
1137	defer cancel()
1138
1139	err = loop.ProcessOneTurn(ctx)
1140	if err != nil {
1141		t.Fatalf("ProcessOneTurn failed: %v", err)
1142	}
1143
1144	mu.Lock()
1145	numChanges := len(gitStateChanges)
1146	mu.Unlock()
1147
1148	if numChanges != 1 {
1149		t.Errorf("expected 1 git state change in worktree, got %d", numChanges)
1150	}
1151}
1152
1153func runGit(t *testing.T, dir string, args ...string) {
1154	t.Helper()
1155	// For commits, use --no-verify to skip hooks
1156	if len(args) > 0 && args[0] == "commit" {
1157		newArgs := []string{"commit", "--no-verify"}
1158		newArgs = append(newArgs, args[1:]...)
1159		args = newArgs
1160	}
1161	cmd := exec.Command("git", args...)
1162	cmd.Dir = dir
1163	output, err := cmd.CombinedOutput()
1164	if err != nil {
1165		t.Fatalf("git %v failed: %v\n%s", args, err, output)
1166	}
1167}
1168
1169func TestPredictableServiceTokenContextWindow(t *testing.T) {
1170	service := NewPredictableService()
1171	window := service.TokenContextWindow()
1172	if window != 200000 {
1173		t.Errorf("expected TokenContextWindow to return 200000, got %d", window)
1174	}
1175}
1176
1177func TestPredictableServiceMaxImageDimension(t *testing.T) {
1178	service := NewPredictableService()
1179	dimension := service.MaxImageDimension()
1180	if dimension != 2000 {
1181		t.Errorf("expected MaxImageDimension to return 2000, got %d", dimension)
1182	}
1183}
1184
1185func TestPredictableServiceThinkTool(t *testing.T) {
1186	service := NewPredictableService()
1187
1188	ctx := context.Background()
1189	req := &llm.Request{
1190		Messages: []llm.Message{
1191			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "think: This is a test thought"}}},
1192		},
1193	}
1194
1195	resp, err := service.Do(ctx, req)
1196	if err != nil {
1197		t.Fatalf("think tool test failed: %v", err)
1198	}
1199
1200	if resp.StopReason != llm.StopReasonToolUse {
1201		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1202	}
1203
1204	// Find the tool use content
1205	var toolUseContent *llm.Content
1206	for _, content := range resp.Content {
1207		if content.Type == llm.ContentTypeToolUse && content.ToolName == "think" {
1208			toolUseContent = &content
1209			break
1210		}
1211	}
1212
1213	if toolUseContent == nil {
1214		t.Fatal("no think tool use content found")
1215	}
1216
1217	// Check tool input contains the thoughts
1218	var toolInput map[string]interface{}
1219	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1220		t.Fatalf("failed to parse tool input: %v", err)
1221	}
1222
1223	if toolInput["thoughts"] != "This is a test thought" {
1224		t.Errorf("expected thoughts 'This is a test thought', got '%v'", toolInput["thoughts"])
1225	}
1226}
1227
1228func TestPredictableServicePatchTool(t *testing.T) {
1229	service := NewPredictableService()
1230
1231	ctx := context.Background()
1232	req := &llm.Request{
1233		Messages: []llm.Message{
1234			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch: /tmp/test.txt"}}},
1235		},
1236	}
1237
1238	resp, err := service.Do(ctx, req)
1239	if err != nil {
1240		t.Fatalf("patch tool test failed: %v", err)
1241	}
1242
1243	if resp.StopReason != llm.StopReasonToolUse {
1244		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1245	}
1246
1247	// Find the tool use content
1248	var toolUseContent *llm.Content
1249	for _, content := range resp.Content {
1250		if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1251			toolUseContent = &content
1252			break
1253		}
1254	}
1255
1256	if toolUseContent == nil {
1257		t.Fatal("no patch tool use content found")
1258	}
1259
1260	// Check tool input contains the file path
1261	var toolInput map[string]interface{}
1262	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1263		t.Fatalf("failed to parse tool input: %v", err)
1264	}
1265
1266	if toolInput["path"] != "/tmp/test.txt" {
1267		t.Errorf("expected path '/tmp/test.txt', got '%v'", toolInput["path"])
1268	}
1269}
1270
1271func TestPredictableServiceMalformedPatchTool(t *testing.T) {
1272	service := NewPredictableService()
1273
1274	ctx := context.Background()
1275	req := &llm.Request{
1276		Messages: []llm.Message{
1277			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "patch bad json"}}},
1278		},
1279	}
1280
1281	resp, err := service.Do(ctx, req)
1282	if err != nil {
1283		t.Fatalf("malformed patch tool test failed: %v", err)
1284	}
1285
1286	if resp.StopReason != llm.StopReasonToolUse {
1287		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1288	}
1289
1290	// Find the tool use content
1291	var toolUseContent *llm.Content
1292	for _, content := range resp.Content {
1293		if content.Type == llm.ContentTypeToolUse && content.ToolName == "patch" {
1294			toolUseContent = &content
1295			break
1296		}
1297	}
1298
1299	if toolUseContent == nil {
1300		t.Fatal("no patch tool use content found")
1301	}
1302
1303	// Check that the tool input is malformed JSON (as expected)
1304	toolInputStr := string(toolUseContent.ToolInput)
1305	if !strings.Contains(toolInputStr, "parameter name") {
1306		t.Errorf("expected malformed JSON in tool input, got: %s", toolInputStr)
1307	}
1308}
1309
1310func TestPredictableServiceError(t *testing.T) {
1311	service := NewPredictableService()
1312
1313	ctx := context.Background()
1314	req := &llm.Request{
1315		Messages: []llm.Message{
1316			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "error: test error"}}},
1317		},
1318	}
1319
1320	resp, err := service.Do(ctx, req)
1321	if err == nil {
1322		t.Fatal("expected error, got nil")
1323	}
1324
1325	if !strings.Contains(err.Error(), "predictable error: test error") {
1326		t.Errorf("expected error message to contain 'predictable error: test error', got: %v", err)
1327	}
1328
1329	if resp != nil {
1330		t.Error("expected response to be nil when error occurs")
1331	}
1332}
1333
1334func TestPredictableServiceRequestTracking(t *testing.T) {
1335	service := NewPredictableService()
1336
1337	// Initially no requests
1338	requests := service.GetRecentRequests()
1339	if requests != nil {
1340		t.Errorf("expected nil requests initially, got %v", requests)
1341	}
1342
1343	lastReq := service.GetLastRequest()
1344	if lastReq != nil {
1345		t.Errorf("expected nil last request initially, got %v", lastReq)
1346	}
1347
1348	// Make a request
1349	ctx := context.Background()
1350	req := &llm.Request{
1351		Messages: []llm.Message{
1352			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1353		},
1354	}
1355
1356	_, err := service.Do(ctx, req)
1357	if err != nil {
1358		t.Fatalf("Do failed: %v", err)
1359	}
1360
1361	// Check that request was tracked
1362	requests = service.GetRecentRequests()
1363	if len(requests) != 1 {
1364		t.Errorf("expected 1 request, got %d", len(requests))
1365	}
1366
1367	lastReq = service.GetLastRequest()
1368	if lastReq == nil {
1369		t.Fatal("expected last request to be non-nil")
1370	}
1371
1372	if len(lastReq.Messages) != 1 {
1373		t.Errorf("expected 1 message in last request, got %d", len(lastReq.Messages))
1374	}
1375
1376	// Test clearing requests
1377	service.ClearRequests()
1378	requests = service.GetRecentRequests()
1379	if requests != nil {
1380		t.Errorf("expected nil requests after clearing, got %v", requests)
1381	}
1382
1383	lastReq = service.GetLastRequest()
1384	if lastReq != nil {
1385		t.Errorf("expected nil last request after clearing, got %v", lastReq)
1386	}
1387
1388	// Test that only last 10 requests are kept
1389	for i := 0; i < 15; i++ {
1390		testReq := &llm.Request{
1391			Messages: []llm.Message{
1392				{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: fmt.Sprintf("test %d", i)}}},
1393			},
1394		}
1395		_, err := service.Do(ctx, testReq)
1396		if err != nil {
1397			t.Fatalf("Do failed on iteration %d: %v", i, err)
1398		}
1399	}
1400
1401	requests = service.GetRecentRequests()
1402	if len(requests) != 10 {
1403		t.Errorf("expected 10 requests (last 10), got %d", len(requests))
1404	}
1405
1406	// Check that we have requests 5-14 (0-indexed)
1407	for i, req := range requests {
1408		expectedText := fmt.Sprintf("test %d", i+5)
1409		if len(req.Messages) == 0 || len(req.Messages[0].Content) == 0 {
1410			t.Errorf("request %d has no content", i)
1411			continue
1412		}
1413		if req.Messages[0].Content[0].Text != expectedText {
1414			t.Errorf("expected request %d to have text '%s', got '%s'", i, expectedText, req.Messages[0].Content[0].Text)
1415		}
1416	}
1417}
1418
1419func TestPredictableServiceScreenshotTool(t *testing.T) {
1420	service := NewPredictableService()
1421
1422	ctx := context.Background()
1423	req := &llm.Request{
1424		Messages: []llm.Message{
1425			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "screenshot: .test-class"}}},
1426		},
1427	}
1428
1429	resp, err := service.Do(ctx, req)
1430	if err != nil {
1431		t.Fatalf("screenshot tool test failed: %v", err)
1432	}
1433
1434	if resp.StopReason != llm.StopReasonToolUse {
1435		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1436	}
1437
1438	// Find the tool use content
1439	var toolUseContent *llm.Content
1440	for _, content := range resp.Content {
1441		if content.Type == llm.ContentTypeToolUse && content.ToolName == "browser_take_screenshot" {
1442			toolUseContent = &content
1443			break
1444		}
1445	}
1446
1447	if toolUseContent == nil {
1448		t.Fatal("no screenshot tool use content found")
1449	}
1450
1451	// Check tool input contains the selector
1452	var toolInput map[string]interface{}
1453	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
1454		t.Fatalf("failed to parse tool input: %v", err)
1455	}
1456
1457	if toolInput["selector"] != ".test-class" {
1458		t.Errorf("expected selector '.test-class', got '%v'", toolInput["selector"])
1459	}
1460}
1461
1462func TestPredictableServiceToolSmorgasbord(t *testing.T) {
1463	service := NewPredictableService()
1464
1465	ctx := context.Background()
1466	req := &llm.Request{
1467		Messages: []llm.Message{
1468			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "tool smorgasbord"}}},
1469		},
1470	}
1471
1472	resp, err := service.Do(ctx, req)
1473	if err != nil {
1474		t.Fatalf("tool smorgasbord test failed: %v", err)
1475	}
1476
1477	if resp.StopReason != llm.StopReasonToolUse {
1478		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
1479	}
1480
1481	// Count the tool use contents
1482	toolUseCount := 0
1483	for _, content := range resp.Content {
1484		if content.Type == llm.ContentTypeToolUse {
1485			toolUseCount++
1486		}
1487	}
1488
1489	// Should have at least several tool uses
1490	if toolUseCount < 5 {
1491		t.Errorf("expected at least 5 tool uses, got %d", toolUseCount)
1492	}
1493}
1494
1495func TestProcessLLMRequestError(t *testing.T) {
1496	// Test error handling when LLM service returns an error
1497	errorService := &errorLLMService{err: fmt.Errorf("test LLM error")}
1498
1499	var recordedMessages []llm.Message
1500	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1501		recordedMessages = append(recordedMessages, message)
1502		return nil
1503	}
1504
1505	loop := NewLoop(Config{
1506		LLM:           errorService,
1507		History:       []llm.Message{},
1508		Tools:         []*llm.Tool{},
1509		RecordMessage: recordFunc,
1510	})
1511
1512	// Queue a user message
1513	userMessage := llm.Message{
1514		Role:    llm.MessageRoleUser,
1515		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "test message"}},
1516	}
1517	loop.QueueUserMessage(userMessage)
1518
1519	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1520	defer cancel()
1521
1522	err := loop.ProcessOneTurn(ctx)
1523	if err == nil {
1524		t.Fatal("expected error from ProcessOneTurn, got nil")
1525	}
1526
1527	if !strings.Contains(err.Error(), "LLM request failed") {
1528		t.Errorf("expected error to contain 'LLM request failed', got: %v", err)
1529	}
1530
1531	// Check that error message was recorded
1532	if len(recordedMessages) < 1 {
1533		t.Fatalf("expected 1 recorded message (error), got %d", len(recordedMessages))
1534	}
1535
1536	if recordedMessages[0].Role != llm.MessageRoleAssistant {
1537		t.Errorf("expected recorded message to be assistant role, got %s", recordedMessages[0].Role)
1538	}
1539
1540	if len(recordedMessages[0].Content) != 1 {
1541		t.Fatalf("expected 1 content item in recorded message, got %d", len(recordedMessages[0].Content))
1542	}
1543
1544	if recordedMessages[0].Content[0].Type != llm.ContentTypeText {
1545		t.Errorf("expected text content, got %s", recordedMessages[0].Content[0].Type)
1546	}
1547
1548	if !strings.Contains(recordedMessages[0].Content[0].Text, "LLM request failed") {
1549		t.Errorf("expected error message to contain 'LLM request failed', got: %s", recordedMessages[0].Content[0].Text)
1550	}
1551
1552	// Verify EndOfTurn is set so the agent working state is properly updated
1553	if !recordedMessages[0].EndOfTurn {
1554		t.Error("expected error message to have EndOfTurn=true so agent working state is updated")
1555	}
1556}
1557
1558// errorLLMService is a test LLM service that always returns an error
1559type errorLLMService struct {
1560	err error
1561}
1562
1563func (e *errorLLMService) Do(ctx context.Context, req *llm.Request) (*llm.Response, error) {
1564	return nil, e.err
1565}
1566
1567func (e *errorLLMService) TokenContextWindow() int {
1568	return 200000
1569}
1570
1571func (e *errorLLMService) MaxImageDimension() int {
1572	return 2000
1573}
1574
1575func TestCheckGitStateChange(t *testing.T) {
1576	// Create a test repo
1577	tmpDir := t.TempDir()
1578
1579	// Initialize git repo
1580	runGit(t, tmpDir, "init")
1581	runGit(t, tmpDir, "config", "user.email", "test@test.com")
1582	runGit(t, tmpDir, "config", "user.name", "Test")
1583
1584	// Create initial commit
1585	testFile := filepath.Join(tmpDir, "test.txt")
1586	if err := os.WriteFile(testFile, []byte("hello"), 0o644); err != nil {
1587		t.Fatal(err)
1588	}
1589	runGit(t, tmpDir, "add", ".")
1590	runGit(t, tmpDir, "commit", "-m", "initial")
1591
1592	// Test with nil OnGitStateChange - should not panic
1593	loop := NewLoop(Config{
1594		LLM:           NewPredictableService(),
1595		History:       []llm.Message{},
1596		WorkingDir:    tmpDir,
1597		GetWorkingDir: func() string { return tmpDir },
1598		// OnGitStateChange is nil
1599		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1600			return nil
1601		},
1602	})
1603
1604	// This should not panic
1605	loop.checkGitStateChange(context.Background())
1606
1607	// Test with actual callback
1608	var gitStateChanges []*gitstate.GitState
1609	loop = NewLoop(Config{
1610		LLM:           NewPredictableService(),
1611		History:       []llm.Message{},
1612		WorkingDir:    tmpDir,
1613		GetWorkingDir: func() string { return tmpDir },
1614		OnGitStateChange: func(ctx context.Context, state *gitstate.GitState) {
1615			gitStateChanges = append(gitStateChanges, state)
1616		},
1617		RecordMessage: func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1618			return nil
1619		},
1620	})
1621
1622	// Make a change
1623	if err := os.WriteFile(testFile, []byte("updated"), 0o644); err != nil {
1624		t.Fatal(err)
1625	}
1626	runGit(t, tmpDir, "add", ".")
1627	runGit(t, tmpDir, "commit", "-m", "update")
1628
1629	// Check git state change
1630	loop.checkGitStateChange(context.Background())
1631
1632	if len(gitStateChanges) != 1 {
1633		t.Errorf("expected 1 git state change, got %d", len(gitStateChanges))
1634	}
1635
1636	// Call again - should not trigger another change since state is the same
1637	loop.checkGitStateChange(context.Background())
1638
1639	if len(gitStateChanges) != 1 {
1640		t.Errorf("expected still 1 git state change (no new changes), got %d", len(gitStateChanges))
1641	}
1642}
1643
1644func TestHandleToolCallsWithMissingTool(t *testing.T) {
1645	var recordedMessages []llm.Message
1646	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1647		recordedMessages = append(recordedMessages, message)
1648		return nil
1649	}
1650
1651	loop := NewLoop(Config{
1652		LLM:           NewPredictableService(),
1653		History:       []llm.Message{},
1654		Tools:         []*llm.Tool{}, // No tools registered
1655		RecordMessage: recordFunc,
1656	})
1657
1658	// Create content with a tool use for a tool that doesn't exist
1659	content := []llm.Content{
1660		{
1661			ID:        "test_tool_123",
1662			Type:      llm.ContentTypeToolUse,
1663			ToolName:  "nonexistent_tool",
1664			ToolInput: json.RawMessage(`{"test": "input"}`),
1665		},
1666	}
1667
1668	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1669	defer cancel()
1670
1671	err := loop.handleToolCalls(ctx, content)
1672	if err != nil {
1673		t.Fatalf("handleToolCalls failed: %v", err)
1674	}
1675
1676	// Should have recorded a user message with tool result
1677	if len(recordedMessages) < 1 {
1678		t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1679	}
1680
1681	msg := recordedMessages[0]
1682	if msg.Role != llm.MessageRoleUser {
1683		t.Errorf("expected user role, got %s", msg.Role)
1684	}
1685
1686	if len(msg.Content) != 1 {
1687		t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1688	}
1689
1690	toolResult := msg.Content[0]
1691	if toolResult.Type != llm.ContentTypeToolResult {
1692		t.Errorf("expected tool result content, got %s", toolResult.Type)
1693	}
1694
1695	if toolResult.ToolUseID != "test_tool_123" {
1696		t.Errorf("expected tool use ID 'test_tool_123', got %s", toolResult.ToolUseID)
1697	}
1698
1699	if !toolResult.ToolError {
1700		t.Error("expected ToolError to be true")
1701	}
1702
1703	if len(toolResult.ToolResult) != 1 {
1704		t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1705	}
1706
1707	if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1708		t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1709	}
1710
1711	expectedText := "Tool 'nonexistent_tool' not found"
1712	if toolResult.ToolResult[0].Text != expectedText {
1713		t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1714	}
1715}
1716
1717func TestHandleToolCallsWithErrorTool(t *testing.T) {
1718	var recordedMessages []llm.Message
1719	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1720		recordedMessages = append(recordedMessages, message)
1721		return nil
1722	}
1723
1724	// Create a tool that always returns an error
1725	errorTool := &llm.Tool{
1726		Name:        "error_tool",
1727		Description: "A tool that always errors",
1728		InputSchema: llm.MustSchema(`{"type": "object", "properties": {}}`),
1729		Run: func(ctx context.Context, input json.RawMessage) llm.ToolOut {
1730			return llm.ErrorToolOut(fmt.Errorf("intentional test error"))
1731		},
1732	}
1733
1734	loop := NewLoop(Config{
1735		LLM:           NewPredictableService(),
1736		History:       []llm.Message{},
1737		Tools:         []*llm.Tool{errorTool},
1738		RecordMessage: recordFunc,
1739	})
1740
1741	// Create content with a tool use that will error
1742	content := []llm.Content{
1743		{
1744			ID:        "error_tool_123",
1745			Type:      llm.ContentTypeToolUse,
1746			ToolName:  "error_tool",
1747			ToolInput: json.RawMessage(`{}`),
1748		},
1749	}
1750
1751	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
1752	defer cancel()
1753
1754	err := loop.handleToolCalls(ctx, content)
1755	if err != nil {
1756		t.Fatalf("handleToolCalls failed: %v", err)
1757	}
1758
1759	// Should have recorded a user message with tool result
1760	if len(recordedMessages) < 1 {
1761		t.Fatalf("expected 1 recorded message, got %d", len(recordedMessages))
1762	}
1763
1764	msg := recordedMessages[0]
1765	if msg.Role != llm.MessageRoleUser {
1766		t.Errorf("expected user role, got %s", msg.Role)
1767	}
1768
1769	if len(msg.Content) != 1 {
1770		t.Fatalf("expected 1 content item, got %d", len(msg.Content))
1771	}
1772
1773	toolResult := msg.Content[0]
1774	if toolResult.Type != llm.ContentTypeToolResult {
1775		t.Errorf("expected tool result content, got %s", toolResult.Type)
1776	}
1777
1778	if toolResult.ToolUseID != "error_tool_123" {
1779		t.Errorf("expected tool use ID 'error_tool_123', got %s", toolResult.ToolUseID)
1780	}
1781
1782	if !toolResult.ToolError {
1783		t.Error("expected ToolError to be true")
1784	}
1785
1786	if len(toolResult.ToolResult) != 1 {
1787		t.Fatalf("expected 1 tool result content item, got %d", len(toolResult.ToolResult))
1788	}
1789
1790	if toolResult.ToolResult[0].Type != llm.ContentTypeText {
1791		t.Errorf("expected text content in tool result, got %s", toolResult.ToolResult[0].Type)
1792	}
1793
1794	expectedText := "intentional test error"
1795	if toolResult.ToolResult[0].Text != expectedText {
1796		t.Errorf("expected tool result text '%s', got '%s'", expectedText, toolResult.ToolResult[0].Text)
1797	}
1798}
1799
1800func TestMaxTokensTruncation(t *testing.T) {
1801	var mu sync.Mutex
1802	var recordedMessages []llm.Message
1803	recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
1804		mu.Lock()
1805		recordedMessages = append(recordedMessages, message)
1806		mu.Unlock()
1807		return nil
1808	}
1809
1810	service := NewPredictableService()
1811	loop := NewLoop(Config{
1812		LLM:           service,
1813		History:       []llm.Message{},
1814		Tools:         []*llm.Tool{},
1815		RecordMessage: recordFunc,
1816	})
1817
1818	// Queue a user message that triggers max tokens truncation
1819	userMessage := llm.Message{
1820		Role:    llm.MessageRoleUser,
1821		Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
1822	}
1823	loop.QueueUserMessage(userMessage)
1824
1825	// Run the loop - it should stop after handling truncation
1826	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
1827	defer cancel()
1828
1829	err := loop.Go(ctx)
1830	if err != context.DeadlineExceeded {
1831		t.Errorf("expected context deadline exceeded, got %v", err)
1832	}
1833
1834	// Check recorded messages
1835	mu.Lock()
1836	numMessages := len(recordedMessages)
1837	messages := make([]llm.Message, len(recordedMessages))
1838	copy(messages, recordedMessages)
1839	mu.Unlock()
1840
1841	// We should see two messages:
1842	// 1. The truncated message (with ExcludedFromContext=true) for cost tracking
1843	// 2. The truncation error message (with ErrorType=truncation)
1844	if numMessages != 2 {
1845		t.Errorf("Expected 2 recorded messages (truncated + error), got %d", numMessages)
1846		for i, msg := range messages {
1847			t.Logf("Message %d: Role=%v, EndOfTurn=%v, ExcludedFromContext=%v, ErrorType=%v",
1848				i, msg.Role, msg.EndOfTurn, msg.ExcludedFromContext, msg.ErrorType)
1849		}
1850		return
1851	}
1852
1853	// First message: truncated response (for cost tracking, excluded from context)
1854	truncatedMsg := messages[0]
1855	if truncatedMsg.Role != llm.MessageRoleAssistant {
1856		t.Errorf("Truncated message should be assistant, got %v", truncatedMsg.Role)
1857	}
1858	if !truncatedMsg.ExcludedFromContext {
1859		t.Error("Truncated message should have ExcludedFromContext=true")
1860	}
1861
1862	// Second message: truncation error
1863	errorMsg := messages[1]
1864	if errorMsg.Role != llm.MessageRoleAssistant {
1865		t.Errorf("Error message should be assistant, got %v", errorMsg.Role)
1866	}
1867	if !errorMsg.EndOfTurn {
1868		t.Error("Error message should have EndOfTurn=true")
1869	}
1870	if errorMsg.ErrorType != llm.ErrorTypeTruncation {
1871		t.Errorf("Error message should have ErrorType=truncation, got %v", errorMsg.ErrorType)
1872	}
1873	if errorMsg.ExcludedFromContext {
1874		t.Error("Error message should not be excluded from context")
1875	}
1876	if !strings.Contains(errorMsg.Content[0].Text, "SYSTEM ERROR") {
1877		t.Errorf("Error message should contain SYSTEM ERROR, got: %s", errorMsg.Content[0].Text)
1878	}
1879
1880	// Verify history contains user message + error message, but NOT the truncated response
1881	loop.mu.Lock()
1882	history := loop.history
1883	loop.mu.Unlock()
1884
1885	// History should have: user message + error message (the truncated response is NOT added to history)
1886	if len(history) != 2 {
1887		t.Errorf("History should have 2 messages (user + error), got %d", len(history))
1888	}
1889}
1890
1891//func TestInsertMissingToolResultsEdgeCases(t *testing.T) {
1892//	loop := NewLoop(Config{
1893//		LLM:     NewPredictableService(),
1894//		History: []llm.Message{},
1895//	})
1896//
1897//	// Test with nil request
1898//	loop.insertMissingToolResults(nil) // Should not panic
1899//
1900//	// Test with empty messages
1901//	req := &llm.Request{Messages: []llm.Message{}}
1902//	loop.insertMissingToolResults(req) // Should not panic
1903//
1904//	// Test with single message
1905//	req = &llm.Request{
1906//		Messages: []llm.Message{
1907//			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "hello"}}},
1908//		},
1909//	}
1910//	loop.insertMissingToolResults(req) // Should not panic
1911//	if len(req.Messages) != 1 {
1912//		t.Errorf("expected 1 message, got %d", len(req.Messages))
1913//	}
1914//
1915//	// Test with multiple consecutive assistant messages with tool_use
1916//	req = &llm.Request{
1917//		Messages: []llm.Message{
1918//			{
1919//				Role: llm.MessageRoleAssistant,
1920//				Content: []llm.Content{
1921//					{Type: llm.ContentTypeText, Text: "First tool"},
1922//					{Type: llm.ContentTypeToolUse, ID: "tool1", ToolName: "bash"},
1923//				},
1924//			},
1925//			{
1926//				Role: llm.MessageRoleAssistant,
1927//				Content: []llm.Content{
1928//					{Type: llm.ContentTypeText, Text: "Second tool"},
1929//					{Type: llm.ContentTypeToolUse, ID: "tool2", ToolName: "read"},
1930//				},
1931//			},
1932//			{
1933//				Role: llm.MessageRoleUser,
1934//				Content: []llm.Content{
1935//					{Type: llm.ContentTypeText, Text: "User response"},
1936//				},
1937//			},
1938//		},
1939//	}
1940//
1941//	loop.insertMissingToolResults(req)
1942//
1943//	// Should have inserted synthetic tool results for both tool_uses
1944//	// The structure should be:
1945//	// 0: First assistant message
1946//	// 1: Synthetic user message with tool1 result
1947//	// 2: Second assistant message
1948//	// 3: Synthetic user message with tool2 result
1949//	// 4: Original user message
1950//	if len(req.Messages) != 5 {
1951//		t.Fatalf("expected 5 messages after processing, got %d", len(req.Messages))
1952//	}
1953//
1954//	// Check first synthetic message
1955//	if req.Messages[1].Role != llm.MessageRoleUser {
1956//		t.Errorf("expected message 1 to be user role, got %s", req.Messages[1].Role)
1957//	}
1958//	foundTool1 := false
1959//	for _, content := range req.Messages[1].Content {
1960//		if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool1" {
1961//			foundTool1 = true
1962//			break
1963//		}
1964//	}
1965//	if !foundTool1 {
1966//		t.Error("expected to find tool1 result in message 1")
1967//	}
1968//
1969//	// Check second synthetic message
1970//	if req.Messages[3].Role != llm.MessageRoleUser {
1971//		t.Errorf("expected message 3 to be user role, got %s", req.Messages[3].Role)
1972//	}
1973//	foundTool2 := false
1974//	for _, content := range req.Messages[3].Content {
1975//		if content.Type == llm.ContentTypeToolResult && content.ToolUseID == "tool2" {
1976//			foundTool2 = true
1977//			break
1978//		}
1979//}
1980//	if !foundTool2 {
1981//		t.Error("expected to find tool2 result in message 3")
1982//	}
1983//}