1package test
2
3import (
4 "bytes"
5 "context"
6 "encoding/json"
7 "fmt"
8 "io"
9 "log/slog"
10 "net/http"
11 "net/http/httptest"
12 "net/url"
13 "os"
14 "os/exec"
15 "path/filepath"
16 "strings"
17 "testing"
18 "time"
19
20 "shelley.exe.dev/claudetool"
21 "shelley.exe.dev/claudetool/browse"
22 "shelley.exe.dev/db"
23 "shelley.exe.dev/db/generated"
24 "shelley.exe.dev/llm"
25 "shelley.exe.dev/loop"
26 "shelley.exe.dev/models"
27 "shelley.exe.dev/server"
28 "shelley.exe.dev/slug"
29)
30
31func TestServerEndToEnd(t *testing.T) {
32 // Create temporary database
33 tempDB := t.TempDir() + "/test.db"
34 database, err := db.New(db.Config{DSN: tempDB})
35 if err != nil {
36 t.Fatalf("Failed to create test database: %v", err)
37 }
38 defer database.Close()
39
40 // Run migrations
41 if err := database.Migrate(context.Background()); err != nil {
42 t.Fatalf("Failed to migrate database: %v", err)
43 }
44
45 // Create logger first
46 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
47 Level: slog.LevelDebug,
48 }))
49
50 // Create LLM service manager with predictable service
51 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
52 predictableService := loop.NewPredictableService()
53 // For testing, we'll override the manager's service selection
54 _ = predictableService // will need to mock this properly
55
56 // Set up tools
57 // Set up tools config
58 toolSetConfig := claudetool.ToolSetConfig{
59 WorkingDir: t.TempDir(),
60 EnableBrowser: false,
61 }
62
63 // Create server
64 svr := server.NewServer(database, llmManager, toolSetConfig, logger, false, "", "", "", nil)
65
66 // Set up HTTP server
67 mux := http.NewServeMux()
68 svr.RegisterRoutes(mux)
69 testServer := httptest.NewServer(mux)
70 defer testServer.Close()
71
72 t.Run("CreateAndListConversations", func(t *testing.T) {
73 // Create a conversation
74 // Using database directly instead of service
75 slug := "test-conversation"
76 conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
77 if err != nil {
78 t.Fatalf("Failed to create conversation: %v", err)
79 }
80
81 // List conversations
82 resp, err := http.Get(testServer.URL + "/api/conversations")
83 if err != nil {
84 t.Fatalf("Failed to get conversations: %v", err)
85 }
86 defer resp.Body.Close()
87
88 if resp.StatusCode != http.StatusOK {
89 t.Fatalf("Expected status 200, got %d", resp.StatusCode)
90 }
91
92 var conversations []generated.Conversation
93 if err := json.NewDecoder(resp.Body).Decode(&conversations); err != nil {
94 t.Fatalf("Failed to decode response: %v", err)
95 }
96
97 if len(conversations) != 1 {
98 t.Fatalf("Expected 1 conversation, got %d", len(conversations))
99 }
100
101 if conversations[0].ConversationID != conv.ConversationID {
102 t.Fatalf("Conversation ID mismatch")
103 }
104 })
105
106 t.Run("ChatEndToEnd", func(t *testing.T) {
107 // Create a conversation
108 // Using database directly instead of service
109 slug := "chat-test"
110 conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
111 if err != nil {
112 t.Fatalf("Failed to create conversation: %v", err)
113 }
114
115 // Send a chat message using predictable model
116 chatReq := map[string]interface{}{"message": "Hello, can you help me?", "model": "predictable"}
117 reqBody, _ := json.Marshal(chatReq)
118
119 resp, err := http.Post(
120 testServer.URL+"/api/conversation/"+conv.ConversationID+"/chat",
121 "application/json",
122 bytes.NewReader(reqBody),
123 )
124 if err != nil {
125 t.Fatalf("Failed to send chat message: %v", err)
126 }
127 defer resp.Body.Close()
128
129 if resp.StatusCode != http.StatusAccepted {
130 t.Fatalf("Expected status 202, got %d", resp.StatusCode)
131 }
132
133 // Wait a bit for processing
134 time.Sleep(500 * time.Millisecond)
135
136 // Check messages
137 msgResp, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID)
138 if err != nil {
139 t.Fatalf("Failed to get conversation: %v", err)
140 }
141 defer msgResp.Body.Close()
142
143 if msgResp.StatusCode != http.StatusOK {
144 t.Fatalf("Expected status 200, got %d", msgResp.StatusCode)
145 }
146
147 var payload server.StreamResponse
148 if err := json.NewDecoder(msgResp.Body).Decode(&payload); err != nil {
149 t.Fatalf("Failed to decode messages: %v", err)
150 }
151
152 // Should have at least system and user messages
153 if len(payload.Messages) < 2 {
154 t.Fatalf("Expected at least 2 messages (system + user), got %d", len(payload.Messages))
155 }
156
157 // First message should be system prompt
158 if payload.Messages[0].Type != "system" {
159 t.Fatalf("Expected first message to be system, got %s", payload.Messages[0].Type)
160 }
161
162 // Second message should be from user
163 if payload.Messages[1].Type != "user" {
164 t.Fatalf("Expected second message to be user, got %s", payload.Messages[1].Type)
165 }
166 })
167
168 t.Run("StreamEndpoint", func(t *testing.T) {
169 // Create a conversation with some messages
170 // Using database directly instead of service
171 // Using database directly instead of service
172 slug := "stream-test"
173 conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
174 if err != nil {
175 t.Fatalf("Failed to create conversation: %v", err)
176 }
177
178 // Add a test message
179 testMsg := llm.Message{
180 Role: llm.MessageRoleUser,
181 Content: []llm.Content{
182 {Type: llm.ContentTypeText, Text: "Test message"},
183 },
184 }
185 _, err = database.CreateMessage(context.Background(), db.CreateMessageParams{
186 ConversationID: conv.ConversationID,
187 Type: db.MessageTypeUser,
188 LLMData: testMsg,
189 })
190 if err != nil {
191 t.Fatalf("Failed to create message: %v", err)
192 }
193
194 // Test stream endpoint
195 resp, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID + "/stream")
196 if err != nil {
197 t.Fatalf("Failed to get stream: %v", err)
198 }
199 defer resp.Body.Close()
200
201 if resp.StatusCode != http.StatusOK {
202 t.Fatalf("Expected status 200, got %d", resp.StatusCode)
203 }
204
205 // Check headers
206 if resp.Header.Get("Content-Type") != "text/event-stream" {
207 t.Fatal("Expected text/event-stream content type")
208 }
209
210 // Read first event (should be current messages)
211 buf := make([]byte, 1024)
212 n, err := resp.Body.Read(buf)
213 if err != nil && err != io.EOF {
214 t.Fatalf("Failed to read stream: %v", err)
215 }
216
217 data := string(buf[:n])
218 if !strings.Contains(data, "data: ") {
219 t.Fatal("Expected SSE data format")
220 }
221 })
222
223 // Test that slug updates are reflected in the stream
224 t.Run("SlugUpdateStream", func(t *testing.T) {
225 // Create a context that won't be canceled unexpectedly
226 ctx := context.Background()
227
228 // Create a conversation without a slug
229 conv, err := database.CreateConversation(ctx, nil, true, nil, nil)
230 if err != nil {
231 t.Fatalf("Failed to create conversation: %v", err)
232 }
233
234 // Verify initially no slug
235 if conv.Slug != nil {
236 t.Fatalf("Expected no initial slug, got: %v", *conv.Slug)
237 }
238
239 // Send a message which should trigger slug generation
240 chatRequest := server.ChatRequest{
241 Message: "Write a Python script to calculate fibonacci numbers",
242 Model: "predictable",
243 }
244
245 chatBody, _ := json.Marshal(chatRequest)
246 chatResp, err := http.Post(
247 testServer.URL+"/api/conversation/"+conv.ConversationID+"/chat",
248 "application/json",
249 strings.NewReader(string(chatBody)),
250 )
251 if err != nil {
252 t.Fatalf("Failed to send chat message: %v", err)
253 }
254 defer chatResp.Body.Close()
255
256 // Check response status before continuing
257 if chatResp.StatusCode != http.StatusAccepted {
258 t.Fatalf("Expected status 202, got %d", chatResp.StatusCode)
259 }
260
261 // Wait longer for slug generation (it happens asynchronously)
262 // Poll every 100ms instead of 500ms for faster feedback
263 for i := 0; i < 100; i++ {
264 time.Sleep(100 * time.Millisecond)
265
266 // Check if slug was generated
267 updatedConv, err := database.GetConversationByID(ctx, conv.ConversationID)
268 if err != nil {
269 // Don't fail immediately on error - the conversation might be temporarily locked
270 // Only fail if we've exhausted all retries
271 if i == 99 {
272 t.Fatalf("Failed to get updated conversation after all retries: %v", err)
273 }
274 continue
275 }
276
277 if updatedConv.Slug != nil {
278 t.Logf("Slug generated successfully: %s", *updatedConv.Slug)
279 return
280 }
281 }
282
283 t.Fatal("Slug was not generated within timeout period")
284 })
285
286 t.Run("ErrorHandling", func(t *testing.T) {
287 // Test non-existent conversation
288 resp, err := http.Get(testServer.URL + "/api/conversation/nonexistent")
289 if err != nil {
290 t.Fatalf("Failed to make request: %v", err)
291 }
292 defer resp.Body.Close()
293
294 // Should handle gracefully (might be empty list or error depending on implementation)
295 if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNotFound {
296 t.Fatalf("Unexpected status code: %d", resp.StatusCode)
297 }
298
299 // Test invalid chat request
300 invalidReq := map[string]string{"not_message": "test"}
301 reqBody, _ := json.Marshal(invalidReq)
302 chatResp, err := http.Post(
303 testServer.URL+"/api/conversation/test/chat",
304 "application/json",
305 bytes.NewReader(reqBody),
306 )
307 if err != nil {
308 t.Fatalf("Failed to send invalid chat: %v", err)
309 }
310 defer chatResp.Body.Close()
311
312 if chatResp.StatusCode != http.StatusBadRequest {
313 t.Fatalf("Expected status 400 for invalid request, got %d", chatResp.StatusCode)
314 }
315 })
316}
317
318func TestPredictableServiceWithTools(t *testing.T) {
319 // Test that the predictable service correctly handles tool calls
320 service := loop.NewPredictableService()
321
322 // First call should return greeting
323 resp1, err := service.Do(context.Background(), &llm.Request{
324 Messages: []llm.Message{
325 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
326 },
327 })
328 if err != nil {
329 t.Fatalf("First call failed: %v", err)
330 }
331
332 if !strings.Contains(resp1.Content[0].Text, "Shelley") {
333 t.Fatal("Expected greeting to mention Shelley")
334 }
335
336 // Second call should return tool use (bash command)
337 resp2, err := service.Do(context.Background(), &llm.Request{
338 Messages: []llm.Message{
339 {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}}},
340 },
341 })
342 if err != nil {
343 t.Fatalf("Second call failed: %v", err)
344 }
345
346 if resp2.StopReason != llm.StopReasonToolUse {
347 t.Fatal("Expected tool use stop reason")
348 }
349
350 if len(resp2.Content) < 2 {
351 t.Fatal("Expected both text and tool use content")
352 }
353
354 // Find tool use content
355 var toolUse *llm.Content
356 for i := range resp2.Content {
357 if resp2.Content[i].Type == llm.ContentTypeToolUse {
358 toolUse = &resp2.Content[i]
359 break
360 }
361 }
362
363 if toolUse == nil {
364 t.Fatal("Expected tool use content")
365 }
366
367 if toolUse.ToolName != "bash" {
368 t.Fatalf("Expected bash tool, got %s", toolUse.ToolName)
369 }
370}
371
372func TestConversationCleanup(t *testing.T) {
373 // Create temporary database
374 tempDB := t.TempDir() + "/cleanup_test.db"
375 database, err := db.New(db.Config{DSN: tempDB})
376 if err != nil {
377 t.Fatalf("Failed to create test database: %v", err)
378 }
379 defer database.Close()
380
381 // Run migrations
382 if err := database.Migrate(context.Background()); err != nil {
383 t.Fatalf("Failed to migrate database: %v", err)
384 }
385
386 // Create server with predictable service
387 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
388 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
389 svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
390
391 // Create a conversation
392 // Using database directly instead of service
393 conv, err := database.CreateConversation(context.Background(), nil, true, nil, nil)
394 if err != nil {
395 t.Fatalf("Failed to create conversation: %v", err)
396 }
397
398 // Test cleanup indirectly by calling cleanup
399 svr.Cleanup()
400
401 // Test passes if no panic occurs
402 t.Log("Cleanup completed successfully for conversation:", conv.ConversationID)
403}
404
405func TestSlugGeneration(t *testing.T) {
406 // This test verifies that the slug generation logic is properly integrated
407 // but uses the direct API to avoid timing issues with background goroutines
408
409 // Create temporary database
410 tempDB := t.TempDir() + "/test.db"
411 database, err := db.New(db.Config{DSN: tempDB})
412 if err != nil {
413 t.Fatalf("Failed to create test database: %v", err)
414 }
415 defer database.Close()
416
417 // Run migrations
418 if err := database.Migrate(context.Background()); err != nil {
419 t.Fatalf("Failed to migrate database: %v", err)
420 }
421
422 // Create server
423 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
424 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
425 _ = server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
426
427 // Test slug generation directly to avoid timing issues
428 // ctx := context.Background()
429 // testMessage := "help me create a Python web server"
430
431 // TODO: Fix slug generation test - method moved to slug package
432 // Generate slug directly
433 // slugResult, err := svr.GenerateSlugForConversation(ctx, testMessage)
434 // if err != nil {
435 // t.Fatalf("Slug generation failed: %v", err)
436 // }
437 // if slugResult == "" {
438 // t.Error("Generated slug is empty")
439 // } else {
440 // t.Logf("Generated slug: %s", slugResult)
441 // }
442
443 // TODO: Fix slug tests
444 // Test that the slug is properly sanitized
445 // if !strings.Contains(slugResult, "python") || !strings.Contains(slugResult, "web") {
446 // t.Logf("Note: Generated slug '%s' may not contain expected keywords, but this is acceptable for AI-generated content", slugResult)
447 // }
448
449 // // Verify slug uniqueness handling
450 // conv, err := database.CreateConversation(ctx, &slugResult, true)
451 // if err != nil {
452 // t.Fatalf("Failed to create conversation with slug: %v", err)
453 // }
454
455 // TODO: Fix slug generation test
456 // Try to generate the same slug again - should get a unique variant
457 // slugResult2, err := svr.GenerateSlugForConversation(ctx, testMessage)
458 // if err != nil {
459 // t.Fatalf("Second slug generation failed: %v", err)
460 // }
461
462 // // The second slug should be different (with -1, -2, etc.)
463 // if slugResult == slugResult2 {
464 // t.Errorf("Expected different slugs for uniqueness, but got same: %s", slugResult)
465 // } else {
466 // t.Logf("Unique slug generated: %s", slugResult2)
467 // }
468
469 // _ = conv // avoid unused variable warning
470}
471
472func TestSanitizeSlug(t *testing.T) {
473 tests := []struct {
474 name string
475 input string
476 expected string
477 }{
478 {"basic text", "Hello World", "hello-world"},
479 {"with numbers", "Python3 Tutorial", "python3-tutorial"},
480 {"with special chars", "C++ Programming!", "c-programming"},
481 {"multiple spaces", "Very Long Title", "very-long-title"},
482 {"underscores", "test_function_name", "test-function-name"},
483 {"mixed case", "CamelCaseExample", "camelcaseexample"},
484 {"with hyphens", "pre-existing-hyphens", "pre-existing-hyphens"},
485 {"leading/trailing spaces", " trimmed ", "trimmed"},
486 {"leading/trailing hyphens", "-start-end-", "start-end"},
487 {"multiple consecutive hyphens", "test---slug", "test-slug"},
488 {"empty after sanitization", "!@#$%^&*()", ""},
489 {"very long", "this-is-a-very-long-slug-that-should-be-truncated-because-it-exceeds-the-maximum-length", "this-is-a-very-long-slug-that-should-be-truncated-because-it"},
490 }
491
492 for _, tt := range tests {
493 t.Run(tt.name, func(t *testing.T) {
494 result := slug.Sanitize(tt.input)
495 if result != tt.expected {
496 t.Errorf("SanitizeSlug(%q) = %q, want %q", tt.input, result, tt.expected)
497 }
498 })
499 }
500}
501
502func TestSlugGenerationWithPredictableService(t *testing.T) {
503 // Create server with predictable service only
504 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
505 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
506
507 // Create a temporary database
508 tempDB := t.TempDir() + "/test.db"
509 database, err := db.New(db.Config{DSN: tempDB})
510 if err != nil {
511 t.Fatalf("Failed to create test database: %v", err)
512 }
513 defer database.Close()
514
515 if err := database.Migrate(context.Background()); err != nil {
516 t.Fatalf("Failed to migrate database: %v", err)
517 }
518
519 _ = server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
520
521 // Test slug generation directly
522 // ctx := context.Background()
523 // testMessage := "help me write a python function"
524
525 // TODO: Fix slug generation test
526 // This should work with the predictable service falling back
527 // slugResult, err := svr.GenerateSlugForConversation(ctx, testMessage)
528 // if err != nil {
529 // t.Fatalf("Slug generation failed: %v", err)
530 // }
531 // if slugResult == "" {
532 // t.Error("Generated slug is empty")
533 // }
534 // t.Logf("Generated slug: %s", slugResult)
535
536 // TODO: Fix slug sanitization test
537 // Test slug sanitization which should always work
538 // slug := slug.Sanitize(testMessage)
539 // if slug != "help-me-write-a-python-function" {
540 // t.Errorf("Expected 'help-me-write-a-python-function', got '%s'", slug)
541 // }
542}
543
544func TestSlugEndToEnd(t *testing.T) {
545 // Create temporary database
546 tempDB := t.TempDir() + "/test.db"
547 database, err := db.New(db.Config{DSN: tempDB})
548 if err != nil {
549 t.Fatalf("Failed to create test database: %v", err)
550 }
551 defer database.Close()
552
553 // Run migrations
554 if err := database.Migrate(context.Background()); err != nil {
555 t.Fatalf("Failed to migrate database: %v", err)
556 }
557
558 // Create a conversation with a specific slug
559 ctx := context.Background()
560 testSlug := "test-conversation-slug"
561 conv, err := database.CreateConversation(ctx, &testSlug, true, nil, nil)
562 if err != nil {
563 t.Fatalf("Failed to create conversation: %v", err)
564 }
565
566 // Test retrieving by slug
567 retrievedBySlug, err := database.GetConversationBySlug(ctx, testSlug)
568 if err != nil {
569 t.Fatalf("Failed to retrieve conversation by slug: %v", err)
570 }
571
572 if retrievedBySlug.ConversationID != conv.ConversationID {
573 t.Errorf("Expected conversation ID %s, got %s", conv.ConversationID, retrievedBySlug.ConversationID)
574 }
575
576 if retrievedBySlug.Slug == nil || *retrievedBySlug.Slug != testSlug {
577 t.Errorf("Expected slug %s, got %v", testSlug, retrievedBySlug.Slug)
578 }
579
580 // Test retrieving by ID still works
581 retrievedByID, err := database.GetConversationByID(ctx, conv.ConversationID)
582 if err != nil {
583 t.Fatalf("Failed to retrieve conversation by ID: %v", err)
584 }
585
586 if retrievedByID.ConversationID != conv.ConversationID {
587 t.Errorf("Expected conversation ID %s, got %s", conv.ConversationID, retrievedByID.ConversationID)
588 }
589
590 t.Logf("Successfully tested slug-based conversation retrieval: %s -> %s", testSlug, conv.ConversationID)
591}
592
593// Test that slug updates are reflected in the stream
594
595// Test that SSE only sends incremental message updates
596func TestSSEIncrementalUpdates(t *testing.T) {
597 // Create temporary database
598 tempDB := t.TempDir() + "/test.db"
599 database, err := db.New(db.Config{DSN: tempDB})
600 if err != nil {
601 t.Fatalf("Failed to create test database: %v", err)
602 }
603 defer database.Close()
604
605 // Run migrations
606 if err := database.Migrate(context.Background()); err != nil {
607 t.Fatalf("Failed to migrate database: %v", err)
608 }
609
610 // Create logger and LLM manager
611 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelWarn}))
612 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
613
614 // Create server
615 serviceInstance := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, false, "", "", "", nil)
616 mux := http.NewServeMux()
617 serviceInstance.RegisterRoutes(mux)
618 testServer := httptest.NewServer(mux)
619 defer testServer.Close()
620
621 // Create a conversation with initial message
622 slug := "test-sse"
623 conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
624 if err != nil {
625 t.Fatalf("Failed to create conversation: %v", err)
626 }
627
628 // Add initial message
629 _, err = database.CreateMessage(context.Background(), db.CreateMessageParams{
630 ConversationID: conv.ConversationID,
631 Type: db.MessageTypeUser,
632 LLMData: &llm.Message{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello"}}},
633 UserData: map[string]string{"content": "Hello"},
634 UsageData: llm.Usage{},
635 })
636 if err != nil {
637 t.Fatalf("Failed to create initial message: %v", err)
638 }
639
640 // Create first SSE client
641 client1, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID + "/stream")
642 if err != nil {
643 t.Fatalf("Failed to connect client1: %v", err)
644 }
645 defer client1.Body.Close()
646
647 // Read initial response from client1 (should contain the first message)
648 // Buffer must be large enough to hold the full response including system prompt
649 buf1 := make([]byte, 32768)
650 n1, err := client1.Body.Read(buf1)
651 if err != nil && err != io.EOF {
652 t.Fatalf("Failed to read from client1: %v", err)
653 }
654
655 response1 := string(buf1[:n1])
656 t.Logf("Client1 initial response: %s", response1)
657
658 // Verify client1 received the initial message
659 if !strings.Contains(response1, "Hello") {
660 t.Fatal("Client1 should have received initial message")
661 }
662
663 // Add a second message
664 _, err = database.CreateMessage(context.Background(), db.CreateMessageParams{
665 ConversationID: conv.ConversationID,
666 Type: db.MessageTypeAgent,
667 LLMData: &llm.Message{Role: llm.MessageRoleAssistant, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hi there!"}}},
668 UserData: map[string]string{"content": "Hi there!"},
669 UsageData: llm.Usage{},
670 })
671 if err != nil {
672 t.Fatalf("Failed to create second message: %v", err)
673 }
674
675 // Create second SSE client after the new message is added
676 client2, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID + "/stream")
677 if err != nil {
678 t.Fatalf("Failed to connect client2: %v", err)
679 }
680 defer client2.Body.Close()
681
682 // Read response from client2 (should contain both messages since it's a new client)
683 buf2 := make([]byte, 32768)
684 n2, err := client2.Body.Read(buf2)
685 if err != nil && err != io.EOF {
686 t.Fatalf("Failed to read from client2: %v", err)
687 }
688
689 response2 := string(buf2[:n2])
690 t.Logf("Client2 initial response: %s", response2)
691
692 // Verify client2 received both messages (new client gets full state)
693 if !strings.Contains(response2, "Hello") {
694 t.Fatal("Client2 should have received first message")
695 }
696 if !strings.Contains(response2, "Hi there!") {
697 t.Fatal("Client2 should have received second message")
698 }
699
700 t.Log("SSE incremental updates test completed successfully")
701}
702
703// TestSystemPromptSentToLLM verifies that the system prompt is included in LLM requests
704func TestSystemPromptSentToLLM(t *testing.T) {
705 ctx := context.Background()
706
707 // Create database and server with predictable service
708 // Note: :memory: is not supported by our DB wrapper since it requires multiple connections.
709 // Use a temp file-backed database for tests.
710 tempDB := t.TempDir() + "/system_prompt_test.db"
711 database, err := db.New(db.Config{DSN: tempDB})
712 if err != nil {
713 t.Fatalf("Failed to create database: %v", err)
714 }
715 defer database.Close()
716
717 if err := database.Migrate(ctx); err != nil {
718 t.Fatalf("Failed to migrate database: %v", err)
719 }
720
721 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
722
723 // Create a predictable service we can inspect
724 predictableService := loop.NewPredictableService()
725
726 // Create a custom LLM manager that returns our inspectable predictable service
727 customLLMManager := &inspectableLLMManager{
728 predictableService: predictableService,
729 logger: logger,
730 }
731
732 tools := claudetool.ToolSetConfig{}
733 svr := server.NewServer(database, customLLMManager, tools, logger, false, "", "", "", nil)
734
735 // Start server
736 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
737 mux := http.NewServeMux()
738 svr.RegisterRoutes(mux)
739 mux.ServeHTTP(w, r)
740 }))
741 defer ts.Close()
742
743 // Test 1: Create new conversation and send first message
744 t.Run("FirstMessage", func(t *testing.T) {
745 predictableService.ClearRequests()
746
747 // Send first message using /api/conversations/new
748 chatReq := map[string]interface{}{
749 "message": "Hello",
750 "model": "predictable",
751 }
752 body, _ := json.Marshal(chatReq)
753 resp, err := http.Post(ts.URL+"/api/conversations/new", "application/json", bytes.NewBuffer(body))
754 if err != nil {
755 t.Fatalf("Failed to send message: %v", err)
756 }
757 defer resp.Body.Close()
758
759 if resp.StatusCode != http.StatusCreated {
760 body, _ := io.ReadAll(resp.Body)
761 t.Fatalf("Expected status 201, got %d: %s", resp.StatusCode, body)
762 }
763
764 // Poll for async processing completion
765 // We need to wait for a request WITH a system prompt, not just any request
766 var lastReq *llm.Request
767 for i := 0; i < 50; i++ {
768 lastReq = predictableService.GetLastRequest()
769 if lastReq != nil && len(lastReq.System) > 0 {
770 break
771 }
772 time.Sleep(100 * time.Millisecond)
773 }
774 if lastReq == nil {
775 t.Fatal("No request was sent to the LLM service after 5 seconds")
776 }
777
778 if len(lastReq.System) == 0 {
779 t.Fatal("System prompt was not included in the LLM request")
780 }
781
782 // Verify system prompt contains expected content
783 systemText := ""
784 for _, sys := range lastReq.System {
785 systemText += sys.Text
786 }
787 if !strings.Contains(systemText, "Shelley") {
788 t.Errorf("System prompt doesn't contain 'Shelley': %s", systemText)
789 }
790 if !strings.Contains(systemText, "coding agent") {
791 t.Errorf("System prompt doesn't contain 'coding agent': %s", systemText)
792 }
793
794 t.Logf("System prompt successfully sent (length: %d chars)", len(systemText))
795 })
796
797 // Test 2: Send second message in existing conversation
798 t.Run("SubsequentMessage", func(t *testing.T) {
799 predictableService.ClearRequests()
800
801 // Create conversation first
802 chatReq := map[string]interface{}{
803 "message": "Hello",
804 "model": "predictable",
805 }
806 body, _ := json.Marshal(chatReq)
807 resp, err := http.Post(ts.URL+"/api/conversations/new", "application/json", bytes.NewBuffer(body))
808 if err != nil {
809 t.Fatalf("Failed to send first message: %v", err)
810 }
811 defer resp.Body.Close()
812
813 var createResp struct {
814 ConversationID string `json:"conversation_id"`
815 }
816 if resp.StatusCode != http.StatusCreated {
817 body, _ := io.ReadAll(resp.Body)
818 t.Fatalf("Expected status 201, got %d: %s", resp.StatusCode, body)
819 }
820 if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil {
821 body, _ := io.ReadAll(resp.Body)
822 t.Fatalf("Failed to decode response (status %d): %v, body: %s", resp.StatusCode, err, body)
823 }
824
825 conversationID := createResp.ConversationID
826
827 // Wait for first message to be processed
828 var firstReq *llm.Request
829 for i := 0; i < 50; i++ {
830 firstReq = predictableService.GetLastRequest()
831 if firstReq != nil {
832 break
833 }
834 time.Sleep(100 * time.Millisecond)
835 }
836 if firstReq == nil {
837 t.Fatal("First request was not sent to the LLM service after 5 seconds")
838 }
839
840 // Clear requests and send second message
841 predictableService.ClearRequests()
842
843 chatReq2 := map[string]interface{}{
844 "message": "what is the date",
845 "model": "predictable",
846 }
847 body2, _ := json.Marshal(chatReq2)
848 resp2, err := http.Post(ts.URL+"/api/conversation/"+conversationID+"/chat", "application/json", bytes.NewBuffer(body2))
849 if err != nil {
850 t.Fatalf("Failed to send second message: %v", err)
851 }
852 defer resp2.Body.Close()
853
854 if resp2.StatusCode != http.StatusAccepted {
855 body, _ := io.ReadAll(resp2.Body)
856 t.Fatalf("Expected status 202, got %d: %s", resp2.StatusCode, body)
857 }
858
859 // Poll for second message to be processed
860 // We need to wait for a request WITH a system prompt, not just any request
861 var lastReq *llm.Request
862 for i := 0; i < 50; i++ {
863 lastReq = predictableService.GetLastRequest()
864 if lastReq != nil && len(lastReq.System) > 0 {
865 break
866 }
867 time.Sleep(100 * time.Millisecond)
868 }
869 if lastReq == nil {
870 t.Fatal("No request was sent to the LLM service after 5 seconds")
871 }
872
873 if len(lastReq.System) == 0 {
874 t.Fatal("System prompt was not included in subsequent LLM request")
875 }
876
877 // Verify system prompt contains expected content
878 systemText := ""
879 for _, sys := range lastReq.System {
880 systemText += sys.Text
881 }
882 if !strings.Contains(systemText, "Shelley") {
883 t.Errorf("System prompt doesn't contain 'Shelley' in subsequent request: %s", systemText)
884 }
885
886 t.Logf("System prompt successfully sent in subsequent message (length: %d chars)", len(systemText))
887 })
888}
889
890// inspectableLLMManager is a test helper that always returns the same predictable service
891type inspectableLLMManager struct {
892 predictableService *loop.PredictableService
893 logger *slog.Logger
894}
895
896func (m *inspectableLLMManager) GetService(modelID string) (llm.Service, error) {
897 if modelID != "predictable" {
898 return nil, fmt.Errorf("unsupported model: %s", modelID)
899 }
900 return m.predictableService, nil
901}
902
903func (m *inspectableLLMManager) GetAvailableModels() []string {
904 return []string{"predictable"}
905}
906
907func (m *inspectableLLMManager) HasModel(modelID string) bool {
908 return modelID == "predictable"
909}
910
911func (m *inspectableLLMManager) GetModelInfo(modelID string) *models.ModelInfo {
912 return nil
913}
914
915func (m *inspectableLLMManager) RefreshCustomModels() error {
916 return nil
917}
918
919func TestVersionEndpoint(t *testing.T) {
920 // Create temp DB-backed server
921 ctx := context.Background()
922 tempDB := t.TempDir() + "/version_test.db"
923 database, err := db.New(db.Config{DSN: tempDB})
924 if err != nil {
925 t.Fatalf("Failed to create database: %v", err)
926 }
927 defer database.Close()
928 if err := database.Migrate(ctx); err != nil {
929 t.Fatalf("Failed to migrate database: %v", err)
930 }
931
932 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
933 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
934 svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, true, "", "", "", nil)
935
936 mux := http.NewServeMux()
937 svr.RegisterRoutes(mux)
938 ts := httptest.NewServer(mux)
939 defer ts.Close()
940
941 // Request /version endpoint
942 resp, err := http.Get(ts.URL + "/version")
943 if err != nil {
944 t.Fatalf("GET /version failed: %v", err)
945 }
946 defer resp.Body.Close()
947
948 if resp.StatusCode != http.StatusOK {
949 b, _ := io.ReadAll(resp.Body)
950 t.Fatalf("expected 200, got %d: %s", resp.StatusCode, string(b))
951 }
952
953 if ct := resp.Header.Get("Content-Type"); ct != "application/json" {
954 t.Fatalf("expected application/json, got %q", ct)
955 }
956
957 // Parse the response
958 var versionInfo struct {
959 Commit string `json:"commit"`
960 CommitTime string `json:"commit_time"`
961 Modified bool `json:"modified"`
962 }
963 if err := json.NewDecoder(resp.Body).Decode(&versionInfo); err != nil {
964 t.Fatalf("Failed to decode version info: %v", err)
965 }
966
967 t.Logf("Version info: %+v", versionInfo)
968}
969
970func TestScreenshotRouteServesImage(t *testing.T) {
971 // Create temp DB-backed server
972 ctx := context.Background()
973 tempDB := t.TempDir() + "/route_test.db"
974 database, err := db.New(db.Config{DSN: tempDB})
975 if err != nil {
976 t.Fatalf("Failed to create database: %v", err)
977 }
978 defer database.Close()
979 if err := database.Migrate(ctx); err != nil {
980 t.Fatalf("Failed to migrate database: %v", err)
981 }
982
983 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
984 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
985 svr := server.NewServer(database, llmManager, claudetool.ToolSetConfig{}, logger, true, "", "", "", nil)
986
987 mux := http.NewServeMux()
988 svr.RegisterRoutes(mux)
989 ts := httptest.NewServer(mux)
990 defer ts.Close()
991
992 // Create a fake screenshot file in the expected location
993 id := "testshot"
994 path := browse.GetScreenshotPath(id)
995 if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
996 t.Fatalf("Failed to create screenshot dir: %v", err)
997 }
998 pngData := []byte{0x89, 0x50, 0x4E, 0x47} // PNG magic, minimal content
999 if err := os.WriteFile(path, pngData, 0o644); err != nil {
1000 t.Fatalf("Failed to write screenshot: %v", err)
1001 }
1002 t.Cleanup(func() { _ = os.Remove(path) })
1003
1004 // Request the screenshot
1005 resp, err := http.Get(ts.URL + "/api/read?path=" + url.QueryEscape(path))
1006 if err != nil {
1007 t.Fatalf("GET screenshot failed: %v", err)
1008 }
1009 defer resp.Body.Close()
1010
1011 if resp.StatusCode != http.StatusOK {
1012 b, _ := io.ReadAll(resp.Body)
1013 t.Fatalf("expected 200, got %d: %s", resp.StatusCode, string(b))
1014 }
1015 if ct := resp.Header.Get("Content-Type"); ct != "image/png" {
1016 t.Fatalf("expected image/png, got %q", ct)
1017 }
1018 // Cache-Control should be set
1019 if cc := resp.Header.Get("Cache-Control"); cc == "" {
1020 t.Fatalf("expected Cache-Control header to be set")
1021 }
1022}
1023
1024// TestGitStateChangeCreatesGitInfoMessage verifies that when the agent makes a git commit,
1025// a gitinfo message is created in the database.
1026func TestGitStateChangeCreatesGitInfoMessage(t *testing.T) {
1027 ctx := context.Background()
1028
1029 // Create a temp directory with a git repo
1030 workDir := t.TempDir()
1031
1032 // Initialize git repo
1033 runCmd := func(name string, args ...string) {
1034 // For git commits, use --no-verify to skip hooks
1035 if name == "git" && len(args) > 0 && args[0] == "commit" {
1036 newArgs := []string{"commit", "--no-verify"}
1037 newArgs = append(newArgs, args[1:]...)
1038 args = newArgs
1039 }
1040 cmd := exec.Command(name, args...)
1041 cmd.Dir = workDir
1042 out, err := cmd.CombinedOutput()
1043 if err != nil {
1044 t.Fatalf("Command %s %v failed: %v\n%s", name, args, err, out)
1045 }
1046 }
1047 runCmd("git", "init")
1048 runCmd("git", "config", "user.email", "test@example.com")
1049 runCmd("git", "config", "user.name", "Test User")
1050
1051 // Create initial commit
1052 initialFile := filepath.Join(workDir, "initial.txt")
1053 if err := os.WriteFile(initialFile, []byte("initial content"), 0o644); err != nil {
1054 t.Fatalf("Failed to write initial file: %v", err)
1055 }
1056 runCmd("git", "add", ".")
1057 runCmd("git", "commit", "-m", "Initial commit")
1058
1059 // Create database
1060 tempDB := t.TempDir() + "/gitstate_test.db"
1061 database, err := db.New(db.Config{DSN: tempDB})
1062 if err != nil {
1063 t.Fatalf("Failed to create database: %v", err)
1064 }
1065 defer database.Close()
1066 if err := database.Migrate(ctx); err != nil {
1067 t.Fatalf("Failed to migrate database: %v", err)
1068 }
1069
1070 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelDebug}))
1071
1072 // Create LLM manager that returns predictable service
1073 predictableService := loop.NewPredictableService()
1074 customLLMManager := &inspectableLLMManager{
1075 predictableService: predictableService,
1076 logger: logger,
1077 }
1078
1079 // Create server with git repo as working directory
1080 toolConfig := claudetool.ToolSetConfig{
1081 WorkingDir: workDir,
1082 EnableBrowser: false,
1083 }
1084 svr := server.NewServer(database, customLLMManager, toolConfig, logger, false, "", "", "", nil)
1085
1086 mux := http.NewServeMux()
1087 svr.RegisterRoutes(mux)
1088 ts := httptest.NewServer(mux)
1089 defer ts.Close()
1090
1091 // The test command creates a file and commits it. We use explicit paths to avoid bash safety checks.
1092 // NOTE: We must set cwd when creating the conversation so the tools run in our git repo.
1093 // Use --no-verify to skip commit hooks that may interfere with tests.
1094 chatReq := map[string]interface{}{
1095 "message": "bash: echo 'new content' > newfile.txt && git add newfile.txt && git commit --no-verify -m 'Add new file'",
1096 "model": "predictable",
1097 "cwd": workDir,
1098 }
1099 body, _ := json.Marshal(chatReq)
1100 resp, err := http.Post(ts.URL+"/api/conversations/new", "application/json", bytes.NewBuffer(body))
1101 if err != nil {
1102 t.Fatalf("Failed to send message: %v", err)
1103 }
1104 defer resp.Body.Close()
1105
1106 if resp.StatusCode != http.StatusCreated {
1107 body, _ := io.ReadAll(resp.Body)
1108 t.Fatalf("Expected status 201, got %d: %s", resp.StatusCode, body)
1109 }
1110
1111 var createResp struct {
1112 ConversationID string `json:"conversation_id"`
1113 }
1114 if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil {
1115 t.Fatalf("Failed to decode response: %v", err)
1116 }
1117
1118 // Poll for the gitinfo message to appear
1119 var foundGitInfo bool
1120 for i := 0; i < 50; i++ {
1121 time.Sleep(100 * time.Millisecond)
1122
1123 messages, err := database.ListMessagesByConversationPaginated(ctx, createResp.ConversationID, 100, 0)
1124 if err != nil {
1125 continue
1126 }
1127
1128 for _, msg := range messages {
1129 if msg.Type == string(db.MessageTypeGitInfo) {
1130 foundGitInfo = true
1131 t.Logf("Found gitinfo message: %v", msg.UserData)
1132 break
1133 }
1134 }
1135 if foundGitInfo {
1136 break
1137 }
1138 }
1139
1140 if !foundGitInfo {
1141 t.Fatal("Expected a gitinfo message to be created after git commit, but none was found")
1142 }
1143}
1144
1145func TestSubagentEndToEnd(t *testing.T) {
1146 // Create temporary database
1147 tempDB := t.TempDir() + "/test.db"
1148 database, err := db.New(db.Config{DSN: tempDB})
1149 if err != nil {
1150 t.Fatalf("Failed to create test database: %v", err)
1151 }
1152 defer database.Close()
1153
1154 // Run migrations
1155 if err := database.Migrate(context.Background()); err != nil {
1156 t.Fatalf("Failed to migrate database: %v", err)
1157 }
1158
1159 // Create logger
1160 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
1161 Level: slog.LevelDebug,
1162 }))
1163
1164 // Create LLM service manager with predictable service
1165 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger})
1166
1167 // Set up tools config
1168 toolSetConfig := claudetool.ToolSetConfig{
1169 WorkingDir: t.TempDir(),
1170 EnableBrowser: false,
1171 }
1172
1173 // Create server (predictable-only mode)
1174 svr := server.NewServer(database, llmManager, toolSetConfig, logger, true, "", "", "", nil)
1175
1176 // Set up HTTP server
1177 mux := http.NewServeMux()
1178 svr.RegisterRoutes(mux)
1179 ts := httptest.NewServer(mux)
1180 defer ts.Close()
1181
1182 client := &http.Client{Timeout: 60 * time.Second}
1183
1184 // Create a new conversation that will spawn a subagent
1185 // The predictable service will respond with a subagent tool call for "subagent: test-worker echo hello"
1186 chatReq := map[string]interface{}{
1187 "message": "subagent: test-worker echo hello",
1188 "model": "predictable",
1189 }
1190 reqBody, _ := json.Marshal(chatReq)
1191
1192 resp, err := client.Post(ts.URL+"/api/conversations/new", "application/json", bytes.NewBuffer(reqBody))
1193 if err != nil {
1194 t.Fatalf("Failed to create conversation: %v", err)
1195 }
1196 defer resp.Body.Close()
1197
1198 if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
1199 body, _ := io.ReadAll(resp.Body)
1200 t.Fatalf("Expected 200/201, got %d: %s", resp.StatusCode, string(body))
1201 }
1202
1203 var createResp struct {
1204 ConversationID string `json:"conversation_id"`
1205 }
1206 if err := json.NewDecoder(resp.Body).Decode(&createResp); err != nil {
1207 t.Fatalf("Failed to decode response: %v", err)
1208 }
1209
1210 parentConvID := createResp.ConversationID
1211 t.Logf("Created parent conversation: %s", parentConvID)
1212
1213 // Wait for the conversation to complete (subagent should be created and executed)
1214 time.Sleep(3 * time.Second)
1215
1216 // Check that subagents were created
1217 subagentsResp, err := client.Get(ts.URL + "/api/conversation/" + parentConvID + "/subagents")
1218 if err != nil {
1219 t.Fatalf("Failed to get subagents: %v", err)
1220 }
1221 defer subagentsResp.Body.Close()
1222
1223 var subagents []generated.Conversation
1224 if err := json.NewDecoder(subagentsResp.Body).Decode(&subagents); err != nil {
1225 t.Fatalf("Failed to decode subagents: %v", err)
1226 }
1227
1228 if len(subagents) == 0 {
1229 t.Fatal("Expected at least one subagent to be created")
1230 }
1231
1232 t.Logf("Created %d subagent(s)", len(subagents))
1233 for _, sub := range subagents {
1234 t.Logf(" - Subagent: %s (slug: %v)", sub.ConversationID, sub.Slug)
1235 }
1236
1237 // Verify the subagent has the expected slug (or a suffixed version)
1238 foundExpectedSlug := false
1239 for _, sub := range subagents {
1240 if sub.Slug != nil && (strings.HasPrefix(*sub.Slug, "test-worker")) {
1241 foundExpectedSlug = true
1242 break
1243 }
1244 }
1245 if !foundExpectedSlug {
1246 t.Errorf("Expected to find subagent with slug starting with 'test-worker'")
1247 }
1248
1249 // Verify the subagent has a parent_conversation_id set
1250 for _, sub := range subagents {
1251 if sub.ParentConversationID == nil || *sub.ParentConversationID != parentConvID {
1252 t.Errorf("Subagent %s has wrong parent_conversation_id: %v", sub.ConversationID, sub.ParentConversationID)
1253 }
1254 }
1255
1256 // Verify the subagent conversation has messages
1257 subConvResp, err := client.Get(ts.URL + "/api/conversation/" + subagents[0].ConversationID)
1258 if err != nil {
1259 t.Fatalf("Failed to get subagent conversation: %v", err)
1260 }
1261 defer subConvResp.Body.Close()
1262
1263 var subConvData struct {
1264 Messages []json.RawMessage `json:"messages"`
1265 }
1266 if err := json.NewDecoder(subConvResp.Body).Decode(&subConvData); err != nil {
1267 t.Fatalf("Failed to decode subagent conversation: %v", err)
1268 }
1269
1270 if len(subConvData.Messages) == 0 {
1271 t.Error("Expected subagent conversation to have messages")
1272 }
1273 t.Logf("Subagent conversation has %d messages", len(subConvData.Messages))
1274}
1275
1276func TestContinueConversation(t *testing.T) {
1277 // Create temporary database
1278 tempDB := t.TempDir() + "/test.db"
1279 database, err := db.New(db.Config{DSN: tempDB})
1280 if err != nil {
1281 t.Fatalf("Failed to create test database: %v", err)
1282 }
1283 defer database.Close()
1284
1285 // Run migrations
1286 if err := database.Migrate(context.Background()); err != nil {
1287 t.Fatalf("Failed to migrate database: %v", err)
1288 }
1289
1290 // Create logger
1291 logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
1292 Level: slog.LevelDebug,
1293 }))
1294
1295 // Create LLM service manager
1296 llmManager := server.NewLLMServiceManager(&server.LLMConfig{Logger: logger, DB: database})
1297
1298 // Set up tools config
1299 toolSetConfig := claudetool.ToolSetConfig{
1300 WorkingDir: t.TempDir(),
1301 EnableBrowser: false,
1302 }
1303
1304 // Create server
1305 svr := server.NewServer(database, llmManager, toolSetConfig, logger, false, "", "", "", nil)
1306
1307 // Set up HTTP server
1308 mux := http.NewServeMux()
1309 svr.RegisterRoutes(mux)
1310 testServer := httptest.NewServer(mux)
1311 defer testServer.Close()
1312
1313 ctx := context.Background()
1314
1315 // Create source conversation with a slug and some messages
1316 sourceSlug := "source-conversation"
1317 cwd := "/tmp/testdir"
1318 model := "predictable"
1319 sourceConv, err := database.CreateConversation(ctx, &sourceSlug, true, &cwd, &model)
1320 if err != nil {
1321 t.Fatalf("Failed to create source conversation: %v", err)
1322 }
1323
1324 // Add some messages to the source conversation
1325 userMessage := llm.Message{
1326 Role: llm.MessageRoleUser,
1327 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello, this is a test message"}},
1328 }
1329 _, err = database.CreateMessage(ctx, db.CreateMessageParams{
1330 ConversationID: sourceConv.ConversationID,
1331 Type: db.MessageTypeUser,
1332 LLMData: userMessage,
1333 })
1334 if err != nil {
1335 t.Fatalf("Failed to create user message: %v", err)
1336 }
1337
1338 agentMessage := llm.Message{
1339 Role: llm.MessageRoleAssistant,
1340 Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Hello! How can I help you?"}},
1341 }
1342 _, err = database.CreateMessage(ctx, db.CreateMessageParams{
1343 ConversationID: sourceConv.ConversationID,
1344 Type: db.MessageTypeAgent,
1345 LLMData: agentMessage,
1346 })
1347 if err != nil {
1348 t.Fatalf("Failed to create agent message: %v", err)
1349 }
1350
1351 // Create a tool use message
1352 toolMessage := llm.Message{
1353 Role: llm.MessageRoleAssistant,
1354 Content: []llm.Content{{
1355 Type: llm.ContentTypeToolUse,
1356 ToolName: "bash",
1357 ToolInput: json.RawMessage(`{"command": "echo hello world this is a long command that should be truncated if it exceeds the limit"}`),
1358 }},
1359 }
1360 _, err = database.CreateMessage(ctx, db.CreateMessageParams{
1361 ConversationID: sourceConv.ConversationID,
1362 Type: db.MessageTypeAgent,
1363 LLMData: toolMessage,
1364 })
1365 if err != nil {
1366 t.Fatalf("Failed to create tool message: %v", err)
1367 }
1368
1369 // Test the continue conversation endpoint
1370 reqBody := map[string]string{
1371 "source_conversation_id": sourceConv.ConversationID,
1372 "model": "predictable",
1373 }
1374 body, _ := json.Marshal(reqBody)
1375
1376 resp, err := http.Post(testServer.URL+"/api/conversations/continue", "application/json", bytes.NewBuffer(body))
1377 if err != nil {
1378 t.Fatalf("Failed to continue conversation: %v", err)
1379 }
1380 defer resp.Body.Close()
1381
1382 if resp.StatusCode != http.StatusCreated {
1383 bodyBytes, _ := io.ReadAll(resp.Body)
1384 t.Fatalf("Expected status 201, got %d: %s", resp.StatusCode, string(bodyBytes))
1385 }
1386
1387 var result map[string]interface{}
1388 if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
1389 t.Fatalf("Failed to decode response: %v", err)
1390 }
1391
1392 newConversationID, ok := result["conversation_id"].(string)
1393 if !ok || newConversationID == "" {
1394 t.Fatal("Response should contain conversation_id")
1395 }
1396
1397 // Verify new conversation was created
1398 newConv, err := database.GetConversationByID(ctx, newConversationID)
1399 if err != nil {
1400 t.Fatalf("Failed to get new conversation: %v", err)
1401 }
1402
1403 // Verify the new conversation inherited the cwd
1404 if newConv.Cwd == nil || *newConv.Cwd != cwd {
1405 t.Errorf("Expected cwd %s, got %v", cwd, newConv.Cwd)
1406 }
1407
1408 // Verify the new conversation has a user message with the summary
1409 messages, err := database.ListMessages(ctx, newConversationID)
1410 if err != nil {
1411 t.Fatalf("Failed to list messages: %v", err)
1412 }
1413
1414 if len(messages) < 1 {
1415 t.Fatal("Expected at least 1 message in new conversation")
1416 }
1417
1418 // Find the user message with the summary (may be after system prompt)
1419 var summaryText string
1420 for _, msg := range messages {
1421 if msg.Type != string(db.MessageTypeUser) {
1422 continue
1423 }
1424 if msg.LlmData == nil {
1425 continue
1426 }
1427 var llmMsg llm.Message
1428 if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
1429 continue
1430 }
1431 for _, content := range llmMsg.Content {
1432 if content.Type == llm.ContentTypeText && strings.Contains(content.Text, "Continue the conversation") {
1433 summaryText = content.Text
1434 break
1435 }
1436 }
1437 if summaryText != "" {
1438 break
1439 }
1440 }
1441
1442 if summaryText == "" {
1443 t.Fatal("Could not find summary message in new conversation")
1444 }
1445
1446 if !strings.Contains(summaryText, sourceSlug) {
1447 t.Errorf("Summary should reference source conversation slug %q, got: %s", sourceSlug, summaryText)
1448 }
1449
1450 if !strings.Contains(summaryText, "Hello, this is a test message") {
1451 t.Error("Summary should contain user message text")
1452 }
1453
1454 if !strings.Contains(summaryText, "Hello! How can I help you?") {
1455 t.Error("Summary should contain agent message text")
1456 }
1457
1458 if !strings.Contains(summaryText, "Tool: bash") {
1459 t.Error("Summary should contain tool name")
1460 }
1461
1462 // Verify that the agent was NOT started - there should only be a user message,
1463 // no agent response. The user should be able to add instructions before sending.
1464 var hasAgentMessage bool
1465 for _, msg := range messages {
1466 if msg.Type == string(db.MessageTypeAgent) {
1467 hasAgentMessage = true
1468 break
1469 }
1470 }
1471 if hasAgentMessage {
1472 t.Error("Expected no agent message - the agent should NOT be started automatically")
1473 }
1474
1475 // Verify the status is "created" not "accepted" (since agent wasn't started)
1476 if status, ok := result["status"].(string); !ok || status != "created" {
1477 t.Errorf("Expected status 'created', got %v", result["status"])
1478 }
1479
1480 t.Logf("Successfully continued conversation from %s to %s", sourceConv.ConversationID, newConversationID)
1481
1482 // Now test that sending a follow-up message works correctly.
1483 // The agent should receive both the summary message AND the new message.
1484 followUpReq := map[string]string{
1485 "message": "Please focus on the bash commands.",
1486 "model": "predictable",
1487 }
1488 followUpBody, _ := json.Marshal(followUpReq)
1489
1490 followUpResp, err := http.Post(testServer.URL+"/api/conversation/"+newConversationID+"/chat", "application/json", bytes.NewBuffer(followUpBody))
1491 if err != nil {
1492 t.Fatalf("Failed to send follow-up message: %v", err)
1493 }
1494 defer followUpResp.Body.Close()
1495
1496 if followUpResp.StatusCode != http.StatusAccepted {
1497 bodyBytes, _ := io.ReadAll(followUpResp.Body)
1498 t.Fatalf("Expected status 202 for follow-up, got %d: %s", followUpResp.StatusCode, string(bodyBytes))
1499 }
1500
1501 // Wait briefly for the agent to process
1502 time.Sleep(200 * time.Millisecond)
1503
1504 // Verify we now have messages: summary (user), follow-up (user), and agent response
1505 updatedMessages, err := database.ListMessages(ctx, newConversationID)
1506 if err != nil {
1507 t.Fatalf("Failed to list updated messages: %v", err)
1508 }
1509
1510 // Count message types
1511 userCount := 0
1512 agentCount := 0
1513 for _, msg := range updatedMessages {
1514 switch msg.Type {
1515 case string(db.MessageTypeUser):
1516 userCount++
1517 case string(db.MessageTypeAgent):
1518 agentCount++
1519 }
1520 }
1521
1522 // Should have 2 user messages (summary + follow-up) and at least 1 agent response
1523 if userCount != 2 {
1524 t.Errorf("Expected 2 user messages, got %d", userCount)
1525 }
1526 if agentCount < 1 {
1527 t.Errorf("Expected at least 1 agent message after follow-up, got %d", agentCount)
1528 }
1529
1530 t.Logf("Follow-up message processed: %d user messages, %d agent messages", userCount, agentCount)
1531}