anthropic_test.go

  1package test
  2
  3import (
  4	"bytes"
  5	"context"
  6	"encoding/json"
  7	"io"
  8	"log/slog"
  9	"net/http"
 10	"net/http/httptest"
 11	"os"
 12	"strings"
 13	"testing"
 14	"time"
 15
 16	"shelley.exe.dev/claudetool"
 17	"shelley.exe.dev/db"
 18	"shelley.exe.dev/llm"
 19	"shelley.exe.dev/server"
 20)
 21
 22func TestWithAnthropicAPI(t *testing.T) {
 23	// Skip if no API key
 24	apiKey := os.Getenv("ANTHROPIC_API_KEY")
 25	if apiKey == "" {
 26		t.Skip("ANTHROPIC_API_KEY not set, skipping Anthropic API test")
 27	}
 28
 29	// Create temporary database
 30	tempDB := t.TempDir() + "/anthropic_test.db"
 31	database, err := db.New(db.Config{DSN: tempDB})
 32	if err != nil {
 33		t.Fatalf("Failed to create test database: %v", err)
 34	}
 35	defer database.Close()
 36
 37	// Run migrations
 38	if err := database.Migrate(context.Background()); err != nil {
 39		t.Fatalf("Failed to migrate database: %v", err)
 40	}
 41
 42	// Create LLM service manager
 43	logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
 44		Level: slog.LevelInfo, // Less verbose for real API test
 45	}))
 46	llmConfig := &server.LLMConfig{
 47		AnthropicAPIKey: os.Getenv("ANTHROPIC_API_KEY"),
 48		OpenAIAPIKey:    os.Getenv("OPENAI_API_KEY"),
 49		GeminiAPIKey:    os.Getenv("GEMINI_API_KEY"),
 50		FireworksAPIKey: os.Getenv("FIREWORKS_API_KEY"),
 51		Logger:          logger,
 52	}
 53	llmManager := server.NewLLMServiceManager(llmConfig)
 54
 55	// Set up tools config
 56	toolSetConfig := claudetool.ToolSetConfig{
 57		WorkingDir:    t.TempDir(),
 58		LLMProvider:   llmManager,
 59		EnableBrowser: false,
 60	}
 61
 62	// Create server
 63	svr := server.NewServer(database, llmManager, toolSetConfig, logger, false, "", "", "", nil)
 64
 65	// Set up HTTP server
 66	mux := http.NewServeMux()
 67	svr.RegisterRoutes(mux)
 68	testServer := httptest.NewServer(mux)
 69	defer testServer.Close()
 70
 71	t.Run("SimpleConversationWithClaude", func(t *testing.T) {
 72		// Create a conversation
 73		// Using database directly instead of service
 74		slug := "claude-test"
 75		conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
 76		if err != nil {
 77			t.Fatalf("Failed to create conversation: %v", err)
 78		}
 79
 80		// Send a simple message
 81		chatReq := map[string]interface{}{
 82			"message": "Hello! Please introduce yourself briefly and tell me what you can help me with. Keep your response under 50 words.",
 83			"model":   "claude-haiku-4.5",
 84		}
 85		reqBody, _ := json.Marshal(chatReq)
 86
 87		resp, err := http.Post(
 88			testServer.URL+"/api/conversation/"+conv.ConversationID+"/chat",
 89			"application/json",
 90			bytes.NewReader(reqBody),
 91		)
 92		if err != nil {
 93			t.Fatalf("Failed to send chat message: %v", err)
 94		}
 95		defer resp.Body.Close()
 96
 97		if resp.StatusCode != http.StatusAccepted {
 98			t.Fatalf("Expected status 202, got %d", resp.StatusCode)
 99		}
100
101		// Wait for processing (Claude API can be slow)
102		time.Sleep(5 * time.Second)
103
104		// Check messages
105		msgResp, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID)
106		if err != nil {
107			t.Fatalf("Failed to get conversation: %v", err)
108		}
109		defer msgResp.Body.Close()
110
111		if msgResp.StatusCode != http.StatusOK {
112			t.Fatalf("Expected status 200, got %d", msgResp.StatusCode)
113		}
114
115		var payload server.StreamResponse
116		if err := json.NewDecoder(msgResp.Body).Decode(&payload); err != nil {
117			t.Fatalf("Failed to decode messages: %v", err)
118		}
119
120		// Should have system message, user message and assistant response
121		if len(payload.Messages) < 3 {
122			msgTypes := make([]string, len(payload.Messages))
123			for i, msg := range payload.Messages {
124				msgTypes[i] = msg.Type
125			}
126			t.Fatalf("Expected at least 3 messages (system + user + assistant), got %d: %v", len(payload.Messages), msgTypes)
127		}
128
129		// Check first message is system prompt
130		if payload.Messages[0].Type != "system" {
131			t.Fatalf("Expected first message to be system, got %s", payload.Messages[0].Type)
132		}
133
134		// Check user message is second
135		if payload.Messages[1].Type != "user" {
136			t.Fatalf("Expected second message to be user, got %s", payload.Messages[1].Type)
137		}
138
139		// Check assistant response
140		assistantFound := false
141		for _, msg := range payload.Messages {
142			if msg.Type == "agent" {
143				assistantFound = true
144				if msg.LlmData == nil {
145					t.Fatal("Assistant message has no LLM data")
146				}
147
148				// Parse and check the response content
149				var llmMsg llm.Message
150				if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err != nil {
151					t.Fatalf("Failed to unmarshal LLM data: %v", err)
152				}
153
154				if len(llmMsg.Content) == 0 {
155					t.Fatal("Assistant response has no content")
156				}
157
158				responseText := llmMsg.Content[0].Text
159				if responseText == "" {
160					t.Fatal("Assistant response text is empty")
161				}
162
163				// Claude should mention being Claude or an AI assistant
164				lowerResponse := strings.ToLower(responseText)
165				if !strings.Contains(lowerResponse, "claude") && !strings.Contains(lowerResponse, "assistant") {
166					t.Logf("Response: %s", responseText)
167					// This is not a hard failure - Claude might respond differently
168				}
169
170				t.Logf("Claude responded: %s", responseText)
171				break
172			}
173		}
174
175		if !assistantFound {
176			t.Fatal("No assistant response found")
177		}
178	})
179
180	t.Run("ConversationWithToolUse", func(t *testing.T) {
181		// Create a conversation
182		// Using database directly instead of service
183		slug := "tool-test"
184		conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
185		if err != nil {
186			t.Fatalf("Failed to create conversation: %v", err)
187		}
188
189		// Ask Claude to think about something
190		chatReq := map[string]interface{}{
191			"message": "Please use the think tool to plan how you would help someone learn to code. Keep it brief.",
192			"model":   "claude-haiku-4.5",
193		}
194		reqBody, _ := json.Marshal(chatReq)
195
196		resp, err := http.Post(
197			testServer.URL+"/api/conversation/"+conv.ConversationID+"/chat",
198			"application/json",
199			bytes.NewReader(reqBody),
200		)
201		if err != nil {
202			t.Fatalf("Failed to send chat message: %v", err)
203		}
204		defer resp.Body.Close()
205
206		if resp.StatusCode != http.StatusAccepted {
207			t.Fatalf("Expected status 202, got %d", resp.StatusCode)
208		}
209
210		// Wait for processing (tool use might take longer)
211		time.Sleep(8 * time.Second)
212
213		// Check messages
214		msgResp, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID)
215		if err != nil {
216			t.Fatalf("Failed to get conversation: %v", err)
217		}
218		defer msgResp.Body.Close()
219
220		var payload server.StreamResponse
221		if err := json.NewDecoder(msgResp.Body).Decode(&payload); err != nil {
222			t.Fatalf("Failed to decode messages: %v", err)
223		}
224
225		// Should have multiple messages due to tool use
226		if len(payload.Messages) < 3 {
227			t.Logf("Got %d messages, expected at least 3 for tool use interaction", len(payload.Messages))
228			// This might not always be the case depending on Claude's response
229		}
230
231		// Log all messages for debugging
232		for i, msg := range payload.Messages {
233			t.Logf("Message %d: Type=%s", i, msg.Type)
234			if msg.LlmData != nil {
235				var llmMsg llm.Message
236				if err := json.Unmarshal([]byte(*msg.LlmData), &llmMsg); err == nil {
237					if len(llmMsg.Content) > 0 && llmMsg.Content[0].Text != "" {
238						t.Logf("  Content: %s", llmMsg.Content[0].Text[:min(100, len(llmMsg.Content[0].Text))])
239					}
240				}
241			}
242		}
243	})
244
245	t.Run("StreamingEndpoint", func(t *testing.T) {
246		// Create a conversation with a message
247		// Using database directly instead of service
248		// Using database directly instead of service
249		slug := "stream-test"
250		conv, err := database.CreateConversation(context.Background(), &slug, true, nil, nil)
251		if err != nil {
252			t.Fatalf("Failed to create conversation: %v", err)
253		}
254
255		// Add a test message
256		testMsg := llm.Message{
257			Role: llm.MessageRoleUser,
258			Content: []llm.Content{
259				{Type: llm.ContentTypeText, Text: "Hello streaming test"},
260			},
261		}
262		_, err = database.CreateMessage(context.Background(), db.CreateMessageParams{
263			ConversationID: conv.ConversationID,
264			Type:           db.MessageTypeUser,
265			LLMData:        testMsg,
266		})
267		if err != nil {
268			t.Fatalf("Failed to create message: %v", err)
269		}
270
271		// Test stream endpoint
272		resp, err := http.Get(testServer.URL + "/api/conversation/" + conv.ConversationID + "/stream")
273		if err != nil {
274			t.Fatalf("Failed to get stream: %v", err)
275		}
276		defer resp.Body.Close()
277
278		if resp.StatusCode != http.StatusOK {
279			t.Fatalf("Expected status 200, got %d", resp.StatusCode)
280		}
281
282		// Check headers
283		if resp.Header.Get("Content-Type") != "text/event-stream" {
284			t.Fatal("Expected text/event-stream content type")
285		}
286
287		// Read first chunk (should contain current messages)
288		buf := make([]byte, 2048)
289		n, err := resp.Body.Read(buf)
290		if err != nil && err != io.EOF {
291			t.Fatalf("Failed to read stream: %v", err)
292		}
293
294		data := string(buf[:n])
295		if !strings.Contains(data, "data: ") {
296			t.Fatal("Expected SSE data format")
297		}
298
299		t.Logf("Received stream data: %s", data[:min(200, len(data))])
300	})
301}
302
303// Helper function for min
304func min(a, b int) int {
305	if a < b {
306		return a
307	}
308	return b
309}