anthropic_test.go

  1package anthropic
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"net/http"
  9	"net/http/httptest"
 10	"testing"
 11	"time"
 12
 13	"charm.land/fantasy"
 14	"github.com/stretchr/testify/require"
 15)
 16
 17func TestToPrompt_DropsEmptyMessages(t *testing.T) {
 18	t.Parallel()
 19
 20	t.Run("should drop assistant messages with only reasoning content", func(t *testing.T) {
 21		t.Parallel()
 22
 23		prompt := fantasy.Prompt{
 24			{
 25				Role: fantasy.MessageRoleUser,
 26				Content: []fantasy.MessagePart{
 27					fantasy.TextPart{Text: "Hello"},
 28				},
 29			},
 30			{
 31				Role: fantasy.MessageRoleAssistant,
 32				Content: []fantasy.MessagePart{
 33					fantasy.ReasoningPart{
 34						Text: "Let me think about this...",
 35						ProviderOptions: fantasy.ProviderOptions{
 36							Name: &ReasoningOptionMetadata{
 37								Signature: "abc123",
 38							},
 39						},
 40					},
 41				},
 42			},
 43		}
 44
 45		systemBlocks, messages, warnings := toPrompt(prompt, true)
 46
 47		require.Empty(t, systemBlocks)
 48		require.Len(t, messages, 1, "should only have user message, assistant message should be dropped")
 49		require.Len(t, warnings, 1)
 50		require.Equal(t, fantasy.CallWarningTypeOther, warnings[0].Type)
 51		require.Contains(t, warnings[0].Message, "dropping empty assistant message")
 52		require.Contains(t, warnings[0].Message, "neither user-facing content nor tool calls")
 53	})
 54
 55	t.Run("should drop assistant reasoning when sendReasoning disabled", func(t *testing.T) {
 56		t.Parallel()
 57
 58		prompt := fantasy.Prompt{
 59			{
 60				Role: fantasy.MessageRoleUser,
 61				Content: []fantasy.MessagePart{
 62					fantasy.TextPart{Text: "Hello"},
 63				},
 64			},
 65			{
 66				Role: fantasy.MessageRoleAssistant,
 67				Content: []fantasy.MessagePart{
 68					fantasy.ReasoningPart{
 69						Text: "Let me think about this...",
 70						ProviderOptions: fantasy.ProviderOptions{
 71							Name: &ReasoningOptionMetadata{
 72								Signature: "def456",
 73							},
 74						},
 75					},
 76				},
 77			},
 78		}
 79
 80		systemBlocks, messages, warnings := toPrompt(prompt, false)
 81
 82		require.Empty(t, systemBlocks)
 83		require.Len(t, messages, 1, "should only have user message, assistant message should be dropped")
 84		require.Len(t, warnings, 2)
 85		require.Equal(t, fantasy.CallWarningTypeOther, warnings[0].Type)
 86		require.Contains(t, warnings[0].Message, "sending reasoning content is disabled")
 87		require.Equal(t, fantasy.CallWarningTypeOther, warnings[1].Type)
 88		require.Contains(t, warnings[1].Message, "dropping empty assistant message")
 89	})
 90
 91	t.Run("should drop truly empty assistant messages", func(t *testing.T) {
 92		t.Parallel()
 93
 94		prompt := fantasy.Prompt{
 95			{
 96				Role: fantasy.MessageRoleUser,
 97				Content: []fantasy.MessagePart{
 98					fantasy.TextPart{Text: "Hello"},
 99				},
100			},
101			{
102				Role:    fantasy.MessageRoleAssistant,
103				Content: []fantasy.MessagePart{},
104			},
105		}
106
107		systemBlocks, messages, warnings := toPrompt(prompt, true)
108
109		require.Empty(t, systemBlocks)
110		require.Len(t, messages, 1, "should only have user message")
111		require.Len(t, warnings, 1)
112		require.Equal(t, fantasy.CallWarningTypeOther, warnings[0].Type)
113		require.Contains(t, warnings[0].Message, "dropping empty assistant message")
114	})
115
116	t.Run("should keep assistant messages with text content", func(t *testing.T) {
117		t.Parallel()
118
119		prompt := fantasy.Prompt{
120			{
121				Role: fantasy.MessageRoleUser,
122				Content: []fantasy.MessagePart{
123					fantasy.TextPart{Text: "Hello"},
124				},
125			},
126			{
127				Role: fantasy.MessageRoleAssistant,
128				Content: []fantasy.MessagePart{
129					fantasy.TextPart{Text: "Hi there!"},
130				},
131			},
132		}
133
134		systemBlocks, messages, warnings := toPrompt(prompt, true)
135
136		require.Empty(t, systemBlocks)
137		require.Len(t, messages, 2, "should have both user and assistant messages")
138		require.Empty(t, warnings)
139	})
140
141	t.Run("should keep assistant messages with tool calls", func(t *testing.T) {
142		t.Parallel()
143
144		prompt := fantasy.Prompt{
145			{
146				Role: fantasy.MessageRoleUser,
147				Content: []fantasy.MessagePart{
148					fantasy.TextPart{Text: "What's the weather?"},
149				},
150			},
151			{
152				Role: fantasy.MessageRoleAssistant,
153				Content: []fantasy.MessagePart{
154					fantasy.ToolCallPart{
155						ToolCallID: "call_123",
156						ToolName:   "get_weather",
157						Input:      `{"location":"NYC"}`,
158					},
159				},
160			},
161		}
162
163		systemBlocks, messages, warnings := toPrompt(prompt, true)
164
165		require.Empty(t, systemBlocks)
166		require.Len(t, messages, 2, "should have both user and assistant messages")
167		require.Empty(t, warnings)
168	})
169
170	t.Run("should drop assistant messages with invalid tool input", func(t *testing.T) {
171		t.Parallel()
172
173		prompt := fantasy.Prompt{
174			{
175				Role: fantasy.MessageRoleUser,
176				Content: []fantasy.MessagePart{
177					fantasy.TextPart{Text: "Hi"},
178				},
179			},
180			{
181				Role: fantasy.MessageRoleAssistant,
182				Content: []fantasy.MessagePart{
183					fantasy.ToolCallPart{
184						ToolCallID: "call_123",
185						ToolName:   "get_weather",
186						Input:      "{not-json",
187					},
188				},
189			},
190		}
191
192		systemBlocks, messages, warnings := toPrompt(prompt, true)
193
194		require.Empty(t, systemBlocks)
195		require.Len(t, messages, 1, "should only have user message")
196		require.Len(t, warnings, 1)
197		require.Equal(t, fantasy.CallWarningTypeOther, warnings[0].Type)
198		require.Contains(t, warnings[0].Message, "dropping empty assistant message")
199	})
200
201	t.Run("should keep assistant messages with reasoning and text", func(t *testing.T) {
202		t.Parallel()
203
204		prompt := fantasy.Prompt{
205			{
206				Role: fantasy.MessageRoleUser,
207				Content: []fantasy.MessagePart{
208					fantasy.TextPart{Text: "Hello"},
209				},
210			},
211			{
212				Role: fantasy.MessageRoleAssistant,
213				Content: []fantasy.MessagePart{
214					fantasy.ReasoningPart{
215						Text: "Let me think...",
216						ProviderOptions: fantasy.ProviderOptions{
217							Name: &ReasoningOptionMetadata{
218								Signature: "abc123",
219							},
220						},
221					},
222					fantasy.TextPart{Text: "Hi there!"},
223				},
224			},
225		}
226
227		systemBlocks, messages, warnings := toPrompt(prompt, true)
228
229		require.Empty(t, systemBlocks)
230		require.Len(t, messages, 2, "should have both user and assistant messages")
231		require.Empty(t, warnings)
232	})
233
234	t.Run("should keep user messages with image content", func(t *testing.T) {
235		t.Parallel()
236
237		prompt := fantasy.Prompt{
238			{
239				Role: fantasy.MessageRoleUser,
240				Content: []fantasy.MessagePart{
241					fantasy.FilePart{
242						Data:      []byte{0x01, 0x02, 0x03},
243						MediaType: "image/png",
244					},
245				},
246			},
247		}
248
249		systemBlocks, messages, warnings := toPrompt(prompt, true)
250
251		require.Empty(t, systemBlocks)
252		require.Len(t, messages, 1)
253		require.Empty(t, warnings)
254	})
255
256	t.Run("should drop user messages without visible content", func(t *testing.T) {
257		t.Parallel()
258
259		prompt := fantasy.Prompt{
260			{
261				Role: fantasy.MessageRoleUser,
262				Content: []fantasy.MessagePart{
263					fantasy.FilePart{
264						Data:      []byte("not supported"),
265						MediaType: "application/pdf",
266					},
267				},
268			},
269		}
270
271		systemBlocks, messages, warnings := toPrompt(prompt, true)
272
273		require.Empty(t, systemBlocks)
274		require.Empty(t, messages)
275		require.Len(t, warnings, 1)
276		require.Equal(t, fantasy.CallWarningTypeOther, warnings[0].Type)
277		require.Contains(t, warnings[0].Message, "dropping empty user message")
278		require.Contains(t, warnings[0].Message, "neither user-facing content nor tool results")
279	})
280
281	t.Run("should keep user messages with tool results", func(t *testing.T) {
282		t.Parallel()
283
284		prompt := fantasy.Prompt{
285			{
286				Role: fantasy.MessageRoleTool,
287				Content: []fantasy.MessagePart{
288					fantasy.ToolResultPart{
289						ToolCallID: "call_123",
290						Output:     fantasy.ToolResultOutputContentText{Text: "done"},
291					},
292				},
293			},
294		}
295
296		systemBlocks, messages, warnings := toPrompt(prompt, true)
297
298		require.Empty(t, systemBlocks)
299		require.Len(t, messages, 1)
300		require.Empty(t, warnings)
301	})
302
303	t.Run("should keep user messages with tool error results", func(t *testing.T) {
304		t.Parallel()
305
306		prompt := fantasy.Prompt{
307			{
308				Role: fantasy.MessageRoleTool,
309				Content: []fantasy.MessagePart{
310					fantasy.ToolResultPart{
311						ToolCallID: "call_456",
312						Output:     fantasy.ToolResultOutputContentError{Error: errors.New("boom")},
313					},
314				},
315			},
316		}
317
318		systemBlocks, messages, warnings := toPrompt(prompt, true)
319
320		require.Empty(t, systemBlocks)
321		require.Len(t, messages, 1)
322		require.Empty(t, warnings)
323	})
324
325	t.Run("should keep user messages with tool media results", func(t *testing.T) {
326		t.Parallel()
327
328		prompt := fantasy.Prompt{
329			{
330				Role: fantasy.MessageRoleTool,
331				Content: []fantasy.MessagePart{
332					fantasy.ToolResultPart{
333						ToolCallID: "call_789",
334						Output: fantasy.ToolResultOutputContentMedia{
335							Data:      "AQID",
336							MediaType: "image/png",
337						},
338					},
339				},
340			},
341		}
342
343		systemBlocks, messages, warnings := toPrompt(prompt, true)
344
345		require.Empty(t, systemBlocks)
346		require.Len(t, messages, 1)
347		require.Empty(t, warnings)
348	})
349}
350
351func TestParseContextTooLargeError(t *testing.T) {
352	t.Parallel()
353
354	tests := []struct {
355		name     string
356		message  string
357		wantErr  bool
358		wantUsed int
359		wantMax  int
360	}{
361		{
362			name:     "matches anthropic format",
363			message:  "prompt is too long: 202630 tokens > 200000 maximum",
364			wantErr:  true,
365			wantUsed: 202630,
366			wantMax:  200000,
367		},
368		{
369			name:     "matches with different numbers",
370			message:  "prompt is too long: 150000 tokens > 128000 maximum",
371			wantErr:  true,
372			wantUsed: 150000,
373			wantMax:  128000,
374		},
375		{
376			name:     "matches with extra whitespace",
377			message:  "prompt is too long:  202630  tokens  >  200000  maximum",
378			wantErr:  true,
379			wantUsed: 202630,
380			wantMax:  200000,
381		},
382		{
383			name:    "does not match unrelated error",
384			message: "invalid api key",
385			wantErr: false,
386		},
387		{
388			name:    "does not match rate limit error",
389			message: "rate limit exceeded",
390			wantErr: false,
391		},
392	}
393
394	for _, tt := range tests {
395		t.Run(tt.name, func(t *testing.T) {
396			t.Parallel()
397			providerErr := &fantasy.ProviderError{Message: tt.message}
398			parseContextTooLargeError(tt.message, providerErr)
399
400			if tt.wantErr {
401				require.True(t, providerErr.IsContextTooLarge())
402				require.Equal(t, tt.wantUsed, providerErr.ContextUsedTokens)
403				require.Equal(t, tt.wantMax, providerErr.ContextMaxTokens)
404			} else {
405				require.False(t, providerErr.IsContextTooLarge())
406			}
407		})
408	}
409}
410
411func TestParseOptions_Effort(t *testing.T) {
412	t.Parallel()
413
414	options, err := ParseOptions(map[string]any{
415		"send_reasoning":            true,
416		"thinking":                  map[string]any{"budget_tokens": int64(2048)},
417		"effort":                    "medium",
418		"disable_parallel_tool_use": true,
419	})
420	require.NoError(t, err)
421	require.NotNil(t, options.SendReasoning)
422	require.True(t, *options.SendReasoning)
423	require.NotNil(t, options.Thinking)
424	require.Equal(t, int64(2048), options.Thinking.BudgetTokens)
425	require.NotNil(t, options.Effort)
426	require.Equal(t, EffortMedium, *options.Effort)
427	require.NotNil(t, options.DisableParallelToolUse)
428	require.True(t, *options.DisableParallelToolUse)
429}
430
431func TestGenerate_SendsOutputConfigEffort(t *testing.T) {
432	t.Parallel()
433
434	server, calls := newAnthropicJSONServer(mockAnthropicGenerateResponse())
435	defer server.Close()
436
437	provider, err := New(
438		WithAPIKey("test-api-key"),
439		WithBaseURL(server.URL),
440	)
441	require.NoError(t, err)
442
443	model, err := provider.LanguageModel(context.Background(), "claude-sonnet-4-20250514")
444	require.NoError(t, err)
445
446	effort := EffortMedium
447	_, err = model.Generate(context.Background(), fantasy.Call{
448		Prompt: testPrompt(),
449		ProviderOptions: NewProviderOptions(&ProviderOptions{
450			Effort: &effort,
451		}),
452	})
453	require.NoError(t, err)
454
455	call := awaitAnthropicCall(t, calls)
456	require.Equal(t, "POST", call.method)
457	require.Equal(t, "/v1/messages", call.path)
458	requireAnthropicEffort(t, call.body, EffortMedium)
459}
460
461func TestStream_SendsOutputConfigEffort(t *testing.T) {
462	t.Parallel()
463
464	server, calls := newAnthropicStreamingServer([]string{
465		"event: message_start\n",
466		"data: {\"type\":\"message_start\",\"message\":{}}\n\n",
467		"event: message_stop\n",
468		"data: {\"type\":\"message_stop\"}\n\n",
469	})
470	defer server.Close()
471
472	provider, err := New(
473		WithAPIKey("test-api-key"),
474		WithBaseURL(server.URL),
475	)
476	require.NoError(t, err)
477
478	model, err := provider.LanguageModel(context.Background(), "claude-sonnet-4-20250514")
479	require.NoError(t, err)
480
481	effort := EffortHigh
482	stream, err := model.Stream(context.Background(), fantasy.Call{
483		Prompt: testPrompt(),
484		ProviderOptions: NewProviderOptions(&ProviderOptions{
485			Effort: &effort,
486		}),
487	})
488	require.NoError(t, err)
489
490	stream(func(fantasy.StreamPart) bool { return true })
491
492	call := awaitAnthropicCall(t, calls)
493	require.Equal(t, "POST", call.method)
494	require.Equal(t, "/v1/messages", call.path)
495	requireAnthropicEffort(t, call.body, EffortHigh)
496}
497
498type anthropicCall struct {
499	method string
500	path   string
501	body   map[string]any
502}
503
504func newAnthropicJSONServer(response map[string]any) (*httptest.Server, <-chan anthropicCall) {
505	calls := make(chan anthropicCall, 4)
506
507	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
508		var body map[string]any
509		if r.Body != nil {
510			_ = json.NewDecoder(r.Body).Decode(&body)
511		}
512
513		calls <- anthropicCall{
514			method: r.Method,
515			path:   r.URL.Path,
516			body:   body,
517		}
518
519		w.Header().Set("Content-Type", "application/json")
520		_ = json.NewEncoder(w).Encode(response)
521	}))
522
523	return server, calls
524}
525
526func newAnthropicStreamingServer(chunks []string) (*httptest.Server, <-chan anthropicCall) {
527	calls := make(chan anthropicCall, 4)
528
529	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
530		var body map[string]any
531		if r.Body != nil {
532			_ = json.NewDecoder(r.Body).Decode(&body)
533		}
534
535		calls <- anthropicCall{
536			method: r.Method,
537			path:   r.URL.Path,
538			body:   body,
539		}
540
541		w.Header().Set("Content-Type", "text/event-stream")
542		w.Header().Set("Cache-Control", "no-cache")
543		w.Header().Set("Connection", "keep-alive")
544		w.WriteHeader(http.StatusOK)
545
546		for _, chunk := range chunks {
547			_, _ = fmt.Fprint(w, chunk)
548			if flusher, ok := w.(http.Flusher); ok {
549				flusher.Flush()
550			}
551		}
552	}))
553
554	return server, calls
555}
556
557func awaitAnthropicCall(t *testing.T, calls <-chan anthropicCall) anthropicCall {
558	t.Helper()
559
560	select {
561	case call := <-calls:
562		return call
563	case <-time.After(2 * time.Second):
564		t.Fatal("timed out waiting for Anthropic request")
565		return anthropicCall{}
566	}
567}
568
569func assertNoAnthropicCall(t *testing.T, calls <-chan anthropicCall) {
570	t.Helper()
571
572	select {
573	case call := <-calls:
574		t.Fatalf("expected no Anthropic API call, but got %s %s", call.method, call.path)
575	case <-time.After(200 * time.Millisecond):
576	}
577}
578
579func requireAnthropicEffort(t *testing.T, body map[string]any, expected Effort) {
580	t.Helper()
581
582	outputConfig, ok := body["output_config"].(map[string]any)
583	thinking, ok := body["thinking"].(map[string]any)
584	require.True(t, ok)
585	require.Equal(t, string(expected), outputConfig["effort"])
586	require.Equal(t, "adaptive", thinking["type"])
587}
588
589func testPrompt() fantasy.Prompt {
590	return fantasy.Prompt{
591		{
592			Role: fantasy.MessageRoleUser,
593			Content: []fantasy.MessagePart{
594				fantasy.TextPart{Text: "Hello"},
595			},
596		},
597	}
598}
599
600func mockAnthropicGenerateResponse() map[string]any {
601	return map[string]any{
602		"id":    "msg_01Test",
603		"type":  "message",
604		"role":  "assistant",
605		"model": "claude-sonnet-4-20250514",
606		"content": []any{
607			map[string]any{
608				"type": "text",
609				"text": "Hi there",
610			},
611		},
612		"stop_reason":   "end_turn",
613		"stop_sequence": "",
614		"usage": map[string]any{
615			"cache_creation": map[string]any{
616				"ephemeral_1h_input_tokens": 0,
617				"ephemeral_5m_input_tokens": 0,
618			},
619			"cache_creation_input_tokens": 0,
620			"cache_read_input_tokens":     0,
621			"input_tokens":                5,
622			"output_tokens":               2,
623			"server_tool_use": map[string]any{
624				"web_search_requests": 0,
625			},
626			"service_tier": "standard",
627		},
628	}
629}