anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/charmbracelet/catwalk/pkg/catwalk"
 19	"github.com/charmbracelet/crush/internal/llm/tools"
 20	"github.com/charmbracelet/crush/internal/message"
 21)
 22
 23type anthropicProvider struct {
 24	*baseProvider
 25	useBedrock        bool
 26	client            anthropic.Client
 27	adjustedMaxTokens int // Used when context limit is hit
 28}
 29
 30func NewAnthropicProvider(base *baseProvider, useBedrock bool) Provider {
 31	return &anthropicProvider{
 32		baseProvider: base,
 33		client:       createAnthropicClient(base, useBedrock),
 34	}
 35}
 36
 37func createAnthropicClient(opts *baseProvider, useBedrock bool) anthropic.Client {
 38	anthropicClientOptions := []option.RequestOption{}
 39
 40	// Check if Authorization header is provided in extra headers
 41	hasBearerAuth := false
 42	if opts.extraHeaders != nil {
 43		for key := range opts.extraHeaders {
 44			if strings.ToLower(key) == "authorization" {
 45				hasBearerAuth = true
 46				break
 47			}
 48		}
 49	}
 50
 51	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 52
 53	if opts.apiKey != "" && !hasBearerAuth {
 54		if isBearerToken {
 55			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 56			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 57		} else {
 58			// Use standard X-Api-Key header
 59			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 60		}
 61	} else if hasBearerAuth {
 62		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 63	}
 64	if useBedrock {
 65		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 66	}
 67	for _, header := range opts.extraHeaders {
 68		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(header, opts.extraHeaders[header]))
 69	}
 70	for key, value := range opts.extraBody {
 71		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
 72	}
 73	return anthropic.NewClient(anthropicClientOptions...)
 74}
 75
 76func (a *anthropicProvider) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
 77	for i, msg := range messages {
 78		cache := false
 79		if i > len(messages)-3 {
 80			cache = true
 81		}
 82		switch msg.Role {
 83		case message.User:
 84			content := anthropic.NewTextBlock(msg.Content().String())
 85			if cache && !a.disableCache {
 86				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
 87					Type: "ephemeral",
 88				}
 89			}
 90			var contentBlocks []anthropic.ContentBlockParamUnion
 91			contentBlocks = append(contentBlocks, content)
 92			for _, binaryContent := range msg.BinaryContent() {
 93				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
 94				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
 95				contentBlocks = append(contentBlocks, imageBlock)
 96			}
 97			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
 98
 99		case message.Assistant:
100			blocks := []anthropic.ContentBlockParamUnion{}
101
102			// Add thinking blocks first if present (required when thinking is enabled with tool use)
103			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
104				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
105				blocks = append(blocks, thinkingBlock)
106			}
107
108			if msg.Content().String() != "" {
109				content := anthropic.NewTextBlock(msg.Content().String())
110				if cache && !a.disableCache {
111					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
112						Type: "ephemeral",
113					}
114				}
115				blocks = append(blocks, content)
116			}
117
118			for _, toolCall := range msg.ToolCalls() {
119				var inputMap map[string]any
120				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
121				if err != nil {
122					continue
123				}
124				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
125			}
126
127			if len(blocks) == 0 {
128				slog.Warn("There is a message without content, investigate, this should not happen")
129				continue
130			}
131			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
132
133		case message.Tool:
134			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
135			for i, toolResult := range msg.ToolResults() {
136				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
137			}
138			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
139		}
140	}
141	return
142}
143
144func (a *anthropicProvider) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
145	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
146
147	for i, tool := range tools {
148		info := tool.Info()
149		toolParam := anthropic.ToolParam{
150			Name:        info.Name,
151			Description: anthropic.String(info.Description),
152			InputSchema: anthropic.ToolInputSchemaParam{
153				Properties: info.Parameters,
154				Required:   info.Required,
155			},
156		}
157
158		if i == len(tools)-1 && !a.disableCache {
159			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
160				Type: "ephemeral",
161			}
162		}
163
164		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
165	}
166
167	return anthropicTools
168}
169
170func (a *anthropicProvider) finishReason(reason string) message.FinishReason {
171	switch reason {
172	case "end_turn":
173		return message.FinishReasonEndTurn
174	case "max_tokens":
175		return message.FinishReasonMaxTokens
176	case "tool_use":
177		return message.FinishReasonToolUse
178	case "stop_sequence":
179		return message.FinishReasonEndTurn
180	default:
181		return message.FinishReasonUnknown
182	}
183}
184
185func (a *anthropicProvider) isThinkingEnabled(model string) bool {
186	return a.Model(model).CanReason && a.think
187}
188
189func (a *anthropicProvider) preparedMessages(modelID string, messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
190	model := a.Model(modelID)
191	var thinkingParam anthropic.ThinkingConfigParamUnion
192	temperature := anthropic.Float(0)
193
194	maxTokens := model.DefaultMaxTokens
195	if a.maxTokens > 0 {
196		maxTokens = a.maxTokens
197	}
198	if a.isThinkingEnabled(modelID) {
199		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
200		temperature = anthropic.Float(1)
201	}
202
203	// Use adjusted max tokens if context limit was hit
204	if a.adjustedMaxTokens > 0 {
205		maxTokens = int64(a.adjustedMaxTokens)
206	}
207
208	systemBlocks := []anthropic.TextBlockParam{}
209
210	// Add custom system prompt prefix if configured
211	if a.systemPromptPrefix != "" {
212		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
213			Text: a.systemPromptPrefix,
214			CacheControl: anthropic.CacheControlEphemeralParam{
215				Type: "ephemeral",
216			},
217		})
218	}
219
220	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
221		Text: a.systemMessage,
222		CacheControl: anthropic.CacheControlEphemeralParam{
223			Type: "ephemeral",
224		},
225	})
226
227	return anthropic.MessageNewParams{
228		Model:       anthropic.Model(model.ID),
229		MaxTokens:   maxTokens,
230		Temperature: temperature,
231		Messages:    messages,
232		Tools:       tools,
233		Thinking:    thinkingParam,
234		System:      systemBlocks,
235	}
236}
237
238func (a *anthropicProvider) Send(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) {
239	messages = a.cleanMessages(messages)
240	return a.send(ctx, model, messages, tools)
241}
242
243func (a *anthropicProvider) send(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
244	attempts := 0
245	for {
246		attempts++
247		// Prepare messages on each attempt in case max_tokens was adjusted
248		preparedMessages := a.preparedMessages(model, a.convertMessages(messages), a.convertTools(tools))
249		if a.debug {
250			jsonData, _ := json.Marshal(preparedMessages)
251			slog.Debug("Prepared messages", "messages", string(jsonData))
252		}
253
254		var opts []option.RequestOption
255		if a.isThinkingEnabled(model) {
256			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
257		}
258		anthropicResponse, err := a.client.Messages.New(
259			ctx,
260			preparedMessages,
261			opts...,
262		)
263		// If there is an error we are going to see if we can retry the call
264		if err != nil {
265			slog.Error("Error in Anthropic API call", "error", err)
266			retry, after, retryErr := a.shouldRetry(attempts, err)
267			if retryErr != nil {
268				return nil, retryErr
269			}
270			if retry {
271				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
272				select {
273				case <-ctx.Done():
274					return nil, ctx.Err()
275				case <-time.After(time.Duration(after) * time.Millisecond):
276					continue
277				}
278			}
279			return nil, retryErr
280		}
281
282		content := ""
283		for _, block := range anthropicResponse.Content {
284			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
285				content += text.Text
286			}
287		}
288
289		return &ProviderResponse{
290			Content:   content,
291			ToolCalls: a.toolCalls(*anthropicResponse),
292			Usage:     a.usage(*anthropicResponse),
293		}, nil
294	}
295}
296
297func (a *anthropicProvider) Stream(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
298	messages = a.cleanMessages(messages)
299	return a.stream(ctx, model, messages, tools)
300}
301
302func (a *anthropicProvider) stream(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
303	attempts := 0
304	eventChan := make(chan ProviderEvent)
305	go func() {
306		for {
307			attempts++
308			// Prepare messages on each attempt in case max_tokens was adjusted
309			preparedMessages := a.preparedMessages(model, a.convertMessages(messages), a.convertTools(tools))
310			if a.debug {
311				jsonData, _ := json.Marshal(preparedMessages)
312				slog.Debug("Prepared messages", "messages", string(jsonData))
313			}
314
315			var opts []option.RequestOption
316			if a.isThinkingEnabled(model) {
317				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
318			}
319
320			anthropicStream := a.client.Messages.NewStreaming(
321				ctx,
322				preparedMessages,
323				opts...,
324			)
325			accumulatedMessage := anthropic.Message{}
326
327			currentToolCallID := ""
328			for anthropicStream.Next() {
329				event := anthropicStream.Current()
330				err := accumulatedMessage.Accumulate(event)
331				if err != nil {
332					slog.Warn("Error accumulating message", "error", err)
333					continue
334				}
335
336				switch event := event.AsAny().(type) {
337				case anthropic.ContentBlockStartEvent:
338					switch event.ContentBlock.Type {
339					case "text":
340						eventChan <- ProviderEvent{Type: EventContentStart}
341					case "tool_use":
342						currentToolCallID = event.ContentBlock.ID
343						eventChan <- ProviderEvent{
344							Type: EventToolUseStart,
345							ToolCall: &message.ToolCall{
346								ID:       event.ContentBlock.ID,
347								Name:     event.ContentBlock.Name,
348								Finished: false,
349							},
350						}
351					}
352
353				case anthropic.ContentBlockDeltaEvent:
354					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
355						eventChan <- ProviderEvent{
356							Type:     EventThinkingDelta,
357							Thinking: event.Delta.Thinking,
358						}
359					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
360						eventChan <- ProviderEvent{
361							Type:      EventSignatureDelta,
362							Signature: event.Delta.Signature,
363						}
364					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
365						eventChan <- ProviderEvent{
366							Type:    EventContentDelta,
367							Content: event.Delta.Text,
368						}
369					} else if event.Delta.Type == "input_json_delta" {
370						if currentToolCallID != "" {
371							eventChan <- ProviderEvent{
372								Type: EventToolUseDelta,
373								ToolCall: &message.ToolCall{
374									ID:       currentToolCallID,
375									Finished: false,
376									Input:    event.Delta.PartialJSON,
377								},
378							}
379						}
380					}
381				case anthropic.ContentBlockStopEvent:
382					if currentToolCallID != "" {
383						eventChan <- ProviderEvent{
384							Type: EventToolUseStop,
385							ToolCall: &message.ToolCall{
386								ID: currentToolCallID,
387							},
388						}
389						currentToolCallID = ""
390					} else {
391						eventChan <- ProviderEvent{Type: EventContentStop}
392					}
393
394				case anthropic.MessageStopEvent:
395					content := ""
396					for _, block := range accumulatedMessage.Content {
397						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
398							content += text.Text
399						}
400					}
401
402					eventChan <- ProviderEvent{
403						Type: EventComplete,
404						Response: &ProviderResponse{
405							Content:      content,
406							ToolCalls:    a.toolCalls(accumulatedMessage),
407							Usage:        a.usage(accumulatedMessage),
408							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
409						},
410						Content: content,
411					}
412				}
413			}
414
415			err := anthropicStream.Err()
416			if err == nil || errors.Is(err, io.EOF) {
417				close(eventChan)
418				return
419			}
420
421			// If there is an error we are going to see if we can retry the call
422			retry, after, retryErr := a.shouldRetry(attempts, err)
423			if retryErr != nil {
424				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
425				close(eventChan)
426				return
427			}
428			if retry {
429				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
430				select {
431				case <-ctx.Done():
432					// context cancelled
433					if ctx.Err() != nil {
434						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
435					}
436					close(eventChan)
437					return
438				case <-time.After(time.Duration(after) * time.Millisecond):
439					continue
440				}
441			}
442			if ctx.Err() != nil {
443				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
444			}
445
446			close(eventChan)
447			return
448		}
449	}()
450	return eventChan
451}
452
453func (a *anthropicProvider) shouldRetry(attempts int, err error) (bool, int64, error) {
454	var apiErr *anthropic.Error
455	if !errors.As(err, &apiErr) {
456		return false, 0, err
457	}
458
459	if attempts > maxRetries {
460		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
461	}
462
463	if apiErr.StatusCode == 401 {
464		a.apiKey, err = a.resolver.ResolveValue(a.config.APIKey)
465		if err != nil {
466			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
467		}
468
469		a.client = createAnthropicClient(a.baseProvider, a.useBedrock)
470		return true, 0, nil
471	}
472
473	// Handle context limit exceeded error (400 Bad Request)
474	if apiErr.StatusCode == 400 {
475		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
476			a.adjustedMaxTokens = adjusted
477			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
478			return true, 0, nil
479		}
480	}
481
482	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
483	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
484		return false, 0, err
485	}
486
487	retryMs := 0
488	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
489
490	backoffMs := 2000 * (1 << (attempts - 1))
491	jitterMs := int(float64(backoffMs) * 0.2)
492	retryMs = backoffMs + jitterMs
493	if len(retryAfterValues) > 0 {
494		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
495			retryMs = retryMs * 1000
496		}
497	}
498	return true, int64(retryMs), nil
499}
500
501// handleContextLimitError parses context limit error and returns adjusted max_tokens
502func (a *anthropicProvider) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
503	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
504	errorMsg := apiErr.Error()
505
506	re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
507	matches := re.FindStringSubmatch(errorMsg)
508
509	if len(matches) != 4 {
510		return 0, false
511	}
512
513	inputTokens, err1 := strconv.Atoi(matches[1])
514	contextLimit, err2 := strconv.Atoi(matches[3])
515
516	if err1 != nil || err2 != nil {
517		return 0, false
518	}
519
520	// Calculate safe max_tokens with a buffer of 1000 tokens
521	safeMaxTokens := contextLimit - inputTokens - 1000
522
523	// Ensure we don't go below a minimum threshold
524	safeMaxTokens = max(safeMaxTokens, 1000)
525
526	return safeMaxTokens, true
527}
528
529func (a *anthropicProvider) toolCalls(msg anthropic.Message) []message.ToolCall {
530	var toolCalls []message.ToolCall
531
532	for _, block := range msg.Content {
533		switch variant := block.AsAny().(type) {
534		case anthropic.ToolUseBlock:
535			toolCall := message.ToolCall{
536				ID:       variant.ID,
537				Name:     variant.Name,
538				Input:    string(variant.Input),
539				Type:     string(variant.Type),
540				Finished: true,
541			}
542			toolCalls = append(toolCalls, toolCall)
543		}
544	}
545
546	return toolCalls
547}
548
549func (a *anthropicProvider) usage(msg anthropic.Message) TokenUsage {
550	return TokenUsage{
551		InputTokens:         msg.Usage.InputTokens,
552		OutputTokens:        msg.Usage.OutputTokens,
553		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
554		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
555	}
556}