anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/charmbracelet/catwalk/pkg/catwalk"
 19	"github.com/charmbracelet/crush/internal/config"
 20	"github.com/charmbracelet/crush/internal/llm/tools"
 21	"github.com/charmbracelet/crush/internal/message"
 22)
 23
 24type anthropicClient struct {
 25	providerOptions   providerClientOptions
 26	useBedrock        bool
 27	client            anthropic.Client
 28	adjustedMaxTokens int // Used when context limit is hit
 29}
 30
 31type AnthropicClient ProviderClient
 32
 33func newAnthropicClient(opts providerClientOptions, useBedrock bool) AnthropicClient {
 34	return &anthropicClient{
 35		providerOptions: opts,
 36		client:          createAnthropicClient(opts, useBedrock),
 37	}
 38}
 39
 40func createAnthropicClient(opts providerClientOptions, useBedrock bool) anthropic.Client {
 41	anthropicClientOptions := []option.RequestOption{}
 42
 43	// Check if Authorization header is provided in extra headers
 44	hasBearerAuth := false
 45	if opts.extraHeaders != nil {
 46		for key := range opts.extraHeaders {
 47			if strings.ToLower(key) == "authorization" {
 48				hasBearerAuth = true
 49				break
 50			}
 51		}
 52	}
 53
 54	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 55
 56	if opts.apiKey != "" && !hasBearerAuth {
 57		if isBearerToken {
 58			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 59			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 60		} else {
 61			// Use standard X-Api-Key header
 62			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 63		}
 64	} else if hasBearerAuth {
 65		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 66	}
 67	if useBedrock {
 68		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 69	}
 70	for key, header := range opts.extraHeaders {
 71		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
 72	}
 73	for key, value := range opts.extraBody {
 74		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
 75	}
 76	return anthropic.NewClient(anthropicClientOptions...)
 77}
 78
 79func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
 80	for i, msg := range messages {
 81		cache := false
 82		if i > len(messages)-3 {
 83			cache = true
 84		}
 85		switch msg.Role {
 86		case message.User:
 87			content := anthropic.NewTextBlock(msg.Content().String())
 88			if cache && !a.providerOptions.disableCache {
 89				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
 90					Type: "ephemeral",
 91				}
 92			}
 93			var contentBlocks []anthropic.ContentBlockParamUnion
 94			contentBlocks = append(contentBlocks, content)
 95			for _, binaryContent := range msg.BinaryContent() {
 96				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
 97				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
 98				contentBlocks = append(contentBlocks, imageBlock)
 99			}
100			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
101
102		case message.Assistant:
103			blocks := []anthropic.ContentBlockParamUnion{}
104
105			// Add thinking blocks first if present (required when thinking is enabled with tool use)
106			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
107				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
108				blocks = append(blocks, thinkingBlock)
109			}
110
111			if msg.Content().String() != "" {
112				content := anthropic.NewTextBlock(msg.Content().String())
113				if cache && !a.providerOptions.disableCache {
114					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
115						Type: "ephemeral",
116					}
117				}
118				blocks = append(blocks, content)
119			}
120
121			for _, toolCall := range msg.ToolCalls() {
122				var inputMap map[string]any
123				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
124				if err != nil {
125					continue
126				}
127				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
128			}
129
130			if len(blocks) == 0 {
131				slog.Warn("There is a message without content, investigate, this should not happen")
132				continue
133			}
134			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
135
136		case message.Tool:
137			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
138			for i, toolResult := range msg.ToolResults() {
139				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
140			}
141			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
142		}
143	}
144	return
145}
146
147func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
148	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
149
150	for i, tool := range tools {
151		info := tool.Info()
152		toolParam := anthropic.ToolParam{
153			Name:        info.Name,
154			Description: anthropic.String(info.Description),
155			InputSchema: anthropic.ToolInputSchemaParam{
156				Properties: info.Parameters,
157				// TODO: figure out how we can tell claude the required fields?
158			},
159		}
160
161		if i == len(tools)-1 && !a.providerOptions.disableCache {
162			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
163				Type: "ephemeral",
164			}
165		}
166
167		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
168	}
169
170	return anthropicTools
171}
172
173func (a *anthropicClient) finishReason(reason string) message.FinishReason {
174	switch reason {
175	case "end_turn":
176		return message.FinishReasonEndTurn
177	case "max_tokens":
178		return message.FinishReasonMaxTokens
179	case "tool_use":
180		return message.FinishReasonToolUse
181	case "stop_sequence":
182		return message.FinishReasonEndTurn
183	default:
184		return message.FinishReasonUnknown
185	}
186}
187
188func (a *anthropicClient) isThinkingEnabled() bool {
189	cfg := config.Get()
190	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
191	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
192		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
193	}
194	return a.Model().CanReason && modelConfig.Think
195}
196
197func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
198	model := a.providerOptions.model(a.providerOptions.modelType)
199	var thinkingParam anthropic.ThinkingConfigParamUnion
200	cfg := config.Get()
201	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
202	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
203		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
204	}
205	temperature := anthropic.Float(0)
206
207	maxTokens := model.DefaultMaxTokens
208	if modelConfig.MaxTokens > 0 {
209		maxTokens = modelConfig.MaxTokens
210	}
211	if a.isThinkingEnabled() {
212		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
213		temperature = anthropic.Float(1)
214	}
215	// Override max tokens if set in provider options
216	if a.providerOptions.maxTokens > 0 {
217		maxTokens = a.providerOptions.maxTokens
218	}
219
220	// Use adjusted max tokens if context limit was hit
221	if a.adjustedMaxTokens > 0 {
222		maxTokens = int64(a.adjustedMaxTokens)
223	}
224
225	systemBlocks := []anthropic.TextBlockParam{}
226	slog.Info("Testing", "prefix", a.providerOptions.systemPromptPrefix)
227
228	// Add custom system prompt prefix if configured
229	if a.providerOptions.systemPromptPrefix != "" {
230		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
231			Text: a.providerOptions.systemPromptPrefix,
232			CacheControl: anthropic.CacheControlEphemeralParam{
233				Type: "ephemeral",
234			},
235		})
236	}
237
238	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
239		Text: a.providerOptions.systemMessage,
240		CacheControl: anthropic.CacheControlEphemeralParam{
241			Type: "ephemeral",
242		},
243	})
244
245	return anthropic.MessageNewParams{
246		Model:       anthropic.Model(model.ID),
247		MaxTokens:   maxTokens,
248		Temperature: temperature,
249		Messages:    messages,
250		Tools:       tools,
251		Thinking:    thinkingParam,
252		System:      systemBlocks,
253	}
254}
255
256func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
257	cfg := config.Get()
258
259	attempts := 0
260	for {
261		attempts++
262		// Prepare messages on each attempt in case max_tokens was adjusted
263		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
264		if cfg.Options.Debug {
265			jsonData, _ := json.Marshal(preparedMessages)
266			slog.Debug("Prepared messages", "messages", string(jsonData))
267		}
268
269		var opts []option.RequestOption
270		if a.isThinkingEnabled() {
271			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
272		}
273		anthropicResponse, err := a.client.Messages.New(
274			ctx,
275			preparedMessages,
276			opts...,
277		)
278		// If there is an error we are going to see if we can retry the call
279		if err != nil {
280			slog.Error("Error in Anthropic API call", "error", err)
281			retry, after, retryErr := a.shouldRetry(attempts, err)
282			if retryErr != nil {
283				return nil, retryErr
284			}
285			if retry {
286				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
287				select {
288				case <-ctx.Done():
289					return nil, ctx.Err()
290				case <-time.After(time.Duration(after) * time.Millisecond):
291					continue
292				}
293			}
294			return nil, retryErr
295		}
296
297		content := ""
298		for _, block := range anthropicResponse.Content {
299			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
300				content += text.Text
301			}
302		}
303
304		return &ProviderResponse{
305			Content:   content,
306			ToolCalls: a.toolCalls(*anthropicResponse),
307			Usage:     a.usage(*anthropicResponse),
308		}, nil
309	}
310}
311
312func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
313	cfg := config.Get()
314	attempts := 0
315	eventChan := make(chan ProviderEvent)
316	go func() {
317		for {
318			attempts++
319			// Prepare messages on each attempt in case max_tokens was adjusted
320			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
321			if cfg.Options.Debug {
322				jsonData, _ := json.Marshal(preparedMessages)
323				slog.Debug("Prepared messages", "messages", string(jsonData))
324			}
325
326			var opts []option.RequestOption
327			if a.isThinkingEnabled() {
328				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
329			}
330
331			anthropicStream := a.client.Messages.NewStreaming(
332				ctx,
333				preparedMessages,
334				opts...,
335			)
336			accumulatedMessage := anthropic.Message{}
337
338			currentToolCallID := ""
339			for anthropicStream.Next() {
340				event := anthropicStream.Current()
341				err := accumulatedMessage.Accumulate(event)
342				if err != nil {
343					slog.Warn("Error accumulating message", "error", err)
344					continue
345				}
346
347				switch event := event.AsAny().(type) {
348				case anthropic.ContentBlockStartEvent:
349					switch event.ContentBlock.Type {
350					case "text":
351						eventChan <- ProviderEvent{Type: EventContentStart}
352					case "tool_use":
353						currentToolCallID = event.ContentBlock.ID
354						eventChan <- ProviderEvent{
355							Type: EventToolUseStart,
356							ToolCall: &message.ToolCall{
357								ID:       event.ContentBlock.ID,
358								Name:     event.ContentBlock.Name,
359								Finished: false,
360							},
361						}
362					}
363
364				case anthropic.ContentBlockDeltaEvent:
365					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
366						eventChan <- ProviderEvent{
367							Type:     EventThinkingDelta,
368							Thinking: event.Delta.Thinking,
369						}
370					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
371						eventChan <- ProviderEvent{
372							Type:      EventSignatureDelta,
373							Signature: event.Delta.Signature,
374						}
375					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
376						eventChan <- ProviderEvent{
377							Type:    EventContentDelta,
378							Content: event.Delta.Text,
379						}
380					} else if event.Delta.Type == "input_json_delta" {
381						if currentToolCallID != "" {
382							eventChan <- ProviderEvent{
383								Type: EventToolUseDelta,
384								ToolCall: &message.ToolCall{
385									ID:       currentToolCallID,
386									Finished: false,
387									Input:    event.Delta.PartialJSON,
388								},
389							}
390						}
391					}
392				case anthropic.ContentBlockStopEvent:
393					if currentToolCallID != "" {
394						eventChan <- ProviderEvent{
395							Type: EventToolUseStop,
396							ToolCall: &message.ToolCall{
397								ID: currentToolCallID,
398							},
399						}
400						currentToolCallID = ""
401					} else {
402						eventChan <- ProviderEvent{Type: EventContentStop}
403					}
404
405				case anthropic.MessageStopEvent:
406					content := ""
407					for _, block := range accumulatedMessage.Content {
408						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
409							content += text.Text
410						}
411					}
412
413					eventChan <- ProviderEvent{
414						Type: EventComplete,
415						Response: &ProviderResponse{
416							Content:      content,
417							ToolCalls:    a.toolCalls(accumulatedMessage),
418							Usage:        a.usage(accumulatedMessage),
419							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
420						},
421						Content: content,
422					}
423				}
424			}
425
426			err := anthropicStream.Err()
427			if err == nil || errors.Is(err, io.EOF) {
428				close(eventChan)
429				return
430			}
431
432			// If there is an error we are going to see if we can retry the call
433			retry, after, retryErr := a.shouldRetry(attempts, err)
434			if retryErr != nil {
435				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
436				close(eventChan)
437				return
438			}
439			if retry {
440				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
441				select {
442				case <-ctx.Done():
443					// context cancelled
444					if ctx.Err() != nil {
445						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
446					}
447					close(eventChan)
448					return
449				case <-time.After(time.Duration(after) * time.Millisecond):
450					continue
451				}
452			}
453			if ctx.Err() != nil {
454				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
455			}
456
457			close(eventChan)
458			return
459		}
460	}()
461	return eventChan
462}
463
464func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
465	var apiErr *anthropic.Error
466	if !errors.As(err, &apiErr) {
467		return false, 0, err
468	}
469
470	if attempts > maxRetries {
471		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
472	}
473
474	if apiErr.StatusCode == 401 {
475		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
476		if err != nil {
477			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
478		}
479		a.client = createAnthropicClient(a.providerOptions, a.useBedrock)
480		return true, 0, nil
481	}
482
483	// Handle context limit exceeded error (400 Bad Request)
484	if apiErr.StatusCode == 400 {
485		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
486			a.adjustedMaxTokens = adjusted
487			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
488			return true, 0, nil
489		}
490	}
491
492	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
493	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
494		return false, 0, err
495	}
496
497	retryMs := 0
498	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
499
500	backoffMs := 2000 * (1 << (attempts - 1))
501	jitterMs := int(float64(backoffMs) * 0.2)
502	retryMs = backoffMs + jitterMs
503	if len(retryAfterValues) > 0 {
504		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
505			retryMs = retryMs * 1000
506		}
507	}
508	return true, int64(retryMs), nil
509}
510
511// handleContextLimitError parses context limit error and returns adjusted max_tokens
512func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
513	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
514	errorMsg := apiErr.Error()
515
516	re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
517	matches := re.FindStringSubmatch(errorMsg)
518
519	if len(matches) != 4 {
520		return 0, false
521	}
522
523	inputTokens, err1 := strconv.Atoi(matches[1])
524	contextLimit, err2 := strconv.Atoi(matches[3])
525
526	if err1 != nil || err2 != nil {
527		return 0, false
528	}
529
530	// Calculate safe max_tokens with a buffer of 1000 tokens
531	safeMaxTokens := contextLimit - inputTokens - 1000
532
533	// Ensure we don't go below a minimum threshold
534	safeMaxTokens = max(safeMaxTokens, 1000)
535
536	return safeMaxTokens, true
537}
538
539func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
540	var toolCalls []message.ToolCall
541
542	for _, block := range msg.Content {
543		switch variant := block.AsAny().(type) {
544		case anthropic.ToolUseBlock:
545			toolCall := message.ToolCall{
546				ID:       variant.ID,
547				Name:     variant.Name,
548				Input:    string(variant.Input),
549				Type:     string(variant.Type),
550				Finished: true,
551			}
552			toolCalls = append(toolCalls, toolCall)
553		}
554	}
555
556	return toolCalls
557}
558
559func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
560	return TokenUsage{
561		InputTokens:         msg.Usage.InputTokens,
562		OutputTokens:        msg.Usage.OutputTokens,
563		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
564		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
565	}
566}
567
568func (a *anthropicClient) Model() catwalk.Model {
569	return a.providerOptions.model(a.providerOptions.modelType)
570}