anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/anthropics/anthropic-sdk-go/vertex"
 19	"github.com/charmbracelet/catwalk/pkg/catwalk"
 20	"github.com/charmbracelet/crush/internal/config"
 21	"github.com/charmbracelet/crush/internal/llm/tools"
 22	"github.com/charmbracelet/crush/internal/log"
 23	"github.com/charmbracelet/crush/internal/message"
 24)
 25
 26// Pre-compiled regex for parsing context limit errors.
 27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
 28
 29type anthropicClient struct {
 30	providerOptions   providerClientOptions
 31	tp                AnthropicClientType
 32	client            anthropic.Client
 33	adjustedMaxTokens int // Used when context limit is hit
 34}
 35
 36type AnthropicClient ProviderClient
 37
 38type AnthropicClientType string
 39
 40const (
 41	AnthropicClientTypeNormal  AnthropicClientType = "normal"
 42	AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
 43	AnthropicClientTypeVertex  AnthropicClientType = "vertex"
 44)
 45
 46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
 47	return &anthropicClient{
 48		providerOptions: opts,
 49		tp:              tp,
 50		client:          createAnthropicClient(opts, tp),
 51	}
 52}
 53
 54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
 55	anthropicClientOptions := []option.RequestOption{}
 56
 57	// Check if Authorization header is provided in extra headers
 58	hasBearerAuth := false
 59	if opts.extraHeaders != nil {
 60		for key := range opts.extraHeaders {
 61			if strings.ToLower(key) == "authorization" {
 62				hasBearerAuth = true
 63				break
 64			}
 65		}
 66	}
 67
 68	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 69
 70	if opts.apiKey != "" && !hasBearerAuth {
 71		if isBearerToken {
 72			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 73			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 74		} else {
 75			// Use standard X-Api-Key header
 76			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 77		}
 78	} else if hasBearerAuth {
 79		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 80	}
 81
 82	if config.Get().Options.Debug {
 83		httpClient := log.NewHTTPClient()
 84		anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
 85	}
 86
 87	switch tp {
 88	case AnthropicClientTypeBedrock:
 89		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 90	case AnthropicClientTypeVertex:
 91		project := opts.extraParams["project"]
 92		location := opts.extraParams["location"]
 93		anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
 94	}
 95	for key, header := range opts.extraHeaders {
 96		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
 97	}
 98	for key, value := range opts.extraBody {
 99		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
100	}
101	return anthropic.NewClient(anthropicClientOptions...)
102}
103
104func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
105	for i, msg := range messages {
106		cache := false
107		if i > len(messages)-3 {
108			cache = true
109		}
110		switch msg.Role {
111		case message.User:
112			content := anthropic.NewTextBlock(msg.Content().String())
113			if cache && !a.providerOptions.disableCache {
114				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
115					Type: "ephemeral",
116				}
117			}
118			var contentBlocks []anthropic.ContentBlockParamUnion
119			contentBlocks = append(contentBlocks, content)
120			for _, binaryContent := range msg.BinaryContent() {
121				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
122				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
123				contentBlocks = append(contentBlocks, imageBlock)
124			}
125			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
126
127		case message.Assistant:
128			blocks := []anthropic.ContentBlockParamUnion{}
129
130			// Add thinking blocks first if present (required when thinking is enabled with tool use)
131			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
132				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
133				blocks = append(blocks, thinkingBlock)
134			}
135
136			if msg.Content().String() != "" {
137				content := anthropic.NewTextBlock(msg.Content().String())
138				if cache && !a.providerOptions.disableCache {
139					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
140						Type: "ephemeral",
141					}
142				}
143				blocks = append(blocks, content)
144			}
145
146			for _, toolCall := range msg.ToolCalls() {
147				var inputMap map[string]any
148				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
149				if err != nil {
150					continue
151				}
152				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
153			}
154
155			if len(blocks) == 0 {
156				slog.Warn("There is a message without content, investigate, this should not happen")
157				continue
158			}
159			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
160
161		case message.Tool:
162			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
163			for i, toolResult := range msg.ToolResults() {
164				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
165			}
166			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
167		}
168	}
169	return
170}
171
172func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
173	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
174
175	for i, tool := range tools {
176		info := tool.Info()
177		toolParam := anthropic.ToolParam{
178			Name:        info.Name,
179			Description: anthropic.String(info.Description),
180			InputSchema: anthropic.ToolInputSchemaParam{
181				Properties: info.Parameters,
182				// TODO: figure out how we can tell claude the required fields?
183			},
184		}
185
186		if i == len(tools)-1 && !a.providerOptions.disableCache {
187			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
188				Type: "ephemeral",
189			}
190		}
191
192		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
193	}
194
195	return anthropicTools
196}
197
198func (a *anthropicClient) finishReason(reason string) message.FinishReason {
199	switch reason {
200	case "end_turn":
201		return message.FinishReasonEndTurn
202	case "max_tokens":
203		return message.FinishReasonMaxTokens
204	case "tool_use":
205		return message.FinishReasonToolUse
206	case "stop_sequence":
207		return message.FinishReasonEndTurn
208	default:
209		return message.FinishReasonUnknown
210	}
211}
212
213func (a *anthropicClient) isThinkingEnabled() bool {
214	cfg := config.Get()
215	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
216	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
217		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
218	}
219	return a.Model().CanReason && modelConfig.Think
220}
221
222func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
223	model := a.providerOptions.model(a.providerOptions.modelType)
224	var thinkingParam anthropic.ThinkingConfigParamUnion
225	cfg := config.Get()
226	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
227	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
228		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
229	}
230	temperature := anthropic.Float(0)
231
232	maxTokens := model.DefaultMaxTokens
233	if modelConfig.MaxTokens > 0 {
234		maxTokens = modelConfig.MaxTokens
235	}
236	if a.isThinkingEnabled() {
237		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
238		temperature = anthropic.Float(1)
239	}
240	// Override max tokens if set in provider options
241	if a.providerOptions.maxTokens > 0 {
242		maxTokens = a.providerOptions.maxTokens
243	}
244
245	// Use adjusted max tokens if context limit was hit
246	if a.adjustedMaxTokens > 0 {
247		maxTokens = int64(a.adjustedMaxTokens)
248	}
249
250	systemBlocks := []anthropic.TextBlockParam{}
251
252	// Add custom system prompt prefix if configured
253	if a.providerOptions.systemPromptPrefix != "" {
254		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
255			Text: a.providerOptions.systemPromptPrefix,
256			CacheControl: anthropic.CacheControlEphemeralParam{
257				Type: "ephemeral",
258			},
259		})
260	}
261
262	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
263		Text: a.providerOptions.systemMessage,
264		CacheControl: anthropic.CacheControlEphemeralParam{
265			Type: "ephemeral",
266		},
267	})
268
269	return anthropic.MessageNewParams{
270		Model:       anthropic.Model(model.ID),
271		MaxTokens:   maxTokens,
272		Temperature: temperature,
273		Messages:    messages,
274		Tools:       tools,
275		Thinking:    thinkingParam,
276		System:      systemBlocks,
277	}
278}
279
280func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
281	attempts := 0
282	for {
283		attempts++
284		// Prepare messages on each attempt in case max_tokens was adjusted
285		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
286
287		var opts []option.RequestOption
288		if a.isThinkingEnabled() {
289			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
290		}
291		anthropicResponse, err := a.client.Messages.New(
292			ctx,
293			preparedMessages,
294			opts...,
295		)
296		// If there is an error we are going to see if we can retry the call
297		if err != nil {
298			slog.Error("Anthropic API error", "error", err.Error(), "attempt", attempts, "max_retries", maxRetries)
299			retry, after, retryErr := a.shouldRetry(attempts, err)
300			if retryErr != nil {
301				return nil, retryErr
302			}
303			if retry {
304				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
305				select {
306				case <-ctx.Done():
307					return nil, ctx.Err()
308				case <-time.After(time.Duration(after) * time.Millisecond):
309					continue
310				}
311			}
312			return nil, retryErr
313		}
314
315		content := ""
316		for _, block := range anthropicResponse.Content {
317			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
318				content += text.Text
319			}
320		}
321
322		return &ProviderResponse{
323			Content:   content,
324			ToolCalls: a.toolCalls(*anthropicResponse),
325			Usage:     a.usage(*anthropicResponse),
326		}, nil
327	}
328}
329
330func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
331	attempts := 0
332	eventChan := make(chan ProviderEvent)
333	go func() {
334		for {
335			attempts++
336			// Prepare messages on each attempt in case max_tokens was adjusted
337			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
338
339			var opts []option.RequestOption
340			if a.isThinkingEnabled() {
341				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
342			}
343
344			anthropicStream := a.client.Messages.NewStreaming(
345				ctx,
346				preparedMessages,
347				opts...,
348			)
349			accumulatedMessage := anthropic.Message{}
350
351			currentToolCallID := ""
352			for anthropicStream.Next() {
353				event := anthropicStream.Current()
354				err := accumulatedMessage.Accumulate(event)
355				if err != nil {
356					slog.Warn("Error accumulating message", "error", err)
357					continue
358				}
359
360				switch event := event.AsAny().(type) {
361				case anthropic.ContentBlockStartEvent:
362					switch event.ContentBlock.Type {
363					case "text":
364						eventChan <- ProviderEvent{Type: EventContentStart}
365					case "tool_use":
366						currentToolCallID = event.ContentBlock.ID
367						eventChan <- ProviderEvent{
368							Type: EventToolUseStart,
369							ToolCall: &message.ToolCall{
370								ID:       event.ContentBlock.ID,
371								Name:     event.ContentBlock.Name,
372								Finished: false,
373							},
374						}
375					}
376
377				case anthropic.ContentBlockDeltaEvent:
378					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
379						eventChan <- ProviderEvent{
380							Type:     EventThinkingDelta,
381							Thinking: event.Delta.Thinking,
382						}
383					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
384						eventChan <- ProviderEvent{
385							Type:      EventSignatureDelta,
386							Signature: event.Delta.Signature,
387						}
388					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
389						eventChan <- ProviderEvent{
390							Type:    EventContentDelta,
391							Content: event.Delta.Text,
392						}
393					} else if event.Delta.Type == "input_json_delta" {
394						if currentToolCallID != "" {
395							eventChan <- ProviderEvent{
396								Type: EventToolUseDelta,
397								ToolCall: &message.ToolCall{
398									ID:       currentToolCallID,
399									Finished: false,
400									Input:    event.Delta.PartialJSON,
401								},
402							}
403						}
404					}
405				case anthropic.ContentBlockStopEvent:
406					if currentToolCallID != "" {
407						eventChan <- ProviderEvent{
408							Type: EventToolUseStop,
409							ToolCall: &message.ToolCall{
410								ID: currentToolCallID,
411							},
412						}
413						currentToolCallID = ""
414					} else {
415						eventChan <- ProviderEvent{Type: EventContentStop}
416					}
417
418				case anthropic.MessageStopEvent:
419					content := ""
420					for _, block := range accumulatedMessage.Content {
421						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
422							content += text.Text
423						}
424					}
425
426					eventChan <- ProviderEvent{
427						Type: EventComplete,
428						Response: &ProviderResponse{
429							Content:      content,
430							ToolCalls:    a.toolCalls(accumulatedMessage),
431							Usage:        a.usage(accumulatedMessage),
432							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
433						},
434						Content: content,
435					}
436				}
437			}
438
439			err := anthropicStream.Err()
440			if err == nil || errors.Is(err, io.EOF) {
441				close(eventChan)
442				return
443			}
444
445			// If there is an error we are going to see if we can retry the call
446			retry, after, retryErr := a.shouldRetry(attempts, err)
447			if retryErr != nil {
448				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
449				close(eventChan)
450				return
451			}
452			if retry {
453				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
454				select {
455				case <-ctx.Done():
456					// context cancelled
457					if ctx.Err() != nil {
458						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
459					}
460					close(eventChan)
461					return
462				case <-time.After(time.Duration(after) * time.Millisecond):
463					continue
464				}
465			}
466			if ctx.Err() != nil {
467				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
468			}
469
470			close(eventChan)
471			return
472		}
473	}()
474	return eventChan
475}
476
477func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
478	var apiErr *anthropic.Error
479	if !errors.As(err, &apiErr) {
480		return false, 0, err
481	}
482
483	if attempts > maxRetries {
484		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
485	}
486
487	if apiErr.StatusCode == 401 {
488		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
489		if err != nil {
490			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
491		}
492		a.client = createAnthropicClient(a.providerOptions, a.tp)
493		return true, 0, nil
494	}
495
496	// Handle context limit exceeded error (400 Bad Request)
497	if apiErr.StatusCode == 400 {
498		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
499			a.adjustedMaxTokens = adjusted
500			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
501			return true, 0, nil
502		}
503	}
504
505	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
506	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
507		return false, 0, err
508	}
509
510	retryMs := 0
511	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
512
513	backoffMs := 2000 * (1 << (attempts - 1))
514	jitterMs := int(float64(backoffMs) * 0.2)
515	retryMs = backoffMs + jitterMs
516	if len(retryAfterValues) > 0 {
517		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
518			retryMs = retryMs * 1000
519		}
520	}
521	return true, int64(retryMs), nil
522}
523
524// handleContextLimitError parses context limit error and returns adjusted max_tokens
525func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
526	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
527	errorMsg := apiErr.Error()
528
529	matches := contextLimitRegex.FindStringSubmatch(errorMsg)
530
531	if len(matches) != 4 {
532		return 0, false
533	}
534
535	inputTokens, err1 := strconv.Atoi(matches[1])
536	contextLimit, err2 := strconv.Atoi(matches[3])
537
538	if err1 != nil || err2 != nil {
539		return 0, false
540	}
541
542	// Calculate safe max_tokens with a buffer of 1000 tokens
543	safeMaxTokens := contextLimit - inputTokens - 1000
544
545	// Ensure we don't go below a minimum threshold
546	safeMaxTokens = max(safeMaxTokens, 1000)
547
548	return safeMaxTokens, true
549}
550
551func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
552	var toolCalls []message.ToolCall
553
554	for _, block := range msg.Content {
555		switch variant := block.AsAny().(type) {
556		case anthropic.ToolUseBlock:
557			toolCall := message.ToolCall{
558				ID:       variant.ID,
559				Name:     variant.Name,
560				Input:    string(variant.Input),
561				Type:     string(variant.Type),
562				Finished: true,
563			}
564			toolCalls = append(toolCalls, toolCall)
565		}
566	}
567
568	return toolCalls
569}
570
571func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
572	return TokenUsage{
573		InputTokens:         msg.Usage.InputTokens,
574		OutputTokens:        msg.Usage.OutputTokens,
575		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
576		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
577	}
578}
579
580func (a *anthropicClient) Model() catwalk.Model {
581	return a.providerOptions.model(a.providerOptions.modelType)
582}