anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"net/http"
 11	"regexp"
 12	"strconv"
 13	"strings"
 14	"time"
 15
 16	"github.com/anthropics/anthropic-sdk-go"
 17	"github.com/anthropics/anthropic-sdk-go/bedrock"
 18	"github.com/anthropics/anthropic-sdk-go/option"
 19	"github.com/anthropics/anthropic-sdk-go/vertex"
 20	"github.com/charmbracelet/catwalk/pkg/catwalk"
 21	"github.com/charmbracelet/crush/internal/config"
 22	"github.com/charmbracelet/crush/internal/llm/tools"
 23	"github.com/charmbracelet/crush/internal/log"
 24	"github.com/charmbracelet/crush/internal/message"
 25)
 26
 27// Pre-compiled regex for parsing context limit errors.
 28var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
 29
 30type anthropicClient struct {
 31	providerOptions   providerClientOptions
 32	tp                AnthropicClientType
 33	client            anthropic.Client
 34	adjustedMaxTokens int // Used when context limit is hit
 35}
 36
 37type AnthropicClient ProviderClient
 38
 39type AnthropicClientType string
 40
 41const (
 42	AnthropicClientTypeNormal  AnthropicClientType = "normal"
 43	AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
 44	AnthropicClientTypeVertex  AnthropicClientType = "vertex"
 45)
 46
 47func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
 48	return &anthropicClient{
 49		providerOptions: opts,
 50		tp:              tp,
 51		client:          createAnthropicClient(opts, tp),
 52	}
 53}
 54
 55func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
 56	anthropicClientOptions := []option.RequestOption{}
 57
 58	// Check if Authorization header is provided in extra headers
 59	hasBearerAuth := false
 60	if opts.extraHeaders != nil {
 61		for key := range opts.extraHeaders {
 62			if strings.ToLower(key) == "authorization" {
 63				hasBearerAuth = true
 64				break
 65			}
 66		}
 67	}
 68
 69	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 70
 71	if opts.apiKey != "" && !hasBearerAuth {
 72		if isBearerToken {
 73			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 74			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 75		} else {
 76			// Use standard X-Api-Key header
 77			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 78		}
 79	} else if hasBearerAuth {
 80		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 81	}
 82
 83	if opts.baseURL != "" {
 84		resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
 85		if err == nil && resolvedBaseURL != "" {
 86			anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
 87		}
 88	}
 89
 90	if config.Get().Options.Debug {
 91		httpClient := log.NewHTTPClient()
 92		anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
 93	}
 94
 95	switch tp {
 96	case AnthropicClientTypeBedrock:
 97		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 98	case AnthropicClientTypeVertex:
 99		project := opts.extraParams["project"]
100		location := opts.extraParams["location"]
101		anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
102	}
103	for key, header := range opts.extraHeaders {
104		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
105	}
106	for key, value := range opts.extraBody {
107		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
108	}
109	return anthropic.NewClient(anthropicClientOptions...)
110}
111
112func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
113	for i, msg := range messages {
114		cache := false
115		if i > len(messages)-3 {
116			cache = true
117		}
118		switch msg.Role {
119		case message.User:
120			content := anthropic.NewTextBlock(msg.Content().String())
121			if cache && !a.providerOptions.disableCache {
122				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
123					Type: "ephemeral",
124				}
125			}
126			var contentBlocks []anthropic.ContentBlockParamUnion
127			contentBlocks = append(contentBlocks, content)
128			for _, binaryContent := range msg.BinaryContent() {
129				if strings.HasPrefix(binaryContent.MIMEType, "image/") {
130					base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
131					imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
132					contentBlocks = append(contentBlocks, imageBlock)
133					continue
134				}
135				blk := anthropic.NewDocumentBlock(anthropic.PlainTextSourceParam{
136					Data: string(binaryContent.Data),
137				})
138				contentBlocks = append(contentBlocks, blk)
139			}
140			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
141
142		case message.Assistant:
143			blocks := []anthropic.ContentBlockParamUnion{}
144
145			// Add thinking blocks first if present (required when thinking is enabled with tool use)
146			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
147				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
148				blocks = append(blocks, thinkingBlock)
149			}
150
151			if msg.Content().String() != "" {
152				content := anthropic.NewTextBlock(msg.Content().String())
153				if cache && !a.providerOptions.disableCache {
154					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
155						Type: "ephemeral",
156					}
157				}
158				blocks = append(blocks, content)
159			}
160
161			for _, toolCall := range msg.ToolCalls() {
162				if !toolCall.Finished {
163					continue
164				}
165				var inputMap map[string]any
166				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
167				if err != nil {
168					continue
169				}
170				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
171			}
172
173			if len(blocks) == 0 {
174				continue
175			}
176			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
177
178		case message.Tool:
179			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
180			for i, toolResult := range msg.ToolResults() {
181				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
182			}
183			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
184		}
185	}
186	return anthropicMessages
187}
188
189func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
190	if len(tools) == 0 {
191		return nil
192	}
193	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
194
195	for i, tool := range tools {
196		info := tool.Info()
197		toolParam := anthropic.ToolParam{
198			Name:        info.Name,
199			Description: anthropic.String(info.Description),
200			InputSchema: anthropic.ToolInputSchemaParam{
201				Properties: info.Parameters,
202				Required:   info.Required,
203			},
204		}
205
206		if i == len(tools)-1 && !a.providerOptions.disableCache {
207			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
208				Type: "ephemeral",
209			}
210		}
211
212		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
213	}
214
215	return anthropicTools
216}
217
218func (a *anthropicClient) finishReason(reason string) message.FinishReason {
219	switch reason {
220	case "end_turn":
221		return message.FinishReasonEndTurn
222	case "max_tokens":
223		return message.FinishReasonMaxTokens
224	case "tool_use":
225		return message.FinishReasonToolUse
226	case "stop_sequence":
227		return message.FinishReasonEndTurn
228	default:
229		return message.FinishReasonUnknown
230	}
231}
232
233func (a *anthropicClient) isThinkingEnabled() bool {
234	cfg := config.Get()
235	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
236	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
237		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
238	}
239	return a.Model().CanReason && modelConfig.Think
240}
241
242func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
243	model := a.providerOptions.model(a.providerOptions.modelType)
244	var thinkingParam anthropic.ThinkingConfigParamUnion
245	cfg := config.Get()
246	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
247	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
248		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
249	}
250	temperature := anthropic.Float(0)
251
252	maxTokens := model.DefaultMaxTokens
253	if modelConfig.MaxTokens > 0 {
254		maxTokens = modelConfig.MaxTokens
255	}
256	if a.isThinkingEnabled() {
257		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
258		temperature = anthropic.Float(1)
259	}
260	// Override max tokens if set in provider options
261	if a.providerOptions.maxTokens > 0 {
262		maxTokens = a.providerOptions.maxTokens
263	}
264
265	// Use adjusted max tokens if context limit was hit
266	if a.adjustedMaxTokens > 0 {
267		maxTokens = int64(a.adjustedMaxTokens)
268	}
269
270	systemBlocks := []anthropic.TextBlockParam{}
271
272	// Add custom system prompt prefix if configured
273	if a.providerOptions.systemPromptPrefix != "" {
274		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
275			Text: a.providerOptions.systemPromptPrefix,
276		})
277	}
278
279	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
280		Text: a.providerOptions.systemMessage,
281		CacheControl: anthropic.CacheControlEphemeralParam{
282			Type: "ephemeral",
283		},
284	})
285
286	return anthropic.MessageNewParams{
287		Model:       anthropic.Model(model.ID),
288		MaxTokens:   maxTokens,
289		Temperature: temperature,
290		Messages:    messages,
291		Tools:       tools,
292		Thinking:    thinkingParam,
293		System:      systemBlocks,
294	}
295}
296
297func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
298	attempts := 0
299	for {
300		attempts++
301		// Prepare messages on each attempt in case max_tokens was adjusted
302		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
303
304		var opts []option.RequestOption
305		if a.isThinkingEnabled() {
306			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
307		}
308		anthropicResponse, err := a.client.Messages.New(
309			ctx,
310			preparedMessages,
311			opts...,
312		)
313		// If there is an error we are going to see if we can retry the call
314		if err != nil {
315			retry, after, retryErr := a.shouldRetry(attempts, err)
316			if retryErr != nil {
317				return nil, retryErr
318			}
319			if retry {
320				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
321				select {
322				case <-ctx.Done():
323					return nil, ctx.Err()
324				case <-time.After(time.Duration(after) * time.Millisecond):
325					continue
326				}
327			}
328			return nil, retryErr
329		}
330
331		content := ""
332		for _, block := range anthropicResponse.Content {
333			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
334				content += text.Text
335			}
336		}
337
338		return &ProviderResponse{
339			Content:   content,
340			ToolCalls: a.toolCalls(*anthropicResponse),
341			Usage:     a.usage(*anthropicResponse),
342		}, nil
343	}
344}
345
346func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
347	attempts := 0
348	eventChan := make(chan ProviderEvent)
349	go func() {
350		for {
351			attempts++
352			// Prepare messages on each attempt in case max_tokens was adjusted
353			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
354
355			var opts []option.RequestOption
356			if a.isThinkingEnabled() {
357				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
358			}
359
360			anthropicStream := a.client.Messages.NewStreaming(
361				ctx,
362				preparedMessages,
363				opts...,
364			)
365			accumulatedMessage := anthropic.Message{}
366
367			currentToolCallID := ""
368			for anthropicStream.Next() {
369				event := anthropicStream.Current()
370				err := accumulatedMessage.Accumulate(event)
371				if err != nil {
372					slog.Warn("Error accumulating message", "error", err)
373					continue
374				}
375
376				switch event := event.AsAny().(type) {
377				case anthropic.ContentBlockStartEvent:
378					switch event.ContentBlock.Type {
379					case "text":
380						eventChan <- ProviderEvent{Type: EventContentStart}
381					case "tool_use":
382						currentToolCallID = event.ContentBlock.ID
383						eventChan <- ProviderEvent{
384							Type: EventToolUseStart,
385							ToolCall: &message.ToolCall{
386								ID:       event.ContentBlock.ID,
387								Name:     event.ContentBlock.Name,
388								Finished: false,
389							},
390						}
391					}
392
393				case anthropic.ContentBlockDeltaEvent:
394					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
395						eventChan <- ProviderEvent{
396							Type:     EventThinkingDelta,
397							Thinking: event.Delta.Thinking,
398						}
399					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
400						eventChan <- ProviderEvent{
401							Type:      EventSignatureDelta,
402							Signature: event.Delta.Signature,
403						}
404					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
405						eventChan <- ProviderEvent{
406							Type:    EventContentDelta,
407							Content: event.Delta.Text,
408						}
409					} else if event.Delta.Type == "input_json_delta" {
410						if currentToolCallID != "" {
411							eventChan <- ProviderEvent{
412								Type: EventToolUseDelta,
413								ToolCall: &message.ToolCall{
414									ID:       currentToolCallID,
415									Finished: false,
416									Input:    event.Delta.PartialJSON,
417								},
418							}
419						}
420					}
421				case anthropic.ContentBlockStopEvent:
422					if currentToolCallID != "" {
423						eventChan <- ProviderEvent{
424							Type: EventToolUseStop,
425							ToolCall: &message.ToolCall{
426								ID: currentToolCallID,
427							},
428						}
429						currentToolCallID = ""
430					} else {
431						eventChan <- ProviderEvent{Type: EventContentStop}
432					}
433
434				case anthropic.MessageStopEvent:
435					content := ""
436					for _, block := range accumulatedMessage.Content {
437						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
438							content += text.Text
439						}
440					}
441
442					eventChan <- ProviderEvent{
443						Type: EventComplete,
444						Response: &ProviderResponse{
445							Content:      content,
446							ToolCalls:    a.toolCalls(accumulatedMessage),
447							Usage:        a.usage(accumulatedMessage),
448							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
449						},
450						Content: content,
451					}
452				}
453			}
454
455			err := anthropicStream.Err()
456			if err == nil || errors.Is(err, io.EOF) {
457				close(eventChan)
458				return
459			}
460
461			// If there is an error we are going to see if we can retry the call
462			retry, after, retryErr := a.shouldRetry(attempts, err)
463			if retryErr != nil {
464				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
465				close(eventChan)
466				return
467			}
468			if retry {
469				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
470				select {
471				case <-ctx.Done():
472					// context cancelled
473					if ctx.Err() != nil {
474						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
475					}
476					close(eventChan)
477					return
478				case <-time.After(time.Duration(after) * time.Millisecond):
479					continue
480				}
481			}
482			if ctx.Err() != nil {
483				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
484			}
485
486			close(eventChan)
487			return
488		}
489	}()
490	return eventChan
491}
492
493func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
494	var apiErr *anthropic.Error
495	if !errors.As(err, &apiErr) {
496		return false, 0, err
497	}
498
499	if attempts > maxRetries {
500		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
501	}
502
503	if apiErr.StatusCode == http.StatusUnauthorized {
504		prev := a.providerOptions.apiKey
505		// in case the key comes from a script, we try to re-evaluate it.
506		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
507		if err != nil {
508			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
509		}
510		// if it didn't change, do not retry.
511		if prev == a.providerOptions.apiKey {
512			return false, 0, err
513		}
514		a.client = createAnthropicClient(a.providerOptions, a.tp)
515		return true, 0, nil
516	}
517
518	// Handle context limit exceeded error (400 Bad Request)
519	if apiErr.StatusCode == http.StatusBadRequest {
520		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
521			a.adjustedMaxTokens = adjusted
522			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
523			return true, 0, nil
524		}
525	}
526
527	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
528	// 529 (unofficial): The service is overloaded
529	if apiErr.StatusCode != http.StatusTooManyRequests && apiErr.StatusCode != 529 && !isOverloaded {
530		return false, 0, err
531	}
532
533	retryMs := 0
534	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
535
536	backoffMs := 2000 * (1 << (attempts - 1))
537	jitterMs := int(float64(backoffMs) * 0.2)
538	retryMs = backoffMs + jitterMs
539	if len(retryAfterValues) > 0 {
540		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
541			retryMs = retryMs * 1000
542		}
543	}
544	return true, int64(retryMs), nil
545}
546
547// handleContextLimitError parses context limit error and returns adjusted max_tokens
548func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
549	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
550	errorMsg := apiErr.Error()
551
552	matches := contextLimitRegex.FindStringSubmatch(errorMsg)
553
554	if len(matches) != 4 {
555		return 0, false
556	}
557
558	inputTokens, err1 := strconv.Atoi(matches[1])
559	contextLimit, err2 := strconv.Atoi(matches[3])
560
561	if err1 != nil || err2 != nil {
562		return 0, false
563	}
564
565	// Calculate safe max_tokens with a buffer of 1000 tokens
566	safeMaxTokens := contextLimit - inputTokens - 1000
567
568	// Ensure we don't go below a minimum threshold
569	safeMaxTokens = max(safeMaxTokens, 1000)
570
571	return safeMaxTokens, true
572}
573
574func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
575	var toolCalls []message.ToolCall
576
577	for _, block := range msg.Content {
578		switch variant := block.AsAny().(type) {
579		case anthropic.ToolUseBlock:
580			toolCall := message.ToolCall{
581				ID:       variant.ID,
582				Name:     variant.Name,
583				Input:    string(variant.Input),
584				Type:     string(variant.Type),
585				Finished: true,
586			}
587			toolCalls = append(toolCalls, toolCall)
588		}
589	}
590
591	return toolCalls
592}
593
594func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
595	return TokenUsage{
596		InputTokens:         msg.Usage.InputTokens,
597		OutputTokens:        msg.Usage.OutputTokens,
598		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
599		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
600	}
601}
602
603func (a *anthropicClient) Model() catwalk.Model {
604	return a.providerOptions.model(a.providerOptions.modelType)
605}