anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/anthropics/anthropic-sdk-go/vertex"
 19	"github.com/charmbracelet/catwalk/pkg/catwalk"
 20	"github.com/charmbracelet/crush/internal/config"
 21	"github.com/charmbracelet/crush/internal/llm/tools"
 22	"github.com/charmbracelet/crush/internal/log"
 23	"github.com/charmbracelet/crush/internal/message"
 24)
 25
 26// Pre-compiled regex for parsing context limit errors.
 27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
 28
 29type anthropicClient struct {
 30	providerOptions   providerClientOptions
 31	tp                AnthropicClientType
 32	client            anthropic.Client
 33	adjustedMaxTokens int // Used when context limit is hit
 34}
 35
 36type AnthropicClient ProviderClient
 37
 38type AnthropicClientType string
 39
 40const (
 41	AnthropicClientTypeNormal  AnthropicClientType = "normal"
 42	AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
 43	AnthropicClientTypeVertex  AnthropicClientType = "vertex"
 44)
 45
 46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
 47	return &anthropicClient{
 48		providerOptions: opts,
 49		tp:              tp,
 50		client:          createAnthropicClient(opts, tp),
 51	}
 52}
 53
 54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
 55	anthropicClientOptions := []option.RequestOption{}
 56
 57	// Check if Authorization header is provided in extra headers
 58	hasBearerAuth := false
 59	if opts.extraHeaders != nil {
 60		for key := range opts.extraHeaders {
 61			if strings.ToLower(key) == "authorization" {
 62				hasBearerAuth = true
 63				break
 64			}
 65		}
 66	}
 67
 68	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 69
 70	if opts.apiKey != "" && !hasBearerAuth {
 71		if isBearerToken {
 72			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 73			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 74		} else {
 75			// Use standard X-Api-Key header
 76			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 77		}
 78	} else if hasBearerAuth {
 79		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 80	}
 81
 82	if opts.baseURL != "" {
 83		resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
 84		if err == nil && resolvedBaseURL != "" {
 85			anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
 86		}
 87	}
 88
 89	if config.Get().Options.Debug {
 90		httpClient := log.NewHTTPClient()
 91		anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
 92	}
 93
 94	switch tp {
 95	case AnthropicClientTypeBedrock:
 96		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 97	case AnthropicClientTypeVertex:
 98		project := opts.extraParams["project"]
 99		location := opts.extraParams["location"]
100		anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
101	}
102	for key, header := range opts.extraHeaders {
103		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
104	}
105	for key, value := range opts.extraBody {
106		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
107	}
108	return anthropic.NewClient(anthropicClientOptions...)
109}
110
111func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
112	for i, msg := range messages {
113		cache := false
114		if i > len(messages)-3 {
115			cache = true
116		}
117		switch msg.Role {
118		case message.User:
119			content := anthropic.NewTextBlock(msg.Content().String())
120			if cache && !a.providerOptions.disableCache {
121				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
122					Type: "ephemeral",
123				}
124			}
125			var contentBlocks []anthropic.ContentBlockParamUnion
126			contentBlocks = append(contentBlocks, content)
127			for _, binaryContent := range msg.BinaryContent() {
128				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
129				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
130				contentBlocks = append(contentBlocks, imageBlock)
131			}
132			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
133
134		case message.Assistant:
135			blocks := []anthropic.ContentBlockParamUnion{}
136
137			// Add thinking blocks first if present (required when thinking is enabled with tool use)
138			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
139				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
140				blocks = append(blocks, thinkingBlock)
141			}
142
143			if msg.Content().String() != "" {
144				content := anthropic.NewTextBlock(msg.Content().String())
145				if cache && !a.providerOptions.disableCache {
146					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
147						Type: "ephemeral",
148					}
149				}
150				blocks = append(blocks, content)
151			}
152
153			for _, toolCall := range msg.ToolCalls() {
154				var inputMap map[string]any
155				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
156				if err != nil {
157					continue
158				}
159				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
160			}
161
162			if len(blocks) == 0 {
163				continue
164			}
165			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
166
167		case message.Tool:
168			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
169			for i, toolResult := range msg.ToolResults() {
170				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
171			}
172			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
173		}
174	}
175	return
176}
177
178func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
179	if len(tools) == 0 {
180		return nil
181	}
182	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
183
184	for i, tool := range tools {
185		info := tool.Info()
186		toolParam := anthropic.ToolParam{
187			Name:        info.Name,
188			Description: anthropic.String(info.Description),
189			InputSchema: anthropic.ToolInputSchemaParam{
190				Properties: info.Parameters,
191				Required:   info.Required,
192			},
193		}
194
195		if i == len(tools)-1 && !a.providerOptions.disableCache {
196			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
197				Type: "ephemeral",
198			}
199		}
200
201		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
202	}
203
204	return anthropicTools
205}
206
207func (a *anthropicClient) finishReason(reason string) message.FinishReason {
208	switch reason {
209	case "end_turn":
210		return message.FinishReasonEndTurn
211	case "max_tokens":
212		return message.FinishReasonMaxTokens
213	case "tool_use":
214		return message.FinishReasonToolUse
215	case "stop_sequence":
216		return message.FinishReasonEndTurn
217	default:
218		return message.FinishReasonUnknown
219	}
220}
221
222func (a *anthropicClient) isThinkingEnabled() bool {
223	cfg := config.Get()
224	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
225	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
226		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
227	}
228	return a.Model().CanReason && modelConfig.Think
229}
230
231func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
232	model := a.providerOptions.model(a.providerOptions.modelType)
233	var thinkingParam anthropic.ThinkingConfigParamUnion
234	cfg := config.Get()
235	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
236	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
237		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
238	}
239	temperature := anthropic.Float(0)
240
241	maxTokens := model.DefaultMaxTokens
242	if modelConfig.MaxTokens > 0 {
243		maxTokens = modelConfig.MaxTokens
244	}
245	if a.isThinkingEnabled() {
246		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
247		temperature = anthropic.Float(1)
248	}
249	// Override max tokens if set in provider options
250	if a.providerOptions.maxTokens > 0 {
251		maxTokens = a.providerOptions.maxTokens
252	}
253
254	// Use adjusted max tokens if context limit was hit
255	if a.adjustedMaxTokens > 0 {
256		maxTokens = int64(a.adjustedMaxTokens)
257	}
258
259	systemBlocks := []anthropic.TextBlockParam{}
260
261	// Add custom system prompt prefix if configured
262	if a.providerOptions.systemPromptPrefix != "" {
263		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
264			Text: a.providerOptions.systemPromptPrefix,
265		})
266	}
267
268	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
269		Text: a.providerOptions.systemMessage,
270		CacheControl: anthropic.CacheControlEphemeralParam{
271			Type: "ephemeral",
272		},
273	})
274
275	return anthropic.MessageNewParams{
276		Model:       anthropic.Model(model.ID),
277		MaxTokens:   maxTokens,
278		Temperature: temperature,
279		Messages:    messages,
280		Tools:       tools,
281		Thinking:    thinkingParam,
282		System:      systemBlocks,
283	}
284}
285
286func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
287	attempts := 0
288	for {
289		attempts++
290		// Prepare messages on each attempt in case max_tokens was adjusted
291		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
292
293		var opts []option.RequestOption
294		if a.isThinkingEnabled() {
295			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
296		}
297		anthropicResponse, err := a.client.Messages.New(
298			ctx,
299			preparedMessages,
300			opts...,
301		)
302		// If there is an error we are going to see if we can retry the call
303		if err != nil {
304			retry, after, retryErr := a.shouldRetry(attempts, err)
305			if retryErr != nil {
306				return nil, retryErr
307			}
308			if retry {
309				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
310				select {
311				case <-ctx.Done():
312					return nil, ctx.Err()
313				case <-time.After(time.Duration(after) * time.Millisecond):
314					continue
315				}
316			}
317			return nil, retryErr
318		}
319
320		content := ""
321		for _, block := range anthropicResponse.Content {
322			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
323				content += text.Text
324			}
325		}
326
327		return &ProviderResponse{
328			Content:   content,
329			ToolCalls: a.toolCalls(*anthropicResponse),
330			Usage:     a.usage(*anthropicResponse),
331		}, nil
332	}
333}
334
335func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
336	attempts := 0
337	eventChan := make(chan ProviderEvent)
338	go func() {
339		for {
340			attempts++
341			// Prepare messages on each attempt in case max_tokens was adjusted
342			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
343
344			var opts []option.RequestOption
345			if a.isThinkingEnabled() {
346				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
347			}
348
349			anthropicStream := a.client.Messages.NewStreaming(
350				ctx,
351				preparedMessages,
352				opts...,
353			)
354			accumulatedMessage := anthropic.Message{}
355
356			currentToolCallID := ""
357			for anthropicStream.Next() {
358				event := anthropicStream.Current()
359				err := accumulatedMessage.Accumulate(event)
360				if err != nil {
361					slog.Warn("Error accumulating message", "error", err)
362					continue
363				}
364
365				switch event := event.AsAny().(type) {
366				case anthropic.ContentBlockStartEvent:
367					switch event.ContentBlock.Type {
368					case "text":
369						eventChan <- ProviderEvent{Type: EventContentStart}
370					case "tool_use":
371						currentToolCallID = event.ContentBlock.ID
372						eventChan <- ProviderEvent{
373							Type: EventToolUseStart,
374							ToolCall: &message.ToolCall{
375								ID:       event.ContentBlock.ID,
376								Name:     event.ContentBlock.Name,
377								Finished: false,
378							},
379						}
380					}
381
382				case anthropic.ContentBlockDeltaEvent:
383					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
384						eventChan <- ProviderEvent{
385							Type:     EventThinkingDelta,
386							Thinking: event.Delta.Thinking,
387						}
388					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
389						eventChan <- ProviderEvent{
390							Type:      EventSignatureDelta,
391							Signature: event.Delta.Signature,
392						}
393					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
394						eventChan <- ProviderEvent{
395							Type:    EventContentDelta,
396							Content: event.Delta.Text,
397						}
398					} else if event.Delta.Type == "input_json_delta" {
399						if currentToolCallID != "" {
400							eventChan <- ProviderEvent{
401								Type: EventToolUseDelta,
402								ToolCall: &message.ToolCall{
403									ID:       currentToolCallID,
404									Finished: false,
405									Input:    event.Delta.PartialJSON,
406								},
407							}
408						}
409					}
410				case anthropic.ContentBlockStopEvent:
411					if currentToolCallID != "" {
412						eventChan <- ProviderEvent{
413							Type: EventToolUseStop,
414							ToolCall: &message.ToolCall{
415								ID: currentToolCallID,
416							},
417						}
418						currentToolCallID = ""
419					} else {
420						eventChan <- ProviderEvent{Type: EventContentStop}
421					}
422
423				case anthropic.MessageStopEvent:
424					content := ""
425					for _, block := range accumulatedMessage.Content {
426						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
427							content += text.Text
428						}
429					}
430
431					eventChan <- ProviderEvent{
432						Type: EventComplete,
433						Response: &ProviderResponse{
434							Content:      content,
435							ToolCalls:    a.toolCalls(accumulatedMessage),
436							Usage:        a.usage(accumulatedMessage),
437							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
438						},
439						Content: content,
440					}
441				}
442			}
443
444			err := anthropicStream.Err()
445			if err == nil || errors.Is(err, io.EOF) {
446				close(eventChan)
447				return
448			}
449
450			// If there is an error we are going to see if we can retry the call
451			retry, after, retryErr := a.shouldRetry(attempts, err)
452			if retryErr != nil {
453				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
454				close(eventChan)
455				return
456			}
457			if retry {
458				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
459				select {
460				case <-ctx.Done():
461					// context cancelled
462					if ctx.Err() != nil {
463						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
464					}
465					close(eventChan)
466					return
467				case <-time.After(time.Duration(after) * time.Millisecond):
468					continue
469				}
470			}
471			if ctx.Err() != nil {
472				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
473			}
474
475			close(eventChan)
476			return
477		}
478	}()
479	return eventChan
480}
481
482func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
483	var apiErr *anthropic.Error
484	if !errors.As(err, &apiErr) {
485		return false, 0, err
486	}
487
488	if attempts > maxRetries {
489		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
490	}
491
492	if apiErr.StatusCode == 401 {
493		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
494		if err != nil {
495			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
496		}
497		a.client = createAnthropicClient(a.providerOptions, a.tp)
498		return true, 0, nil
499	}
500
501	// Handle context limit exceeded error (400 Bad Request)
502	if apiErr.StatusCode == 400 {
503		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
504			a.adjustedMaxTokens = adjusted
505			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
506			return true, 0, nil
507		}
508	}
509
510	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
511	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
512		return false, 0, err
513	}
514
515	retryMs := 0
516	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
517
518	backoffMs := 2000 * (1 << (attempts - 1))
519	jitterMs := int(float64(backoffMs) * 0.2)
520	retryMs = backoffMs + jitterMs
521	if len(retryAfterValues) > 0 {
522		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
523			retryMs = retryMs * 1000
524		}
525	}
526	return true, int64(retryMs), nil
527}
528
529// handleContextLimitError parses context limit error and returns adjusted max_tokens
530func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
531	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
532	errorMsg := apiErr.Error()
533
534	matches := contextLimitRegex.FindStringSubmatch(errorMsg)
535
536	if len(matches) != 4 {
537		return 0, false
538	}
539
540	inputTokens, err1 := strconv.Atoi(matches[1])
541	contextLimit, err2 := strconv.Atoi(matches[3])
542
543	if err1 != nil || err2 != nil {
544		return 0, false
545	}
546
547	// Calculate safe max_tokens with a buffer of 1000 tokens
548	safeMaxTokens := contextLimit - inputTokens - 1000
549
550	// Ensure we don't go below a minimum threshold
551	safeMaxTokens = max(safeMaxTokens, 1000)
552
553	return safeMaxTokens, true
554}
555
556func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
557	var toolCalls []message.ToolCall
558
559	for _, block := range msg.Content {
560		switch variant := block.AsAny().(type) {
561		case anthropic.ToolUseBlock:
562			toolCall := message.ToolCall{
563				ID:       variant.ID,
564				Name:     variant.Name,
565				Input:    string(variant.Input),
566				Type:     string(variant.Type),
567				Finished: true,
568			}
569			toolCalls = append(toolCalls, toolCall)
570		}
571	}
572
573	return toolCalls
574}
575
576func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
577	return TokenUsage{
578		InputTokens:         msg.Usage.InputTokens,
579		OutputTokens:        msg.Usage.OutputTokens,
580		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
581		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
582	}
583}
584
585func (a *anthropicClient) Model() catwalk.Model {
586	return a.providerOptions.model(a.providerOptions.modelType)
587}