anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/anthropics/anthropic-sdk-go/vertex"
 19	"github.com/charmbracelet/catwalk/pkg/catwalk"
 20	"github.com/charmbracelet/crush/internal/config"
 21	"github.com/charmbracelet/crush/internal/llm/tools"
 22	"github.com/charmbracelet/crush/internal/log"
 23	"github.com/charmbracelet/crush/internal/message"
 24)
 25
 26// Pre-compiled regex for parsing context limit errors.
 27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
 28
 29type anthropicClient struct {
 30	providerOptions   providerClientOptions
 31	tp                AnthropicClientType
 32	client            anthropic.Client
 33	adjustedMaxTokens int // Used when context limit is hit
 34}
 35
 36type AnthropicClient ProviderClient
 37
 38type AnthropicClientType string
 39
 40const (
 41	AnthropicClientTypeNormal  AnthropicClientType = "normal"
 42	AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
 43	AnthropicClientTypeVertex  AnthropicClientType = "vertex"
 44)
 45
 46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
 47	return &anthropicClient{
 48		providerOptions: opts,
 49		tp:              tp,
 50		client:          createAnthropicClient(opts, tp),
 51	}
 52}
 53
 54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
 55	anthropicClientOptions := []option.RequestOption{}
 56
 57	// Check if Authorization header is provided in extra headers
 58	hasBearerAuth := false
 59	if opts.extraHeaders != nil {
 60		for key := range opts.extraHeaders {
 61			if strings.ToLower(key) == "authorization" {
 62				hasBearerAuth = true
 63				break
 64			}
 65		}
 66	}
 67
 68	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 69
 70	if opts.apiKey != "" && !hasBearerAuth {
 71		if isBearerToken {
 72			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 73			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 74		} else {
 75			// Use standard X-Api-Key header
 76			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 77		}
 78	} else if hasBearerAuth {
 79		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 80	}
 81
 82	if opts.baseURL != "" {
 83		anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(opts.baseURL))
 84	}
 85
 86	if config.Get().Options.Debug {
 87		httpClient := log.NewHTTPClient()
 88		anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
 89	}
 90
 91	switch tp {
 92	case AnthropicClientTypeBedrock:
 93		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 94	case AnthropicClientTypeVertex:
 95		project := opts.extraParams["project"]
 96		location := opts.extraParams["location"]
 97		anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
 98	}
 99	for key, header := range opts.extraHeaders {
100		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
101	}
102	for key, value := range opts.extraBody {
103		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
104	}
105	return anthropic.NewClient(anthropicClientOptions...)
106}
107
108func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
109	for i, msg := range messages {
110		cache := false
111		if i > len(messages)-3 {
112			cache = true
113		}
114		switch msg.Role {
115		case message.User:
116			content := anthropic.NewTextBlock(msg.Content().String())
117			if cache && !a.providerOptions.disableCache {
118				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
119					Type: "ephemeral",
120				}
121			}
122			var contentBlocks []anthropic.ContentBlockParamUnion
123			contentBlocks = append(contentBlocks, content)
124			for _, binaryContent := range msg.BinaryContent() {
125				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
126				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
127				contentBlocks = append(contentBlocks, imageBlock)
128			}
129			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
130
131		case message.Assistant:
132			blocks := []anthropic.ContentBlockParamUnion{}
133
134			// Add thinking blocks first if present (required when thinking is enabled with tool use)
135			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
136				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
137				blocks = append(blocks, thinkingBlock)
138			}
139
140			if msg.Content().String() != "" {
141				content := anthropic.NewTextBlock(msg.Content().String())
142				if cache && !a.providerOptions.disableCache {
143					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
144						Type: "ephemeral",
145					}
146				}
147				blocks = append(blocks, content)
148			}
149
150			for _, toolCall := range msg.ToolCalls() {
151				var inputMap map[string]any
152				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
153				if err != nil {
154					continue
155				}
156				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
157			}
158
159			if len(blocks) == 0 {
160				continue
161			}
162			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
163
164		case message.Tool:
165			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
166			for i, toolResult := range msg.ToolResults() {
167				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
168			}
169			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
170		}
171	}
172	return
173}
174
175func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
176	if len(tools) == 0 {
177		return nil
178	}
179	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
180
181	for i, tool := range tools {
182		info := tool.Info()
183		toolParam := anthropic.ToolParam{
184			Name:        info.Name,
185			Description: anthropic.String(info.Description),
186			InputSchema: anthropic.ToolInputSchemaParam{
187				Properties: info.Parameters,
188				Required:   info.Required,
189			},
190		}
191
192		if i == len(tools)-1 && !a.providerOptions.disableCache {
193			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
194				Type: "ephemeral",
195			}
196		}
197
198		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
199	}
200
201	return anthropicTools
202}
203
204func (a *anthropicClient) finishReason(reason string) message.FinishReason {
205	switch reason {
206	case "end_turn":
207		return message.FinishReasonEndTurn
208	case "max_tokens":
209		return message.FinishReasonMaxTokens
210	case "tool_use":
211		return message.FinishReasonToolUse
212	case "stop_sequence":
213		return message.FinishReasonEndTurn
214	default:
215		return message.FinishReasonUnknown
216	}
217}
218
219func (a *anthropicClient) isThinkingEnabled() bool {
220	cfg := config.Get()
221	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
222	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
223		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
224	}
225	return a.Model().CanReason && modelConfig.Think
226}
227
228func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
229	model := a.providerOptions.model(a.providerOptions.modelType)
230	var thinkingParam anthropic.ThinkingConfigParamUnion
231	cfg := config.Get()
232	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
233	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
234		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
235	}
236	temperature := anthropic.Float(0)
237
238	maxTokens := model.DefaultMaxTokens
239	if modelConfig.MaxTokens > 0 {
240		maxTokens = modelConfig.MaxTokens
241	}
242	if a.isThinkingEnabled() {
243		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
244		temperature = anthropic.Float(1)
245	}
246	// Override max tokens if set in provider options
247	if a.providerOptions.maxTokens > 0 {
248		maxTokens = a.providerOptions.maxTokens
249	}
250
251	// Use adjusted max tokens if context limit was hit
252	if a.adjustedMaxTokens > 0 {
253		maxTokens = int64(a.adjustedMaxTokens)
254	}
255
256	systemBlocks := []anthropic.TextBlockParam{}
257
258	// Add custom system prompt prefix if configured
259	if a.providerOptions.systemPromptPrefix != "" {
260		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
261			Text: a.providerOptions.systemPromptPrefix,
262		})
263	}
264
265	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
266		Text: a.providerOptions.systemMessage,
267		CacheControl: anthropic.CacheControlEphemeralParam{
268			Type: "ephemeral",
269		},
270	})
271
272	return anthropic.MessageNewParams{
273		Model:       anthropic.Model(model.ID),
274		MaxTokens:   maxTokens,
275		Temperature: temperature,
276		Messages:    messages,
277		Tools:       tools,
278		Thinking:    thinkingParam,
279		System:      systemBlocks,
280	}
281}
282
283func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
284	attempts := 0
285	for {
286		attempts++
287		// Prepare messages on each attempt in case max_tokens was adjusted
288		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
289
290		var opts []option.RequestOption
291		if a.isThinkingEnabled() {
292			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
293		}
294		anthropicResponse, err := a.client.Messages.New(
295			ctx,
296			preparedMessages,
297			opts...,
298		)
299		// If there is an error we are going to see if we can retry the call
300		if err != nil {
301			retry, after, retryErr := a.shouldRetry(attempts, err)
302			if retryErr != nil {
303				return nil, retryErr
304			}
305			if retry {
306				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
307				select {
308				case <-ctx.Done():
309					return nil, ctx.Err()
310				case <-time.After(time.Duration(after) * time.Millisecond):
311					continue
312				}
313			}
314			return nil, retryErr
315		}
316
317		content := ""
318		for _, block := range anthropicResponse.Content {
319			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
320				content += text.Text
321			}
322		}
323
324		return &ProviderResponse{
325			Content:   content,
326			ToolCalls: a.toolCalls(*anthropicResponse),
327			Usage:     a.usage(*anthropicResponse),
328		}, nil
329	}
330}
331
332func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
333	attempts := 0
334	eventChan := make(chan ProviderEvent)
335	go func() {
336		for {
337			attempts++
338			// Prepare messages on each attempt in case max_tokens was adjusted
339			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
340
341			var opts []option.RequestOption
342			if a.isThinkingEnabled() {
343				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
344			}
345
346			anthropicStream := a.client.Messages.NewStreaming(
347				ctx,
348				preparedMessages,
349				opts...,
350			)
351			accumulatedMessage := anthropic.Message{}
352
353			currentToolCallID := ""
354			for anthropicStream.Next() {
355				event := anthropicStream.Current()
356				err := accumulatedMessage.Accumulate(event)
357				if err != nil {
358					slog.Warn("Error accumulating message", "error", err)
359					continue
360				}
361
362				switch event := event.AsAny().(type) {
363				case anthropic.ContentBlockStartEvent:
364					switch event.ContentBlock.Type {
365					case "text":
366						eventChan <- ProviderEvent{Type: EventContentStart}
367					case "tool_use":
368						currentToolCallID = event.ContentBlock.ID
369						eventChan <- ProviderEvent{
370							Type: EventToolUseStart,
371							ToolCall: &message.ToolCall{
372								ID:       event.ContentBlock.ID,
373								Name:     event.ContentBlock.Name,
374								Finished: false,
375							},
376						}
377					}
378
379				case anthropic.ContentBlockDeltaEvent:
380					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
381						eventChan <- ProviderEvent{
382							Type:     EventThinkingDelta,
383							Thinking: event.Delta.Thinking,
384						}
385					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
386						eventChan <- ProviderEvent{
387							Type:      EventSignatureDelta,
388							Signature: event.Delta.Signature,
389						}
390					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
391						eventChan <- ProviderEvent{
392							Type:    EventContentDelta,
393							Content: event.Delta.Text,
394						}
395					} else if event.Delta.Type == "input_json_delta" {
396						if currentToolCallID != "" {
397							eventChan <- ProviderEvent{
398								Type: EventToolUseDelta,
399								ToolCall: &message.ToolCall{
400									ID:       currentToolCallID,
401									Finished: false,
402									Input:    event.Delta.PartialJSON,
403								},
404							}
405						}
406					}
407				case anthropic.ContentBlockStopEvent:
408					if currentToolCallID != "" {
409						eventChan <- ProviderEvent{
410							Type: EventToolUseStop,
411							ToolCall: &message.ToolCall{
412								ID: currentToolCallID,
413							},
414						}
415						currentToolCallID = ""
416					} else {
417						eventChan <- ProviderEvent{Type: EventContentStop}
418					}
419
420				case anthropic.MessageStopEvent:
421					content := ""
422					for _, block := range accumulatedMessage.Content {
423						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
424							content += text.Text
425						}
426					}
427
428					eventChan <- ProviderEvent{
429						Type: EventComplete,
430						Response: &ProviderResponse{
431							Content:      content,
432							ToolCalls:    a.toolCalls(accumulatedMessage),
433							Usage:        a.usage(accumulatedMessage),
434							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
435						},
436						Content: content,
437					}
438				}
439			}
440
441			err := anthropicStream.Err()
442			if err == nil || errors.Is(err, io.EOF) {
443				close(eventChan)
444				return
445			}
446
447			// If there is an error we are going to see if we can retry the call
448			retry, after, retryErr := a.shouldRetry(attempts, err)
449			if retryErr != nil {
450				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
451				close(eventChan)
452				return
453			}
454			if retry {
455				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
456				select {
457				case <-ctx.Done():
458					// context cancelled
459					if ctx.Err() != nil {
460						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
461					}
462					close(eventChan)
463					return
464				case <-time.After(time.Duration(after) * time.Millisecond):
465					continue
466				}
467			}
468			if ctx.Err() != nil {
469				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
470			}
471
472			close(eventChan)
473			return
474		}
475	}()
476	return eventChan
477}
478
479func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
480	var apiErr *anthropic.Error
481	if !errors.As(err, &apiErr) {
482		return false, 0, err
483	}
484
485	if attempts > maxRetries {
486		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
487	}
488
489	if apiErr.StatusCode == 401 {
490		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
491		if err != nil {
492			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
493		}
494		a.client = createAnthropicClient(a.providerOptions, a.tp)
495		return true, 0, nil
496	}
497
498	// Handle context limit exceeded error (400 Bad Request)
499	if apiErr.StatusCode == 400 {
500		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
501			a.adjustedMaxTokens = adjusted
502			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
503			return true, 0, nil
504		}
505	}
506
507	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
508	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
509		return false, 0, err
510	}
511
512	retryMs := 0
513	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
514
515	backoffMs := 2000 * (1 << (attempts - 1))
516	jitterMs := int(float64(backoffMs) * 0.2)
517	retryMs = backoffMs + jitterMs
518	if len(retryAfterValues) > 0 {
519		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
520			retryMs = retryMs * 1000
521		}
522	}
523	return true, int64(retryMs), nil
524}
525
526// handleContextLimitError parses context limit error and returns adjusted max_tokens
527func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
528	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
529	errorMsg := apiErr.Error()
530
531	matches := contextLimitRegex.FindStringSubmatch(errorMsg)
532
533	if len(matches) != 4 {
534		return 0, false
535	}
536
537	inputTokens, err1 := strconv.Atoi(matches[1])
538	contextLimit, err2 := strconv.Atoi(matches[3])
539
540	if err1 != nil || err2 != nil {
541		return 0, false
542	}
543
544	// Calculate safe max_tokens with a buffer of 1000 tokens
545	safeMaxTokens := contextLimit - inputTokens - 1000
546
547	// Ensure we don't go below a minimum threshold
548	safeMaxTokens = max(safeMaxTokens, 1000)
549
550	return safeMaxTokens, true
551}
552
553func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
554	var toolCalls []message.ToolCall
555
556	for _, block := range msg.Content {
557		switch variant := block.AsAny().(type) {
558		case anthropic.ToolUseBlock:
559			toolCall := message.ToolCall{
560				ID:       variant.ID,
561				Name:     variant.Name,
562				Input:    string(variant.Input),
563				Type:     string(variant.Type),
564				Finished: true,
565			}
566			toolCalls = append(toolCalls, toolCall)
567		}
568	}
569
570	return toolCalls
571}
572
573func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
574	return TokenUsage{
575		InputTokens:         msg.Usage.InputTokens,
576		OutputTokens:        msg.Usage.OutputTokens,
577		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
578		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
579	}
580}
581
582func (a *anthropicClient) Model() catwalk.Model {
583	return a.providerOptions.model(a.providerOptions.modelType)
584}