anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"net/http"
 11	"regexp"
 12	"strconv"
 13	"strings"
 14	"time"
 15
 16	"github.com/anthropics/anthropic-sdk-go"
 17	"github.com/anthropics/anthropic-sdk-go/bedrock"
 18	"github.com/anthropics/anthropic-sdk-go/option"
 19	"github.com/anthropics/anthropic-sdk-go/vertex"
 20	"github.com/charmbracelet/catwalk/pkg/catwalk"
 21	"github.com/charmbracelet/crush/internal/config"
 22	"github.com/charmbracelet/crush/internal/llm/tools"
 23	"github.com/charmbracelet/crush/internal/log"
 24	"github.com/charmbracelet/crush/internal/message"
 25)
 26
 27// Pre-compiled regex for parsing context limit errors.
 28var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
 29
 30type anthropicClient struct {
 31	providerOptions   providerClientOptions
 32	tp                AnthropicClientType
 33	client            anthropic.Client
 34	adjustedMaxTokens int // Used when context limit is hit
 35}
 36
 37type AnthropicClient ProviderClient
 38
 39type AnthropicClientType string
 40
 41const (
 42	AnthropicClientTypeNormal  AnthropicClientType = "normal"
 43	AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
 44	AnthropicClientTypeVertex  AnthropicClientType = "vertex"
 45)
 46
 47func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
 48	return &anthropicClient{
 49		providerOptions: opts,
 50		tp:              tp,
 51		client:          createAnthropicClient(opts, tp),
 52	}
 53}
 54
 55func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
 56	anthropicClientOptions := []option.RequestOption{}
 57
 58	// Check if Authorization header is provided in extra headers
 59	hasBearerAuth := false
 60	if opts.extraHeaders != nil {
 61		for key := range opts.extraHeaders {
 62			if strings.ToLower(key) == "authorization" {
 63				hasBearerAuth = true
 64				break
 65			}
 66		}
 67	}
 68
 69	isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
 70
 71	if opts.apiKey != "" && !hasBearerAuth {
 72		if isBearerToken {
 73			slog.Debug("API key starts with 'Bearer ', using as Authorization header")
 74			anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
 75		} else {
 76			// Use standard X-Api-Key header
 77			anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 78		}
 79	} else if hasBearerAuth {
 80		slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
 81	}
 82
 83	if opts.baseURL != "" {
 84		resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
 85		if err == nil && resolvedBaseURL != "" {
 86			anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
 87		}
 88	}
 89
 90	if config.Get().Options.Debug {
 91		httpClient := log.NewHTTPClient()
 92		anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
 93	}
 94
 95	switch tp {
 96	case AnthropicClientTypeBedrock:
 97		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 98	case AnthropicClientTypeVertex:
 99		project := opts.extraParams["project"]
100		location := opts.extraParams["location"]
101		anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
102	}
103	for key, header := range opts.extraHeaders {
104		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
105	}
106	for key, value := range opts.extraBody {
107		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
108	}
109	return anthropic.NewClient(anthropicClientOptions...)
110}
111
112func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
113	for i, msg := range messages {
114		cache := false
115		if i > len(messages)-3 {
116			cache = true
117		}
118		switch msg.Role {
119		case message.User:
120			content := anthropic.NewTextBlock(msg.Content().String())
121			if cache && !a.providerOptions.disableCache {
122				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
123					Type: "ephemeral",
124				}
125			}
126			var contentBlocks []anthropic.ContentBlockParamUnion
127			contentBlocks = append(contentBlocks, content)
128			for _, binaryContent := range msg.BinaryContent() {
129				base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
130				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
131				contentBlocks = append(contentBlocks, imageBlock)
132			}
133			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
134
135		case message.Assistant:
136			blocks := []anthropic.ContentBlockParamUnion{}
137
138			// Add thinking blocks first if present (required when thinking is enabled with tool use)
139			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
140				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
141				blocks = append(blocks, thinkingBlock)
142			}
143
144			if msg.Content().String() != "" {
145				content := anthropic.NewTextBlock(msg.Content().String())
146				if cache && !a.providerOptions.disableCache {
147					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
148						Type: "ephemeral",
149					}
150				}
151				blocks = append(blocks, content)
152			}
153
154			for _, toolCall := range msg.ToolCalls() {
155				if !toolCall.Finished {
156					continue
157				}
158				var inputMap map[string]any
159				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
160				if err != nil {
161					continue
162				}
163				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
164			}
165
166			if len(blocks) == 0 {
167				continue
168			}
169			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
170
171		case message.Tool:
172			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
173			for i, toolResult := range msg.ToolResults() {
174				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
175			}
176			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
177		}
178	}
179	return anthropicMessages
180}
181
182func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
183	if len(tools) == 0 {
184		return nil
185	}
186	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
187
188	for i, tool := range tools {
189		info := tool.Info()
190		toolParam := anthropic.ToolParam{
191			Name:        info.Name,
192			Description: anthropic.String(info.Description),
193			InputSchema: anthropic.ToolInputSchemaParam{
194				Properties: info.Parameters,
195				Required:   info.Required,
196			},
197		}
198
199		if i == len(tools)-1 && !a.providerOptions.disableCache {
200			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
201				Type: "ephemeral",
202			}
203		}
204
205		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
206	}
207
208	return anthropicTools
209}
210
211func (a *anthropicClient) finishReason(reason string) message.FinishReason {
212	switch reason {
213	case "end_turn":
214		return message.FinishReasonEndTurn
215	case "max_tokens":
216		return message.FinishReasonMaxTokens
217	case "tool_use":
218		return message.FinishReasonToolUse
219	case "stop_sequence":
220		return message.FinishReasonEndTurn
221	default:
222		return message.FinishReasonUnknown
223	}
224}
225
226func (a *anthropicClient) isThinkingEnabled() bool {
227	cfg := config.Get()
228	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
229	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
230		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
231	}
232	return a.Model().CanReason && modelConfig.Think
233}
234
235func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
236	model := a.providerOptions.model(a.providerOptions.modelType)
237	var thinkingParam anthropic.ThinkingConfigParamUnion
238	cfg := config.Get()
239	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
240	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
241		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
242	}
243	temperature := anthropic.Float(0)
244
245	maxTokens := model.DefaultMaxTokens
246	if modelConfig.MaxTokens > 0 {
247		maxTokens = modelConfig.MaxTokens
248	}
249	if a.isThinkingEnabled() {
250		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
251		temperature = anthropic.Float(1)
252	}
253	// Override max tokens if set in provider options
254	if a.providerOptions.maxTokens > 0 {
255		maxTokens = a.providerOptions.maxTokens
256	}
257
258	// Use adjusted max tokens if context limit was hit
259	if a.adjustedMaxTokens > 0 {
260		maxTokens = int64(a.adjustedMaxTokens)
261	}
262
263	systemBlocks := []anthropic.TextBlockParam{}
264
265	// Add custom system prompt prefix if configured
266	if a.providerOptions.systemPromptPrefix != "" {
267		systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
268			Text: a.providerOptions.systemPromptPrefix,
269		})
270	}
271
272	systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
273		Text: a.providerOptions.systemMessage,
274		CacheControl: anthropic.CacheControlEphemeralParam{
275			Type: "ephemeral",
276		},
277	})
278
279	return anthropic.MessageNewParams{
280		Model:       anthropic.Model(model.ID),
281		MaxTokens:   maxTokens,
282		Temperature: temperature,
283		Messages:    messages,
284		Tools:       tools,
285		Thinking:    thinkingParam,
286		System:      systemBlocks,
287	}
288}
289
290func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
291	attempts := 0
292	for {
293		attempts++
294		// Prepare messages on each attempt in case max_tokens was adjusted
295		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
296
297		var opts []option.RequestOption
298		if a.isThinkingEnabled() {
299			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
300		}
301		anthropicResponse, err := a.client.Messages.New(
302			ctx,
303			preparedMessages,
304			opts...,
305		)
306		// If there is an error we are going to see if we can retry the call
307		if err != nil {
308			retry, after, retryErr := a.shouldRetry(attempts, err)
309			if retryErr != nil {
310				return nil, retryErr
311			}
312			if retry {
313				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
314				select {
315				case <-ctx.Done():
316					return nil, ctx.Err()
317				case <-time.After(time.Duration(after) * time.Millisecond):
318					continue
319				}
320			}
321			return nil, retryErr
322		}
323
324		content := ""
325		for _, block := range anthropicResponse.Content {
326			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
327				content += text.Text
328			}
329		}
330
331		return &ProviderResponse{
332			Content:   content,
333			ToolCalls: a.toolCalls(*anthropicResponse),
334			Usage:     a.usage(*anthropicResponse),
335		}, nil
336	}
337}
338
339func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
340	attempts := 0
341	eventChan := make(chan ProviderEvent)
342	go func() {
343		for {
344			attempts++
345			// Prepare messages on each attempt in case max_tokens was adjusted
346			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
347
348			var opts []option.RequestOption
349			if a.isThinkingEnabled() {
350				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
351			}
352
353			anthropicStream := a.client.Messages.NewStreaming(
354				ctx,
355				preparedMessages,
356				opts...,
357			)
358			accumulatedMessage := anthropic.Message{}
359
360			currentToolCallID := ""
361			for anthropicStream.Next() {
362				event := anthropicStream.Current()
363				err := accumulatedMessage.Accumulate(event)
364				if err != nil {
365					slog.Warn("Error accumulating message", "error", err)
366					continue
367				}
368
369				switch event := event.AsAny().(type) {
370				case anthropic.ContentBlockStartEvent:
371					switch event.ContentBlock.Type {
372					case "text":
373						eventChan <- ProviderEvent{Type: EventContentStart}
374					case "tool_use":
375						currentToolCallID = event.ContentBlock.ID
376						eventChan <- ProviderEvent{
377							Type: EventToolUseStart,
378							ToolCall: &message.ToolCall{
379								ID:       event.ContentBlock.ID,
380								Name:     event.ContentBlock.Name,
381								Finished: false,
382							},
383						}
384					}
385
386				case anthropic.ContentBlockDeltaEvent:
387					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
388						eventChan <- ProviderEvent{
389							Type:     EventThinkingDelta,
390							Thinking: event.Delta.Thinking,
391						}
392					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
393						eventChan <- ProviderEvent{
394							Type:      EventSignatureDelta,
395							Signature: event.Delta.Signature,
396						}
397					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
398						eventChan <- ProviderEvent{
399							Type:    EventContentDelta,
400							Content: event.Delta.Text,
401						}
402					} else if event.Delta.Type == "input_json_delta" {
403						if currentToolCallID != "" {
404							eventChan <- ProviderEvent{
405								Type: EventToolUseDelta,
406								ToolCall: &message.ToolCall{
407									ID:       currentToolCallID,
408									Finished: false,
409									Input:    event.Delta.PartialJSON,
410								},
411							}
412						}
413					}
414				case anthropic.ContentBlockStopEvent:
415					if currentToolCallID != "" {
416						eventChan <- ProviderEvent{
417							Type: EventToolUseStop,
418							ToolCall: &message.ToolCall{
419								ID: currentToolCallID,
420							},
421						}
422						currentToolCallID = ""
423					} else {
424						eventChan <- ProviderEvent{Type: EventContentStop}
425					}
426
427				case anthropic.MessageStopEvent:
428					content := ""
429					for _, block := range accumulatedMessage.Content {
430						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
431							content += text.Text
432						}
433					}
434
435					eventChan <- ProviderEvent{
436						Type: EventComplete,
437						Response: &ProviderResponse{
438							Content:      content,
439							ToolCalls:    a.toolCalls(accumulatedMessage),
440							Usage:        a.usage(accumulatedMessage),
441							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
442						},
443						Content: content,
444					}
445				}
446			}
447
448			err := anthropicStream.Err()
449			if err == nil || errors.Is(err, io.EOF) {
450				close(eventChan)
451				return
452			}
453
454			// If there is an error we are going to see if we can retry the call
455			retry, after, retryErr := a.shouldRetry(attempts, err)
456			if retryErr != nil {
457				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
458				close(eventChan)
459				return
460			}
461			if retry {
462				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
463				select {
464				case <-ctx.Done():
465					// context cancelled
466					if ctx.Err() != nil {
467						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
468					}
469					close(eventChan)
470					return
471				case <-time.After(time.Duration(after) * time.Millisecond):
472					continue
473				}
474			}
475			if ctx.Err() != nil {
476				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
477			}
478
479			close(eventChan)
480			return
481		}
482	}()
483	return eventChan
484}
485
486func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
487	var apiErr *anthropic.Error
488	if !errors.As(err, &apiErr) {
489		return false, 0, err
490	}
491
492	if attempts > maxRetries {
493		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
494	}
495
496	if apiErr.StatusCode == http.StatusUnauthorized {
497		return false, 0, err
498	}
499
500	// Handle context limit exceeded error (400 Bad Request)
501	if apiErr.StatusCode == http.StatusBadRequest {
502		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
503			a.adjustedMaxTokens = adjusted
504			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
505			return true, 0, nil
506		}
507	}
508
509	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
510	if apiErr.StatusCode != http.StatusTooManyRequests && apiErr.StatusCode != 529 && !isOverloaded {
511		return false, 0, err
512	}
513
514	retryMs := 0
515	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
516
517	backoffMs := 2000 * (1 << (attempts - 1))
518	jitterMs := int(float64(backoffMs) * 0.2)
519	retryMs = backoffMs + jitterMs
520	if len(retryAfterValues) > 0 {
521		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
522			retryMs = retryMs * 1000
523		}
524	}
525	return true, int64(retryMs), nil
526}
527
528// handleContextLimitError parses context limit error and returns adjusted max_tokens
529func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
530	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
531	errorMsg := apiErr.Error()
532
533	matches := contextLimitRegex.FindStringSubmatch(errorMsg)
534
535	if len(matches) != 4 {
536		return 0, false
537	}
538
539	inputTokens, err1 := strconv.Atoi(matches[1])
540	contextLimit, err2 := strconv.Atoi(matches[3])
541
542	if err1 != nil || err2 != nil {
543		return 0, false
544	}
545
546	// Calculate safe max_tokens with a buffer of 1000 tokens
547	safeMaxTokens := contextLimit - inputTokens - 1000
548
549	// Ensure we don't go below a minimum threshold
550	safeMaxTokens = max(safeMaxTokens, 1000)
551
552	return safeMaxTokens, true
553}
554
555func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
556	var toolCalls []message.ToolCall
557
558	for _, block := range msg.Content {
559		switch variant := block.AsAny().(type) {
560		case anthropic.ToolUseBlock:
561			toolCall := message.ToolCall{
562				ID:       variant.ID,
563				Name:     variant.Name,
564				Input:    string(variant.Input),
565				Type:     string(variant.Type),
566				Finished: true,
567			}
568			toolCalls = append(toolCalls, toolCall)
569		}
570	}
571
572	return toolCalls
573}
574
575func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
576	return TokenUsage{
577		InputTokens:         msg.Usage.InputTokens,
578		OutputTokens:        msg.Usage.OutputTokens,
579		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
580		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
581	}
582}
583
584func (a *anthropicClient) Model() catwalk.Model {
585	return a.providerOptions.model(a.providerOptions.modelType)
586}