anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"log/slog"
 10	"regexp"
 11	"strconv"
 12	"strings"
 13	"time"
 14
 15	"github.com/anthropics/anthropic-sdk-go"
 16	"github.com/anthropics/anthropic-sdk-go/bedrock"
 17	"github.com/anthropics/anthropic-sdk-go/option"
 18	"github.com/charmbracelet/crush/internal/config"
 19	"github.com/charmbracelet/crush/internal/fur/provider"
 20	"github.com/charmbracelet/crush/internal/llm/tools"
 21	"github.com/charmbracelet/crush/internal/message"
 22)
 23
 24type anthropicClient struct {
 25	providerOptions   providerClientOptions
 26	useBedrock        bool
 27	client            anthropic.Client
 28	adjustedMaxTokens int // Used when context limit is hit
 29}
 30
 31type AnthropicClient ProviderClient
 32
 33func newAnthropicClient(opts providerClientOptions, useBedrock bool) AnthropicClient {
 34	return &anthropicClient{
 35		providerOptions: opts,
 36		client:          createAnthropicClient(opts, useBedrock),
 37	}
 38}
 39
 40func createAnthropicClient(opts providerClientOptions, useBedrock bool) anthropic.Client {
 41	anthropicClientOptions := []option.RequestOption{}
 42	if opts.apiKey != "" {
 43		anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 44	}
 45	if useBedrock {
 46		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 47	}
 48	for _, header := range opts.extraHeaders {
 49		anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(header, opts.extraHeaders[header]))
 50	}
 51	for key, value := range opts.extraBody {
 52		anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
 53	}
 54	return anthropic.NewClient(anthropicClientOptions...)
 55}
 56
 57func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
 58	for i, msg := range messages {
 59		cache := false
 60		if i > len(messages)-3 {
 61			cache = true
 62		}
 63		switch msg.Role {
 64		case message.User:
 65			content := anthropic.NewTextBlock(msg.Content().String())
 66			if cache && !a.providerOptions.disableCache {
 67				content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
 68					Type: "ephemeral",
 69				}
 70			}
 71			var contentBlocks []anthropic.ContentBlockParamUnion
 72			contentBlocks = append(contentBlocks, content)
 73			for _, binaryContent := range msg.BinaryContent() {
 74				base64Image := binaryContent.String(provider.InferenceProviderAnthropic)
 75				imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
 76				contentBlocks = append(contentBlocks, imageBlock)
 77			}
 78			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
 79
 80		case message.Assistant:
 81			blocks := []anthropic.ContentBlockParamUnion{}
 82
 83			// Add thinking blocks first if present (required when thinking is enabled with tool use)
 84			if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
 85				thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
 86				blocks = append(blocks, thinkingBlock)
 87			}
 88
 89			if msg.Content().String() != "" {
 90				content := anthropic.NewTextBlock(msg.Content().String())
 91				if cache && !a.providerOptions.disableCache {
 92					content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
 93						Type: "ephemeral",
 94					}
 95				}
 96				blocks = append(blocks, content)
 97			}
 98
 99			for _, toolCall := range msg.ToolCalls() {
100				var inputMap map[string]any
101				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
102				if err != nil {
103					continue
104				}
105				blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
106			}
107
108			if len(blocks) == 0 {
109				slog.Warn("There is a message without content, investigate, this should not happen")
110				continue
111			}
112			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
113
114		case message.Tool:
115			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
116			for i, toolResult := range msg.ToolResults() {
117				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
118			}
119			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
120		}
121	}
122	return
123}
124
125func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
126	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
127
128	for i, tool := range tools {
129		info := tool.Info()
130		toolParam := anthropic.ToolParam{
131			Name:        info.Name,
132			Description: anthropic.String(info.Description),
133			InputSchema: anthropic.ToolInputSchemaParam{
134				Properties: info.Parameters,
135				// TODO: figure out how we can tell claude the required fields?
136			},
137		}
138
139		if i == len(tools)-1 && !a.providerOptions.disableCache {
140			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
141				Type: "ephemeral",
142			}
143		}
144
145		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
146	}
147
148	return anthropicTools
149}
150
151func (a *anthropicClient) finishReason(reason string) message.FinishReason {
152	switch reason {
153	case "end_turn":
154		return message.FinishReasonEndTurn
155	case "max_tokens":
156		return message.FinishReasonMaxTokens
157	case "tool_use":
158		return message.FinishReasonToolUse
159	case "stop_sequence":
160		return message.FinishReasonEndTurn
161	default:
162		return message.FinishReasonUnknown
163	}
164}
165
166func (a *anthropicClient) isThinkingEnabled() bool {
167	cfg := config.Get()
168	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
169	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
170		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
171	}
172	return a.Model().CanReason && modelConfig.Think
173}
174
175func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
176	model := a.providerOptions.model(a.providerOptions.modelType)
177	var thinkingParam anthropic.ThinkingConfigParamUnion
178	cfg := config.Get()
179	modelConfig := cfg.Models[config.SelectedModelTypeLarge]
180	if a.providerOptions.modelType == config.SelectedModelTypeSmall {
181		modelConfig = cfg.Models[config.SelectedModelTypeSmall]
182	}
183	temperature := anthropic.Float(0)
184
185	maxTokens := model.DefaultMaxTokens
186	if modelConfig.MaxTokens > 0 {
187		maxTokens = modelConfig.MaxTokens
188	}
189	if a.isThinkingEnabled() {
190		thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
191		temperature = anthropic.Float(1)
192	}
193	// Override max tokens if set in provider options
194	if a.providerOptions.maxTokens > 0 {
195		maxTokens = a.providerOptions.maxTokens
196	}
197
198	// Use adjusted max tokens if context limit was hit
199	if a.adjustedMaxTokens > 0 {
200		maxTokens = int64(a.adjustedMaxTokens)
201	}
202
203	return anthropic.MessageNewParams{
204		Model:       anthropic.Model(model.ID),
205		MaxTokens:   maxTokens,
206		Temperature: temperature,
207		Messages:    messages,
208		Tools:       tools,
209		Thinking:    thinkingParam,
210		System: []anthropic.TextBlockParam{
211			{
212				Text: a.providerOptions.systemMessage,
213				CacheControl: anthropic.CacheControlEphemeralParam{
214					Type: "ephemeral",
215				},
216			},
217		},
218	}
219}
220
221func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
222	cfg := config.Get()
223
224	attempts := 0
225	for {
226		attempts++
227		// Prepare messages on each attempt in case max_tokens was adjusted
228		preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
229		if cfg.Options.Debug {
230			jsonData, _ := json.Marshal(preparedMessages)
231			slog.Debug("Prepared messages", "messages", string(jsonData))
232		}
233
234		var opts []option.RequestOption
235		if a.isThinkingEnabled() {
236			opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
237		}
238		anthropicResponse, err := a.client.Messages.New(
239			ctx,
240			preparedMessages,
241			opts...,
242		)
243		// If there is an error we are going to see if we can retry the call
244		if err != nil {
245			slog.Error("Error in Anthropic API call", "error", err)
246			retry, after, retryErr := a.shouldRetry(attempts, err)
247			if retryErr != nil {
248				return nil, retryErr
249			}
250			if retry {
251				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
252				select {
253				case <-ctx.Done():
254					return nil, ctx.Err()
255				case <-time.After(time.Duration(after) * time.Millisecond):
256					continue
257				}
258			}
259			return nil, retryErr
260		}
261
262		content := ""
263		for _, block := range anthropicResponse.Content {
264			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
265				content += text.Text
266			}
267		}
268
269		return &ProviderResponse{
270			Content:   content,
271			ToolCalls: a.toolCalls(*anthropicResponse),
272			Usage:     a.usage(*anthropicResponse),
273		}, nil
274	}
275}
276
277func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
278	cfg := config.Get()
279	attempts := 0
280	eventChan := make(chan ProviderEvent)
281	go func() {
282		for {
283			attempts++
284			// Prepare messages on each attempt in case max_tokens was adjusted
285			preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
286			if cfg.Options.Debug {
287				jsonData, _ := json.Marshal(preparedMessages)
288				slog.Debug("Prepared messages", "messages", string(jsonData))
289			}
290
291			var opts []option.RequestOption
292			if a.isThinkingEnabled() {
293				opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
294			}
295
296			anthropicStream := a.client.Messages.NewStreaming(
297				ctx,
298				preparedMessages,
299				opts...,
300			)
301			accumulatedMessage := anthropic.Message{}
302
303			currentToolCallID := ""
304			for anthropicStream.Next() {
305				event := anthropicStream.Current()
306				err := accumulatedMessage.Accumulate(event)
307				if err != nil {
308					slog.Warn("Error accumulating message", "error", err)
309					continue
310				}
311
312				switch event := event.AsAny().(type) {
313				case anthropic.ContentBlockStartEvent:
314					switch event.ContentBlock.Type {
315					case "text":
316						eventChan <- ProviderEvent{Type: EventContentStart}
317					case "tool_use":
318						currentToolCallID = event.ContentBlock.ID
319						eventChan <- ProviderEvent{
320							Type: EventToolUseStart,
321							ToolCall: &message.ToolCall{
322								ID:       event.ContentBlock.ID,
323								Name:     event.ContentBlock.Name,
324								Finished: false,
325							},
326						}
327					}
328
329				case anthropic.ContentBlockDeltaEvent:
330					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
331						eventChan <- ProviderEvent{
332							Type:     EventThinkingDelta,
333							Thinking: event.Delta.Thinking,
334						}
335					} else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
336						eventChan <- ProviderEvent{
337							Type:      EventSignatureDelta,
338							Signature: event.Delta.Signature,
339						}
340					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
341						eventChan <- ProviderEvent{
342							Type:    EventContentDelta,
343							Content: event.Delta.Text,
344						}
345					} else if event.Delta.Type == "input_json_delta" {
346						if currentToolCallID != "" {
347							eventChan <- ProviderEvent{
348								Type: EventToolUseDelta,
349								ToolCall: &message.ToolCall{
350									ID:       currentToolCallID,
351									Finished: false,
352									Input:    event.Delta.PartialJSON,
353								},
354							}
355						}
356					}
357				case anthropic.ContentBlockStopEvent:
358					if currentToolCallID != "" {
359						eventChan <- ProviderEvent{
360							Type: EventToolUseStop,
361							ToolCall: &message.ToolCall{
362								ID: currentToolCallID,
363							},
364						}
365						currentToolCallID = ""
366					} else {
367						eventChan <- ProviderEvent{Type: EventContentStop}
368					}
369
370				case anthropic.MessageStopEvent:
371					content := ""
372					for _, block := range accumulatedMessage.Content {
373						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
374							content += text.Text
375						}
376					}
377
378					eventChan <- ProviderEvent{
379						Type: EventComplete,
380						Response: &ProviderResponse{
381							Content:      content,
382							ToolCalls:    a.toolCalls(accumulatedMessage),
383							Usage:        a.usage(accumulatedMessage),
384							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
385						},
386						Content: content,
387					}
388				}
389			}
390
391			err := anthropicStream.Err()
392			if err == nil || errors.Is(err, io.EOF) {
393				close(eventChan)
394				return
395			}
396			// If there is an error we are going to see if we can retry the call
397			retry, after, retryErr := a.shouldRetry(attempts, err)
398			if retryErr != nil {
399				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
400				close(eventChan)
401				return
402			}
403			if retry {
404				slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
405				select {
406				case <-ctx.Done():
407					// context cancelled
408					if ctx.Err() != nil {
409						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
410					}
411					close(eventChan)
412					return
413				case <-time.After(time.Duration(after) * time.Millisecond):
414					continue
415				}
416			}
417			if ctx.Err() != nil {
418				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
419			}
420
421			close(eventChan)
422			return
423		}
424	}()
425	return eventChan
426}
427
428func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
429	var apiErr *anthropic.Error
430	if !errors.As(err, &apiErr) {
431		return false, 0, err
432	}
433
434	if attempts > maxRetries {
435		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
436	}
437
438	if apiErr.StatusCode == 401 {
439		a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
440		if err != nil {
441			return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
442		}
443		a.client = createAnthropicClient(a.providerOptions, a.useBedrock)
444		return true, 0, nil
445	}
446
447	// Handle context limit exceeded error (400 Bad Request)
448	if apiErr.StatusCode == 400 {
449		if adjusted, ok := a.handleContextLimitError(apiErr); ok {
450			a.adjustedMaxTokens = adjusted
451			slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
452			return true, 0, nil
453		}
454	}
455
456	isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
457	if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
458		return false, 0, err
459	}
460
461	retryMs := 0
462	retryAfterValues := apiErr.Response.Header.Values("Retry-After")
463
464	backoffMs := 2000 * (1 << (attempts - 1))
465	jitterMs := int(float64(backoffMs) * 0.2)
466	retryMs = backoffMs + jitterMs
467	if len(retryAfterValues) > 0 {
468		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
469			retryMs = retryMs * 1000
470		}
471	}
472	return true, int64(retryMs), nil
473}
474
475// handleContextLimitError parses context limit error and returns adjusted max_tokens
476func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
477	// Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
478	errorMsg := apiErr.Error()
479
480	re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
481	matches := re.FindStringSubmatch(errorMsg)
482
483	if len(matches) != 4 {
484		return 0, false
485	}
486
487	inputTokens, err1 := strconv.Atoi(matches[1])
488	contextLimit, err2 := strconv.Atoi(matches[3])
489
490	if err1 != nil || err2 != nil {
491		return 0, false
492	}
493
494	// Calculate safe max_tokens with a buffer of 1000 tokens
495	safeMaxTokens := contextLimit - inputTokens - 1000
496
497	// Ensure we don't go below a minimum threshold
498	safeMaxTokens = max(safeMaxTokens, 1000)
499
500	return safeMaxTokens, true
501}
502
503func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
504	var toolCalls []message.ToolCall
505
506	for _, block := range msg.Content {
507		switch variant := block.AsAny().(type) {
508		case anthropic.ToolUseBlock:
509			toolCall := message.ToolCall{
510				ID:       variant.ID,
511				Name:     variant.Name,
512				Input:    string(variant.Input),
513				Type:     string(variant.Type),
514				Finished: true,
515			}
516			toolCalls = append(toolCalls, toolCall)
517		}
518	}
519
520	return toolCalls
521}
522
523func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
524	return TokenUsage{
525		InputTokens:         msg.Usage.InputTokens,
526		OutputTokens:        msg.Usage.OutputTokens,
527		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
528		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
529	}
530}
531
532func (a *anthropicClient) Model() provider.Model {
533	return a.providerOptions.model(a.providerOptions.modelType)
534}