1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/charmbracelet/catwalk/pkg/catwalk"
19 "github.com/charmbracelet/crush/internal/llm/tools"
20 "github.com/charmbracelet/crush/internal/message"
21)
22
23type anthropicProvider struct {
24 *baseProvider
25 useBedrock bool
26 client anthropic.Client
27 adjustedMaxTokens int // Used when context limit is hit
28}
29
30func NewAnthropicProvider(base *baseProvider, useBedrock bool) Provider {
31 return &anthropicProvider{
32 baseProvider: base,
33 client: createAnthropicClient(base, useBedrock),
34 }
35}
36
37func createAnthropicClient(opts *baseProvider, useBedrock bool) anthropic.Client {
38 anthropicClientOptions := []option.RequestOption{}
39
40 // Check if Authorization header is provided in extra headers
41 hasBearerAuth := false
42 if opts.extraHeaders != nil {
43 for key := range opts.extraHeaders {
44 if strings.ToLower(key) == "authorization" {
45 hasBearerAuth = true
46 break
47 }
48 }
49 }
50
51 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
52
53 if opts.apiKey != "" && !hasBearerAuth {
54 if isBearerToken {
55 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
56 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
57 } else {
58 // Use standard X-Api-Key header
59 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
60 }
61 } else if hasBearerAuth {
62 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
63 }
64 if useBedrock {
65 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
66 }
67 for _, header := range opts.extraHeaders {
68 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(header, opts.extraHeaders[header]))
69 }
70 for key, value := range opts.extraBody {
71 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
72 }
73 return anthropic.NewClient(anthropicClientOptions...)
74}
75
76func (a *anthropicProvider) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
77 for i, msg := range messages {
78 cache := false
79 if i > len(messages)-3 {
80 cache = true
81 }
82 switch msg.Role {
83 case message.User:
84 content := anthropic.NewTextBlock(msg.Content().String())
85 if cache && !a.disableCache {
86 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
87 Type: "ephemeral",
88 }
89 }
90 var contentBlocks []anthropic.ContentBlockParamUnion
91 contentBlocks = append(contentBlocks, content)
92 for _, binaryContent := range msg.BinaryContent() {
93 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
94 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
95 contentBlocks = append(contentBlocks, imageBlock)
96 }
97 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
98
99 case message.Assistant:
100 blocks := []anthropic.ContentBlockParamUnion{}
101
102 // Add thinking blocks first if present (required when thinking is enabled with tool use)
103 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
104 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
105 blocks = append(blocks, thinkingBlock)
106 }
107
108 if msg.Content().String() != "" {
109 content := anthropic.NewTextBlock(msg.Content().String())
110 if cache && !a.disableCache {
111 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
112 Type: "ephemeral",
113 }
114 }
115 blocks = append(blocks, content)
116 }
117
118 for _, toolCall := range msg.ToolCalls() {
119 var inputMap map[string]any
120 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
121 if err != nil {
122 continue
123 }
124 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
125 }
126
127 if len(blocks) == 0 {
128 slog.Warn("There is a message without content, investigate, this should not happen")
129 continue
130 }
131 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
132
133 case message.Tool:
134 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
135 for i, toolResult := range msg.ToolResults() {
136 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
137 }
138 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
139 }
140 }
141 return
142}
143
144func (a *anthropicProvider) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
145 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
146
147 for i, tool := range tools {
148 info := tool.Info()
149 toolParam := anthropic.ToolParam{
150 Name: info.Name,
151 Description: anthropic.String(info.Description),
152 InputSchema: anthropic.ToolInputSchemaParam{
153 Properties: info.Parameters,
154 Required: info.Required,
155 },
156 }
157
158 if i == len(tools)-1 && !a.disableCache {
159 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
160 Type: "ephemeral",
161 }
162 }
163
164 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
165 }
166
167 return anthropicTools
168}
169
170func (a *anthropicProvider) finishReason(reason string) message.FinishReason {
171 switch reason {
172 case "end_turn":
173 return message.FinishReasonEndTurn
174 case "max_tokens":
175 return message.FinishReasonMaxTokens
176 case "tool_use":
177 return message.FinishReasonToolUse
178 case "stop_sequence":
179 return message.FinishReasonEndTurn
180 default:
181 return message.FinishReasonUnknown
182 }
183}
184
185func (a *anthropicProvider) isThinkingEnabled(model string) bool {
186 return a.Model(model).CanReason && a.think
187}
188
189func (a *anthropicProvider) preparedMessages(modelID string, messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
190 model := a.Model(modelID)
191 var thinkingParam anthropic.ThinkingConfigParamUnion
192 temperature := anthropic.Float(0)
193
194 maxTokens := model.DefaultMaxTokens
195 if a.maxTokens > 0 {
196 maxTokens = a.maxTokens
197 }
198 if a.isThinkingEnabled(modelID) {
199 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
200 temperature = anthropic.Float(1)
201 }
202
203 // Use adjusted max tokens if context limit was hit
204 if a.adjustedMaxTokens > 0 {
205 maxTokens = int64(a.adjustedMaxTokens)
206 }
207
208 systemBlocks := []anthropic.TextBlockParam{}
209
210 // Add custom system prompt prefix if configured
211 if a.systemPromptPrefix != "" {
212 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
213 Text: a.systemPromptPrefix,
214 CacheControl: anthropic.CacheControlEphemeralParam{
215 Type: "ephemeral",
216 },
217 })
218 }
219
220 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
221 Text: a.systemMessage,
222 CacheControl: anthropic.CacheControlEphemeralParam{
223 Type: "ephemeral",
224 },
225 })
226
227 return anthropic.MessageNewParams{
228 Model: anthropic.Model(model.ID),
229 MaxTokens: maxTokens,
230 Temperature: temperature,
231 Messages: messages,
232 Tools: tools,
233 Thinking: thinkingParam,
234 System: systemBlocks,
235 }
236}
237
238func (a *anthropicProvider) Send(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) (*ProviderResponse, error) {
239 messages = a.cleanMessages(messages)
240 return a.send(ctx, model, messages, tools)
241}
242
243func (a *anthropicProvider) send(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
244 attempts := 0
245 for {
246 attempts++
247 // Prepare messages on each attempt in case max_tokens was adjusted
248 preparedMessages := a.preparedMessages(model, a.convertMessages(messages), a.convertTools(tools))
249 if a.debug {
250 jsonData, _ := json.Marshal(preparedMessages)
251 slog.Debug("Prepared messages", "messages", string(jsonData))
252 }
253
254 var opts []option.RequestOption
255 if a.isThinkingEnabled(model) {
256 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
257 }
258 anthropicResponse, err := a.client.Messages.New(
259 ctx,
260 preparedMessages,
261 opts...,
262 )
263 // If there is an error we are going to see if we can retry the call
264 if err != nil {
265 slog.Error("Error in Anthropic API call", "error", err)
266 retry, after, retryErr := a.shouldRetry(attempts, err)
267 if retryErr != nil {
268 return nil, retryErr
269 }
270 if retry {
271 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
272 select {
273 case <-ctx.Done():
274 return nil, ctx.Err()
275 case <-time.After(time.Duration(after) * time.Millisecond):
276 continue
277 }
278 }
279 return nil, retryErr
280 }
281
282 content := ""
283 for _, block := range anthropicResponse.Content {
284 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
285 content += text.Text
286 }
287 }
288
289 return &ProviderResponse{
290 Content: content,
291 ToolCalls: a.toolCalls(*anthropicResponse),
292 Usage: a.usage(*anthropicResponse),
293 }, nil
294 }
295}
296
297func (a *anthropicProvider) Stream(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
298 messages = a.cleanMessages(messages)
299 return a.stream(ctx, model, messages, tools)
300}
301
302func (a *anthropicProvider) stream(ctx context.Context, model string, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
303 attempts := 0
304 eventChan := make(chan ProviderEvent)
305 go func() {
306 for {
307 attempts++
308 // Prepare messages on each attempt in case max_tokens was adjusted
309 preparedMessages := a.preparedMessages(model, a.convertMessages(messages), a.convertTools(tools))
310 if a.debug {
311 jsonData, _ := json.Marshal(preparedMessages)
312 slog.Debug("Prepared messages", "messages", string(jsonData))
313 }
314
315 var opts []option.RequestOption
316 if a.isThinkingEnabled(model) {
317 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
318 }
319
320 anthropicStream := a.client.Messages.NewStreaming(
321 ctx,
322 preparedMessages,
323 opts...,
324 )
325 accumulatedMessage := anthropic.Message{}
326
327 currentToolCallID := ""
328 for anthropicStream.Next() {
329 event := anthropicStream.Current()
330 err := accumulatedMessage.Accumulate(event)
331 if err != nil {
332 slog.Warn("Error accumulating message", "error", err)
333 continue
334 }
335
336 switch event := event.AsAny().(type) {
337 case anthropic.ContentBlockStartEvent:
338 switch event.ContentBlock.Type {
339 case "text":
340 eventChan <- ProviderEvent{Type: EventContentStart}
341 case "tool_use":
342 currentToolCallID = event.ContentBlock.ID
343 eventChan <- ProviderEvent{
344 Type: EventToolUseStart,
345 ToolCall: &message.ToolCall{
346 ID: event.ContentBlock.ID,
347 Name: event.ContentBlock.Name,
348 Finished: false,
349 },
350 }
351 }
352
353 case anthropic.ContentBlockDeltaEvent:
354 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
355 eventChan <- ProviderEvent{
356 Type: EventThinkingDelta,
357 Thinking: event.Delta.Thinking,
358 }
359 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
360 eventChan <- ProviderEvent{
361 Type: EventSignatureDelta,
362 Signature: event.Delta.Signature,
363 }
364 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
365 eventChan <- ProviderEvent{
366 Type: EventContentDelta,
367 Content: event.Delta.Text,
368 }
369 } else if event.Delta.Type == "input_json_delta" {
370 if currentToolCallID != "" {
371 eventChan <- ProviderEvent{
372 Type: EventToolUseDelta,
373 ToolCall: &message.ToolCall{
374 ID: currentToolCallID,
375 Finished: false,
376 Input: event.Delta.PartialJSON,
377 },
378 }
379 }
380 }
381 case anthropic.ContentBlockStopEvent:
382 if currentToolCallID != "" {
383 eventChan <- ProviderEvent{
384 Type: EventToolUseStop,
385 ToolCall: &message.ToolCall{
386 ID: currentToolCallID,
387 },
388 }
389 currentToolCallID = ""
390 } else {
391 eventChan <- ProviderEvent{Type: EventContentStop}
392 }
393
394 case anthropic.MessageStopEvent:
395 content := ""
396 for _, block := range accumulatedMessage.Content {
397 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
398 content += text.Text
399 }
400 }
401
402 eventChan <- ProviderEvent{
403 Type: EventComplete,
404 Response: &ProviderResponse{
405 Content: content,
406 ToolCalls: a.toolCalls(accumulatedMessage),
407 Usage: a.usage(accumulatedMessage),
408 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
409 },
410 Content: content,
411 }
412 }
413 }
414
415 err := anthropicStream.Err()
416 if err == nil || errors.Is(err, io.EOF) {
417 close(eventChan)
418 return
419 }
420
421 // If there is an error we are going to see if we can retry the call
422 retry, after, retryErr := a.shouldRetry(attempts, err)
423 if retryErr != nil {
424 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
425 close(eventChan)
426 return
427 }
428 if retry {
429 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
430 select {
431 case <-ctx.Done():
432 // context cancelled
433 if ctx.Err() != nil {
434 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
435 }
436 close(eventChan)
437 return
438 case <-time.After(time.Duration(after) * time.Millisecond):
439 continue
440 }
441 }
442 if ctx.Err() != nil {
443 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
444 }
445
446 close(eventChan)
447 return
448 }
449 }()
450 return eventChan
451}
452
453func (a *anthropicProvider) shouldRetry(attempts int, err error) (bool, int64, error) {
454 var apiErr *anthropic.Error
455 if !errors.As(err, &apiErr) {
456 return false, 0, err
457 }
458
459 if attempts > maxRetries {
460 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
461 }
462
463 if apiErr.StatusCode == 401 {
464 a.apiKey, err = a.resolver.ResolveValue(a.config.APIKey)
465 if err != nil {
466 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
467 }
468
469 a.client = createAnthropicClient(a.baseProvider, a.useBedrock)
470 return true, 0, nil
471 }
472
473 // Handle context limit exceeded error (400 Bad Request)
474 if apiErr.StatusCode == 400 {
475 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
476 a.adjustedMaxTokens = adjusted
477 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
478 return true, 0, nil
479 }
480 }
481
482 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
483 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
484 return false, 0, err
485 }
486
487 retryMs := 0
488 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
489
490 backoffMs := 2000 * (1 << (attempts - 1))
491 jitterMs := int(float64(backoffMs) * 0.2)
492 retryMs = backoffMs + jitterMs
493 if len(retryAfterValues) > 0 {
494 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
495 retryMs = retryMs * 1000
496 }
497 }
498 return true, int64(retryMs), nil
499}
500
501// handleContextLimitError parses context limit error and returns adjusted max_tokens
502func (a *anthropicProvider) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
503 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
504 errorMsg := apiErr.Error()
505
506 re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
507 matches := re.FindStringSubmatch(errorMsg)
508
509 if len(matches) != 4 {
510 return 0, false
511 }
512
513 inputTokens, err1 := strconv.Atoi(matches[1])
514 contextLimit, err2 := strconv.Atoi(matches[3])
515
516 if err1 != nil || err2 != nil {
517 return 0, false
518 }
519
520 // Calculate safe max_tokens with a buffer of 1000 tokens
521 safeMaxTokens := contextLimit - inputTokens - 1000
522
523 // Ensure we don't go below a minimum threshold
524 safeMaxTokens = max(safeMaxTokens, 1000)
525
526 return safeMaxTokens, true
527}
528
529func (a *anthropicProvider) toolCalls(msg anthropic.Message) []message.ToolCall {
530 var toolCalls []message.ToolCall
531
532 for _, block := range msg.Content {
533 switch variant := block.AsAny().(type) {
534 case anthropic.ToolUseBlock:
535 toolCall := message.ToolCall{
536 ID: variant.ID,
537 Name: variant.Name,
538 Input: string(variant.Input),
539 Type: string(variant.Type),
540 Finished: true,
541 }
542 toolCalls = append(toolCalls, toolCall)
543 }
544 }
545
546 return toolCalls
547}
548
549func (a *anthropicProvider) usage(msg anthropic.Message) TokenUsage {
550 return TokenUsage{
551 InputTokens: msg.Usage.InputTokens,
552 OutputTokens: msg.Usage.OutputTokens,
553 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
554 CacheReadTokens: msg.Usage.CacheReadInputTokens,
555 }
556}