1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/charmbracelet/catwalk/pkg/catwalk"
19 "github.com/charmbracelet/crush/internal/config"
20 "github.com/charmbracelet/crush/internal/llm/tools"
21 "github.com/charmbracelet/crush/internal/message"
22)
23
24// Pre-compiled regex for parsing context limit errors.
25var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
26
27type anthropicClient struct {
28 providerOptions providerClientOptions
29 useBedrock bool
30 client anthropic.Client
31 adjustedMaxTokens int // Used when context limit is hit
32}
33
34type AnthropicClient ProviderClient
35
36func newAnthropicClient(opts providerClientOptions, useBedrock bool) AnthropicClient {
37 return &anthropicClient{
38 providerOptions: opts,
39 client: createAnthropicClient(opts, useBedrock),
40 }
41}
42
43func createAnthropicClient(opts providerClientOptions, useBedrock bool) anthropic.Client {
44 anthropicClientOptions := []option.RequestOption{}
45
46 // Check if Authorization header is provided in extra headers
47 hasBearerAuth := false
48 if opts.extraHeaders != nil {
49 for key := range opts.extraHeaders {
50 if strings.ToLower(key) == "authorization" {
51 hasBearerAuth = true
52 break
53 }
54 }
55 }
56
57 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
58
59 if opts.apiKey != "" && !hasBearerAuth {
60 if isBearerToken {
61 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
62 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
63 } else {
64 // Use standard X-Api-Key header
65 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
66 }
67 } else if hasBearerAuth {
68 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
69 }
70 if useBedrock {
71 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
72 }
73 for key, header := range opts.extraHeaders {
74 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
75 }
76 for key, value := range opts.extraBody {
77 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
78 }
79 return anthropic.NewClient(anthropicClientOptions...)
80}
81
82func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
83 for i, msg := range messages {
84 cache := false
85 if i > len(messages)-3 {
86 cache = true
87 }
88 switch msg.Role {
89 case message.User:
90 content := anthropic.NewTextBlock(msg.Content().String())
91 if cache && !a.providerOptions.disableCache {
92 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
93 Type: "ephemeral",
94 }
95 }
96 var contentBlocks []anthropic.ContentBlockParamUnion
97 contentBlocks = append(contentBlocks, content)
98 for _, binaryContent := range msg.BinaryContent() {
99 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
100 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
101 contentBlocks = append(contentBlocks, imageBlock)
102 }
103 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
104
105 case message.Assistant:
106 blocks := []anthropic.ContentBlockParamUnion{}
107
108 // Add thinking blocks first if present (required when thinking is enabled with tool use)
109 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
110 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
111 blocks = append(blocks, thinkingBlock)
112 }
113
114 if msg.Content().String() != "" {
115 content := anthropic.NewTextBlock(msg.Content().String())
116 if cache && !a.providerOptions.disableCache {
117 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
118 Type: "ephemeral",
119 }
120 }
121 blocks = append(blocks, content)
122 }
123
124 for _, toolCall := range msg.ToolCalls() {
125 var inputMap map[string]any
126 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
127 if err != nil {
128 continue
129 }
130 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
131 }
132
133 if len(blocks) == 0 {
134 slog.Warn("There is a message without content, investigate, this should not happen")
135 continue
136 }
137 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
138
139 case message.Tool:
140 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
141 for i, toolResult := range msg.ToolResults() {
142 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
143 }
144 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
145 }
146 }
147 return
148}
149
150func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
151 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
152
153 for i, tool := range tools {
154 info := tool.Info()
155 toolParam := anthropic.ToolParam{
156 Name: info.Name,
157 Description: anthropic.String(info.Description),
158 InputSchema: anthropic.ToolInputSchemaParam{
159 Properties: info.Parameters,
160 // TODO: figure out how we can tell claude the required fields?
161 },
162 }
163
164 if i == len(tools)-1 && !a.providerOptions.disableCache {
165 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
166 Type: "ephemeral",
167 }
168 }
169
170 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
171 }
172
173 return anthropicTools
174}
175
176func (a *anthropicClient) finishReason(reason string) message.FinishReason {
177 switch reason {
178 case "end_turn":
179 return message.FinishReasonEndTurn
180 case "max_tokens":
181 return message.FinishReasonMaxTokens
182 case "tool_use":
183 return message.FinishReasonToolUse
184 case "stop_sequence":
185 return message.FinishReasonEndTurn
186 default:
187 return message.FinishReasonUnknown
188 }
189}
190
191func (a *anthropicClient) isThinkingEnabled() bool {
192 cfg := config.Get()
193 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
194 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
195 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
196 }
197 return a.Model().CanReason && modelConfig.Think
198}
199
200func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
201 model := a.providerOptions.model(a.providerOptions.modelType)
202 var thinkingParam anthropic.ThinkingConfigParamUnion
203 cfg := config.Get()
204 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
205 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
206 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
207 }
208 temperature := anthropic.Float(0)
209
210 maxTokens := model.DefaultMaxTokens
211 if modelConfig.MaxTokens > 0 {
212 maxTokens = modelConfig.MaxTokens
213 }
214 if a.isThinkingEnabled() {
215 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
216 temperature = anthropic.Float(1)
217 }
218 // Override max tokens if set in provider options
219 if a.providerOptions.maxTokens > 0 {
220 maxTokens = a.providerOptions.maxTokens
221 }
222
223 // Use adjusted max tokens if context limit was hit
224 if a.adjustedMaxTokens > 0 {
225 maxTokens = int64(a.adjustedMaxTokens)
226 }
227
228 systemBlocks := []anthropic.TextBlockParam{}
229
230 // Add custom system prompt prefix if configured
231 if a.providerOptions.systemPromptPrefix != "" {
232 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
233 Text: a.providerOptions.systemPromptPrefix,
234 CacheControl: anthropic.CacheControlEphemeralParam{
235 Type: "ephemeral",
236 },
237 })
238 }
239
240 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
241 Text: a.providerOptions.systemMessage,
242 CacheControl: anthropic.CacheControlEphemeralParam{
243 Type: "ephemeral",
244 },
245 })
246
247 return anthropic.MessageNewParams{
248 Model: anthropic.Model(model.ID),
249 MaxTokens: maxTokens,
250 Temperature: temperature,
251 Messages: messages,
252 Tools: tools,
253 Thinking: thinkingParam,
254 System: systemBlocks,
255 }
256}
257
258func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
259 cfg := config.Get()
260
261 attempts := 0
262 for {
263 attempts++
264 // Prepare messages on each attempt in case max_tokens was adjusted
265 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
266 if cfg.Options.Debug {
267 jsonData, _ := json.Marshal(preparedMessages)
268 slog.Debug("Prepared messages", "messages", string(jsonData))
269 }
270
271 var opts []option.RequestOption
272 if a.isThinkingEnabled() {
273 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
274 }
275 anthropicResponse, err := a.client.Messages.New(
276 ctx,
277 preparedMessages,
278 opts...,
279 )
280 // If there is an error we are going to see if we can retry the call
281 if err != nil {
282 slog.Error("Error in Anthropic API call", "error", err)
283 retry, after, retryErr := a.shouldRetry(attempts, err)
284 if retryErr != nil {
285 return nil, retryErr
286 }
287 if retry {
288 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
289 select {
290 case <-ctx.Done():
291 return nil, ctx.Err()
292 case <-time.After(time.Duration(after) * time.Millisecond):
293 continue
294 }
295 }
296 return nil, retryErr
297 }
298
299 content := ""
300 for _, block := range anthropicResponse.Content {
301 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
302 content += text.Text
303 }
304 }
305
306 return &ProviderResponse{
307 Content: content,
308 ToolCalls: a.toolCalls(*anthropicResponse),
309 Usage: a.usage(*anthropicResponse),
310 }, nil
311 }
312}
313
314func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
315 cfg := config.Get()
316 attempts := 0
317 eventChan := make(chan ProviderEvent)
318 go func() {
319 for {
320 attempts++
321 // Prepare messages on each attempt in case max_tokens was adjusted
322 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
323 if cfg.Options.Debug {
324 jsonData, _ := json.Marshal(preparedMessages)
325 slog.Debug("Prepared messages", "messages", string(jsonData))
326 }
327
328 var opts []option.RequestOption
329 if a.isThinkingEnabled() {
330 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
331 }
332
333 anthropicStream := a.client.Messages.NewStreaming(
334 ctx,
335 preparedMessages,
336 opts...,
337 )
338 accumulatedMessage := anthropic.Message{}
339
340 currentToolCallID := ""
341 for anthropicStream.Next() {
342 event := anthropicStream.Current()
343 err := accumulatedMessage.Accumulate(event)
344 if err != nil {
345 slog.Warn("Error accumulating message", "error", err)
346 continue
347 }
348
349 switch event := event.AsAny().(type) {
350 case anthropic.ContentBlockStartEvent:
351 switch event.ContentBlock.Type {
352 case "text":
353 eventChan <- ProviderEvent{Type: EventContentStart}
354 case "tool_use":
355 currentToolCallID = event.ContentBlock.ID
356 eventChan <- ProviderEvent{
357 Type: EventToolUseStart,
358 ToolCall: &message.ToolCall{
359 ID: event.ContentBlock.ID,
360 Name: event.ContentBlock.Name,
361 Finished: false,
362 },
363 }
364 }
365
366 case anthropic.ContentBlockDeltaEvent:
367 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
368 eventChan <- ProviderEvent{
369 Type: EventThinkingDelta,
370 Thinking: event.Delta.Thinking,
371 }
372 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
373 eventChan <- ProviderEvent{
374 Type: EventSignatureDelta,
375 Signature: event.Delta.Signature,
376 }
377 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
378 eventChan <- ProviderEvent{
379 Type: EventContentDelta,
380 Content: event.Delta.Text,
381 }
382 } else if event.Delta.Type == "input_json_delta" {
383 if currentToolCallID != "" {
384 eventChan <- ProviderEvent{
385 Type: EventToolUseDelta,
386 ToolCall: &message.ToolCall{
387 ID: currentToolCallID,
388 Finished: false,
389 Input: event.Delta.PartialJSON,
390 },
391 }
392 }
393 }
394 case anthropic.ContentBlockStopEvent:
395 if currentToolCallID != "" {
396 eventChan <- ProviderEvent{
397 Type: EventToolUseStop,
398 ToolCall: &message.ToolCall{
399 ID: currentToolCallID,
400 },
401 }
402 currentToolCallID = ""
403 } else {
404 eventChan <- ProviderEvent{Type: EventContentStop}
405 }
406
407 case anthropic.MessageStopEvent:
408 content := ""
409 for _, block := range accumulatedMessage.Content {
410 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
411 content += text.Text
412 }
413 }
414
415 eventChan <- ProviderEvent{
416 Type: EventComplete,
417 Response: &ProviderResponse{
418 Content: content,
419 ToolCalls: a.toolCalls(accumulatedMessage),
420 Usage: a.usage(accumulatedMessage),
421 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
422 },
423 Content: content,
424 }
425 }
426 }
427
428 err := anthropicStream.Err()
429 if err == nil || errors.Is(err, io.EOF) {
430 close(eventChan)
431 return
432 }
433
434 // If there is an error we are going to see if we can retry the call
435 retry, after, retryErr := a.shouldRetry(attempts, err)
436 if retryErr != nil {
437 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
438 close(eventChan)
439 return
440 }
441 if retry {
442 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
443 select {
444 case <-ctx.Done():
445 // context cancelled
446 if ctx.Err() != nil {
447 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
448 }
449 close(eventChan)
450 return
451 case <-time.After(time.Duration(after) * time.Millisecond):
452 continue
453 }
454 }
455 if ctx.Err() != nil {
456 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
457 }
458
459 close(eventChan)
460 return
461 }
462 }()
463 return eventChan
464}
465
466func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
467 var apiErr *anthropic.Error
468 if !errors.As(err, &apiErr) {
469 return false, 0, err
470 }
471
472 if attempts > maxRetries {
473 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
474 }
475
476 if apiErr.StatusCode == 401 {
477 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
478 if err != nil {
479 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
480 }
481 a.client = createAnthropicClient(a.providerOptions, a.useBedrock)
482 return true, 0, nil
483 }
484
485 // Handle context limit exceeded error (400 Bad Request)
486 if apiErr.StatusCode == 400 {
487 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
488 a.adjustedMaxTokens = adjusted
489 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
490 return true, 0, nil
491 }
492 }
493
494 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
495 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
496 return false, 0, err
497 }
498
499 retryMs := 0
500 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
501
502 backoffMs := 2000 * (1 << (attempts - 1))
503 jitterMs := int(float64(backoffMs) * 0.2)
504 retryMs = backoffMs + jitterMs
505 if len(retryAfterValues) > 0 {
506 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
507 retryMs = retryMs * 1000
508 }
509 }
510 return true, int64(retryMs), nil
511}
512
513// handleContextLimitError parses context limit error and returns adjusted max_tokens
514func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
515 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
516 errorMsg := apiErr.Error()
517
518 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
519
520 if len(matches) != 4 {
521 return 0, false
522 }
523
524 inputTokens, err1 := strconv.Atoi(matches[1])
525 contextLimit, err2 := strconv.Atoi(matches[3])
526
527 if err1 != nil || err2 != nil {
528 return 0, false
529 }
530
531 // Calculate safe max_tokens with a buffer of 1000 tokens
532 safeMaxTokens := contextLimit - inputTokens - 1000
533
534 // Ensure we don't go below a minimum threshold
535 safeMaxTokens = max(safeMaxTokens, 1000)
536
537 return safeMaxTokens, true
538}
539
540func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
541 var toolCalls []message.ToolCall
542
543 for _, block := range msg.Content {
544 switch variant := block.AsAny().(type) {
545 case anthropic.ToolUseBlock:
546 toolCall := message.ToolCall{
547 ID: variant.ID,
548 Name: variant.Name,
549 Input: string(variant.Input),
550 Type: string(variant.Type),
551 Finished: true,
552 }
553 toolCalls = append(toolCalls, toolCall)
554 }
555 }
556
557 return toolCalls
558}
559
560func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
561 return TokenUsage{
562 InputTokens: msg.Usage.InputTokens,
563 OutputTokens: msg.Usage.OutputTokens,
564 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
565 CacheReadTokens: msg.Usage.CacheReadInputTokens,
566 }
567}
568
569func (a *anthropicClient) Model() catwalk.Model {
570 return a.providerOptions.model(a.providerOptions.modelType)
571}