1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/anthropics/anthropic-sdk-go/vertex"
19 "github.com/charmbracelet/catwalk/pkg/catwalk"
20 "github.com/charmbracelet/crush/internal/config"
21 "github.com/charmbracelet/crush/internal/llm/tools"
22 "github.com/charmbracelet/crush/internal/message"
23)
24
25// Pre-compiled regex for parsing context limit errors.
26var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
27
28type anthropicClient struct {
29 providerOptions providerClientOptions
30 tp AnthropicClientType
31 client anthropic.Client
32 adjustedMaxTokens int // Used when context limit is hit
33}
34
35type AnthropicClient ProviderClient
36
37type AnthropicClientType string
38
39const (
40 AnthropicClientTypeNormal AnthropicClientType = "normal"
41 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
42 AnthropicClientTypeVertex AnthropicClientType = "vertex"
43)
44
45func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
46 return &anthropicClient{
47 providerOptions: opts,
48 tp: tp,
49 client: createAnthropicClient(opts, tp),
50 }
51}
52
53func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
54 anthropicClientOptions := []option.RequestOption{}
55
56 // Check if Authorization header is provided in extra headers
57 hasBearerAuth := false
58 if opts.extraHeaders != nil {
59 for key := range opts.extraHeaders {
60 if strings.ToLower(key) == "authorization" {
61 hasBearerAuth = true
62 break
63 }
64 }
65 }
66
67 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
68
69 if opts.apiKey != "" && !hasBearerAuth {
70 if isBearerToken {
71 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
72 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
73 } else {
74 // Use standard X-Api-Key header
75 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
76 }
77 } else if hasBearerAuth {
78 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
79 }
80 switch tp {
81 case AnthropicClientTypeBedrock:
82 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
83 case AnthropicClientTypeVertex:
84 project := opts.extraParams["project"]
85 location := opts.extraParams["location"]
86 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
87 }
88 for key, header := range opts.extraHeaders {
89 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
90 }
91 for key, value := range opts.extraBody {
92 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
93 }
94 return anthropic.NewClient(anthropicClientOptions...)
95}
96
97func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
98 for i, msg := range messages {
99 cache := false
100 if i > len(messages)-3 {
101 cache = true
102 }
103 switch msg.Role {
104 case message.User:
105 content := anthropic.NewTextBlock(msg.Content().String())
106 if cache && !a.providerOptions.disableCache {
107 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
108 Type: "ephemeral",
109 }
110 }
111 var contentBlocks []anthropic.ContentBlockParamUnion
112 contentBlocks = append(contentBlocks, content)
113 for _, binaryContent := range msg.BinaryContent() {
114 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
115 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
116 contentBlocks = append(contentBlocks, imageBlock)
117 }
118 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
119
120 case message.Assistant:
121 blocks := []anthropic.ContentBlockParamUnion{}
122
123 // Add thinking blocks first if present (required when thinking is enabled with tool use)
124 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
125 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
126 blocks = append(blocks, thinkingBlock)
127 }
128
129 if msg.Content().String() != "" {
130 content := anthropic.NewTextBlock(msg.Content().String())
131 if cache && !a.providerOptions.disableCache {
132 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
133 Type: "ephemeral",
134 }
135 }
136 blocks = append(blocks, content)
137 }
138
139 for _, toolCall := range msg.ToolCalls() {
140 var inputMap map[string]any
141 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
142 if err != nil {
143 continue
144 }
145 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
146 }
147
148 if len(blocks) == 0 {
149 slog.Warn("There is a message without content, investigate, this should not happen")
150 continue
151 }
152 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
153
154 case message.Tool:
155 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
156 for i, toolResult := range msg.ToolResults() {
157 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
158 }
159 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
160 }
161 }
162 return
163}
164
165func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
166 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
167
168 for i, tool := range tools {
169 info := tool.Info()
170 toolParam := anthropic.ToolParam{
171 Name: info.Name,
172 Description: anthropic.String(info.Description),
173 InputSchema: anthropic.ToolInputSchemaParam{
174 Properties: info.Parameters,
175 // TODO: figure out how we can tell claude the required fields?
176 },
177 }
178
179 if i == len(tools)-1 && !a.providerOptions.disableCache {
180 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
181 Type: "ephemeral",
182 }
183 }
184
185 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
186 }
187
188 return anthropicTools
189}
190
191func (a *anthropicClient) finishReason(reason string) message.FinishReason {
192 switch reason {
193 case "end_turn":
194 return message.FinishReasonEndTurn
195 case "max_tokens":
196 return message.FinishReasonMaxTokens
197 case "tool_use":
198 return message.FinishReasonToolUse
199 case "stop_sequence":
200 return message.FinishReasonEndTurn
201 default:
202 return message.FinishReasonUnknown
203 }
204}
205
206func (a *anthropicClient) isThinkingEnabled() bool {
207 cfg := config.Get()
208 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
209 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
210 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
211 }
212 return a.Model().CanReason && modelConfig.Think
213}
214
215func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
216 model := a.providerOptions.model(a.providerOptions.modelType)
217 var thinkingParam anthropic.ThinkingConfigParamUnion
218 cfg := config.Get()
219 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
220 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
221 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
222 }
223 temperature := anthropic.Float(0)
224
225 maxTokens := model.DefaultMaxTokens
226 if modelConfig.MaxTokens > 0 {
227 maxTokens = modelConfig.MaxTokens
228 }
229 if a.isThinkingEnabled() {
230 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
231 temperature = anthropic.Float(1)
232 }
233 // Override max tokens if set in provider options
234 if a.providerOptions.maxTokens > 0 {
235 maxTokens = a.providerOptions.maxTokens
236 }
237
238 // Use adjusted max tokens if context limit was hit
239 if a.adjustedMaxTokens > 0 {
240 maxTokens = int64(a.adjustedMaxTokens)
241 }
242
243 systemBlocks := []anthropic.TextBlockParam{}
244
245 // Add custom system prompt prefix if configured
246 if a.providerOptions.systemPromptPrefix != "" {
247 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
248 Text: a.providerOptions.systemPromptPrefix,
249 CacheControl: anthropic.CacheControlEphemeralParam{
250 Type: "ephemeral",
251 },
252 })
253 }
254
255 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
256 Text: a.providerOptions.systemMessage,
257 CacheControl: anthropic.CacheControlEphemeralParam{
258 Type: "ephemeral",
259 },
260 })
261
262 return anthropic.MessageNewParams{
263 Model: anthropic.Model(model.ID),
264 MaxTokens: maxTokens,
265 Temperature: temperature,
266 Messages: messages,
267 Tools: tools,
268 Thinking: thinkingParam,
269 System: systemBlocks,
270 }
271}
272
273func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
274 cfg := config.Get()
275
276 attempts := 0
277 for {
278 attempts++
279 // Prepare messages on each attempt in case max_tokens was adjusted
280 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
281 if cfg.Options.Debug {
282 jsonData, _ := json.Marshal(preparedMessages)
283 slog.Debug("Prepared messages", "messages", string(jsonData))
284 }
285
286 var opts []option.RequestOption
287 if a.isThinkingEnabled() {
288 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
289 }
290 anthropicResponse, err := a.client.Messages.New(
291 ctx,
292 preparedMessages,
293 opts...,
294 )
295 // If there is an error we are going to see if we can retry the call
296 if err != nil {
297 slog.Error("Error in Anthropic API call", "error", err)
298 retry, after, retryErr := a.shouldRetry(attempts, err)
299 if retryErr != nil {
300 return nil, retryErr
301 }
302 if retry {
303 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
304 select {
305 case <-ctx.Done():
306 return nil, ctx.Err()
307 case <-time.After(time.Duration(after) * time.Millisecond):
308 continue
309 }
310 }
311 return nil, retryErr
312 }
313
314 content := ""
315 for _, block := range anthropicResponse.Content {
316 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
317 content += text.Text
318 }
319 }
320
321 return &ProviderResponse{
322 Content: content,
323 ToolCalls: a.toolCalls(*anthropicResponse),
324 Usage: a.usage(*anthropicResponse),
325 }, nil
326 }
327}
328
329func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
330 cfg := config.Get()
331 attempts := 0
332 eventChan := make(chan ProviderEvent)
333 go func() {
334 for {
335 attempts++
336 // Prepare messages on each attempt in case max_tokens was adjusted
337 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
338 if cfg.Options.Debug {
339 jsonData, _ := json.Marshal(preparedMessages)
340 slog.Debug("Prepared messages", "messages", string(jsonData))
341 }
342
343 var opts []option.RequestOption
344 if a.isThinkingEnabled() {
345 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
346 }
347
348 anthropicStream := a.client.Messages.NewStreaming(
349 ctx,
350 preparedMessages,
351 opts...,
352 )
353 accumulatedMessage := anthropic.Message{}
354
355 currentToolCallID := ""
356 for anthropicStream.Next() {
357 event := anthropicStream.Current()
358 err := accumulatedMessage.Accumulate(event)
359 if err != nil {
360 slog.Warn("Error accumulating message", "error", err)
361 continue
362 }
363
364 switch event := event.AsAny().(type) {
365 case anthropic.ContentBlockStartEvent:
366 switch event.ContentBlock.Type {
367 case "text":
368 eventChan <- ProviderEvent{Type: EventContentStart}
369 case "tool_use":
370 currentToolCallID = event.ContentBlock.ID
371 eventChan <- ProviderEvent{
372 Type: EventToolUseStart,
373 ToolCall: &message.ToolCall{
374 ID: event.ContentBlock.ID,
375 Name: event.ContentBlock.Name,
376 Finished: false,
377 },
378 }
379 }
380
381 case anthropic.ContentBlockDeltaEvent:
382 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
383 eventChan <- ProviderEvent{
384 Type: EventThinkingDelta,
385 Thinking: event.Delta.Thinking,
386 }
387 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
388 eventChan <- ProviderEvent{
389 Type: EventSignatureDelta,
390 Signature: event.Delta.Signature,
391 }
392 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
393 eventChan <- ProviderEvent{
394 Type: EventContentDelta,
395 Content: event.Delta.Text,
396 }
397 } else if event.Delta.Type == "input_json_delta" {
398 if currentToolCallID != "" {
399 eventChan <- ProviderEvent{
400 Type: EventToolUseDelta,
401 ToolCall: &message.ToolCall{
402 ID: currentToolCallID,
403 Finished: false,
404 Input: event.Delta.PartialJSON,
405 },
406 }
407 }
408 }
409 case anthropic.ContentBlockStopEvent:
410 if currentToolCallID != "" {
411 eventChan <- ProviderEvent{
412 Type: EventToolUseStop,
413 ToolCall: &message.ToolCall{
414 ID: currentToolCallID,
415 },
416 }
417 currentToolCallID = ""
418 } else {
419 eventChan <- ProviderEvent{Type: EventContentStop}
420 }
421
422 case anthropic.MessageStopEvent:
423 content := ""
424 for _, block := range accumulatedMessage.Content {
425 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
426 content += text.Text
427 }
428 }
429
430 eventChan <- ProviderEvent{
431 Type: EventComplete,
432 Response: &ProviderResponse{
433 Content: content,
434 ToolCalls: a.toolCalls(accumulatedMessage),
435 Usage: a.usage(accumulatedMessage),
436 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
437 },
438 Content: content,
439 }
440 }
441 }
442
443 err := anthropicStream.Err()
444 if err == nil || errors.Is(err, io.EOF) {
445 close(eventChan)
446 return
447 }
448
449 // If there is an error we are going to see if we can retry the call
450 retry, after, retryErr := a.shouldRetry(attempts, err)
451 if retryErr != nil {
452 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
453 close(eventChan)
454 return
455 }
456 if retry {
457 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
458 select {
459 case <-ctx.Done():
460 // context cancelled
461 if ctx.Err() != nil {
462 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
463 }
464 close(eventChan)
465 return
466 case <-time.After(time.Duration(after) * time.Millisecond):
467 continue
468 }
469 }
470 if ctx.Err() != nil {
471 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
472 }
473
474 close(eventChan)
475 return
476 }
477 }()
478 return eventChan
479}
480
481func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
482 var apiErr *anthropic.Error
483 if !errors.As(err, &apiErr) {
484 return false, 0, err
485 }
486
487 if attempts > maxRetries {
488 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
489 }
490
491 if apiErr.StatusCode == 401 {
492 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
493 if err != nil {
494 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
495 }
496 a.client = createAnthropicClient(a.providerOptions, a.tp)
497 return true, 0, nil
498 }
499
500 // Handle context limit exceeded error (400 Bad Request)
501 if apiErr.StatusCode == 400 {
502 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
503 a.adjustedMaxTokens = adjusted
504 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
505 return true, 0, nil
506 }
507 }
508
509 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
510 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
511 return false, 0, err
512 }
513
514 retryMs := 0
515 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
516
517 backoffMs := 2000 * (1 << (attempts - 1))
518 jitterMs := int(float64(backoffMs) * 0.2)
519 retryMs = backoffMs + jitterMs
520 if len(retryAfterValues) > 0 {
521 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
522 retryMs = retryMs * 1000
523 }
524 }
525 return true, int64(retryMs), nil
526}
527
528// handleContextLimitError parses context limit error and returns adjusted max_tokens
529func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
530 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
531 errorMsg := apiErr.Error()
532
533 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
534
535 if len(matches) != 4 {
536 return 0, false
537 }
538
539 inputTokens, err1 := strconv.Atoi(matches[1])
540 contextLimit, err2 := strconv.Atoi(matches[3])
541
542 if err1 != nil || err2 != nil {
543 return 0, false
544 }
545
546 // Calculate safe max_tokens with a buffer of 1000 tokens
547 safeMaxTokens := contextLimit - inputTokens - 1000
548
549 // Ensure we don't go below a minimum threshold
550 safeMaxTokens = max(safeMaxTokens, 1000)
551
552 return safeMaxTokens, true
553}
554
555func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
556 var toolCalls []message.ToolCall
557
558 for _, block := range msg.Content {
559 switch variant := block.AsAny().(type) {
560 case anthropic.ToolUseBlock:
561 toolCall := message.ToolCall{
562 ID: variant.ID,
563 Name: variant.Name,
564 Input: string(variant.Input),
565 Type: string(variant.Type),
566 Finished: true,
567 }
568 toolCalls = append(toolCalls, toolCall)
569 }
570 }
571
572 return toolCalls
573}
574
575func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
576 return TokenUsage{
577 InputTokens: msg.Usage.InputTokens,
578 OutputTokens: msg.Usage.OutputTokens,
579 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
580 CacheReadTokens: msg.Usage.CacheReadInputTokens,
581 }
582}
583
584func (a *anthropicClient) Model() catwalk.Model {
585 return a.providerOptions.model(a.providerOptions.modelType)
586}