1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "net/http"
11 "regexp"
12 "strconv"
13 "strings"
14 "time"
15
16 "github.com/anthropics/anthropic-sdk-go"
17 "github.com/anthropics/anthropic-sdk-go/bedrock"
18 "github.com/anthropics/anthropic-sdk-go/option"
19 "github.com/anthropics/anthropic-sdk-go/vertex"
20 "github.com/charmbracelet/catwalk/pkg/catwalk"
21 "github.com/charmbracelet/crush/internal/config"
22 "github.com/charmbracelet/crush/internal/llm/tools"
23 "github.com/charmbracelet/crush/internal/log"
24 "github.com/charmbracelet/crush/internal/message"
25)
26
27// Pre-compiled regex for parsing context limit errors.
28var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
29
30type anthropicClient struct {
31 providerOptions providerClientOptions
32 tp AnthropicClientType
33 client anthropic.Client
34 adjustedMaxTokens int // Used when context limit is hit
35}
36
37type AnthropicClient ProviderClient
38
39type AnthropicClientType string
40
41const (
42 AnthropicClientTypeNormal AnthropicClientType = "normal"
43 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
44 AnthropicClientTypeVertex AnthropicClientType = "vertex"
45)
46
47func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
48 return &anthropicClient{
49 providerOptions: opts,
50 tp: tp,
51 client: createAnthropicClient(opts, tp),
52 }
53}
54
55func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
56 anthropicClientOptions := []option.RequestOption{}
57
58 // Check if Authorization header is provided in extra headers
59 hasBearerAuth := false
60 if opts.extraHeaders != nil {
61 for key := range opts.extraHeaders {
62 if strings.ToLower(key) == "authorization" {
63 hasBearerAuth = true
64 break
65 }
66 }
67 }
68
69 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
70
71 if opts.apiKey != "" && !hasBearerAuth {
72 if isBearerToken {
73 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
74 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
75 } else {
76 // Use standard X-Api-Key header
77 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
78 }
79 } else if hasBearerAuth {
80 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
81 }
82
83 if opts.baseURL != "" {
84 resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
85 if err == nil && resolvedBaseURL != "" {
86 anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
87 }
88 }
89
90 if config.Get().Options.Debug {
91 httpClient := log.NewHTTPClient()
92 anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
93 }
94
95 switch tp {
96 case AnthropicClientTypeBedrock:
97 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
98 case AnthropicClientTypeVertex:
99 project := opts.extraParams["project"]
100 location := opts.extraParams["location"]
101 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
102 }
103 for key, header := range opts.extraHeaders {
104 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
105 }
106 for key, value := range opts.extraBody {
107 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
108 }
109 return anthropic.NewClient(anthropicClientOptions...)
110}
111
112func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
113 for i, msg := range messages {
114 cache := false
115 if i > len(messages)-3 {
116 cache = true
117 }
118 switch msg.Role {
119 case message.User:
120 content := anthropic.NewTextBlock(msg.Content().String())
121 if cache && !a.providerOptions.disableCache {
122 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
123 Type: "ephemeral",
124 }
125 }
126 var contentBlocks []anthropic.ContentBlockParamUnion
127 contentBlocks = append(contentBlocks, content)
128 for _, binaryContent := range msg.BinaryContent() {
129 if strings.HasPrefix(binaryContent.MIMEType, "image/") {
130 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
131 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
132 contentBlocks = append(contentBlocks, imageBlock)
133 continue
134 }
135 blk := anthropic.NewDocumentBlock(anthropic.PlainTextSourceParam{
136 Data: string(binaryContent.Data),
137 })
138 contentBlocks = append(contentBlocks, blk)
139 }
140 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
141
142 case message.Assistant:
143 blocks := []anthropic.ContentBlockParamUnion{}
144
145 // Add thinking blocks first if present (required when thinking is enabled with tool use)
146 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
147 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
148 blocks = append(blocks, thinkingBlock)
149 }
150
151 if msg.Content().String() != "" {
152 content := anthropic.NewTextBlock(msg.Content().String())
153 if cache && !a.providerOptions.disableCache {
154 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
155 Type: "ephemeral",
156 }
157 }
158 blocks = append(blocks, content)
159 }
160
161 for _, toolCall := range msg.ToolCalls() {
162 if !toolCall.Finished {
163 continue
164 }
165 var inputMap map[string]any
166 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
167 if err != nil {
168 continue
169 }
170 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
171 }
172
173 if len(blocks) == 0 {
174 continue
175 }
176 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
177
178 case message.Tool:
179 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
180 for i, toolResult := range msg.ToolResults() {
181 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
182 }
183 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
184 }
185 }
186 return anthropicMessages
187}
188
189func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
190 if len(tools) == 0 {
191 return nil
192 }
193 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
194
195 for i, tool := range tools {
196 info := tool.Info()
197 toolParam := anthropic.ToolParam{
198 Name: info.Name,
199 Description: anthropic.String(info.Description),
200 InputSchema: anthropic.ToolInputSchemaParam{
201 Properties: info.Parameters,
202 Required: info.Required,
203 },
204 }
205
206 if i == len(tools)-1 && !a.providerOptions.disableCache {
207 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
208 Type: "ephemeral",
209 }
210 }
211
212 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
213 }
214
215 return anthropicTools
216}
217
218func (a *anthropicClient) finishReason(reason string) message.FinishReason {
219 switch reason {
220 case "end_turn":
221 return message.FinishReasonEndTurn
222 case "max_tokens":
223 return message.FinishReasonMaxTokens
224 case "tool_use":
225 return message.FinishReasonToolUse
226 case "stop_sequence":
227 return message.FinishReasonEndTurn
228 default:
229 return message.FinishReasonUnknown
230 }
231}
232
233func (a *anthropicClient) isThinkingEnabled() bool {
234 cfg := config.Get()
235 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
236 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
237 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
238 }
239 return a.Model().CanReason && modelConfig.Think
240}
241
242func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
243 model := a.providerOptions.model(a.providerOptions.modelType)
244 var thinkingParam anthropic.ThinkingConfigParamUnion
245 cfg := config.Get()
246 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
247 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
248 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
249 }
250 temperature := anthropic.Float(0)
251
252 maxTokens := model.DefaultMaxTokens
253 if modelConfig.MaxTokens > 0 {
254 maxTokens = modelConfig.MaxTokens
255 }
256 if a.isThinkingEnabled() {
257 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
258 temperature = anthropic.Float(1)
259 }
260 // Override max tokens if set in provider options
261 if a.providerOptions.maxTokens > 0 {
262 maxTokens = a.providerOptions.maxTokens
263 }
264
265 // Use adjusted max tokens if context limit was hit
266 if a.adjustedMaxTokens > 0 {
267 maxTokens = int64(a.adjustedMaxTokens)
268 }
269
270 systemBlocks := []anthropic.TextBlockParam{}
271
272 // Add custom system prompt prefix if configured
273 if a.providerOptions.systemPromptPrefix != "" {
274 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
275 Text: a.providerOptions.systemPromptPrefix,
276 })
277 }
278
279 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
280 Text: a.providerOptions.systemMessage,
281 CacheControl: anthropic.CacheControlEphemeralParam{
282 Type: "ephemeral",
283 },
284 })
285
286 return anthropic.MessageNewParams{
287 Model: anthropic.Model(model.ID),
288 MaxTokens: maxTokens,
289 Temperature: temperature,
290 Messages: messages,
291 Tools: tools,
292 Thinking: thinkingParam,
293 System: systemBlocks,
294 }
295}
296
297func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
298 attempts := 0
299 for {
300 attempts++
301 // Prepare messages on each attempt in case max_tokens was adjusted
302 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
303
304 var opts []option.RequestOption
305 if a.isThinkingEnabled() {
306 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
307 }
308 anthropicResponse, err := a.client.Messages.New(
309 ctx,
310 preparedMessages,
311 opts...,
312 )
313 // If there is an error we are going to see if we can retry the call
314 if err != nil {
315 retry, after, retryErr := a.shouldRetry(attempts, err)
316 if retryErr != nil {
317 return nil, retryErr
318 }
319 if retry {
320 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
321 select {
322 case <-ctx.Done():
323 return nil, ctx.Err()
324 case <-time.After(time.Duration(after) * time.Millisecond):
325 continue
326 }
327 }
328 return nil, retryErr
329 }
330
331 content := ""
332 for _, block := range anthropicResponse.Content {
333 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
334 content += text.Text
335 }
336 }
337
338 return &ProviderResponse{
339 Content: content,
340 ToolCalls: a.toolCalls(*anthropicResponse),
341 Usage: a.usage(*anthropicResponse),
342 }, nil
343 }
344}
345
346func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
347 attempts := 0
348 eventChan := make(chan ProviderEvent)
349 go func() {
350 for {
351 attempts++
352 // Prepare messages on each attempt in case max_tokens was adjusted
353 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
354
355 var opts []option.RequestOption
356 if a.isThinkingEnabled() {
357 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
358 }
359
360 anthropicStream := a.client.Messages.NewStreaming(
361 ctx,
362 preparedMessages,
363 opts...,
364 )
365 accumulatedMessage := anthropic.Message{}
366
367 currentToolCallID := ""
368 for anthropicStream.Next() {
369 event := anthropicStream.Current()
370 err := accumulatedMessage.Accumulate(event)
371 if err != nil {
372 slog.Warn("Error accumulating message", "error", err)
373 continue
374 }
375
376 switch event := event.AsAny().(type) {
377 case anthropic.ContentBlockStartEvent:
378 switch event.ContentBlock.Type {
379 case "text":
380 eventChan <- ProviderEvent{Type: EventContentStart}
381 case "tool_use":
382 currentToolCallID = event.ContentBlock.ID
383 eventChan <- ProviderEvent{
384 Type: EventToolUseStart,
385 ToolCall: &message.ToolCall{
386 ID: event.ContentBlock.ID,
387 Name: event.ContentBlock.Name,
388 Finished: false,
389 },
390 }
391 }
392
393 case anthropic.ContentBlockDeltaEvent:
394 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
395 eventChan <- ProviderEvent{
396 Type: EventThinkingDelta,
397 Thinking: event.Delta.Thinking,
398 }
399 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
400 eventChan <- ProviderEvent{
401 Type: EventSignatureDelta,
402 Signature: event.Delta.Signature,
403 }
404 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
405 eventChan <- ProviderEvent{
406 Type: EventContentDelta,
407 Content: event.Delta.Text,
408 }
409 } else if event.Delta.Type == "input_json_delta" {
410 if currentToolCallID != "" {
411 eventChan <- ProviderEvent{
412 Type: EventToolUseDelta,
413 ToolCall: &message.ToolCall{
414 ID: currentToolCallID,
415 Finished: false,
416 Input: event.Delta.PartialJSON,
417 },
418 }
419 }
420 }
421 case anthropic.ContentBlockStopEvent:
422 if currentToolCallID != "" {
423 eventChan <- ProviderEvent{
424 Type: EventToolUseStop,
425 ToolCall: &message.ToolCall{
426 ID: currentToolCallID,
427 },
428 }
429 currentToolCallID = ""
430 } else {
431 eventChan <- ProviderEvent{Type: EventContentStop}
432 }
433
434 case anthropic.MessageStopEvent:
435 content := ""
436 for _, block := range accumulatedMessage.Content {
437 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
438 content += text.Text
439 }
440 }
441
442 eventChan <- ProviderEvent{
443 Type: EventComplete,
444 Response: &ProviderResponse{
445 Content: content,
446 ToolCalls: a.toolCalls(accumulatedMessage),
447 Usage: a.usage(accumulatedMessage),
448 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
449 },
450 Content: content,
451 }
452 }
453 }
454
455 err := anthropicStream.Err()
456 if err == nil || errors.Is(err, io.EOF) {
457 close(eventChan)
458 return
459 }
460
461 // If there is an error we are going to see if we can retry the call
462 retry, after, retryErr := a.shouldRetry(attempts, err)
463 if retryErr != nil {
464 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
465 close(eventChan)
466 return
467 }
468 if retry {
469 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
470 select {
471 case <-ctx.Done():
472 // context cancelled
473 if ctx.Err() != nil {
474 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
475 }
476 close(eventChan)
477 return
478 case <-time.After(time.Duration(after) * time.Millisecond):
479 continue
480 }
481 }
482 if ctx.Err() != nil {
483 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
484 }
485
486 close(eventChan)
487 return
488 }
489 }()
490 return eventChan
491}
492
493func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
494 var apiErr *anthropic.Error
495 if !errors.As(err, &apiErr) {
496 return false, 0, err
497 }
498
499 if attempts > maxRetries {
500 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
501 }
502
503 if apiErr.StatusCode == http.StatusUnauthorized {
504 prev := a.providerOptions.apiKey
505 // in case the key comes from a script, we try to re-evaluate it.
506 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
507 if err != nil {
508 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
509 }
510 // if it didn't change, do not retry.
511 if prev == a.providerOptions.apiKey {
512 return false, 0, err
513 }
514 a.client = createAnthropicClient(a.providerOptions, a.tp)
515 return true, 0, nil
516 }
517
518 // Handle context limit exceeded error (400 Bad Request)
519 if apiErr.StatusCode == http.StatusBadRequest {
520 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
521 a.adjustedMaxTokens = adjusted
522 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
523 return true, 0, nil
524 }
525 }
526
527 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
528 // 529 (unofficial): The service is overloaded
529 if apiErr.StatusCode != http.StatusTooManyRequests && apiErr.StatusCode != 529 && !isOverloaded {
530 return false, 0, err
531 }
532
533 retryMs := 0
534 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
535
536 backoffMs := 2000 * (1 << (attempts - 1))
537 jitterMs := int(float64(backoffMs) * 0.2)
538 retryMs = backoffMs + jitterMs
539 if len(retryAfterValues) > 0 {
540 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
541 retryMs = retryMs * 1000
542 }
543 }
544 return true, int64(retryMs), nil
545}
546
547// handleContextLimitError parses context limit error and returns adjusted max_tokens
548func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
549 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
550 errorMsg := apiErr.Error()
551
552 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
553
554 if len(matches) != 4 {
555 return 0, false
556 }
557
558 inputTokens, err1 := strconv.Atoi(matches[1])
559 contextLimit, err2 := strconv.Atoi(matches[3])
560
561 if err1 != nil || err2 != nil {
562 return 0, false
563 }
564
565 // Calculate safe max_tokens with a buffer of 1000 tokens
566 safeMaxTokens := contextLimit - inputTokens - 1000
567
568 // Ensure we don't go below a minimum threshold
569 safeMaxTokens = max(safeMaxTokens, 1000)
570
571 return safeMaxTokens, true
572}
573
574func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
575 var toolCalls []message.ToolCall
576
577 for _, block := range msg.Content {
578 switch variant := block.AsAny().(type) {
579 case anthropic.ToolUseBlock:
580 toolCall := message.ToolCall{
581 ID: variant.ID,
582 Name: variant.Name,
583 Input: string(variant.Input),
584 Type: string(variant.Type),
585 Finished: true,
586 }
587 toolCalls = append(toolCalls, toolCall)
588 }
589 }
590
591 return toolCalls
592}
593
594func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
595 return TokenUsage{
596 InputTokens: msg.Usage.InputTokens,
597 OutputTokens: msg.Usage.OutputTokens,
598 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
599 CacheReadTokens: msg.Usage.CacheReadInputTokens,
600 }
601}
602
603func (a *anthropicClient) Model() catwalk.Model {
604 return a.providerOptions.model(a.providerOptions.modelType)
605}