1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/anthropics/anthropic-sdk-go/vertex"
19 "github.com/charmbracelet/catwalk/pkg/catwalk"
20 "github.com/charmbracelet/crush/internal/config"
21 "github.com/charmbracelet/crush/internal/llm/tools"
22 "github.com/charmbracelet/crush/internal/log"
23 "github.com/charmbracelet/crush/internal/message"
24)
25
26// Pre-compiled regex for parsing context limit errors.
27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
28
29type anthropicClient struct {
30 providerOptions providerClientOptions
31 tp AnthropicClientType
32 client anthropic.Client
33 adjustedMaxTokens int // Used when context limit is hit
34}
35
36type AnthropicClient ProviderClient
37
38type AnthropicClientType string
39
40const (
41 AnthropicClientTypeNormal AnthropicClientType = "normal"
42 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
43 AnthropicClientTypeVertex AnthropicClientType = "vertex"
44)
45
46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
47 return &anthropicClient{
48 providerOptions: opts,
49 tp: tp,
50 client: createAnthropicClient(opts, tp),
51 }
52}
53
54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
55 anthropicClientOptions := []option.RequestOption{}
56
57 // Check if Authorization header is provided in extra headers
58 hasBearerAuth := false
59 if opts.extraHeaders != nil {
60 for key := range opts.extraHeaders {
61 if strings.ToLower(key) == "authorization" {
62 hasBearerAuth = true
63 break
64 }
65 }
66 }
67
68 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
69
70 if opts.apiKey != "" && !hasBearerAuth {
71 if isBearerToken {
72 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
73 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
74 } else {
75 // Use standard X-Api-Key header
76 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
77 }
78 } else if hasBearerAuth {
79 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
80 }
81
82 if opts.baseURL != "" {
83 resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
84 if err == nil && resolvedBaseURL != "" {
85 anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
86 }
87 }
88
89 if config.Get().Options.Debug {
90 httpClient := log.NewHTTPClient()
91 anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
92 }
93
94 switch tp {
95 case AnthropicClientTypeBedrock:
96 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
97 case AnthropicClientTypeVertex:
98 project := opts.extraParams["project"]
99 location := opts.extraParams["location"]
100 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
101 }
102 for key, header := range opts.extraHeaders {
103 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
104 }
105 for key, value := range opts.extraBody {
106 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
107 }
108 return anthropic.NewClient(anthropicClientOptions...)
109}
110
111func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
112 for i, msg := range messages {
113 cache := false
114 if i > len(messages)-3 {
115 cache = true
116 }
117 switch msg.Role {
118 case message.User:
119 content := anthropic.NewTextBlock(msg.Content().String())
120 if cache && !a.providerOptions.disableCache {
121 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
122 Type: "ephemeral",
123 }
124 }
125 var contentBlocks []anthropic.ContentBlockParamUnion
126 contentBlocks = append(contentBlocks, content)
127 for _, binaryContent := range msg.BinaryContent() {
128 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
129 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
130 contentBlocks = append(contentBlocks, imageBlock)
131 }
132 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
133
134 case message.Assistant:
135 blocks := []anthropic.ContentBlockParamUnion{}
136
137 // Add thinking blocks first if present (required when thinking is enabled with tool use)
138 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
139 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
140 blocks = append(blocks, thinkingBlock)
141 }
142
143 if msg.Content().String() != "" {
144 content := anthropic.NewTextBlock(msg.Content().String())
145 if cache && !a.providerOptions.disableCache {
146 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
147 Type: "ephemeral",
148 }
149 }
150 blocks = append(blocks, content)
151 }
152
153 for _, toolCall := range msg.ToolCalls() {
154 var inputMap map[string]any
155 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
156 if err != nil {
157 continue
158 }
159 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
160 }
161
162 if len(blocks) == 0 {
163 continue
164 }
165 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
166
167 case message.Tool:
168 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
169 for i, toolResult := range msg.ToolResults() {
170 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
171 }
172 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
173 }
174 }
175 return
176}
177
178func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
179 if len(tools) == 0 {
180 return nil
181 }
182 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
183
184 for i, tool := range tools {
185 info := tool.Info()
186 toolParam := anthropic.ToolParam{
187 Name: info.Name,
188 Description: anthropic.String(info.Description),
189 InputSchema: anthropic.ToolInputSchemaParam{
190 Properties: info.Parameters,
191 Required: info.Required,
192 },
193 }
194
195 if i == len(tools)-1 && !a.providerOptions.disableCache {
196 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
197 Type: "ephemeral",
198 }
199 }
200
201 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
202 }
203
204 return anthropicTools
205}
206
207func (a *anthropicClient) finishReason(reason string) message.FinishReason {
208 switch reason {
209 case "end_turn":
210 return message.FinishReasonEndTurn
211 case "max_tokens":
212 return message.FinishReasonMaxTokens
213 case "tool_use":
214 return message.FinishReasonToolUse
215 case "stop_sequence":
216 return message.FinishReasonEndTurn
217 default:
218 return message.FinishReasonUnknown
219 }
220}
221
222func (a *anthropicClient) isThinkingEnabled() bool {
223 cfg := config.Get()
224 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
225 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
226 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
227 }
228 return a.Model().CanReason && modelConfig.Think
229}
230
231func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
232 model := a.providerOptions.model(a.providerOptions.modelType)
233 var thinkingParam anthropic.ThinkingConfigParamUnion
234 cfg := config.Get()
235 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
236 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
237 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
238 }
239 temperature := anthropic.Float(0)
240
241 maxTokens := model.DefaultMaxTokens
242 if modelConfig.MaxTokens > 0 {
243 maxTokens = modelConfig.MaxTokens
244 }
245 if a.isThinkingEnabled() {
246 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
247 temperature = anthropic.Float(1)
248 }
249 // Override max tokens if set in provider options
250 if a.providerOptions.maxTokens > 0 {
251 maxTokens = a.providerOptions.maxTokens
252 }
253
254 // Use adjusted max tokens if context limit was hit
255 if a.adjustedMaxTokens > 0 {
256 maxTokens = int64(a.adjustedMaxTokens)
257 }
258
259 systemBlocks := []anthropic.TextBlockParam{}
260
261 // Add custom system prompt prefix if configured
262 if a.providerOptions.systemPromptPrefix != "" {
263 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
264 Text: a.providerOptions.systemPromptPrefix,
265 })
266 }
267
268 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
269 Text: a.providerOptions.systemMessage,
270 CacheControl: anthropic.CacheControlEphemeralParam{
271 Type: "ephemeral",
272 },
273 })
274
275 return anthropic.MessageNewParams{
276 Model: anthropic.Model(model.ID),
277 MaxTokens: maxTokens,
278 Temperature: temperature,
279 Messages: messages,
280 Tools: tools,
281 Thinking: thinkingParam,
282 System: systemBlocks,
283 }
284}
285
286func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
287 attempts := 0
288 for {
289 attempts++
290 // Prepare messages on each attempt in case max_tokens was adjusted
291 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
292
293 var opts []option.RequestOption
294 if a.isThinkingEnabled() {
295 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
296 }
297 anthropicResponse, err := a.client.Messages.New(
298 ctx,
299 preparedMessages,
300 opts...,
301 )
302 // If there is an error we are going to see if we can retry the call
303 if err != nil {
304 retry, after, retryErr := a.shouldRetry(attempts, err)
305 if retryErr != nil {
306 return nil, retryErr
307 }
308 if retry {
309 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
310 select {
311 case <-ctx.Done():
312 return nil, ctx.Err()
313 case <-time.After(time.Duration(after) * time.Millisecond):
314 continue
315 }
316 }
317 return nil, retryErr
318 }
319
320 content := ""
321 for _, block := range anthropicResponse.Content {
322 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
323 content += text.Text
324 }
325 }
326
327 return &ProviderResponse{
328 Content: content,
329 ToolCalls: a.toolCalls(*anthropicResponse),
330 Usage: a.usage(*anthropicResponse),
331 }, nil
332 }
333}
334
335func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
336 attempts := 0
337 eventChan := make(chan ProviderEvent)
338 go func() {
339 for {
340 attempts++
341 // Prepare messages on each attempt in case max_tokens was adjusted
342 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
343
344 var opts []option.RequestOption
345 if a.isThinkingEnabled() {
346 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
347 }
348
349 anthropicStream := a.client.Messages.NewStreaming(
350 ctx,
351 preparedMessages,
352 opts...,
353 )
354 accumulatedMessage := anthropic.Message{}
355
356 currentToolCallID := ""
357 for anthropicStream.Next() {
358 event := anthropicStream.Current()
359 err := accumulatedMessage.Accumulate(event)
360 if err != nil {
361 slog.Warn("Error accumulating message", "error", err)
362 continue
363 }
364
365 switch event := event.AsAny().(type) {
366 case anthropic.ContentBlockStartEvent:
367 switch event.ContentBlock.Type {
368 case "text":
369 eventChan <- ProviderEvent{Type: EventContentStart}
370 case "tool_use":
371 currentToolCallID = event.ContentBlock.ID
372 eventChan <- ProviderEvent{
373 Type: EventToolUseStart,
374 ToolCall: &message.ToolCall{
375 ID: event.ContentBlock.ID,
376 Name: event.ContentBlock.Name,
377 Finished: false,
378 },
379 }
380 }
381
382 case anthropic.ContentBlockDeltaEvent:
383 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
384 eventChan <- ProviderEvent{
385 Type: EventThinkingDelta,
386 Thinking: event.Delta.Thinking,
387 }
388 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
389 eventChan <- ProviderEvent{
390 Type: EventSignatureDelta,
391 Signature: event.Delta.Signature,
392 }
393 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
394 eventChan <- ProviderEvent{
395 Type: EventContentDelta,
396 Content: event.Delta.Text,
397 }
398 } else if event.Delta.Type == "input_json_delta" {
399 if currentToolCallID != "" {
400 eventChan <- ProviderEvent{
401 Type: EventToolUseDelta,
402 ToolCall: &message.ToolCall{
403 ID: currentToolCallID,
404 Finished: false,
405 Input: event.Delta.PartialJSON,
406 },
407 }
408 }
409 }
410 case anthropic.ContentBlockStopEvent:
411 if currentToolCallID != "" {
412 eventChan <- ProviderEvent{
413 Type: EventToolUseStop,
414 ToolCall: &message.ToolCall{
415 ID: currentToolCallID,
416 },
417 }
418 currentToolCallID = ""
419 } else {
420 eventChan <- ProviderEvent{Type: EventContentStop}
421 }
422
423 case anthropic.MessageStopEvent:
424 content := ""
425 for _, block := range accumulatedMessage.Content {
426 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
427 content += text.Text
428 }
429 }
430
431 eventChan <- ProviderEvent{
432 Type: EventComplete,
433 Response: &ProviderResponse{
434 Content: content,
435 ToolCalls: a.toolCalls(accumulatedMessage),
436 Usage: a.usage(accumulatedMessage),
437 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
438 },
439 Content: content,
440 }
441 }
442 }
443
444 err := anthropicStream.Err()
445 if err == nil || errors.Is(err, io.EOF) {
446 close(eventChan)
447 return
448 }
449
450 // If there is an error we are going to see if we can retry the call
451 retry, after, retryErr := a.shouldRetry(attempts, err)
452 if retryErr != nil {
453 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
454 close(eventChan)
455 return
456 }
457 if retry {
458 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
459 select {
460 case <-ctx.Done():
461 // context cancelled
462 if ctx.Err() != nil {
463 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
464 }
465 close(eventChan)
466 return
467 case <-time.After(time.Duration(after) * time.Millisecond):
468 continue
469 }
470 }
471 if ctx.Err() != nil {
472 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
473 }
474
475 close(eventChan)
476 return
477 }
478 }()
479 return eventChan
480}
481
482func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
483 var apiErr *anthropic.Error
484 if !errors.As(err, &apiErr) {
485 return false, 0, err
486 }
487
488 if attempts > maxRetries {
489 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
490 }
491
492 if apiErr.StatusCode == 401 {
493 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
494 if err != nil {
495 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
496 }
497 a.client = createAnthropicClient(a.providerOptions, a.tp)
498 return true, 0, nil
499 }
500
501 // Handle context limit exceeded error (400 Bad Request)
502 if apiErr.StatusCode == 400 {
503 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
504 a.adjustedMaxTokens = adjusted
505 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
506 return true, 0, nil
507 }
508 }
509
510 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
511 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
512 return false, 0, err
513 }
514
515 retryMs := 0
516 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
517
518 backoffMs := 2000 * (1 << (attempts - 1))
519 jitterMs := int(float64(backoffMs) * 0.2)
520 retryMs = backoffMs + jitterMs
521 if len(retryAfterValues) > 0 {
522 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
523 retryMs = retryMs * 1000
524 }
525 }
526 return true, int64(retryMs), nil
527}
528
529// handleContextLimitError parses context limit error and returns adjusted max_tokens
530func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
531 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
532 errorMsg := apiErr.Error()
533
534 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
535
536 if len(matches) != 4 {
537 return 0, false
538 }
539
540 inputTokens, err1 := strconv.Atoi(matches[1])
541 contextLimit, err2 := strconv.Atoi(matches[3])
542
543 if err1 != nil || err2 != nil {
544 return 0, false
545 }
546
547 // Calculate safe max_tokens with a buffer of 1000 tokens
548 safeMaxTokens := contextLimit - inputTokens - 1000
549
550 // Ensure we don't go below a minimum threshold
551 safeMaxTokens = max(safeMaxTokens, 1000)
552
553 return safeMaxTokens, true
554}
555
556func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
557 var toolCalls []message.ToolCall
558
559 for _, block := range msg.Content {
560 switch variant := block.AsAny().(type) {
561 case anthropic.ToolUseBlock:
562 toolCall := message.ToolCall{
563 ID: variant.ID,
564 Name: variant.Name,
565 Input: string(variant.Input),
566 Type: string(variant.Type),
567 Finished: true,
568 }
569 toolCalls = append(toolCalls, toolCall)
570 }
571 }
572
573 return toolCalls
574}
575
576func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
577 return TokenUsage{
578 InputTokens: msg.Usage.InputTokens,
579 OutputTokens: msg.Usage.OutputTokens,
580 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
581 CacheReadTokens: msg.Usage.CacheReadInputTokens,
582 }
583}
584
585func (a *anthropicClient) Model() catwalk.Model {
586 return a.providerOptions.model(a.providerOptions.modelType)
587}