1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/anthropics/anthropic-sdk-go/vertex"
19 "github.com/charmbracelet/catwalk/pkg/catwalk"
20 "github.com/charmbracelet/crush/internal/config"
21 "github.com/charmbracelet/crush/internal/llm/tools"
22 "github.com/charmbracelet/crush/internal/log"
23 "github.com/charmbracelet/crush/internal/message"
24)
25
26// Pre-compiled regex for parsing context limit errors.
27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
28
29type anthropicClient struct {
30 providerOptions providerClientOptions
31 tp AnthropicClientType
32 client anthropic.Client
33 adjustedMaxTokens int // Used when context limit is hit
34}
35
36type AnthropicClient ProviderClient
37
38type AnthropicClientType string
39
40const (
41 AnthropicClientTypeNormal AnthropicClientType = "normal"
42 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
43 AnthropicClientTypeVertex AnthropicClientType = "vertex"
44)
45
46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
47 return &anthropicClient{
48 providerOptions: opts,
49 tp: tp,
50 client: createAnthropicClient(opts, tp),
51 }
52}
53
54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
55 anthropicClientOptions := []option.RequestOption{}
56
57 // Check if Authorization header is provided in extra headers
58 hasBearerAuth := false
59 if opts.extraHeaders != nil {
60 for key := range opts.extraHeaders {
61 if strings.ToLower(key) == "authorization" {
62 hasBearerAuth = true
63 break
64 }
65 }
66 }
67
68 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
69
70 if opts.apiKey != "" && !hasBearerAuth {
71 if isBearerToken {
72 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
73 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
74 } else {
75 // Use standard X-Api-Key header
76 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
77 }
78 } else if hasBearerAuth {
79 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
80 }
81
82 if opts.baseURL != "" {
83 anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(opts.baseURL))
84 }
85
86 if config.Get().Options.Debug {
87 httpClient := log.NewHTTPClient()
88 anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
89 }
90
91 switch tp {
92 case AnthropicClientTypeBedrock:
93 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
94 case AnthropicClientTypeVertex:
95 project := opts.extraParams["project"]
96 location := opts.extraParams["location"]
97 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
98 }
99 for key, header := range opts.extraHeaders {
100 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
101 }
102 for key, value := range opts.extraBody {
103 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
104 }
105 return anthropic.NewClient(anthropicClientOptions...)
106}
107
108func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
109 for i, msg := range messages {
110 cache := false
111 if i > len(messages)-3 {
112 cache = true
113 }
114 switch msg.Role {
115 case message.User:
116 content := anthropic.NewTextBlock(msg.Content().String())
117 if cache && !a.providerOptions.disableCache {
118 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
119 Type: "ephemeral",
120 }
121 }
122 var contentBlocks []anthropic.ContentBlockParamUnion
123 contentBlocks = append(contentBlocks, content)
124 for _, binaryContent := range msg.BinaryContent() {
125 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
126 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
127 contentBlocks = append(contentBlocks, imageBlock)
128 }
129 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
130
131 case message.Assistant:
132 blocks := []anthropic.ContentBlockParamUnion{}
133
134 // Add thinking blocks first if present (required when thinking is enabled with tool use)
135 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
136 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
137 blocks = append(blocks, thinkingBlock)
138 }
139
140 if msg.Content().String() != "" {
141 content := anthropic.NewTextBlock(msg.Content().String())
142 if cache && !a.providerOptions.disableCache {
143 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
144 Type: "ephemeral",
145 }
146 }
147 blocks = append(blocks, content)
148 }
149
150 for _, toolCall := range msg.ToolCalls() {
151 var inputMap map[string]any
152 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
153 if err != nil {
154 continue
155 }
156 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
157 }
158
159 if len(blocks) == 0 {
160 continue
161 }
162 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
163
164 case message.Tool:
165 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
166 for i, toolResult := range msg.ToolResults() {
167 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
168 }
169 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
170 }
171 }
172 return
173}
174
175func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
176 if len(tools) == 0 {
177 return nil
178 }
179 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
180
181 for i, tool := range tools {
182 info := tool.Info()
183 toolParam := anthropic.ToolParam{
184 Name: info.Name,
185 Description: anthropic.String(info.Description),
186 InputSchema: anthropic.ToolInputSchemaParam{
187 Properties: info.Parameters,
188 Required: info.Required,
189 },
190 }
191
192 if i == len(tools)-1 && !a.providerOptions.disableCache {
193 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
194 Type: "ephemeral",
195 }
196 }
197
198 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
199 }
200
201 return anthropicTools
202}
203
204func (a *anthropicClient) finishReason(reason string) message.FinishReason {
205 switch reason {
206 case "end_turn":
207 return message.FinishReasonEndTurn
208 case "max_tokens":
209 return message.FinishReasonMaxTokens
210 case "tool_use":
211 return message.FinishReasonToolUse
212 case "stop_sequence":
213 return message.FinishReasonEndTurn
214 default:
215 return message.FinishReasonUnknown
216 }
217}
218
219func (a *anthropicClient) isThinkingEnabled() bool {
220 cfg := config.Get()
221 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
222 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
223 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
224 }
225 return a.Model().CanReason && modelConfig.Think
226}
227
228func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
229 model := a.providerOptions.model(a.providerOptions.modelType)
230 var thinkingParam anthropic.ThinkingConfigParamUnion
231 cfg := config.Get()
232 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
233 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
234 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
235 }
236 temperature := anthropic.Float(0)
237
238 maxTokens := model.DefaultMaxTokens
239 if modelConfig.MaxTokens > 0 {
240 maxTokens = modelConfig.MaxTokens
241 }
242 if a.isThinkingEnabled() {
243 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
244 temperature = anthropic.Float(1)
245 }
246 // Override max tokens if set in provider options
247 if a.providerOptions.maxTokens > 0 {
248 maxTokens = a.providerOptions.maxTokens
249 }
250
251 // Use adjusted max tokens if context limit was hit
252 if a.adjustedMaxTokens > 0 {
253 maxTokens = int64(a.adjustedMaxTokens)
254 }
255
256 systemBlocks := []anthropic.TextBlockParam{}
257
258 // Add custom system prompt prefix if configured
259 if a.providerOptions.systemPromptPrefix != "" {
260 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
261 Text: a.providerOptions.systemPromptPrefix,
262 })
263 }
264
265 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
266 Text: a.providerOptions.systemMessage,
267 CacheControl: anthropic.CacheControlEphemeralParam{
268 Type: "ephemeral",
269 },
270 })
271
272 return anthropic.MessageNewParams{
273 Model: anthropic.Model(model.ID),
274 MaxTokens: maxTokens,
275 Temperature: temperature,
276 Messages: messages,
277 Tools: tools,
278 Thinking: thinkingParam,
279 System: systemBlocks,
280 }
281}
282
283func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
284 attempts := 0
285 for {
286 attempts++
287 // Prepare messages on each attempt in case max_tokens was adjusted
288 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
289
290 var opts []option.RequestOption
291 if a.isThinkingEnabled() {
292 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
293 }
294 anthropicResponse, err := a.client.Messages.New(
295 ctx,
296 preparedMessages,
297 opts...,
298 )
299 // If there is an error we are going to see if we can retry the call
300 if err != nil {
301 retry, after, retryErr := a.shouldRetry(attempts, err)
302 if retryErr != nil {
303 return nil, retryErr
304 }
305 if retry {
306 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
307 select {
308 case <-ctx.Done():
309 return nil, ctx.Err()
310 case <-time.After(time.Duration(after) * time.Millisecond):
311 continue
312 }
313 }
314 return nil, retryErr
315 }
316
317 content := ""
318 for _, block := range anthropicResponse.Content {
319 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
320 content += text.Text
321 }
322 }
323
324 return &ProviderResponse{
325 Content: content,
326 ToolCalls: a.toolCalls(*anthropicResponse),
327 Usage: a.usage(*anthropicResponse),
328 }, nil
329 }
330}
331
332func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
333 attempts := 0
334 eventChan := make(chan ProviderEvent)
335 go func() {
336 for {
337 attempts++
338 // Prepare messages on each attempt in case max_tokens was adjusted
339 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
340
341 var opts []option.RequestOption
342 if a.isThinkingEnabled() {
343 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
344 }
345
346 anthropicStream := a.client.Messages.NewStreaming(
347 ctx,
348 preparedMessages,
349 opts...,
350 )
351 accumulatedMessage := anthropic.Message{}
352
353 currentToolCallID := ""
354 for anthropicStream.Next() {
355 event := anthropicStream.Current()
356 err := accumulatedMessage.Accumulate(event)
357 if err != nil {
358 slog.Warn("Error accumulating message", "error", err)
359 continue
360 }
361
362 switch event := event.AsAny().(type) {
363 case anthropic.ContentBlockStartEvent:
364 switch event.ContentBlock.Type {
365 case "text":
366 eventChan <- ProviderEvent{Type: EventContentStart}
367 case "tool_use":
368 currentToolCallID = event.ContentBlock.ID
369 eventChan <- ProviderEvent{
370 Type: EventToolUseStart,
371 ToolCall: &message.ToolCall{
372 ID: event.ContentBlock.ID,
373 Name: event.ContentBlock.Name,
374 Finished: false,
375 },
376 }
377 }
378
379 case anthropic.ContentBlockDeltaEvent:
380 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
381 eventChan <- ProviderEvent{
382 Type: EventThinkingDelta,
383 Thinking: event.Delta.Thinking,
384 }
385 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
386 eventChan <- ProviderEvent{
387 Type: EventSignatureDelta,
388 Signature: event.Delta.Signature,
389 }
390 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
391 eventChan <- ProviderEvent{
392 Type: EventContentDelta,
393 Content: event.Delta.Text,
394 }
395 } else if event.Delta.Type == "input_json_delta" {
396 if currentToolCallID != "" {
397 eventChan <- ProviderEvent{
398 Type: EventToolUseDelta,
399 ToolCall: &message.ToolCall{
400 ID: currentToolCallID,
401 Finished: false,
402 Input: event.Delta.PartialJSON,
403 },
404 }
405 }
406 }
407 case anthropic.ContentBlockStopEvent:
408 if currentToolCallID != "" {
409 eventChan <- ProviderEvent{
410 Type: EventToolUseStop,
411 ToolCall: &message.ToolCall{
412 ID: currentToolCallID,
413 },
414 }
415 currentToolCallID = ""
416 } else {
417 eventChan <- ProviderEvent{Type: EventContentStop}
418 }
419
420 case anthropic.MessageStopEvent:
421 content := ""
422 for _, block := range accumulatedMessage.Content {
423 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
424 content += text.Text
425 }
426 }
427
428 eventChan <- ProviderEvent{
429 Type: EventComplete,
430 Response: &ProviderResponse{
431 Content: content,
432 ToolCalls: a.toolCalls(accumulatedMessage),
433 Usage: a.usage(accumulatedMessage),
434 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
435 },
436 Content: content,
437 }
438 }
439 }
440
441 err := anthropicStream.Err()
442 if err == nil || errors.Is(err, io.EOF) {
443 close(eventChan)
444 return
445 }
446
447 // If there is an error we are going to see if we can retry the call
448 retry, after, retryErr := a.shouldRetry(attempts, err)
449 if retryErr != nil {
450 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
451 close(eventChan)
452 return
453 }
454 if retry {
455 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
456 select {
457 case <-ctx.Done():
458 // context cancelled
459 if ctx.Err() != nil {
460 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
461 }
462 close(eventChan)
463 return
464 case <-time.After(time.Duration(after) * time.Millisecond):
465 continue
466 }
467 }
468 if ctx.Err() != nil {
469 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
470 }
471
472 close(eventChan)
473 return
474 }
475 }()
476 return eventChan
477}
478
479func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
480 var apiErr *anthropic.Error
481 if !errors.As(err, &apiErr) {
482 return false, 0, err
483 }
484
485 if attempts > maxRetries {
486 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
487 }
488
489 if apiErr.StatusCode == 401 {
490 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
491 if err != nil {
492 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
493 }
494 a.client = createAnthropicClient(a.providerOptions, a.tp)
495 return true, 0, nil
496 }
497
498 // Handle context limit exceeded error (400 Bad Request)
499 if apiErr.StatusCode == 400 {
500 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
501 a.adjustedMaxTokens = adjusted
502 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
503 return true, 0, nil
504 }
505 }
506
507 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
508 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
509 return false, 0, err
510 }
511
512 retryMs := 0
513 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
514
515 backoffMs := 2000 * (1 << (attempts - 1))
516 jitterMs := int(float64(backoffMs) * 0.2)
517 retryMs = backoffMs + jitterMs
518 if len(retryAfterValues) > 0 {
519 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
520 retryMs = retryMs * 1000
521 }
522 }
523 return true, int64(retryMs), nil
524}
525
526// handleContextLimitError parses context limit error and returns adjusted max_tokens
527func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
528 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
529 errorMsg := apiErr.Error()
530
531 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
532
533 if len(matches) != 4 {
534 return 0, false
535 }
536
537 inputTokens, err1 := strconv.Atoi(matches[1])
538 contextLimit, err2 := strconv.Atoi(matches[3])
539
540 if err1 != nil || err2 != nil {
541 return 0, false
542 }
543
544 // Calculate safe max_tokens with a buffer of 1000 tokens
545 safeMaxTokens := contextLimit - inputTokens - 1000
546
547 // Ensure we don't go below a minimum threshold
548 safeMaxTokens = max(safeMaxTokens, 1000)
549
550 return safeMaxTokens, true
551}
552
553func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
554 var toolCalls []message.ToolCall
555
556 for _, block := range msg.Content {
557 switch variant := block.AsAny().(type) {
558 case anthropic.ToolUseBlock:
559 toolCall := message.ToolCall{
560 ID: variant.ID,
561 Name: variant.Name,
562 Input: string(variant.Input),
563 Type: string(variant.Type),
564 Finished: true,
565 }
566 toolCalls = append(toolCalls, toolCall)
567 }
568 }
569
570 return toolCalls
571}
572
573func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
574 return TokenUsage{
575 InputTokens: msg.Usage.InputTokens,
576 OutputTokens: msg.Usage.OutputTokens,
577 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
578 CacheReadTokens: msg.Usage.CacheReadInputTokens,
579 }
580}
581
582func (a *anthropicClient) Model() catwalk.Model {
583 return a.providerOptions.model(a.providerOptions.modelType)
584}