1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/anthropics/anthropic-sdk-go/vertex"
19 "github.com/charmbracelet/catwalk/pkg/catwalk"
20 "github.com/charmbracelet/crush/internal/config"
21 "github.com/charmbracelet/crush/internal/llm/tools"
22 "github.com/charmbracelet/crush/internal/log"
23 "github.com/charmbracelet/crush/internal/message"
24)
25
26// Pre-compiled regex for parsing context limit errors.
27var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
28
29type anthropicClient struct {
30 providerOptions providerClientOptions
31 tp AnthropicClientType
32 client anthropic.Client
33 adjustedMaxTokens int // Used when context limit is hit
34}
35
36type AnthropicClient ProviderClient
37
38type AnthropicClientType string
39
40const (
41 AnthropicClientTypeNormal AnthropicClientType = "normal"
42 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
43 AnthropicClientTypeVertex AnthropicClientType = "vertex"
44)
45
46func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
47 return &anthropicClient{
48 providerOptions: opts,
49 tp: tp,
50 client: createAnthropicClient(opts, tp),
51 }
52}
53
54func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
55 anthropicClientOptions := []option.RequestOption{}
56
57 // Check if Authorization header is provided in extra headers
58 hasBearerAuth := false
59 if opts.extraHeaders != nil {
60 for key := range opts.extraHeaders {
61 if strings.ToLower(key) == "authorization" {
62 hasBearerAuth = true
63 break
64 }
65 }
66 }
67
68 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
69
70 if opts.apiKey != "" && !hasBearerAuth {
71 if isBearerToken {
72 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
73 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
74 } else {
75 // Use standard X-Api-Key header
76 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
77 }
78 } else if hasBearerAuth {
79 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
80 }
81
82 if config.Get().Options.Debug {
83 httpClient := log.NewHTTPClient()
84 anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
85 }
86
87 switch tp {
88 case AnthropicClientTypeBedrock:
89 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
90 case AnthropicClientTypeVertex:
91 project := opts.extraParams["project"]
92 location := opts.extraParams["location"]
93 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
94 }
95 for key, header := range opts.extraHeaders {
96 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
97 }
98 for key, value := range opts.extraBody {
99 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
100 }
101 return anthropic.NewClient(anthropicClientOptions...)
102}
103
104func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
105 for i, msg := range messages {
106 cache := false
107 if i > len(messages)-3 {
108 cache = true
109 }
110 switch msg.Role {
111 case message.User:
112 content := anthropic.NewTextBlock(msg.Content().String())
113 if cache && !a.providerOptions.disableCache {
114 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
115 Type: "ephemeral",
116 }
117 }
118 var contentBlocks []anthropic.ContentBlockParamUnion
119 contentBlocks = append(contentBlocks, content)
120 for _, binaryContent := range msg.BinaryContent() {
121 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
122 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
123 contentBlocks = append(contentBlocks, imageBlock)
124 }
125 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
126
127 case message.Assistant:
128 blocks := []anthropic.ContentBlockParamUnion{}
129
130 // Add thinking blocks first if present (required when thinking is enabled with tool use)
131 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
132 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
133 blocks = append(blocks, thinkingBlock)
134 }
135
136 if msg.Content().String() != "" {
137 content := anthropic.NewTextBlock(msg.Content().String())
138 if cache && !a.providerOptions.disableCache {
139 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
140 Type: "ephemeral",
141 }
142 }
143 blocks = append(blocks, content)
144 }
145
146 for _, toolCall := range msg.ToolCalls() {
147 var inputMap map[string]any
148 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
149 if err != nil {
150 continue
151 }
152 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
153 }
154
155 if len(blocks) == 0 {
156 slog.Warn("There is a message without content, investigate, this should not happen")
157 continue
158 }
159 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
160
161 case message.Tool:
162 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
163 for i, toolResult := range msg.ToolResults() {
164 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
165 }
166 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
167 }
168 }
169 return
170}
171
172func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
173 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
174
175 for i, tool := range tools {
176 info := tool.Info()
177 toolParam := anthropic.ToolParam{
178 Name: info.Name,
179 Description: anthropic.String(info.Description),
180 InputSchema: anthropic.ToolInputSchemaParam{
181 Properties: info.Parameters,
182 // TODO: figure out how we can tell claude the required fields?
183 },
184 }
185
186 if i == len(tools)-1 && !a.providerOptions.disableCache {
187 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
188 Type: "ephemeral",
189 }
190 }
191
192 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
193 }
194
195 return anthropicTools
196}
197
198func (a *anthropicClient) finishReason(reason string) message.FinishReason {
199 switch reason {
200 case "end_turn":
201 return message.FinishReasonEndTurn
202 case "max_tokens":
203 return message.FinishReasonMaxTokens
204 case "tool_use":
205 return message.FinishReasonToolUse
206 case "stop_sequence":
207 return message.FinishReasonEndTurn
208 default:
209 return message.FinishReasonUnknown
210 }
211}
212
213func (a *anthropicClient) isThinkingEnabled() bool {
214 cfg := config.Get()
215 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
216 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
217 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
218 }
219 return a.Model().CanReason && modelConfig.Think
220}
221
222func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
223 model := a.providerOptions.model(a.providerOptions.modelType)
224 var thinkingParam anthropic.ThinkingConfigParamUnion
225 cfg := config.Get()
226 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
227 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
228 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
229 }
230 temperature := anthropic.Float(0)
231
232 maxTokens := model.DefaultMaxTokens
233 if modelConfig.MaxTokens > 0 {
234 maxTokens = modelConfig.MaxTokens
235 }
236 if a.isThinkingEnabled() {
237 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
238 temperature = anthropic.Float(1)
239 }
240 // Override max tokens if set in provider options
241 if a.providerOptions.maxTokens > 0 {
242 maxTokens = a.providerOptions.maxTokens
243 }
244
245 // Use adjusted max tokens if context limit was hit
246 if a.adjustedMaxTokens > 0 {
247 maxTokens = int64(a.adjustedMaxTokens)
248 }
249
250 systemBlocks := []anthropic.TextBlockParam{}
251
252 // Add custom system prompt prefix if configured
253 if a.providerOptions.systemPromptPrefix != "" {
254 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
255 Text: a.providerOptions.systemPromptPrefix,
256 CacheControl: anthropic.CacheControlEphemeralParam{
257 Type: "ephemeral",
258 },
259 })
260 }
261
262 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
263 Text: a.providerOptions.systemMessage,
264 CacheControl: anthropic.CacheControlEphemeralParam{
265 Type: "ephemeral",
266 },
267 })
268
269 return anthropic.MessageNewParams{
270 Model: anthropic.Model(model.ID),
271 MaxTokens: maxTokens,
272 Temperature: temperature,
273 Messages: messages,
274 Tools: tools,
275 Thinking: thinkingParam,
276 System: systemBlocks,
277 }
278}
279
280func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
281 attempts := 0
282 for {
283 attempts++
284 // Prepare messages on each attempt in case max_tokens was adjusted
285 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
286
287 var opts []option.RequestOption
288 if a.isThinkingEnabled() {
289 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
290 }
291 anthropicResponse, err := a.client.Messages.New(
292 ctx,
293 preparedMessages,
294 opts...,
295 )
296 // If there is an error we are going to see if we can retry the call
297 if err != nil {
298 slog.Error("Anthropic API error", "error", err.Error(), "attempt", attempts, "max_retries", maxRetries)
299 retry, after, retryErr := a.shouldRetry(attempts, err)
300 if retryErr != nil {
301 return nil, retryErr
302 }
303 if retry {
304 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
305 select {
306 case <-ctx.Done():
307 return nil, ctx.Err()
308 case <-time.After(time.Duration(after) * time.Millisecond):
309 continue
310 }
311 }
312 return nil, retryErr
313 }
314
315 content := ""
316 for _, block := range anthropicResponse.Content {
317 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
318 content += text.Text
319 }
320 }
321
322 return &ProviderResponse{
323 Content: content,
324 ToolCalls: a.toolCalls(*anthropicResponse),
325 Usage: a.usage(*anthropicResponse),
326 }, nil
327 }
328}
329
330func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
331 attempts := 0
332 eventChan := make(chan ProviderEvent)
333 go func() {
334 for {
335 attempts++
336 // Prepare messages on each attempt in case max_tokens was adjusted
337 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
338
339 var opts []option.RequestOption
340 if a.isThinkingEnabled() {
341 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
342 }
343
344 anthropicStream := a.client.Messages.NewStreaming(
345 ctx,
346 preparedMessages,
347 opts...,
348 )
349 accumulatedMessage := anthropic.Message{}
350
351 currentToolCallID := ""
352 for anthropicStream.Next() {
353 event := anthropicStream.Current()
354 err := accumulatedMessage.Accumulate(event)
355 if err != nil {
356 slog.Warn("Error accumulating message", "error", err)
357 continue
358 }
359
360 switch event := event.AsAny().(type) {
361 case anthropic.ContentBlockStartEvent:
362 switch event.ContentBlock.Type {
363 case "text":
364 eventChan <- ProviderEvent{Type: EventContentStart}
365 case "tool_use":
366 currentToolCallID = event.ContentBlock.ID
367 eventChan <- ProviderEvent{
368 Type: EventToolUseStart,
369 ToolCall: &message.ToolCall{
370 ID: event.ContentBlock.ID,
371 Name: event.ContentBlock.Name,
372 Finished: false,
373 },
374 }
375 }
376
377 case anthropic.ContentBlockDeltaEvent:
378 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
379 eventChan <- ProviderEvent{
380 Type: EventThinkingDelta,
381 Thinking: event.Delta.Thinking,
382 }
383 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
384 eventChan <- ProviderEvent{
385 Type: EventSignatureDelta,
386 Signature: event.Delta.Signature,
387 }
388 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
389 eventChan <- ProviderEvent{
390 Type: EventContentDelta,
391 Content: event.Delta.Text,
392 }
393 } else if event.Delta.Type == "input_json_delta" {
394 if currentToolCallID != "" {
395 eventChan <- ProviderEvent{
396 Type: EventToolUseDelta,
397 ToolCall: &message.ToolCall{
398 ID: currentToolCallID,
399 Finished: false,
400 Input: event.Delta.PartialJSON,
401 },
402 }
403 }
404 }
405 case anthropic.ContentBlockStopEvent:
406 if currentToolCallID != "" {
407 eventChan <- ProviderEvent{
408 Type: EventToolUseStop,
409 ToolCall: &message.ToolCall{
410 ID: currentToolCallID,
411 },
412 }
413 currentToolCallID = ""
414 } else {
415 eventChan <- ProviderEvent{Type: EventContentStop}
416 }
417
418 case anthropic.MessageStopEvent:
419 content := ""
420 for _, block := range accumulatedMessage.Content {
421 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
422 content += text.Text
423 }
424 }
425
426 eventChan <- ProviderEvent{
427 Type: EventComplete,
428 Response: &ProviderResponse{
429 Content: content,
430 ToolCalls: a.toolCalls(accumulatedMessage),
431 Usage: a.usage(accumulatedMessage),
432 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
433 },
434 Content: content,
435 }
436 }
437 }
438
439 err := anthropicStream.Err()
440 if err == nil || errors.Is(err, io.EOF) {
441 close(eventChan)
442 return
443 }
444
445 // If there is an error we are going to see if we can retry the call
446 retry, after, retryErr := a.shouldRetry(attempts, err)
447 if retryErr != nil {
448 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
449 close(eventChan)
450 return
451 }
452 if retry {
453 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
454 select {
455 case <-ctx.Done():
456 // context cancelled
457 if ctx.Err() != nil {
458 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
459 }
460 close(eventChan)
461 return
462 case <-time.After(time.Duration(after) * time.Millisecond):
463 continue
464 }
465 }
466 if ctx.Err() != nil {
467 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
468 }
469
470 close(eventChan)
471 return
472 }
473 }()
474 return eventChan
475}
476
477func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
478 var apiErr *anthropic.Error
479 if !errors.As(err, &apiErr) {
480 return false, 0, err
481 }
482
483 if attempts > maxRetries {
484 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
485 }
486
487 if apiErr.StatusCode == 401 {
488 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
489 if err != nil {
490 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
491 }
492 a.client = createAnthropicClient(a.providerOptions, a.tp)
493 return true, 0, nil
494 }
495
496 // Handle context limit exceeded error (400 Bad Request)
497 if apiErr.StatusCode == 400 {
498 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
499 a.adjustedMaxTokens = adjusted
500 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
501 return true, 0, nil
502 }
503 }
504
505 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
506 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
507 return false, 0, err
508 }
509
510 retryMs := 0
511 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
512
513 backoffMs := 2000 * (1 << (attempts - 1))
514 jitterMs := int(float64(backoffMs) * 0.2)
515 retryMs = backoffMs + jitterMs
516 if len(retryAfterValues) > 0 {
517 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
518 retryMs = retryMs * 1000
519 }
520 }
521 return true, int64(retryMs), nil
522}
523
524// handleContextLimitError parses context limit error and returns adjusted max_tokens
525func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
526 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
527 errorMsg := apiErr.Error()
528
529 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
530
531 if len(matches) != 4 {
532 return 0, false
533 }
534
535 inputTokens, err1 := strconv.Atoi(matches[1])
536 contextLimit, err2 := strconv.Atoi(matches[3])
537
538 if err1 != nil || err2 != nil {
539 return 0, false
540 }
541
542 // Calculate safe max_tokens with a buffer of 1000 tokens
543 safeMaxTokens := contextLimit - inputTokens - 1000
544
545 // Ensure we don't go below a minimum threshold
546 safeMaxTokens = max(safeMaxTokens, 1000)
547
548 return safeMaxTokens, true
549}
550
551func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
552 var toolCalls []message.ToolCall
553
554 for _, block := range msg.Content {
555 switch variant := block.AsAny().(type) {
556 case anthropic.ToolUseBlock:
557 toolCall := message.ToolCall{
558 ID: variant.ID,
559 Name: variant.Name,
560 Input: string(variant.Input),
561 Type: string(variant.Type),
562 Finished: true,
563 }
564 toolCalls = append(toolCalls, toolCall)
565 }
566 }
567
568 return toolCalls
569}
570
571func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
572 return TokenUsage{
573 InputTokens: msg.Usage.InputTokens,
574 OutputTokens: msg.Usage.OutputTokens,
575 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
576 CacheReadTokens: msg.Usage.CacheReadInputTokens,
577 }
578}
579
580func (a *anthropicClient) Model() catwalk.Model {
581 return a.providerOptions.model(a.providerOptions.modelType)
582}