1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "net/http"
11 "regexp"
12 "strconv"
13 "strings"
14 "time"
15
16 "github.com/anthropics/anthropic-sdk-go"
17 "github.com/anthropics/anthropic-sdk-go/bedrock"
18 "github.com/anthropics/anthropic-sdk-go/option"
19 "github.com/anthropics/anthropic-sdk-go/vertex"
20 "github.com/charmbracelet/catwalk/pkg/catwalk"
21 "github.com/charmbracelet/crush/internal/config"
22 "github.com/charmbracelet/crush/internal/llm/tools"
23 "github.com/charmbracelet/crush/internal/log"
24 "github.com/charmbracelet/crush/internal/message"
25)
26
27// Pre-compiled regex for parsing context limit errors.
28var contextLimitRegex = regexp.MustCompile(`input length and ` + "`max_tokens`" + ` exceed context limit: (\d+) \+ (\d+) > (\d+)`)
29
30type anthropicClient struct {
31 providerOptions providerClientOptions
32 tp AnthropicClientType
33 client anthropic.Client
34 adjustedMaxTokens int // Used when context limit is hit
35}
36
37type AnthropicClient ProviderClient
38
39type AnthropicClientType string
40
41const (
42 AnthropicClientTypeNormal AnthropicClientType = "normal"
43 AnthropicClientTypeBedrock AnthropicClientType = "bedrock"
44 AnthropicClientTypeVertex AnthropicClientType = "vertex"
45)
46
47func newAnthropicClient(opts providerClientOptions, tp AnthropicClientType) AnthropicClient {
48 return &anthropicClient{
49 providerOptions: opts,
50 tp: tp,
51 client: createAnthropicClient(opts, tp),
52 }
53}
54
55func createAnthropicClient(opts providerClientOptions, tp AnthropicClientType) anthropic.Client {
56 anthropicClientOptions := []option.RequestOption{}
57
58 // Check if Authorization header is provided in extra headers
59 hasBearerAuth := false
60 if opts.extraHeaders != nil {
61 for key := range opts.extraHeaders {
62 if strings.ToLower(key) == "authorization" {
63 hasBearerAuth = true
64 break
65 }
66 }
67 }
68
69 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
70
71 if opts.apiKey != "" && !hasBearerAuth {
72 if isBearerToken {
73 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
74 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
75 } else {
76 // Use standard X-Api-Key header
77 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
78 }
79 } else if hasBearerAuth {
80 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
81 }
82
83 if opts.baseURL != "" {
84 resolvedBaseURL, err := config.Get().Resolve(opts.baseURL)
85 if err == nil && resolvedBaseURL != "" {
86 anthropicClientOptions = append(anthropicClientOptions, option.WithBaseURL(resolvedBaseURL))
87 }
88 }
89
90 if config.Get().Options.Debug {
91 httpClient := log.NewHTTPClient()
92 anthropicClientOptions = append(anthropicClientOptions, option.WithHTTPClient(httpClient))
93 }
94
95 switch tp {
96 case AnthropicClientTypeBedrock:
97 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
98 case AnthropicClientTypeVertex:
99 project := opts.extraParams["project"]
100 location := opts.extraParams["location"]
101 anthropicClientOptions = append(anthropicClientOptions, vertex.WithGoogleAuth(context.Background(), location, project))
102 }
103 for key, header := range opts.extraHeaders {
104 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
105 }
106 for key, value := range opts.extraBody {
107 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
108 }
109 return anthropic.NewClient(anthropicClientOptions...)
110}
111
112func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
113 for i, msg := range messages {
114 cache := false
115 if i > len(messages)-3 {
116 cache = true
117 }
118 switch msg.Role {
119 case message.User:
120 content := anthropic.NewTextBlock(msg.Content().String())
121 if cache && !a.providerOptions.disableCache {
122 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
123 Type: "ephemeral",
124 }
125 }
126 var contentBlocks []anthropic.ContentBlockParamUnion
127 contentBlocks = append(contentBlocks, content)
128 for _, binaryContent := range msg.BinaryContent() {
129 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
130 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
131 contentBlocks = append(contentBlocks, imageBlock)
132 }
133 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
134
135 case message.Assistant:
136 blocks := []anthropic.ContentBlockParamUnion{}
137
138 // Add thinking blocks first if present (required when thinking is enabled with tool use)
139 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
140 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
141 blocks = append(blocks, thinkingBlock)
142 }
143
144 if msg.Content().String() != "" {
145 content := anthropic.NewTextBlock(msg.Content().String())
146 if cache && !a.providerOptions.disableCache {
147 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
148 Type: "ephemeral",
149 }
150 }
151 blocks = append(blocks, content)
152 }
153
154 for _, toolCall := range msg.ToolCalls() {
155 if !toolCall.Finished {
156 continue
157 }
158 var inputMap map[string]any
159 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
160 if err != nil {
161 continue
162 }
163 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
164 }
165
166 if len(blocks) == 0 {
167 continue
168 }
169 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
170
171 case message.Tool:
172 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
173 for i, toolResult := range msg.ToolResults() {
174 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
175 }
176 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
177 }
178 }
179 return anthropicMessages
180}
181
182func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
183 if len(tools) == 0 {
184 return nil
185 }
186 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
187
188 for i, tool := range tools {
189 info := tool.Info()
190 toolParam := anthropic.ToolParam{
191 Name: info.Name,
192 Description: anthropic.String(info.Description),
193 InputSchema: anthropic.ToolInputSchemaParam{
194 Properties: info.Parameters,
195 Required: info.Required,
196 },
197 }
198
199 if i == len(tools)-1 && !a.providerOptions.disableCache {
200 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
201 Type: "ephemeral",
202 }
203 }
204
205 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
206 }
207
208 return anthropicTools
209}
210
211func (a *anthropicClient) finishReason(reason string) message.FinishReason {
212 switch reason {
213 case "end_turn":
214 return message.FinishReasonEndTurn
215 case "max_tokens":
216 return message.FinishReasonMaxTokens
217 case "tool_use":
218 return message.FinishReasonToolUse
219 case "stop_sequence":
220 return message.FinishReasonEndTurn
221 default:
222 return message.FinishReasonUnknown
223 }
224}
225
226func (a *anthropicClient) isThinkingEnabled() bool {
227 cfg := config.Get()
228 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
229 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
230 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
231 }
232 return a.Model().CanReason && modelConfig.Think
233}
234
235func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
236 model := a.providerOptions.model(a.providerOptions.modelType)
237 var thinkingParam anthropic.ThinkingConfigParamUnion
238 cfg := config.Get()
239 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
240 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
241 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
242 }
243 temperature := anthropic.Float(0)
244
245 maxTokens := model.DefaultMaxTokens
246 if modelConfig.MaxTokens > 0 {
247 maxTokens = modelConfig.MaxTokens
248 }
249 if a.isThinkingEnabled() {
250 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
251 temperature = anthropic.Float(1)
252 }
253 // Override max tokens if set in provider options
254 if a.providerOptions.maxTokens > 0 {
255 maxTokens = a.providerOptions.maxTokens
256 }
257
258 // Use adjusted max tokens if context limit was hit
259 if a.adjustedMaxTokens > 0 {
260 maxTokens = int64(a.adjustedMaxTokens)
261 }
262
263 systemBlocks := []anthropic.TextBlockParam{}
264
265 // Add custom system prompt prefix if configured
266 if a.providerOptions.systemPromptPrefix != "" {
267 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
268 Text: a.providerOptions.systemPromptPrefix,
269 })
270 }
271
272 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
273 Text: a.providerOptions.systemMessage,
274 CacheControl: anthropic.CacheControlEphemeralParam{
275 Type: "ephemeral",
276 },
277 })
278
279 return anthropic.MessageNewParams{
280 Model: anthropic.Model(model.ID),
281 MaxTokens: maxTokens,
282 Temperature: temperature,
283 Messages: messages,
284 Tools: tools,
285 Thinking: thinkingParam,
286 System: systemBlocks,
287 }
288}
289
290func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
291 attempts := 0
292 for {
293 attempts++
294 // Prepare messages on each attempt in case max_tokens was adjusted
295 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
296
297 var opts []option.RequestOption
298 if a.isThinkingEnabled() {
299 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
300 }
301 anthropicResponse, err := a.client.Messages.New(
302 ctx,
303 preparedMessages,
304 opts...,
305 )
306 // If there is an error we are going to see if we can retry the call
307 if err != nil {
308 retry, after, retryErr := a.shouldRetry(attempts, err)
309 if retryErr != nil {
310 return nil, retryErr
311 }
312 if retry {
313 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
314 select {
315 case <-ctx.Done():
316 return nil, ctx.Err()
317 case <-time.After(time.Duration(after) * time.Millisecond):
318 continue
319 }
320 }
321 return nil, retryErr
322 }
323
324 content := ""
325 for _, block := range anthropicResponse.Content {
326 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
327 content += text.Text
328 }
329 }
330
331 return &ProviderResponse{
332 Content: content,
333 ToolCalls: a.toolCalls(*anthropicResponse),
334 Usage: a.usage(*anthropicResponse),
335 }, nil
336 }
337}
338
339func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
340 attempts := 0
341 eventChan := make(chan ProviderEvent)
342 go func() {
343 for {
344 attempts++
345 // Prepare messages on each attempt in case max_tokens was adjusted
346 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
347
348 var opts []option.RequestOption
349 if a.isThinkingEnabled() {
350 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
351 }
352
353 anthropicStream := a.client.Messages.NewStreaming(
354 ctx,
355 preparedMessages,
356 opts...,
357 )
358 accumulatedMessage := anthropic.Message{}
359
360 currentToolCallID := ""
361 for anthropicStream.Next() {
362 event := anthropicStream.Current()
363 err := accumulatedMessage.Accumulate(event)
364 if err != nil {
365 slog.Warn("Error accumulating message", "error", err)
366 continue
367 }
368
369 switch event := event.AsAny().(type) {
370 case anthropic.ContentBlockStartEvent:
371 switch event.ContentBlock.Type {
372 case "text":
373 eventChan <- ProviderEvent{Type: EventContentStart}
374 case "tool_use":
375 currentToolCallID = event.ContentBlock.ID
376 eventChan <- ProviderEvent{
377 Type: EventToolUseStart,
378 ToolCall: &message.ToolCall{
379 ID: event.ContentBlock.ID,
380 Name: event.ContentBlock.Name,
381 Finished: false,
382 },
383 }
384 }
385
386 case anthropic.ContentBlockDeltaEvent:
387 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
388 eventChan <- ProviderEvent{
389 Type: EventThinkingDelta,
390 Thinking: event.Delta.Thinking,
391 }
392 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
393 eventChan <- ProviderEvent{
394 Type: EventSignatureDelta,
395 Signature: event.Delta.Signature,
396 }
397 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
398 eventChan <- ProviderEvent{
399 Type: EventContentDelta,
400 Content: event.Delta.Text,
401 }
402 } else if event.Delta.Type == "input_json_delta" {
403 if currentToolCallID != "" {
404 eventChan <- ProviderEvent{
405 Type: EventToolUseDelta,
406 ToolCall: &message.ToolCall{
407 ID: currentToolCallID,
408 Finished: false,
409 Input: event.Delta.PartialJSON,
410 },
411 }
412 }
413 }
414 case anthropic.ContentBlockStopEvent:
415 if currentToolCallID != "" {
416 eventChan <- ProviderEvent{
417 Type: EventToolUseStop,
418 ToolCall: &message.ToolCall{
419 ID: currentToolCallID,
420 },
421 }
422 currentToolCallID = ""
423 } else {
424 eventChan <- ProviderEvent{Type: EventContentStop}
425 }
426
427 case anthropic.MessageStopEvent:
428 content := ""
429 for _, block := range accumulatedMessage.Content {
430 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
431 content += text.Text
432 }
433 }
434
435 eventChan <- ProviderEvent{
436 Type: EventComplete,
437 Response: &ProviderResponse{
438 Content: content,
439 ToolCalls: a.toolCalls(accumulatedMessage),
440 Usage: a.usage(accumulatedMessage),
441 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
442 },
443 Content: content,
444 }
445 }
446 }
447
448 err := anthropicStream.Err()
449 if err == nil || errors.Is(err, io.EOF) {
450 close(eventChan)
451 return
452 }
453
454 // If there is an error we are going to see if we can retry the call
455 retry, after, retryErr := a.shouldRetry(attempts, err)
456 if retryErr != nil {
457 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
458 close(eventChan)
459 return
460 }
461 if retry {
462 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries, "error", err)
463 select {
464 case <-ctx.Done():
465 // context cancelled
466 if ctx.Err() != nil {
467 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
468 }
469 close(eventChan)
470 return
471 case <-time.After(time.Duration(after) * time.Millisecond):
472 continue
473 }
474 }
475 if ctx.Err() != nil {
476 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
477 }
478
479 close(eventChan)
480 return
481 }
482 }()
483 return eventChan
484}
485
486func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
487 var apiErr *anthropic.Error
488 if !errors.As(err, &apiErr) {
489 return false, 0, err
490 }
491
492 if attempts > maxRetries {
493 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
494 }
495
496 if apiErr.StatusCode == http.StatusUnauthorized {
497 return false, 0, err
498 }
499
500 // Handle context limit exceeded error (400 Bad Request)
501 if apiErr.StatusCode == http.StatusBadRequest {
502 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
503 a.adjustedMaxTokens = adjusted
504 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
505 return true, 0, nil
506 }
507 }
508
509 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
510 if apiErr.StatusCode != http.StatusTooManyRequests && apiErr.StatusCode != 529 && !isOverloaded {
511 return false, 0, err
512 }
513
514 retryMs := 0
515 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
516
517 backoffMs := 2000 * (1 << (attempts - 1))
518 jitterMs := int(float64(backoffMs) * 0.2)
519 retryMs = backoffMs + jitterMs
520 if len(retryAfterValues) > 0 {
521 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
522 retryMs = retryMs * 1000
523 }
524 }
525 return true, int64(retryMs), nil
526}
527
528// handleContextLimitError parses context limit error and returns adjusted max_tokens
529func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
530 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
531 errorMsg := apiErr.Error()
532
533 matches := contextLimitRegex.FindStringSubmatch(errorMsg)
534
535 if len(matches) != 4 {
536 return 0, false
537 }
538
539 inputTokens, err1 := strconv.Atoi(matches[1])
540 contextLimit, err2 := strconv.Atoi(matches[3])
541
542 if err1 != nil || err2 != nil {
543 return 0, false
544 }
545
546 // Calculate safe max_tokens with a buffer of 1000 tokens
547 safeMaxTokens := contextLimit - inputTokens - 1000
548
549 // Ensure we don't go below a minimum threshold
550 safeMaxTokens = max(safeMaxTokens, 1000)
551
552 return safeMaxTokens, true
553}
554
555func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
556 var toolCalls []message.ToolCall
557
558 for _, block := range msg.Content {
559 switch variant := block.AsAny().(type) {
560 case anthropic.ToolUseBlock:
561 toolCall := message.ToolCall{
562 ID: variant.ID,
563 Name: variant.Name,
564 Input: string(variant.Input),
565 Type: string(variant.Type),
566 Finished: true,
567 }
568 toolCalls = append(toolCalls, toolCall)
569 }
570 }
571
572 return toolCalls
573}
574
575func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
576 return TokenUsage{
577 InputTokens: msg.Usage.InputTokens,
578 OutputTokens: msg.Usage.OutputTokens,
579 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
580 CacheReadTokens: msg.Usage.CacheReadInputTokens,
581 }
582}
583
584func (a *anthropicClient) Model() catwalk.Model {
585 return a.providerOptions.model(a.providerOptions.modelType)
586}