1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/charmbracelet/catwalk/pkg/catwalk"
19 "github.com/charmbracelet/crush/internal/config"
20 "github.com/charmbracelet/crush/internal/llm/tools"
21 "github.com/charmbracelet/crush/internal/message"
22)
23
24type anthropicClient struct {
25 providerOptions providerClientOptions
26 useBedrock bool
27 client anthropic.Client
28 adjustedMaxTokens int // Used when context limit is hit
29}
30
31type AnthropicClient ProviderClient
32
33func newAnthropicClient(opts providerClientOptions, useBedrock bool) AnthropicClient {
34 return &anthropicClient{
35 providerOptions: opts,
36 client: createAnthropicClient(opts, useBedrock),
37 }
38}
39
40func createAnthropicClient(opts providerClientOptions, useBedrock bool) anthropic.Client {
41 anthropicClientOptions := []option.RequestOption{}
42
43 // Check if Authorization header is provided in extra headers
44 hasBearerAuth := false
45 if opts.extraHeaders != nil {
46 for key := range opts.extraHeaders {
47 if strings.ToLower(key) == "authorization" {
48 hasBearerAuth = true
49 break
50 }
51 }
52 }
53
54 isBearerToken := strings.HasPrefix(opts.apiKey, "Bearer ")
55
56 if opts.apiKey != "" && !hasBearerAuth {
57 if isBearerToken {
58 slog.Debug("API key starts with 'Bearer ', using as Authorization header")
59 anthropicClientOptions = append(anthropicClientOptions, option.WithHeader("Authorization", opts.apiKey))
60 } else {
61 // Use standard X-Api-Key header
62 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
63 }
64 } else if hasBearerAuth {
65 slog.Debug("Skipping X-Api-Key header because Authorization header is provided")
66 }
67 if useBedrock {
68 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
69 }
70 for key, header := range opts.extraHeaders {
71 anthropicClientOptions = append(anthropicClientOptions, option.WithHeaderAdd(key, header))
72 }
73 for key, value := range opts.extraBody {
74 anthropicClientOptions = append(anthropicClientOptions, option.WithJSONSet(key, value))
75 }
76 return anthropic.NewClient(anthropicClientOptions...)
77}
78
79func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
80 for i, msg := range messages {
81 cache := false
82 if i > len(messages)-3 {
83 cache = true
84 }
85 switch msg.Role {
86 case message.User:
87 content := anthropic.NewTextBlock(msg.Content().String())
88 if cache && !a.providerOptions.disableCache {
89 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
90 Type: "ephemeral",
91 }
92 }
93 var contentBlocks []anthropic.ContentBlockParamUnion
94 contentBlocks = append(contentBlocks, content)
95 for _, binaryContent := range msg.BinaryContent() {
96 base64Image := binaryContent.String(catwalk.InferenceProviderAnthropic)
97 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
98 contentBlocks = append(contentBlocks, imageBlock)
99 }
100 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
101
102 case message.Assistant:
103 blocks := []anthropic.ContentBlockParamUnion{}
104
105 // Add thinking blocks first if present (required when thinking is enabled with tool use)
106 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
107 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
108 blocks = append(blocks, thinkingBlock)
109 }
110
111 if msg.Content().String() != "" {
112 content := anthropic.NewTextBlock(msg.Content().String())
113 if cache && !a.providerOptions.disableCache {
114 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
115 Type: "ephemeral",
116 }
117 }
118 blocks = append(blocks, content)
119 }
120
121 for _, toolCall := range msg.ToolCalls() {
122 var inputMap map[string]any
123 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
124 if err != nil {
125 continue
126 }
127 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
128 }
129
130 if len(blocks) == 0 {
131 slog.Warn("There is a message without content, investigate, this should not happen")
132 continue
133 }
134 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
135
136 case message.Tool:
137 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
138 for i, toolResult := range msg.ToolResults() {
139 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
140 }
141 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
142 }
143 }
144 return
145}
146
147func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
148 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
149
150 for i, tool := range tools {
151 info := tool.Info()
152 toolParam := anthropic.ToolParam{
153 Name: info.Name,
154 Description: anthropic.String(info.Description),
155 InputSchema: anthropic.ToolInputSchemaParam{
156 Properties: info.Parameters,
157 // TODO: figure out how we can tell claude the required fields?
158 },
159 }
160
161 if i == len(tools)-1 && !a.providerOptions.disableCache {
162 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
163 Type: "ephemeral",
164 }
165 }
166
167 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
168 }
169
170 return anthropicTools
171}
172
173func (a *anthropicClient) finishReason(reason string) message.FinishReason {
174 switch reason {
175 case "end_turn":
176 return message.FinishReasonEndTurn
177 case "max_tokens":
178 return message.FinishReasonMaxTokens
179 case "tool_use":
180 return message.FinishReasonToolUse
181 case "stop_sequence":
182 return message.FinishReasonEndTurn
183 default:
184 return message.FinishReasonUnknown
185 }
186}
187
188func (a *anthropicClient) isThinkingEnabled() bool {
189 cfg := config.Get()
190 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
191 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
192 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
193 }
194 return a.Model().CanReason && modelConfig.Think
195}
196
197func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
198 model := a.providerOptions.model(a.providerOptions.modelType)
199 var thinkingParam anthropic.ThinkingConfigParamUnion
200 cfg := config.Get()
201 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
202 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
203 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
204 }
205 temperature := anthropic.Float(0)
206
207 maxTokens := model.DefaultMaxTokens
208 if modelConfig.MaxTokens > 0 {
209 maxTokens = modelConfig.MaxTokens
210 }
211 if a.isThinkingEnabled() {
212 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
213 temperature = anthropic.Float(1)
214 }
215 // Override max tokens if set in provider options
216 if a.providerOptions.maxTokens > 0 {
217 maxTokens = a.providerOptions.maxTokens
218 }
219
220 // Use adjusted max tokens if context limit was hit
221 if a.adjustedMaxTokens > 0 {
222 maxTokens = int64(a.adjustedMaxTokens)
223 }
224
225 systemBlocks := []anthropic.TextBlockParam{}
226 slog.Info("Testing", "prefix", a.providerOptions.systemPromptPrefix)
227
228 // Add custom system prompt prefix if configured
229 if a.providerOptions.systemPromptPrefix != "" {
230 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
231 Text: a.providerOptions.systemPromptPrefix,
232 CacheControl: anthropic.CacheControlEphemeralParam{
233 Type: "ephemeral",
234 },
235 })
236 }
237
238 systemBlocks = append(systemBlocks, anthropic.TextBlockParam{
239 Text: a.providerOptions.systemMessage,
240 CacheControl: anthropic.CacheControlEphemeralParam{
241 Type: "ephemeral",
242 },
243 })
244
245 return anthropic.MessageNewParams{
246 Model: anthropic.Model(model.ID),
247 MaxTokens: maxTokens,
248 Temperature: temperature,
249 Messages: messages,
250 Tools: tools,
251 Thinking: thinkingParam,
252 System: systemBlocks,
253 }
254}
255
256func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
257 cfg := config.Get()
258
259 attempts := 0
260 for {
261 attempts++
262 // Prepare messages on each attempt in case max_tokens was adjusted
263 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
264 if cfg.Options.Debug {
265 jsonData, _ := json.Marshal(preparedMessages)
266 slog.Debug("Prepared messages", "messages", string(jsonData))
267 }
268
269 var opts []option.RequestOption
270 if a.isThinkingEnabled() {
271 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
272 }
273 anthropicResponse, err := a.client.Messages.New(
274 ctx,
275 preparedMessages,
276 opts...,
277 )
278 // If there is an error we are going to see if we can retry the call
279 if err != nil {
280 slog.Error("Error in Anthropic API call", "error", err)
281 retry, after, retryErr := a.shouldRetry(attempts, err)
282 if retryErr != nil {
283 return nil, retryErr
284 }
285 if retry {
286 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
287 select {
288 case <-ctx.Done():
289 return nil, ctx.Err()
290 case <-time.After(time.Duration(after) * time.Millisecond):
291 continue
292 }
293 }
294 return nil, retryErr
295 }
296
297 content := ""
298 for _, block := range anthropicResponse.Content {
299 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
300 content += text.Text
301 }
302 }
303
304 return &ProviderResponse{
305 Content: content,
306 ToolCalls: a.toolCalls(*anthropicResponse),
307 Usage: a.usage(*anthropicResponse),
308 }, nil
309 }
310}
311
312func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
313 cfg := config.Get()
314 attempts := 0
315 eventChan := make(chan ProviderEvent)
316 go func() {
317 for {
318 attempts++
319 // Prepare messages on each attempt in case max_tokens was adjusted
320 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
321 if cfg.Options.Debug {
322 jsonData, _ := json.Marshal(preparedMessages)
323 slog.Debug("Prepared messages", "messages", string(jsonData))
324 }
325
326 var opts []option.RequestOption
327 if a.isThinkingEnabled() {
328 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
329 }
330
331 anthropicStream := a.client.Messages.NewStreaming(
332 ctx,
333 preparedMessages,
334 opts...,
335 )
336 accumulatedMessage := anthropic.Message{}
337
338 currentToolCallID := ""
339 for anthropicStream.Next() {
340 event := anthropicStream.Current()
341 err := accumulatedMessage.Accumulate(event)
342 if err != nil {
343 slog.Warn("Error accumulating message", "error", err)
344 continue
345 }
346
347 switch event := event.AsAny().(type) {
348 case anthropic.ContentBlockStartEvent:
349 switch event.ContentBlock.Type {
350 case "text":
351 eventChan <- ProviderEvent{Type: EventContentStart}
352 case "tool_use":
353 currentToolCallID = event.ContentBlock.ID
354 eventChan <- ProviderEvent{
355 Type: EventToolUseStart,
356 ToolCall: &message.ToolCall{
357 ID: event.ContentBlock.ID,
358 Name: event.ContentBlock.Name,
359 Finished: false,
360 },
361 }
362 }
363
364 case anthropic.ContentBlockDeltaEvent:
365 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
366 eventChan <- ProviderEvent{
367 Type: EventThinkingDelta,
368 Thinking: event.Delta.Thinking,
369 }
370 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
371 eventChan <- ProviderEvent{
372 Type: EventSignatureDelta,
373 Signature: event.Delta.Signature,
374 }
375 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
376 eventChan <- ProviderEvent{
377 Type: EventContentDelta,
378 Content: event.Delta.Text,
379 }
380 } else if event.Delta.Type == "input_json_delta" {
381 if currentToolCallID != "" {
382 eventChan <- ProviderEvent{
383 Type: EventToolUseDelta,
384 ToolCall: &message.ToolCall{
385 ID: currentToolCallID,
386 Finished: false,
387 Input: event.Delta.PartialJSON,
388 },
389 }
390 }
391 }
392 case anthropic.ContentBlockStopEvent:
393 if currentToolCallID != "" {
394 eventChan <- ProviderEvent{
395 Type: EventToolUseStop,
396 ToolCall: &message.ToolCall{
397 ID: currentToolCallID,
398 },
399 }
400 currentToolCallID = ""
401 } else {
402 eventChan <- ProviderEvent{Type: EventContentStop}
403 }
404
405 case anthropic.MessageStopEvent:
406 content := ""
407 for _, block := range accumulatedMessage.Content {
408 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
409 content += text.Text
410 }
411 }
412
413 eventChan <- ProviderEvent{
414 Type: EventComplete,
415 Response: &ProviderResponse{
416 Content: content,
417 ToolCalls: a.toolCalls(accumulatedMessage),
418 Usage: a.usage(accumulatedMessage),
419 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
420 },
421 Content: content,
422 }
423 }
424 }
425
426 err := anthropicStream.Err()
427 if err == nil || errors.Is(err, io.EOF) {
428 close(eventChan)
429 return
430 }
431
432 // If there is an error we are going to see if we can retry the call
433 retry, after, retryErr := a.shouldRetry(attempts, err)
434 if retryErr != nil {
435 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
436 close(eventChan)
437 return
438 }
439 if retry {
440 slog.Warn("Retrying due to rate limit", "attempt", attempts, "max_retries", maxRetries)
441 select {
442 case <-ctx.Done():
443 // context cancelled
444 if ctx.Err() != nil {
445 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
446 }
447 close(eventChan)
448 return
449 case <-time.After(time.Duration(after) * time.Millisecond):
450 continue
451 }
452 }
453 if ctx.Err() != nil {
454 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
455 }
456
457 close(eventChan)
458 return
459 }
460 }()
461 return eventChan
462}
463
464func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
465 var apiErr *anthropic.Error
466 if !errors.As(err, &apiErr) {
467 return false, 0, err
468 }
469
470 if attempts > maxRetries {
471 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
472 }
473
474 if apiErr.StatusCode == 401 {
475 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
476 if err != nil {
477 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
478 }
479 a.client = createAnthropicClient(a.providerOptions, a.useBedrock)
480 return true, 0, nil
481 }
482
483 // Handle context limit exceeded error (400 Bad Request)
484 if apiErr.StatusCode == 400 {
485 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
486 a.adjustedMaxTokens = adjusted
487 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
488 return true, 0, nil
489 }
490 }
491
492 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
493 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
494 return false, 0, err
495 }
496
497 retryMs := 0
498 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
499
500 backoffMs := 2000 * (1 << (attempts - 1))
501 jitterMs := int(float64(backoffMs) * 0.2)
502 retryMs = backoffMs + jitterMs
503 if len(retryAfterValues) > 0 {
504 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
505 retryMs = retryMs * 1000
506 }
507 }
508 return true, int64(retryMs), nil
509}
510
511// handleContextLimitError parses context limit error and returns adjusted max_tokens
512func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
513 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
514 errorMsg := apiErr.Error()
515
516 re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
517 matches := re.FindStringSubmatch(errorMsg)
518
519 if len(matches) != 4 {
520 return 0, false
521 }
522
523 inputTokens, err1 := strconv.Atoi(matches[1])
524 contextLimit, err2 := strconv.Atoi(matches[3])
525
526 if err1 != nil || err2 != nil {
527 return 0, false
528 }
529
530 // Calculate safe max_tokens with a buffer of 1000 tokens
531 safeMaxTokens := contextLimit - inputTokens - 1000
532
533 // Ensure we don't go below a minimum threshold
534 safeMaxTokens = max(safeMaxTokens, 1000)
535
536 return safeMaxTokens, true
537}
538
539func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
540 var toolCalls []message.ToolCall
541
542 for _, block := range msg.Content {
543 switch variant := block.AsAny().(type) {
544 case anthropic.ToolUseBlock:
545 toolCall := message.ToolCall{
546 ID: variant.ID,
547 Name: variant.Name,
548 Input: string(variant.Input),
549 Type: string(variant.Type),
550 Finished: true,
551 }
552 toolCalls = append(toolCalls, toolCall)
553 }
554 }
555
556 return toolCalls
557}
558
559func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
560 return TokenUsage{
561 InputTokens: msg.Usage.InputTokens,
562 OutputTokens: msg.Usage.OutputTokens,
563 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
564 CacheReadTokens: msg.Usage.CacheReadInputTokens,
565 }
566}
567
568func (a *anthropicClient) Model() catwalk.Model {
569 return a.providerOptions.model(a.providerOptions.modelType)
570}