1package provider
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "regexp"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/anthropics/anthropic-sdk-go"
16 "github.com/anthropics/anthropic-sdk-go/bedrock"
17 "github.com/anthropics/anthropic-sdk-go/option"
18 "github.com/charmbracelet/crush/internal/config"
19 "github.com/charmbracelet/crush/internal/fur/provider"
20 "github.com/charmbracelet/crush/internal/llm/tools"
21 "github.com/charmbracelet/crush/internal/message"
22)
23
24type anthropicClient struct {
25 providerOptions providerClientOptions
26 useBedrock bool
27 client anthropic.Client
28 adjustedMaxTokens int // Used when context limit is hit
29}
30
31type AnthropicClient ProviderClient
32
33func newAnthropicClient(opts providerClientOptions, useBedrock bool) AnthropicClient {
34 return &anthropicClient{
35 providerOptions: opts,
36 client: createAnthropicClient(opts, useBedrock),
37 }
38}
39
40func createAnthropicClient(opts providerClientOptions, useBedrock bool) anthropic.Client {
41 anthropicClientOptions := []option.RequestOption{}
42 if opts.apiKey != "" {
43 anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
44 }
45 if useBedrock {
46 anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
47 }
48 return anthropic.NewClient(anthropicClientOptions...)
49}
50
51func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
52 for i, msg := range messages {
53 cache := false
54 if i > len(messages)-3 {
55 cache = true
56 }
57 switch msg.Role {
58 case message.User:
59 content := anthropic.NewTextBlock(msg.Content().String())
60 if cache && !a.providerOptions.disableCache {
61 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
62 Type: "ephemeral",
63 }
64 }
65 var contentBlocks []anthropic.ContentBlockParamUnion
66 contentBlocks = append(contentBlocks, content)
67 for _, binaryContent := range msg.BinaryContent() {
68 base64Image := binaryContent.String(provider.InferenceProviderAnthropic)
69 imageBlock := anthropic.NewImageBlockBase64(binaryContent.MIMEType, base64Image)
70 contentBlocks = append(contentBlocks, imageBlock)
71 }
72 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(contentBlocks...))
73
74 case message.Assistant:
75 blocks := []anthropic.ContentBlockParamUnion{}
76
77 // Add thinking blocks first if present (required when thinking is enabled with tool use)
78 if reasoningContent := msg.ReasoningContent(); reasoningContent.Thinking != "" {
79 thinkingBlock := anthropic.NewThinkingBlock(reasoningContent.Signature, reasoningContent.Thinking)
80 blocks = append(blocks, thinkingBlock)
81 }
82
83 if msg.Content().String() != "" {
84 content := anthropic.NewTextBlock(msg.Content().String())
85 if cache && !a.providerOptions.disableCache {
86 content.OfText.CacheControl = anthropic.CacheControlEphemeralParam{
87 Type: "ephemeral",
88 }
89 }
90 blocks = append(blocks, content)
91 }
92
93 for _, toolCall := range msg.ToolCalls() {
94 var inputMap map[string]any
95 err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
96 if err != nil {
97 continue
98 }
99 blocks = append(blocks, anthropic.NewToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
100 }
101
102 if len(blocks) == 0 {
103 slog.Warn("There is a message without content, investigate, this should not happen")
104 continue
105 }
106 anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
107
108 case message.Tool:
109 results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
110 for i, toolResult := range msg.ToolResults() {
111 results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
112 }
113 anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
114 }
115 }
116 return
117}
118
119func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
120 anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
121
122 for i, tool := range tools {
123 info := tool.Info()
124 toolParam := anthropic.ToolParam{
125 Name: info.Name,
126 Description: anthropic.String(info.Description),
127 InputSchema: anthropic.ToolInputSchemaParam{
128 Properties: info.Parameters,
129 // TODO: figure out how we can tell claude the required fields?
130 },
131 }
132
133 if i == len(tools)-1 && !a.providerOptions.disableCache {
134 toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
135 Type: "ephemeral",
136 }
137 }
138
139 anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
140 }
141
142 return anthropicTools
143}
144
145func (a *anthropicClient) finishReason(reason string) message.FinishReason {
146 switch reason {
147 case "end_turn":
148 return message.FinishReasonEndTurn
149 case "max_tokens":
150 return message.FinishReasonMaxTokens
151 case "tool_use":
152 return message.FinishReasonToolUse
153 case "stop_sequence":
154 return message.FinishReasonEndTurn
155 default:
156 return message.FinishReasonUnknown
157 }
158}
159
160func (a *anthropicClient) isThinkingEnabled() bool {
161 cfg := config.Get()
162 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
163 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
164 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
165 }
166 return a.Model().CanReason && modelConfig.Think
167}
168
169func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
170 model := a.providerOptions.model(a.providerOptions.modelType)
171 var thinkingParam anthropic.ThinkingConfigParamUnion
172 cfg := config.Get()
173 modelConfig := cfg.Models[config.SelectedModelTypeLarge]
174 if a.providerOptions.modelType == config.SelectedModelTypeSmall {
175 modelConfig = cfg.Models[config.SelectedModelTypeSmall]
176 }
177 temperature := anthropic.Float(0)
178
179 maxTokens := model.DefaultMaxTokens
180 if modelConfig.MaxTokens > 0 {
181 maxTokens = modelConfig.MaxTokens
182 }
183 if a.isThinkingEnabled() {
184 thinkingParam = anthropic.ThinkingConfigParamOfEnabled(int64(float64(maxTokens) * 0.8))
185 temperature = anthropic.Float(1)
186 }
187 // Override max tokens if set in provider options
188 if a.providerOptions.maxTokens > 0 {
189 maxTokens = a.providerOptions.maxTokens
190 }
191
192 // Use adjusted max tokens if context limit was hit
193 if a.adjustedMaxTokens > 0 {
194 maxTokens = int64(a.adjustedMaxTokens)
195 }
196
197 return anthropic.MessageNewParams{
198 Model: anthropic.Model(model.ID),
199 MaxTokens: maxTokens,
200 Temperature: temperature,
201 Messages: messages,
202 Tools: tools,
203 Thinking: thinkingParam,
204 System: []anthropic.TextBlockParam{
205 {
206 Text: a.providerOptions.systemMessage,
207 CacheControl: anthropic.CacheControlEphemeralParam{
208 Type: "ephemeral",
209 },
210 },
211 },
212 }
213}
214
215func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (response *ProviderResponse, err error) {
216 cfg := config.Get()
217
218 attempts := 0
219 for {
220 attempts++
221 // Prepare messages on each attempt in case max_tokens was adjusted
222 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
223 if cfg.Options.Debug {
224 jsonData, _ := json.Marshal(preparedMessages)
225 slog.Debug("Prepared messages", "messages", string(jsonData))
226 }
227
228 var opts []option.RequestOption
229 if a.isThinkingEnabled() {
230 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
231 }
232 anthropicResponse, err := a.client.Messages.New(
233 ctx,
234 preparedMessages,
235 opts...,
236 )
237 // If there is an error we are going to see if we can retry the call
238 if err != nil {
239 slog.Error("Error in Anthropic API call", "error", err)
240 retry, after, retryErr := a.shouldRetry(attempts, err)
241 if retryErr != nil {
242 return nil, retryErr
243 }
244 if retry {
245 slog.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries))
246 select {
247 case <-ctx.Done():
248 return nil, ctx.Err()
249 case <-time.After(time.Duration(after) * time.Millisecond):
250 continue
251 }
252 }
253 return nil, retryErr
254 }
255
256 content := ""
257 for _, block := range anthropicResponse.Content {
258 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
259 content += text.Text
260 }
261 }
262
263 return &ProviderResponse{
264 Content: content,
265 ToolCalls: a.toolCalls(*anthropicResponse),
266 Usage: a.usage(*anthropicResponse),
267 }, nil
268 }
269}
270
271func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
272 cfg := config.Get()
273 attempts := 0
274 eventChan := make(chan ProviderEvent)
275 go func() {
276 for {
277 attempts++
278 // Prepare messages on each attempt in case max_tokens was adjusted
279 preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
280 if cfg.Options.Debug {
281 jsonData, _ := json.Marshal(preparedMessages)
282 slog.Debug("Prepared messages", "messages", string(jsonData))
283 }
284
285 var opts []option.RequestOption
286 if a.isThinkingEnabled() {
287 opts = append(opts, option.WithHeaderAdd("anthropic-beta", "interleaved-thinking-2025-05-14"))
288 }
289
290 anthropicStream := a.client.Messages.NewStreaming(
291 ctx,
292 preparedMessages,
293 opts...,
294 )
295 accumulatedMessage := anthropic.Message{}
296
297 currentToolCallID := ""
298 for anthropicStream.Next() {
299 event := anthropicStream.Current()
300 err := accumulatedMessage.Accumulate(event)
301 if err != nil {
302 slog.Warn("Error accumulating message", "error", err)
303 continue
304 }
305
306 switch event := event.AsAny().(type) {
307 case anthropic.ContentBlockStartEvent:
308 switch event.ContentBlock.Type {
309 case "text":
310 eventChan <- ProviderEvent{Type: EventContentStart}
311 case "tool_use":
312 currentToolCallID = event.ContentBlock.ID
313 eventChan <- ProviderEvent{
314 Type: EventToolUseStart,
315 ToolCall: &message.ToolCall{
316 ID: event.ContentBlock.ID,
317 Name: event.ContentBlock.Name,
318 Finished: false,
319 },
320 }
321 }
322
323 case anthropic.ContentBlockDeltaEvent:
324 if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
325 eventChan <- ProviderEvent{
326 Type: EventThinkingDelta,
327 Thinking: event.Delta.Thinking,
328 }
329 } else if event.Delta.Type == "signature_delta" && event.Delta.Signature != "" {
330 eventChan <- ProviderEvent{
331 Type: EventSignatureDelta,
332 Signature: event.Delta.Signature,
333 }
334 } else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
335 eventChan <- ProviderEvent{
336 Type: EventContentDelta,
337 Content: event.Delta.Text,
338 }
339 } else if event.Delta.Type == "input_json_delta" {
340 if currentToolCallID != "" {
341 eventChan <- ProviderEvent{
342 Type: EventToolUseDelta,
343 ToolCall: &message.ToolCall{
344 ID: currentToolCallID,
345 Finished: false,
346 Input: event.Delta.PartialJSON,
347 },
348 }
349 }
350 }
351 case anthropic.ContentBlockStopEvent:
352 if currentToolCallID != "" {
353 eventChan <- ProviderEvent{
354 Type: EventToolUseStop,
355 ToolCall: &message.ToolCall{
356 ID: currentToolCallID,
357 },
358 }
359 currentToolCallID = ""
360 } else {
361 eventChan <- ProviderEvent{Type: EventContentStop}
362 }
363
364 case anthropic.MessageStopEvent:
365 content := ""
366 for _, block := range accumulatedMessage.Content {
367 if text, ok := block.AsAny().(anthropic.TextBlock); ok {
368 content += text.Text
369 }
370 }
371
372 eventChan <- ProviderEvent{
373 Type: EventComplete,
374 Response: &ProviderResponse{
375 Content: content,
376 ToolCalls: a.toolCalls(accumulatedMessage),
377 Usage: a.usage(accumulatedMessage),
378 FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
379 },
380 Content: content,
381 }
382 }
383 }
384
385 err := anthropicStream.Err()
386 if err == nil || errors.Is(err, io.EOF) {
387 close(eventChan)
388 return
389 }
390 // If there is an error we are going to see if we can retry the call
391 retry, after, retryErr := a.shouldRetry(attempts, err)
392 if retryErr != nil {
393 eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
394 close(eventChan)
395 return
396 }
397 if retry {
398 slog.Warn(fmt.Sprintf("Retrying due to rate limit... attempt %d of %d", attempts, maxRetries))
399 select {
400 case <-ctx.Done():
401 // context cancelled
402 if ctx.Err() != nil {
403 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
404 }
405 close(eventChan)
406 return
407 case <-time.After(time.Duration(after) * time.Millisecond):
408 continue
409 }
410 }
411 if ctx.Err() != nil {
412 eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
413 }
414
415 close(eventChan)
416 return
417 }
418 }()
419 return eventChan
420}
421
422func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
423 var apiErr *anthropic.Error
424 if !errors.As(err, &apiErr) {
425 return false, 0, err
426 }
427
428 if attempts > maxRetries {
429 return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
430 }
431
432 if apiErr.StatusCode == 401 {
433 a.providerOptions.apiKey, err = config.Get().Resolve(a.providerOptions.config.APIKey)
434 if err != nil {
435 return false, 0, fmt.Errorf("failed to resolve API key: %w", err)
436 }
437 a.client = createAnthropicClient(a.providerOptions, a.useBedrock)
438 return true, 0, nil
439 }
440
441 // Handle context limit exceeded error (400 Bad Request)
442 if apiErr.StatusCode == 400 {
443 if adjusted, ok := a.handleContextLimitError(apiErr); ok {
444 a.adjustedMaxTokens = adjusted
445 slog.Debug("Adjusted max_tokens due to context limit", "new_max_tokens", adjusted)
446 return true, 0, nil
447 }
448 }
449
450 isOverloaded := strings.Contains(apiErr.Error(), "overloaded") || strings.Contains(apiErr.Error(), "rate limit exceeded")
451 if apiErr.StatusCode != 429 && apiErr.StatusCode != 529 && !isOverloaded {
452 return false, 0, err
453 }
454
455 retryMs := 0
456 retryAfterValues := apiErr.Response.Header.Values("Retry-After")
457
458 backoffMs := 2000 * (1 << (attempts - 1))
459 jitterMs := int(float64(backoffMs) * 0.2)
460 retryMs = backoffMs + jitterMs
461 if len(retryAfterValues) > 0 {
462 if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
463 retryMs = retryMs * 1000
464 }
465 }
466 return true, int64(retryMs), nil
467}
468
469// handleContextLimitError parses context limit error and returns adjusted max_tokens
470func (a *anthropicClient) handleContextLimitError(apiErr *anthropic.Error) (int, bool) {
471 // Parse error message like: "input length and max_tokens exceed context limit: 154978 + 50000 > 200000"
472 errorMsg := apiErr.Error()
473
474 re := regexp.MustCompile("input length and `max_tokens` exceed context limit: (\\d+) \\+ (\\d+) > (\\d+)")
475 matches := re.FindStringSubmatch(errorMsg)
476
477 if len(matches) != 4 {
478 return 0, false
479 }
480
481 inputTokens, err1 := strconv.Atoi(matches[1])
482 contextLimit, err2 := strconv.Atoi(matches[3])
483
484 if err1 != nil || err2 != nil {
485 return 0, false
486 }
487
488 // Calculate safe max_tokens with a buffer of 1000 tokens
489 safeMaxTokens := contextLimit - inputTokens - 1000
490
491 // Ensure we don't go below a minimum threshold
492 safeMaxTokens = max(safeMaxTokens, 1000)
493
494 return safeMaxTokens, true
495}
496
497func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
498 var toolCalls []message.ToolCall
499
500 for _, block := range msg.Content {
501 switch variant := block.AsAny().(type) {
502 case anthropic.ToolUseBlock:
503 toolCall := message.ToolCall{
504 ID: variant.ID,
505 Name: variant.Name,
506 Input: string(variant.Input),
507 Type: string(variant.Type),
508 Finished: true,
509 }
510 toolCalls = append(toolCalls, toolCall)
511 }
512 }
513
514 return toolCalls
515}
516
517func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
518 return TokenUsage{
519 InputTokens: msg.Usage.InputTokens,
520 OutputTokens: msg.Usage.OutputTokens,
521 CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
522 CacheReadTokens: msg.Usage.CacheReadInputTokens,
523 }
524}
525
526func (a *anthropicClient) Model() provider.Model {
527 return a.providerOptions.model(a.providerOptions.modelType)
528}