anthropic.go

  1package provider
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"strings"
 10	"time"
 11
 12	"github.com/anthropics/anthropic-sdk-go"
 13	"github.com/anthropics/anthropic-sdk-go/bedrock"
 14	"github.com/anthropics/anthropic-sdk-go/option"
 15	"github.com/kujtimiihoxha/termai/internal/config"
 16	"github.com/kujtimiihoxha/termai/internal/llm/tools"
 17	"github.com/kujtimiihoxha/termai/internal/logging"
 18	"github.com/kujtimiihoxha/termai/internal/message"
 19)
 20
 21type anthropicOptions struct {
 22	useBedrock   bool
 23	disableCache bool
 24	shouldThink  func(userMessage string) bool
 25}
 26
 27type AnthropicOption func(*anthropicOptions)
 28
 29type anthropicClient struct {
 30	providerOptions providerClientOptions
 31	options         anthropicOptions
 32	client          anthropic.Client
 33}
 34
 35type AnthropicClient ProviderClient
 36
 37func newAnthropicClient(opts providerClientOptions) AnthropicClient {
 38	anthropicOpts := anthropicOptions{}
 39	for _, o := range opts.anthropicOptions {
 40		o(&anthropicOpts)
 41	}
 42
 43	anthropicClientOptions := []option.RequestOption{}
 44	if opts.apiKey != "" {
 45		anthropicClientOptions = append(anthropicClientOptions, option.WithAPIKey(opts.apiKey))
 46	}
 47	if anthropicOpts.useBedrock {
 48		anthropicClientOptions = append(anthropicClientOptions, bedrock.WithLoadDefaultConfig(context.Background()))
 49	}
 50
 51	client := anthropic.NewClient(anthropicClientOptions...)
 52	return &anthropicClient{
 53		providerOptions: opts,
 54		options:         anthropicOpts,
 55		client:          client,
 56	}
 57}
 58
 59func (a *anthropicClient) convertMessages(messages []message.Message) (anthropicMessages []anthropic.MessageParam) {
 60	cachedBlocks := 0
 61	for _, msg := range messages {
 62		switch msg.Role {
 63		case message.User:
 64			content := anthropic.NewTextBlock(msg.Content().String())
 65			if cachedBlocks < 2 && !a.options.disableCache {
 66				content.OfRequestTextBlock.CacheControl = anthropic.CacheControlEphemeralParam{
 67					Type: "ephemeral",
 68				}
 69				cachedBlocks++
 70			}
 71			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(content))
 72
 73		case message.Assistant:
 74			blocks := []anthropic.ContentBlockParamUnion{}
 75			if msg.Content().String() != "" {
 76				content := anthropic.NewTextBlock(msg.Content().String())
 77				if cachedBlocks < 2 && !a.options.disableCache {
 78					content.OfRequestTextBlock.CacheControl = anthropic.CacheControlEphemeralParam{
 79						Type: "ephemeral",
 80					}
 81					cachedBlocks++
 82				}
 83				blocks = append(blocks, content)
 84			}
 85
 86			for _, toolCall := range msg.ToolCalls() {
 87				var inputMap map[string]any
 88				err := json.Unmarshal([]byte(toolCall.Input), &inputMap)
 89				if err != nil {
 90					continue
 91				}
 92				blocks = append(blocks, anthropic.ContentBlockParamOfRequestToolUseBlock(toolCall.ID, inputMap, toolCall.Name))
 93			}
 94
 95			if len(blocks) == 0 {
 96				logging.Warn("There is a message without content, investigate")
 97				// This should never happend but we log this because we might have a bug in our cleanup method
 98				continue
 99			}
100			anthropicMessages = append(anthropicMessages, anthropic.NewAssistantMessage(blocks...))
101
102		case message.Tool:
103			results := make([]anthropic.ContentBlockParamUnion, len(msg.ToolResults()))
104			for i, toolResult := range msg.ToolResults() {
105				results[i] = anthropic.NewToolResultBlock(toolResult.ToolCallID, toolResult.Content, toolResult.IsError)
106			}
107			anthropicMessages = append(anthropicMessages, anthropic.NewUserMessage(results...))
108		}
109	}
110	return
111}
112
113func (a *anthropicClient) convertTools(tools []tools.BaseTool) []anthropic.ToolUnionParam {
114	anthropicTools := make([]anthropic.ToolUnionParam, len(tools))
115
116	for i, tool := range tools {
117		info := tool.Info()
118		toolParam := anthropic.ToolParam{
119			Name:        info.Name,
120			Description: anthropic.String(info.Description),
121			InputSchema: anthropic.ToolInputSchemaParam{
122				Properties: info.Parameters,
123				// TODO: figure out how we can tell claude the required fields?
124			},
125		}
126
127		if i == len(tools)-1 && !a.options.disableCache {
128			toolParam.CacheControl = anthropic.CacheControlEphemeralParam{
129				Type: "ephemeral",
130			}
131		}
132
133		anthropicTools[i] = anthropic.ToolUnionParam{OfTool: &toolParam}
134	}
135
136	return anthropicTools
137}
138
139func (a *anthropicClient) finishReason(reason string) message.FinishReason {
140	switch reason {
141	case "end_turn":
142		return message.FinishReasonEndTurn
143	case "max_tokens":
144		return message.FinishReasonMaxTokens
145	case "tool_use":
146		return message.FinishReasonToolUse
147	case "stop_sequence":
148		return message.FinishReasonEndTurn
149	default:
150		return message.FinishReasonUnknown
151	}
152}
153
154func (a *anthropicClient) preparedMessages(messages []anthropic.MessageParam, tools []anthropic.ToolUnionParam) anthropic.MessageNewParams {
155	var thinkingParam anthropic.ThinkingConfigParamUnion
156	lastMessage := messages[len(messages)-1]
157	isUser := lastMessage.Role == anthropic.MessageParamRoleUser
158	messageContent := ""
159	temperature := anthropic.Float(0)
160	if isUser {
161		for _, m := range lastMessage.Content {
162			if m.OfRequestTextBlock != nil && m.OfRequestTextBlock.Text != "" {
163				messageContent = m.OfRequestTextBlock.Text
164			}
165		}
166		if messageContent != "" && a.options.shouldThink != nil && a.options.shouldThink(messageContent) {
167			thinkingParam = anthropic.ThinkingConfigParamUnion{
168				OfThinkingConfigEnabled: &anthropic.ThinkingConfigEnabledParam{
169					BudgetTokens: int64(float64(a.providerOptions.maxTokens) * 0.8),
170					Type:         "enabled",
171				},
172			}
173			temperature = anthropic.Float(1)
174		}
175	}
176
177	return anthropic.MessageNewParams{
178		Model:       anthropic.Model(a.providerOptions.model.APIModel),
179		MaxTokens:   a.providerOptions.maxTokens,
180		Temperature: temperature,
181		Messages:    messages,
182		Tools:       tools,
183		Thinking:    thinkingParam,
184		System: []anthropic.TextBlockParam{
185			{
186				Text: a.providerOptions.systemMessage,
187				CacheControl: anthropic.CacheControlEphemeralParam{
188					Type: "ephemeral",
189				},
190			},
191		},
192	}
193}
194
195func (a *anthropicClient) send(ctx context.Context, messages []message.Message, tools []tools.BaseTool) (resposne *ProviderResponse, err error) {
196	preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
197	cfg := config.Get()
198	if cfg.Debug {
199		jsonData, _ := json.Marshal(preparedMessages)
200		logging.Debug("Prepared messages", "messages", string(jsonData))
201	}
202	attempts := 0
203	for {
204		attempts++
205		anthropicResponse, err := a.client.Messages.New(
206			ctx,
207			preparedMessages,
208		)
209		// If there is an error we are going to see if we can retry the call
210		if err != nil {
211			retry, after, retryErr := a.shouldRetry(attempts, err)
212			if retryErr != nil {
213				return nil, retryErr
214			}
215			if retry {
216				logging.WarnPersist("Retrying due to rate limit... attempt %d of %d", logging.PersistTimeArg, time.Millisecond*time.Duration(after+100))
217				select {
218				case <-ctx.Done():
219					return nil, ctx.Err()
220				case <-time.After(time.Duration(after) * time.Millisecond):
221					continue
222				}
223			}
224			return nil, retryErr
225		}
226
227		content := ""
228		for _, block := range anthropicResponse.Content {
229			if text, ok := block.AsAny().(anthropic.TextBlock); ok {
230				content += text.Text
231			}
232		}
233
234		return &ProviderResponse{
235			Content:   content,
236			ToolCalls: a.toolCalls(*anthropicResponse),
237			Usage:     a.usage(*anthropicResponse),
238		}, nil
239	}
240}
241
242func (a *anthropicClient) stream(ctx context.Context, messages []message.Message, tools []tools.BaseTool) <-chan ProviderEvent {
243	preparedMessages := a.preparedMessages(a.convertMessages(messages), a.convertTools(tools))
244	cfg := config.Get()
245	if cfg.Debug {
246		jsonData, _ := json.Marshal(preparedMessages)
247		logging.Debug("Prepared messages", "messages", string(jsonData))
248	}
249	attempts := 0
250	eventChan := make(chan ProviderEvent)
251	go func() {
252		for {
253			attempts++
254			anthropicStream := a.client.Messages.NewStreaming(
255				ctx,
256				preparedMessages,
257			)
258			accumulatedMessage := anthropic.Message{}
259
260			for anthropicStream.Next() {
261				event := anthropicStream.Current()
262				err := accumulatedMessage.Accumulate(event)
263				if err != nil {
264					eventChan <- ProviderEvent{Type: EventError, Error: err}
265					continue
266				}
267
268				switch event := event.AsAny().(type) {
269				case anthropic.ContentBlockStartEvent:
270					eventChan <- ProviderEvent{Type: EventContentStart}
271
272				case anthropic.ContentBlockDeltaEvent:
273					if event.Delta.Type == "thinking_delta" && event.Delta.Thinking != "" {
274						eventChan <- ProviderEvent{
275							Type:     EventThinkingDelta,
276							Thinking: event.Delta.Thinking,
277						}
278					} else if event.Delta.Type == "text_delta" && event.Delta.Text != "" {
279						eventChan <- ProviderEvent{
280							Type:    EventContentDelta,
281							Content: event.Delta.Text,
282						}
283					}
284				// TODO: check if we can somehow stream tool calls
285
286				case anthropic.ContentBlockStopEvent:
287					eventChan <- ProviderEvent{Type: EventContentStop}
288
289				case anthropic.MessageStopEvent:
290					content := ""
291					for _, block := range accumulatedMessage.Content {
292						if text, ok := block.AsAny().(anthropic.TextBlock); ok {
293							content += text.Text
294						}
295					}
296
297					eventChan <- ProviderEvent{
298						Type: EventComplete,
299						Response: &ProviderResponse{
300							Content:      content,
301							ToolCalls:    a.toolCalls(accumulatedMessage),
302							Usage:        a.usage(accumulatedMessage),
303							FinishReason: a.finishReason(string(accumulatedMessage.StopReason)),
304						},
305					}
306				}
307			}
308
309			err := anthropicStream.Err()
310			if err == nil || errors.Is(err, io.EOF) {
311				close(eventChan)
312				return
313			}
314			// If there is an error we are going to see if we can retry the call
315			retry, after, retryErr := a.shouldRetry(attempts, err)
316			if retryErr != nil {
317				eventChan <- ProviderEvent{Type: EventError, Error: retryErr}
318				close(eventChan)
319				return
320			}
321			if retry {
322				logging.WarnPersist("Retrying due to rate limit... attempt %d of %d", logging.PersistTimeArg, time.Millisecond*time.Duration(after+100))
323				select {
324				case <-ctx.Done():
325					// context cancelled
326					if ctx.Err() != nil {
327						eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
328					}
329					close(eventChan)
330					return
331				case <-time.After(time.Duration(after) * time.Millisecond):
332					continue
333				}
334			}
335			if ctx.Err() != nil {
336				eventChan <- ProviderEvent{Type: EventError, Error: ctx.Err()}
337			}
338
339			close(eventChan)
340			return
341		}
342	}()
343	return eventChan
344}
345
346func (a *anthropicClient) shouldRetry(attempts int, err error) (bool, int64, error) {
347	var apierr *anthropic.Error
348	if !errors.As(err, &apierr) {
349		return false, 0, err
350	}
351
352	if apierr.StatusCode != 429 && apierr.StatusCode != 529 {
353		return false, 0, err
354	}
355
356	if attempts > maxRetries {
357		return false, 0, fmt.Errorf("maximum retry attempts reached for rate limit: %d retries", maxRetries)
358	}
359
360	retryMs := 0
361	retryAfterValues := apierr.Response.Header.Values("Retry-After")
362
363	backoffMs := 2000 * (1 << (attempts - 1))
364	jitterMs := int(float64(backoffMs) * 0.2)
365	retryMs = backoffMs + jitterMs
366	if len(retryAfterValues) > 0 {
367		if _, err := fmt.Sscanf(retryAfterValues[0], "%d", &retryMs); err == nil {
368			retryMs = retryMs * 1000
369		}
370	}
371	return true, int64(retryMs), nil
372}
373
374func (a *anthropicClient) toolCalls(msg anthropic.Message) []message.ToolCall {
375	var toolCalls []message.ToolCall
376
377	for _, block := range msg.Content {
378		switch variant := block.AsAny().(type) {
379		case anthropic.ToolUseBlock:
380			toolCall := message.ToolCall{
381				ID:    variant.ID,
382				Name:  variant.Name,
383				Input: string(variant.Input),
384				Type:  string(variant.Type),
385			}
386			toolCalls = append(toolCalls, toolCall)
387		}
388	}
389
390	return toolCalls
391}
392
393func (a *anthropicClient) usage(msg anthropic.Message) TokenUsage {
394	return TokenUsage{
395		InputTokens:         msg.Usage.InputTokens,
396		OutputTokens:        msg.Usage.OutputTokens,
397		CacheCreationTokens: msg.Usage.CacheCreationInputTokens,
398		CacheReadTokens:     msg.Usage.CacheReadInputTokens,
399	}
400}
401
402func WithAnthropicBedrock(useBedrock bool) AnthropicOption {
403	return func(options *anthropicOptions) {
404		options.useBedrock = useBedrock
405	}
406}
407
408func WithAnthropicDisableCache() AnthropicOption {
409	return func(options *anthropicOptions) {
410		options.disableCache = true
411	}
412}
413
414func DefaultShouldThinkFn(s string) bool {
415	return strings.Contains(strings.ToLower(s), "think")
416}
417
418func WithAnthropicShouldThinkFn(fn func(string) bool) AnthropicOption {
419	return func(options *anthropicOptions) {
420		options.shouldThink = fn
421	}
422}