From c35e7000176bab3815e5e8f50a817486c04454d2 Mon Sep 17 00:00:00 2001 From: Kujtim Hoxha Date: Mon, 28 Jul 2025 20:27:23 +0200 Subject: [PATCH] chore: proper caching for anthropic models with openrouter --- internal/llm/provider/openai.go | 45 ++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/internal/llm/provider/openai.go b/internal/llm/provider/openai.go index 23e247830a48ba1860ba7bde5059da69fab6d3ac..498831d92b76b3a6f1c6c155923b2d35e40e7f78 100644 --- a/internal/llm/provider/openai.go +++ b/internal/llm/provider/openai.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "log/slog" + "strings" "time" "github.com/charmbracelet/catwalk/pkg/catwalk" @@ -56,14 +57,33 @@ func createOpenAIClient(opts providerClientOptions) openai.Client { } func (o *openaiClient) convertMessages(messages []message.Message) (openaiMessages []openai.ChatCompletionMessageParamUnion) { + isAnthropicModel := o.providerOptions.config.ID == "openrouter" && strings.HasPrefix(o.Model().ID, "anthropic/") // Add system message first systemMessage := o.providerOptions.systemMessage if o.providerOptions.systemPromptPrefix != "" { systemMessage = o.providerOptions.systemPromptPrefix + "\n" + systemMessage } - openaiMessages = append(openaiMessages, openai.SystemMessage(systemMessage)) - for _, msg := range messages { + systemTextBlock := openai.ChatCompletionContentPartTextParam{Text: systemMessage} + if isAnthropicModel && !o.providerOptions.disableCache { + systemTextBlock.SetExtraFields( + map[string]any{ + "cache_control": map[string]string{ + "type": "ephemeral", + }, + }, + ) + } + var content []openai.ChatCompletionContentPartTextParam + content = append(content, systemTextBlock) + system := openai.SystemMessage(content) + openaiMessages = append(openaiMessages, system) + + for i, msg := range messages { + cache := false + if i > len(messages)-3 { + cache = true + } switch msg.Role { case message.User: var content []openai.ChatCompletionContentPartUnionParam @@ -75,6 +95,13 @@ func (o *openaiClient) convertMessages(messages []message.Message) (openaiMessag content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock}) } + if cache && !o.providerOptions.disableCache && isAnthropicModel { + textBlock.SetExtraFields(map[string]any{ + "cache_control": map[string]string{ + "type": "ephemeral", + }, + }) + } openaiMessages = append(openaiMessages, openai.UserMessage(content)) @@ -86,8 +113,20 @@ func (o *openaiClient) convertMessages(messages []message.Message) (openaiMessag hasContent := false if msg.Content().String() != "" { hasContent = true + textBlock := openai.ChatCompletionContentPartTextParam{Text: msg.Content().String()} + if cache && !o.providerOptions.disableCache && isAnthropicModel { + textBlock.SetExtraFields(map[string]any{ + "cache_control": map[string]string{ + "type": "ephemeral", + }, + }) + } assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{ - OfString: openai.String(msg.Content().String()), + OfArrayOfContentParts: []openai.ChatCompletionAssistantMessageParamContentArrayOfContentPartUnion{ + { + OfText: &textBlock, + }, + }, } }