1// Package agent is the core orchestration layer for Crush AI agents.
2//
3// It provides session-based AI agent functionality for managing
4// conversations, tool execution, and message handling. It coordinates
5// interactions between language models, messages, sessions, and tools while
6// handling features like automatic summarization, queuing, and token
7// management.
8package agent
9
10import (
11 "cmp"
12 "context"
13 _ "embed"
14 "encoding/base64"
15 "errors"
16 "fmt"
17 "log/slog"
18 "net/http"
19 "os"
20 "regexp"
21 "strconv"
22 "strings"
23 "sync"
24 "time"
25
26 "charm.land/catwalk/pkg/catwalk"
27 "charm.land/fantasy"
28 "charm.land/fantasy/providers/anthropic"
29 "charm.land/fantasy/providers/bedrock"
30 "charm.land/fantasy/providers/google"
31 "charm.land/fantasy/providers/openai"
32 "charm.land/fantasy/providers/openrouter"
33 "charm.land/fantasy/providers/vercel"
34 "charm.land/lipgloss/v2"
35 "github.com/charmbracelet/crush/internal/agent/hyper"
36 "github.com/charmbracelet/crush/internal/agent/notify"
37 "github.com/charmbracelet/crush/internal/agent/tools"
38 "github.com/charmbracelet/crush/internal/agent/tools/mcp"
39 "github.com/charmbracelet/crush/internal/config"
40 "github.com/charmbracelet/crush/internal/csync"
41 "github.com/charmbracelet/crush/internal/message"
42 "github.com/charmbracelet/crush/internal/pubsub"
43 "github.com/charmbracelet/crush/internal/session"
44 "github.com/charmbracelet/crush/internal/stringext"
45 "github.com/charmbracelet/crush/internal/version"
46 "github.com/charmbracelet/x/exp/charmtone"
47)
48
49const (
50 DefaultSessionName = "Untitled Session"
51
52 // Constants for auto-summarization thresholds
53 largeContextWindowThreshold = 200_000
54 largeContextWindowBuffer = 20_000
55 smallContextWindowRatio = 0.2
56)
57
58var userAgent = fmt.Sprintf("Charm-Crush/%s (https://charm.land/crush)", version.Version)
59
60//go:embed templates/title.md
61var titlePrompt []byte
62
63//go:embed templates/summary.md
64var summaryPrompt []byte
65
66// Used to remove <think> tags from generated titles.
67var (
68 thinkTagRegex = regexp.MustCompile(`(?s)<think>.*?</think>`)
69 orphanThinkTagRegex = regexp.MustCompile(`</?think>`)
70)
71
72type SessionAgentCall struct {
73 SessionID string
74 Prompt string
75 ProviderOptions fantasy.ProviderOptions
76 Attachments []message.Attachment
77 MaxOutputTokens int64
78 Temperature *float64
79 TopP *float64
80 TopK *int64
81 FrequencyPenalty *float64
82 PresencePenalty *float64
83 NonInteractive bool
84}
85
86type SessionAgent interface {
87 Run(context.Context, SessionAgentCall) (*fantasy.AgentResult, error)
88 SetModels(large Model, small Model)
89 SetTools(tools []fantasy.AgentTool)
90 SetSystemPrompt(systemPrompt string)
91 Cancel(sessionID string)
92 CancelAll()
93 IsSessionBusy(sessionID string) bool
94 IsBusy() bool
95 QueuedPrompts(sessionID string) int
96 QueuedPromptsList(sessionID string) []string
97 ClearQueue(sessionID string)
98 Summarize(context.Context, string, fantasy.ProviderOptions) error
99 Model() Model
100}
101
102type Model struct {
103 Model fantasy.LanguageModel
104 CatwalkCfg catwalk.Model
105 ModelCfg config.SelectedModel
106}
107
108type sessionAgent struct {
109 largeModel *csync.Value[Model]
110 smallModel *csync.Value[Model]
111 systemPromptPrefix *csync.Value[string]
112 systemPrompt *csync.Value[string]
113 tools *csync.Slice[fantasy.AgentTool]
114
115 isSubAgent bool
116 sessions session.Service
117 messages message.Service
118 disableAutoSummarize bool
119 isYolo bool
120 notify pubsub.Publisher[notify.Notification]
121
122 messageQueue *csync.Map[string, []SessionAgentCall]
123 activeRequests *csync.Map[string, context.CancelFunc]
124}
125
126type SessionAgentOptions struct {
127 LargeModel Model
128 SmallModel Model
129 SystemPromptPrefix string
130 SystemPrompt string
131 IsSubAgent bool
132 DisableAutoSummarize bool
133 IsYolo bool
134 Sessions session.Service
135 Messages message.Service
136 Tools []fantasy.AgentTool
137 Notify pubsub.Publisher[notify.Notification]
138}
139
140func NewSessionAgent(
141 opts SessionAgentOptions,
142) SessionAgent {
143 return &sessionAgent{
144 largeModel: csync.NewValue(opts.LargeModel),
145 smallModel: csync.NewValue(opts.SmallModel),
146 systemPromptPrefix: csync.NewValue(opts.SystemPromptPrefix),
147 systemPrompt: csync.NewValue(opts.SystemPrompt),
148 isSubAgent: opts.IsSubAgent,
149 sessions: opts.Sessions,
150 messages: opts.Messages,
151 disableAutoSummarize: opts.DisableAutoSummarize,
152 tools: csync.NewSliceFrom(opts.Tools),
153 isYolo: opts.IsYolo,
154 notify: opts.Notify,
155 messageQueue: csync.NewMap[string, []SessionAgentCall](),
156 activeRequests: csync.NewMap[string, context.CancelFunc](),
157 }
158}
159
160func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy.AgentResult, error) {
161 if call.Prompt == "" && !message.ContainsTextAttachment(call.Attachments) {
162 return nil, ErrEmptyPrompt
163 }
164 if call.SessionID == "" {
165 return nil, ErrSessionMissing
166 }
167
168 // Queue the message if busy
169 if a.IsSessionBusy(call.SessionID) {
170 existing, ok := a.messageQueue.Get(call.SessionID)
171 if !ok {
172 existing = []SessionAgentCall{}
173 }
174 existing = append(existing, call)
175 a.messageQueue.Set(call.SessionID, existing)
176 return nil, nil
177 }
178
179 // Copy mutable fields under lock to avoid races with SetTools/SetModels.
180 agentTools := a.tools.Copy()
181 largeModel := a.largeModel.Get()
182 systemPrompt := a.systemPrompt.Get()
183 promptPrefix := a.systemPromptPrefix.Get()
184 var instructions strings.Builder
185
186 for _, server := range mcp.GetStates() {
187 if server.State != mcp.StateConnected {
188 continue
189 }
190 if s := server.Client.InitializeResult().Instructions; s != "" {
191 instructions.WriteString(s)
192 instructions.WriteString("\n\n")
193 }
194 }
195
196 if s := instructions.String(); s != "" {
197 systemPrompt += "\n\n<mcp-instructions>\n" + s + "\n</mcp-instructions>"
198 }
199
200 if len(agentTools) > 0 {
201 // Add Anthropic caching to the last tool.
202 agentTools[len(agentTools)-1].SetProviderOptions(a.getCacheControlOptions())
203 }
204
205 agent := fantasy.NewAgent(
206 largeModel.Model,
207 fantasy.WithSystemPrompt(systemPrompt),
208 fantasy.WithTools(agentTools...),
209 fantasy.WithUserAgent(userAgent),
210 )
211
212 sessionLock := sync.Mutex{}
213 currentSession, err := a.sessions.Get(ctx, call.SessionID)
214 if err != nil {
215 return nil, fmt.Errorf("failed to get session: %w", err)
216 }
217
218 msgs, err := a.getSessionMessages(ctx, currentSession)
219 if err != nil {
220 return nil, fmt.Errorf("failed to get session messages: %w", err)
221 }
222
223 var wg sync.WaitGroup
224 // Generate title if first message.
225 if len(msgs) == 0 {
226 titleCtx := ctx // Copy to avoid race with ctx reassignment below.
227 wg.Go(func() {
228 a.generateTitle(titleCtx, call.SessionID, call.Prompt)
229 })
230 }
231 defer wg.Wait()
232
233 // Add the user message to the session.
234 _, err = a.createUserMessage(ctx, call)
235 if err != nil {
236 return nil, err
237 }
238
239 // Add the session to the context.
240 ctx = context.WithValue(ctx, tools.SessionIDContextKey, call.SessionID)
241
242 genCtx, cancel := context.WithCancel(ctx)
243 a.activeRequests.Set(call.SessionID, cancel)
244
245 defer cancel()
246 defer a.activeRequests.Del(call.SessionID)
247
248 history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...)
249
250 startTime := time.Now()
251 a.eventPromptSent(call.SessionID)
252
253 var currentAssistant *message.Message
254 var shouldSummarize bool
255 // Don't send MaxOutputTokens if 0 — some providers (e.g. LM Studio) reject it
256 var maxOutputTokens *int64
257 if call.MaxOutputTokens > 0 {
258 maxOutputTokens = &call.MaxOutputTokens
259 }
260 result, err := agent.Stream(genCtx, fantasy.AgentStreamCall{
261 Prompt: message.PromptWithTextAttachments(call.Prompt, call.Attachments),
262 Files: files,
263 Messages: history,
264 ProviderOptions: call.ProviderOptions,
265 MaxOutputTokens: maxOutputTokens,
266 TopP: call.TopP,
267 Temperature: call.Temperature,
268 PresencePenalty: call.PresencePenalty,
269 TopK: call.TopK,
270 FrequencyPenalty: call.FrequencyPenalty,
271 PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) {
272 prepared.Messages = options.Messages
273 for i := range prepared.Messages {
274 prepared.Messages[i].ProviderOptions = nil
275 }
276
277 // Use latest tools (updated by SetTools when MCP tools change).
278 prepared.Tools = a.tools.Copy()
279
280 queuedCalls, _ := a.messageQueue.Get(call.SessionID)
281 a.messageQueue.Del(call.SessionID)
282 for _, queued := range queuedCalls {
283 userMessage, createErr := a.createUserMessage(callContext, queued)
284 if createErr != nil {
285 return callContext, prepared, createErr
286 }
287 prepared.Messages = append(prepared.Messages, userMessage.ToAIMessage()...)
288 }
289
290 prepared.Messages = a.workaroundProviderMediaLimitations(prepared.Messages, largeModel)
291
292 lastSystemRoleInx := 0
293 systemMessageUpdated := false
294 for i, msg := range prepared.Messages {
295 // Only add cache control to the last message.
296 if msg.Role == fantasy.MessageRoleSystem {
297 lastSystemRoleInx = i
298 } else if !systemMessageUpdated {
299 prepared.Messages[lastSystemRoleInx].ProviderOptions = a.getCacheControlOptions()
300 systemMessageUpdated = true
301 }
302 // Than add cache control to the last 2 messages.
303 if i > len(prepared.Messages)-3 {
304 prepared.Messages[i].ProviderOptions = a.getCacheControlOptions()
305 }
306 }
307
308 if promptPrefix != "" {
309 prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(promptPrefix)}, prepared.Messages...)
310 }
311
312 var assistantMsg message.Message
313 assistantMsg, err = a.messages.Create(callContext, call.SessionID, message.CreateMessageParams{
314 Role: message.Assistant,
315 Parts: []message.ContentPart{},
316 Model: largeModel.ModelCfg.Model,
317 Provider: largeModel.ModelCfg.Provider,
318 })
319 if err != nil {
320 return callContext, prepared, err
321 }
322 callContext = context.WithValue(callContext, tools.MessageIDContextKey, assistantMsg.ID)
323 callContext = context.WithValue(callContext, tools.SupportsImagesContextKey, largeModel.CatwalkCfg.SupportsImages)
324 callContext = context.WithValue(callContext, tools.ModelNameContextKey, largeModel.CatwalkCfg.Name)
325 currentAssistant = &assistantMsg
326 return callContext, prepared, err
327 },
328 OnReasoningStart: func(id string, reasoning fantasy.ReasoningContent) error {
329 currentAssistant.AppendReasoningContent(reasoning.Text)
330 return a.messages.Update(genCtx, *currentAssistant)
331 },
332 OnReasoningDelta: func(id string, text string) error {
333 currentAssistant.AppendReasoningContent(text)
334 return a.messages.Update(genCtx, *currentAssistant)
335 },
336 OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error {
337 // handle anthropic signature
338 if anthropicData, ok := reasoning.ProviderMetadata[anthropic.Name]; ok {
339 if reasoning, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok {
340 currentAssistant.AppendReasoningSignature(reasoning.Signature)
341 }
342 }
343 if googleData, ok := reasoning.ProviderMetadata[google.Name]; ok {
344 if reasoning, ok := googleData.(*google.ReasoningMetadata); ok {
345 currentAssistant.AppendThoughtSignature(reasoning.Signature, reasoning.ToolID)
346 }
347 }
348 if openaiData, ok := reasoning.ProviderMetadata[openai.Name]; ok {
349 if reasoning, ok := openaiData.(*openai.ResponsesReasoningMetadata); ok {
350 currentAssistant.SetReasoningResponsesData(reasoning)
351 }
352 }
353 currentAssistant.FinishThinking()
354 return a.messages.Update(genCtx, *currentAssistant)
355 },
356 OnTextDelta: func(id string, text string) error {
357 // Strip leading newline from initial text content. This is is
358 // particularly important in non-interactive mode where leading
359 // newlines are very visible.
360 if len(currentAssistant.Parts) == 0 {
361 text = strings.TrimPrefix(text, "\n")
362 }
363
364 currentAssistant.AppendContent(text)
365 return a.messages.Update(genCtx, *currentAssistant)
366 },
367 OnToolInputStart: func(id string, toolName string) error {
368 toolCall := message.ToolCall{
369 ID: id,
370 Name: toolName,
371 ProviderExecuted: false,
372 Finished: false,
373 }
374 currentAssistant.AddToolCall(toolCall)
375 // Use parent ctx instead of genCtx to ensure the update succeeds
376 // even if the request is canceled mid-stream
377 return a.messages.Update(ctx, *currentAssistant)
378 },
379 OnRetry: func(err *fantasy.ProviderError, delay time.Duration) {
380 slog.Warn("Provider request failed, retrying", providerRetryLogFields(err, delay)...)
381 },
382 OnToolCall: func(tc fantasy.ToolCallContent) error {
383 toolCall := message.ToolCall{
384 ID: tc.ToolCallID,
385 Name: tc.ToolName,
386 Input: tc.Input,
387 ProviderExecuted: false,
388 Finished: true,
389 }
390 currentAssistant.AddToolCall(toolCall)
391 // Use parent ctx instead of genCtx to ensure the update succeeds
392 // even if the request is canceled mid-stream
393 return a.messages.Update(ctx, *currentAssistant)
394 },
395 OnToolResult: func(result fantasy.ToolResultContent) error {
396 toolResult := a.convertToToolResult(result)
397 // Use parent ctx instead of genCtx to ensure the message is created
398 // even if the request is canceled mid-stream
399 _, createMsgErr := a.messages.Create(ctx, currentAssistant.SessionID, message.CreateMessageParams{
400 Role: message.Tool,
401 Parts: []message.ContentPart{
402 toolResult,
403 },
404 })
405 return createMsgErr
406 },
407 OnStepFinish: func(stepResult fantasy.StepResult) error {
408 finishReason := message.FinishReasonUnknown
409 switch stepResult.FinishReason {
410 case fantasy.FinishReasonLength:
411 finishReason = message.FinishReasonMaxTokens
412 case fantasy.FinishReasonStop:
413 finishReason = message.FinishReasonEndTurn
414 case fantasy.FinishReasonToolCalls:
415 finishReason = message.FinishReasonToolUse
416 }
417 // If a tool result halted the turn (e.g. a hook halt or a
418 // permission denial), the step ends on FinishReasonToolCalls but
419 // the model will not be called again. Treat it as the end of the
420 // turn so the UI can render the assistant footer.
421 if finishReason == message.FinishReasonToolUse {
422 for _, tr := range stepResult.Content.ToolResults() {
423 if tr.StopTurn {
424 finishReason = message.FinishReasonEndTurn
425 break
426 }
427 }
428 }
429 currentAssistant.AddFinish(finishReason, "", "")
430 sessionLock.Lock()
431 defer sessionLock.Unlock()
432
433 updatedSession, getSessionErr := a.sessions.Get(ctx, call.SessionID)
434 if getSessionErr != nil {
435 return getSessionErr
436 }
437 a.updateSessionUsage(largeModel, &updatedSession, stepResult.Usage, a.openrouterCost(stepResult.ProviderMetadata))
438 _, sessionErr := a.sessions.Save(ctx, updatedSession)
439 if sessionErr != nil {
440 return sessionErr
441 }
442 currentSession = updatedSession
443 return a.messages.Update(genCtx, *currentAssistant)
444 },
445 StopWhen: []fantasy.StopCondition{
446 func(_ []fantasy.StepResult) bool {
447 cw := int64(largeModel.CatwalkCfg.ContextWindow)
448 // If context window is unknown (0), skip auto-summarize
449 // to avoid immediately truncating custom/local models.
450 if cw == 0 {
451 return false
452 }
453 tokens := currentSession.CompletionTokens + currentSession.PromptTokens
454 remaining := cw - tokens
455 var threshold int64
456 if cw > largeContextWindowThreshold {
457 threshold = largeContextWindowBuffer
458 } else {
459 threshold = int64(float64(cw) * smallContextWindowRatio)
460 }
461 if (remaining <= threshold) && !a.disableAutoSummarize {
462 shouldSummarize = true
463 return true
464 }
465 return false
466 },
467 func(steps []fantasy.StepResult) bool {
468 return hasRepeatedToolCalls(steps, loopDetectionWindowSize, loopDetectionMaxRepeats)
469 },
470 },
471 })
472
473 a.eventPromptResponded(call.SessionID, time.Since(startTime).Truncate(time.Second))
474
475 if err != nil {
476 isHyper := largeModel.ModelCfg.Provider == hyper.Name
477 isCancelErr := errors.Is(err, context.Canceled)
478 if currentAssistant == nil {
479 return result, err
480 }
481 // Ensure we finish thinking on error to close the reasoning state.
482 currentAssistant.FinishThinking()
483 toolCalls := currentAssistant.ToolCalls()
484 // INFO: we use the parent context here because the genCtx has been cancelled.
485 msgs, createErr := a.messages.List(ctx, currentAssistant.SessionID)
486 if createErr != nil {
487 return nil, createErr
488 }
489 for _, tc := range toolCalls {
490 if !tc.Finished {
491 tc.Finished = true
492 tc.Input = "{}"
493 currentAssistant.AddToolCall(tc)
494 updateErr := a.messages.Update(ctx, *currentAssistant)
495 if updateErr != nil {
496 return nil, updateErr
497 }
498 }
499
500 found := false
501 for _, msg := range msgs {
502 if msg.Role == message.Tool {
503 for _, tr := range msg.ToolResults() {
504 if tr.ToolCallID == tc.ID {
505 found = true
506 break
507 }
508 }
509 }
510 if found {
511 break
512 }
513 }
514 if found {
515 continue
516 }
517 content := "There was an error while executing the tool"
518 if isCancelErr {
519 content = "Error: user cancelled assistant tool calling"
520 }
521 toolResult := message.ToolResult{
522 ToolCallID: tc.ID,
523 Name: tc.Name,
524 Content: content,
525 IsError: true,
526 }
527 _, createErr = a.messages.Create(ctx, currentAssistant.SessionID, message.CreateMessageParams{
528 Role: message.Tool,
529 Parts: []message.ContentPart{
530 toolResult,
531 },
532 })
533 if createErr != nil {
534 return nil, createErr
535 }
536 }
537 var fantasyErr *fantasy.Error
538 var providerErr *fantasy.ProviderError
539 const defaultTitle = "Provider Error"
540 linkStyle := lipgloss.NewStyle().Foreground(charmtone.Guac).Underline(true)
541 if isCancelErr {
542 currentAssistant.AddFinish(message.FinishReasonCanceled, "User canceled request", "")
543 } else if isHyper && errors.As(err, &providerErr) && providerErr.StatusCode == http.StatusUnauthorized {
544 currentAssistant.AddFinish(message.FinishReasonError, "Unauthorized", `Please re-authenticate with Hyper. You can also run "crush auth" to re-authenticate.`)
545 if a.notify != nil {
546 a.notify.Publish(pubsub.CreatedEvent, notify.Notification{
547 SessionID: call.SessionID,
548 SessionTitle: currentSession.Title,
549 Type: notify.TypeReAuthenticate,
550 ProviderID: largeModel.ModelCfg.Provider,
551 })
552 }
553 } else if isHyper && errors.As(err, &providerErr) && providerErr.StatusCode == http.StatusPaymentRequired {
554 url := hyper.BaseURL()
555 link := linkStyle.Hyperlink(url, "id=hyper").Render(url)
556 currentAssistant.AddFinish(message.FinishReasonError, "No credits", "You're out of credits. Add more at "+link)
557 } else if errors.As(err, &providerErr) {
558 if providerErr.Message == "The requested model is not supported." {
559 url := "https://github.com/settings/copilot/features"
560 link := linkStyle.Hyperlink(url, "id=copilot").Render(url)
561 currentAssistant.AddFinish(
562 message.FinishReasonError,
563 "Copilot model not enabled",
564 fmt.Sprintf("%q is not enabled in Copilot. Go to the following page to enable it. Then, wait 5 minutes before trying again. %s", largeModel.CatwalkCfg.Name, link),
565 )
566 } else {
567 currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(providerErr.Title), defaultTitle), providerErr.Message)
568 }
569 } else if errors.As(err, &fantasyErr) {
570 currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(fantasyErr.Title), defaultTitle), fantasyErr.Message)
571 } else {
572 currentAssistant.AddFinish(message.FinishReasonError, defaultTitle, err.Error())
573 }
574 // Note: we use the parent context here because the genCtx has been
575 // cancelled.
576 updateErr := a.messages.Update(ctx, *currentAssistant)
577 if updateErr != nil {
578 return nil, updateErr
579 }
580 return nil, err
581 }
582
583 // Send notification that agent has finished its turn (skip for
584 // nested/non-interactive sessions).
585 if !call.NonInteractive && a.notify != nil {
586 a.notify.Publish(pubsub.CreatedEvent, notify.Notification{
587 SessionID: call.SessionID,
588 SessionTitle: currentSession.Title,
589 Type: notify.TypeAgentFinished,
590 })
591 }
592
593 if shouldSummarize {
594 a.activeRequests.Del(call.SessionID)
595 if summarizeErr := a.Summarize(genCtx, call.SessionID, call.ProviderOptions); summarizeErr != nil {
596 return nil, summarizeErr
597 }
598 // If the agent wasn't done...
599 if len(currentAssistant.ToolCalls()) > 0 {
600 existing, ok := a.messageQueue.Get(call.SessionID)
601 if !ok {
602 existing = []SessionAgentCall{}
603 }
604 call.Prompt = fmt.Sprintf("The previous session was interrupted because it got too long, the initial user request was: `%s`", call.Prompt)
605 existing = append(existing, call)
606 a.messageQueue.Set(call.SessionID, existing)
607 }
608 }
609
610 // Release active request before processing queued messages.
611 a.activeRequests.Del(call.SessionID)
612 cancel()
613
614 queuedMessages, ok := a.messageQueue.Get(call.SessionID)
615 if !ok || len(queuedMessages) == 0 {
616 return result, err
617 }
618 // There are queued messages restart the loop.
619 firstQueuedMessage := queuedMessages[0]
620 a.messageQueue.Set(call.SessionID, queuedMessages[1:])
621 return a.Run(ctx, firstQueuedMessage)
622}
623
624func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fantasy.ProviderOptions) error {
625 if a.IsSessionBusy(sessionID) {
626 return ErrSessionBusy
627 }
628
629 // Copy mutable fields under lock to avoid races with SetModels.
630 largeModel := a.largeModel.Get()
631 systemPromptPrefix := a.systemPromptPrefix.Get()
632
633 currentSession, err := a.sessions.Get(ctx, sessionID)
634 if err != nil {
635 return fmt.Errorf("failed to get session: %w", err)
636 }
637 msgs, err := a.getSessionMessages(ctx, currentSession)
638 if err != nil {
639 return err
640 }
641 if len(msgs) == 0 {
642 // Nothing to summarize.
643 return nil
644 }
645
646 aiMsgs, _ := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages)
647
648 genCtx, cancel := context.WithCancel(ctx)
649 a.activeRequests.Set(sessionID, cancel)
650 defer a.activeRequests.Del(sessionID)
651 defer cancel()
652
653 agent := fantasy.NewAgent(largeModel.Model,
654 fantasy.WithSystemPrompt(string(summaryPrompt)),
655 fantasy.WithUserAgent(userAgent),
656 )
657 summaryMessage, err := a.messages.Create(ctx, sessionID, message.CreateMessageParams{
658 Role: message.Assistant,
659 Model: largeModel.Model.Model(),
660 Provider: largeModel.Model.Provider(),
661 IsSummaryMessage: true,
662 })
663 if err != nil {
664 return err
665 }
666
667 summaryPromptText := buildSummaryPrompt(currentSession.Todos)
668
669 resp, err := agent.Stream(genCtx, fantasy.AgentStreamCall{
670 Prompt: summaryPromptText,
671 Messages: aiMsgs,
672 ProviderOptions: opts,
673 PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) {
674 prepared.Messages = options.Messages
675 if systemPromptPrefix != "" {
676 prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(systemPromptPrefix)}, prepared.Messages...)
677 }
678 return callContext, prepared, nil
679 },
680 OnReasoningDelta: func(id string, text string) error {
681 summaryMessage.AppendReasoningContent(text)
682 return a.messages.Update(genCtx, summaryMessage)
683 },
684 OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error {
685 // Handle anthropic signature.
686 if anthropicData, ok := reasoning.ProviderMetadata["anthropic"]; ok {
687 if signature, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok && signature.Signature != "" {
688 summaryMessage.AppendReasoningSignature(signature.Signature)
689 }
690 }
691 summaryMessage.FinishThinking()
692 return a.messages.Update(genCtx, summaryMessage)
693 },
694 OnTextDelta: func(id, text string) error {
695 summaryMessage.AppendContent(text)
696 return a.messages.Update(genCtx, summaryMessage)
697 },
698 })
699 if err != nil {
700 isCancelErr := errors.Is(err, context.Canceled)
701 if isCancelErr {
702 // User cancelled summarize we need to remove the summary message.
703 deleteErr := a.messages.Delete(ctx, summaryMessage.ID)
704 return deleteErr
705 }
706 return err
707 }
708
709 summaryMessage.AddFinish(message.FinishReasonEndTurn, "", "")
710 err = a.messages.Update(genCtx, summaryMessage)
711 if err != nil {
712 return err
713 }
714
715 var openrouterCost *float64
716 for _, step := range resp.Steps {
717 stepCost := a.openrouterCost(step.ProviderMetadata)
718 if stepCost != nil {
719 newCost := *stepCost
720 if openrouterCost != nil {
721 newCost += *openrouterCost
722 }
723 openrouterCost = &newCost
724 }
725 }
726
727 a.updateSessionUsage(largeModel, ¤tSession, resp.TotalUsage, openrouterCost)
728
729 // Just in case, get just the last usage info.
730 usage := resp.Response.Usage
731 currentSession.SummaryMessageID = summaryMessage.ID
732 currentSession.CompletionTokens = usage.OutputTokens
733 currentSession.PromptTokens = 0
734 _, err = a.sessions.Save(genCtx, currentSession)
735 if err != nil {
736 return err
737 }
738
739 // Release the active request before processing queued messages so that
740 // Run() does not see the session as busy.
741 a.activeRequests.Del(sessionID)
742 cancel()
743
744 // Process any messages that were queued while summarizing.
745 queuedMessages, ok := a.messageQueue.Get(sessionID)
746 if !ok || len(queuedMessages) == 0 {
747 return nil
748 }
749 firstQueuedMessage := queuedMessages[0]
750 a.messageQueue.Set(sessionID, queuedMessages[1:])
751 _, qErr := a.Run(ctx, firstQueuedMessage)
752 return qErr
753}
754
755func (a *sessionAgent) getCacheControlOptions() fantasy.ProviderOptions {
756 if t, _ := strconv.ParseBool(os.Getenv("CRUSH_DISABLE_ANTHROPIC_CACHE")); t {
757 return fantasy.ProviderOptions{}
758 }
759 return fantasy.ProviderOptions{
760 anthropic.Name: &anthropic.ProviderCacheControlOptions{
761 CacheControl: anthropic.CacheControl{Type: "ephemeral"},
762 },
763 bedrock.Name: &anthropic.ProviderCacheControlOptions{
764 CacheControl: anthropic.CacheControl{Type: "ephemeral"},
765 },
766 vercel.Name: &anthropic.ProviderCacheControlOptions{
767 CacheControl: anthropic.CacheControl{Type: "ephemeral"},
768 },
769 }
770}
771
772func (a *sessionAgent) createUserMessage(ctx context.Context, call SessionAgentCall) (message.Message, error) {
773 parts := []message.ContentPart{message.TextContent{Text: call.Prompt}}
774 var attachmentParts []message.ContentPart
775 for _, attachment := range call.Attachments {
776 attachmentParts = append(attachmentParts, message.BinaryContent{Path: attachment.FilePath, MIMEType: attachment.MimeType, Data: attachment.Content})
777 }
778 parts = append(parts, attachmentParts...)
779 msg, err := a.messages.Create(ctx, call.SessionID, message.CreateMessageParams{
780 Role: message.User,
781 Parts: parts,
782 })
783 if err != nil {
784 return message.Message{}, fmt.Errorf("failed to create user message: %w", err)
785 }
786 return msg, nil
787}
788
789func (a *sessionAgent) preparePrompt(msgs []message.Message, supportsImages bool, attachments ...message.Attachment) ([]fantasy.Message, []fantasy.FilePart) {
790 var history []fantasy.Message
791 if !a.isSubAgent {
792 history = append(history, fantasy.NewUserMessage(
793 fmt.Sprintf("<system_reminder>%s</system_reminder>",
794 `This is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware.
795If you are working on tasks that would benefit from a todo list please use the "todos" tool to create one.
796If not, please feel free to ignore. Again do not mention this message to the user.`,
797 ),
798 ))
799 }
800 // Collect all tool call IDs present in assistant messages and all tool
801 // result IDs present in tool messages. This lets us detect both orphaned
802 // tool results (result without a call) and orphaned tool calls (call
803 // without a result).
804 knownToolCallIDs := make(map[string]struct{})
805 knownToolResultIDs := make(map[string]struct{})
806 for _, m := range msgs {
807 switch m.Role {
808 case message.Assistant:
809 for _, tc := range m.ToolCalls() {
810 knownToolCallIDs[tc.ID] = struct{}{}
811 }
812 case message.Tool:
813 for _, tr := range m.ToolResults() {
814 knownToolResultIDs[tr.ToolCallID] = struct{}{}
815 }
816 }
817 }
818
819 for _, m := range msgs {
820 if len(m.Parts) == 0 {
821 continue
822 }
823 // Assistant message without content or tool calls (cancelled before it returned anything).
824 if m.Role == message.Assistant && len(m.ToolCalls()) == 0 && m.Content().Text == "" && m.ReasoningContent().String() == "" {
825 continue
826 }
827 if m.Role == message.Tool {
828 if msg, ok := filterOrphanedToolResults(m, knownToolCallIDs); ok {
829 history = append(history, msg)
830 }
831 continue
832 }
833 aiMsgs := m.ToAIMessage()
834 if !supportsImages {
835 for i := range aiMsgs {
836 if aiMsgs[i].Role == fantasy.MessageRoleUser {
837 aiMsgs[i].Content = filterFileParts(aiMsgs[i].Content)
838 }
839 }
840 }
841 history = append(history, aiMsgs...)
842
843 if m.Role == message.Assistant {
844 if msg, ok := syntheticToolResultsForOrphanedCalls(m, knownToolResultIDs); ok {
845 history = append(history, msg)
846 }
847 }
848 }
849
850 var files []fantasy.FilePart
851 for _, attachment := range attachments {
852 if attachment.IsText() {
853 continue
854 }
855 files = append(files, fantasy.FilePart{
856 Filename: attachment.FileName,
857 Data: attachment.Content,
858 MediaType: attachment.MimeType,
859 })
860 }
861
862 return history, files
863}
864
865// filterFileParts removes fantasy.FilePart entries from a slice of message
866// parts. Used to strip image attachments from historical user messages when
867// the current model does not support them.
868func filterFileParts(parts []fantasy.MessagePart) []fantasy.MessagePart {
869 filtered := make([]fantasy.MessagePart, 0, len(parts))
870 for _, part := range parts {
871 if _, ok := fantasy.AsMessagePart[fantasy.FilePart](part); ok {
872 continue
873 }
874 filtered = append(filtered, part)
875 }
876 return filtered
877}
878
879// filterOrphanedToolResults converts a tool message to a fantasy.Message,
880// dropping any tool result parts whose tool_call_id has no matching tool call
881// in the known set. An orphaned result causes API validation to fail on every
882// subsequent turn, permanently locking the session. Returns the filtered
883// message and true if at least one valid part remains.
884func filterOrphanedToolResults(m message.Message, knownToolCallIDs map[string]struct{}) (fantasy.Message, bool) {
885 aiMsgs := m.ToAIMessage()
886 if len(aiMsgs) == 0 {
887 return fantasy.Message{}, false
888 }
889 var validParts []fantasy.MessagePart
890 for _, part := range aiMsgs[0].Content {
891 tr, ok := fantasy.AsMessagePart[fantasy.ToolResultPart](part)
892 if !ok {
893 validParts = append(validParts, part)
894 continue
895 }
896 if _, known := knownToolCallIDs[tr.ToolCallID]; known {
897 validParts = append(validParts, part)
898 } else {
899 slog.Warn("Dropping orphaned tool result with no matching tool call",
900 "tool_call_id", tr.ToolCallID,
901 )
902 }
903 }
904 if len(validParts) == 0 {
905 return fantasy.Message{}, false
906 }
907 msg := aiMsgs[0]
908 msg.Content = validParts
909 return msg, true
910}
911
912// syntheticToolResultsForOrphanedCalls returns a tool message containing
913// synthetic tool results for any tool calls in the assistant message that
914// have no matching result in knownToolResultIDs. LLM APIs require every
915// tool_use to be immediately followed by a tool_result; an interrupted
916// session can leave orphaned tool_use blocks that permanently lock the
917// conversation. Returns the message and true if any synthetic results were
918// produced.
919func syntheticToolResultsForOrphanedCalls(m message.Message, knownToolResultIDs map[string]struct{}) (fantasy.Message, bool) {
920 var syntheticParts []fantasy.MessagePart
921 for _, tc := range m.ToolCalls() {
922 if _, hasResult := knownToolResultIDs[tc.ID]; hasResult {
923 continue
924 }
925 slog.Warn("Injecting synthetic tool result for orphaned tool call",
926 "tool_call_id", tc.ID,
927 "tool_name", tc.Name,
928 )
929 syntheticParts = append(syntheticParts, fantasy.ToolResultPart{
930 ToolCallID: tc.ID,
931 Output: fantasy.ToolResultOutputContentError{
932 Error: errors.New("tool call was interrupted and did not produce a result, you may retry this call if the result is still needed"),
933 },
934 })
935 }
936 if len(syntheticParts) == 0 {
937 return fantasy.Message{}, false
938 }
939 return fantasy.Message{
940 Role: fantasy.MessageRoleTool,
941 Content: syntheticParts,
942 }, true
943}
944
945func (a *sessionAgent) getSessionMessages(ctx context.Context, session session.Session) ([]message.Message, error) {
946 msgs, err := a.messages.List(ctx, session.ID)
947 if err != nil {
948 return nil, fmt.Errorf("failed to list messages: %w", err)
949 }
950
951 if session.SummaryMessageID != "" {
952 summaryMsgIndex := -1
953 for i, msg := range msgs {
954 if msg.ID == session.SummaryMessageID {
955 summaryMsgIndex = i
956 break
957 }
958 }
959 if summaryMsgIndex != -1 {
960 msgs = msgs[summaryMsgIndex:]
961 msgs[0].Role = message.User
962 }
963 }
964 return msgs, nil
965}
966
967// generateTitle generates a session titled based on the initial prompt.
968func (a *sessionAgent) generateTitle(ctx context.Context, sessionID string, userPrompt string) {
969 if userPrompt == "" {
970 return
971 }
972
973 smallModel := a.smallModel.Get()
974 largeModel := a.largeModel.Get()
975 systemPromptPrefix := a.systemPromptPrefix.Get()
976
977 var maxOutputTokens int64 = 40
978 if smallModel.CatwalkCfg.CanReason {
979 maxOutputTokens = smallModel.CatwalkCfg.DefaultMaxTokens
980 }
981
982 newAgent := func(m fantasy.LanguageModel, p []byte, tok int64) fantasy.Agent {
983 return fantasy.NewAgent(m,
984 fantasy.WithSystemPrompt(string(p)+"\n /no_think"),
985 fantasy.WithMaxOutputTokens(tok),
986 fantasy.WithUserAgent(userAgent),
987 )
988 }
989
990 streamCall := fantasy.AgentStreamCall{
991 Prompt: fmt.Sprintf("Generate a concise title for the following content:\n\n%s\n <think>\n\n</think>", userPrompt),
992 PrepareStep: func(callCtx context.Context, opts fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) {
993 prepared.Messages = opts.Messages
994 if systemPromptPrefix != "" {
995 prepared.Messages = append([]fantasy.Message{
996 fantasy.NewSystemMessage(systemPromptPrefix),
997 }, prepared.Messages...)
998 }
999 return callCtx, prepared, nil
1000 },
1001 }
1002
1003 // Use the small model to generate the title.
1004 model := smallModel
1005 agent := newAgent(model.Model, titlePrompt, maxOutputTokens)
1006 resp, err := agent.Stream(ctx, streamCall)
1007 if err == nil {
1008 // We successfully generated a title with the small model.
1009 slog.Debug("Generated title with small model")
1010 } else {
1011 // It didn't work. Let's try with the big model.
1012 slog.Error("Error generating title with small model; trying big model", "err", err)
1013 model = largeModel
1014 agent = newAgent(model.Model, titlePrompt, maxOutputTokens)
1015 resp, err = agent.Stream(ctx, streamCall)
1016 if err == nil {
1017 slog.Debug("Generated title with large model")
1018 } else {
1019 // Welp, the large model didn't work either. Use the default
1020 // session name and return.
1021 slog.Error("Error generating title with large model", "err", err)
1022 saveErr := a.sessions.Rename(ctx, sessionID, DefaultSessionName)
1023 if saveErr != nil {
1024 slog.Error("Failed to save session title", "error", saveErr)
1025 }
1026 return
1027 }
1028 }
1029
1030 if resp == nil {
1031 // Actually, we didn't get a response so we can't. Use the default
1032 // session name and return.
1033 slog.Error("Response is nil; can't generate title")
1034 saveErr := a.sessions.Rename(ctx, sessionID, DefaultSessionName)
1035 if saveErr != nil {
1036 slog.Error("Failed to save session title", "error", saveErr)
1037 }
1038 return
1039 }
1040
1041 // Clean up title.
1042 var title string
1043 title = strings.ReplaceAll(resp.Response.Content.Text(), "\n", " ")
1044
1045 // Remove thinking tags if present.
1046 title = thinkTagRegex.ReplaceAllString(title, "")
1047 title = orphanThinkTagRegex.ReplaceAllString(title, "")
1048
1049 title = strings.TrimSpace(title)
1050 title = cmp.Or(title, DefaultSessionName)
1051
1052 // Calculate usage and cost.
1053 var openrouterCost *float64
1054 for _, step := range resp.Steps {
1055 stepCost := a.openrouterCost(step.ProviderMetadata)
1056 if stepCost != nil {
1057 newCost := *stepCost
1058 if openrouterCost != nil {
1059 newCost += *openrouterCost
1060 }
1061 openrouterCost = &newCost
1062 }
1063 }
1064
1065 modelConfig := model.CatwalkCfg
1066 cost := modelConfig.CostPer1MInCached/1e6*float64(resp.TotalUsage.CacheCreationTokens) +
1067 modelConfig.CostPer1MOutCached/1e6*float64(resp.TotalUsage.CacheReadTokens) +
1068 modelConfig.CostPer1MIn/1e6*float64(resp.TotalUsage.InputTokens) +
1069 modelConfig.CostPer1MOut/1e6*float64(resp.TotalUsage.OutputTokens)
1070
1071 // Use override cost if available (e.g., from OpenRouter).
1072 if openrouterCost != nil {
1073 cost = *openrouterCost
1074 }
1075
1076 promptTokens := resp.TotalUsage.InputTokens + resp.TotalUsage.CacheCreationTokens
1077 completionTokens := resp.TotalUsage.OutputTokens
1078
1079 // Atomically update only title and usage fields to avoid overriding other
1080 // concurrent session updates.
1081 saveErr := a.sessions.UpdateTitleAndUsage(ctx, sessionID, title, promptTokens, completionTokens, cost)
1082 if saveErr != nil {
1083 slog.Error("Failed to save session title and usage", "error", saveErr)
1084 return
1085 }
1086}
1087
1088func (a *sessionAgent) openrouterCost(metadata fantasy.ProviderMetadata) *float64 {
1089 openrouterMetadata, ok := metadata[openrouter.Name]
1090 if !ok {
1091 return nil
1092 }
1093
1094 opts, ok := openrouterMetadata.(*openrouter.ProviderMetadata)
1095 if !ok {
1096 return nil
1097 }
1098 return &opts.Usage.Cost
1099}
1100
1101func (a *sessionAgent) updateSessionUsage(model Model, session *session.Session, usage fantasy.Usage, overrideCost *float64) {
1102 modelConfig := model.CatwalkCfg
1103 cost := modelConfig.CostPer1MInCached/1e6*float64(usage.CacheCreationTokens) +
1104 modelConfig.CostPer1MOutCached/1e6*float64(usage.CacheReadTokens) +
1105 modelConfig.CostPer1MIn/1e6*float64(usage.InputTokens) +
1106 modelConfig.CostPer1MOut/1e6*float64(usage.OutputTokens)
1107
1108 a.eventTokensUsed(session.ID, model, usage, cost)
1109
1110 if overrideCost != nil {
1111 session.Cost += *overrideCost
1112 } else {
1113 session.Cost += cost
1114 }
1115
1116 session.CompletionTokens = usage.OutputTokens
1117 session.PromptTokens = usage.InputTokens + usage.CacheReadTokens
1118}
1119
1120func (a *sessionAgent) Cancel(sessionID string) {
1121 // Cancel regular requests. Don't use Take() here - we need the entry to
1122 // remain in activeRequests so IsBusy() returns true until the goroutine
1123 // fully completes (including error handling that may access the DB).
1124 // The defer in processRequest will clean up the entry.
1125 if cancel, ok := a.activeRequests.Get(sessionID); ok && cancel != nil {
1126 slog.Debug("Request cancellation initiated", "session_id", sessionID)
1127 cancel()
1128 }
1129
1130 // Also check for summarize requests.
1131 if cancel, ok := a.activeRequests.Get(sessionID + "-summarize"); ok && cancel != nil {
1132 slog.Debug("Summarize cancellation initiated", "session_id", sessionID)
1133 cancel()
1134 }
1135
1136 if a.QueuedPrompts(sessionID) > 0 {
1137 slog.Debug("Clearing queued prompts", "session_id", sessionID)
1138 a.messageQueue.Del(sessionID)
1139 }
1140}
1141
1142func (a *sessionAgent) ClearQueue(sessionID string) {
1143 if a.QueuedPrompts(sessionID) > 0 {
1144 slog.Debug("Clearing queued prompts", "session_id", sessionID)
1145 a.messageQueue.Del(sessionID)
1146 }
1147}
1148
1149func (a *sessionAgent) CancelAll() {
1150 if !a.IsBusy() {
1151 return
1152 }
1153 for key := range a.activeRequests.Seq2() {
1154 a.Cancel(key) // key is sessionID
1155 }
1156
1157 timeout := time.After(5 * time.Second)
1158 for a.IsBusy() {
1159 select {
1160 case <-timeout:
1161 return
1162 default:
1163 time.Sleep(200 * time.Millisecond)
1164 }
1165 }
1166}
1167
1168func (a *sessionAgent) IsBusy() bool {
1169 var busy bool
1170 for cancelFunc := range a.activeRequests.Seq() {
1171 if cancelFunc != nil {
1172 busy = true
1173 break
1174 }
1175 }
1176 return busy
1177}
1178
1179func (a *sessionAgent) IsSessionBusy(sessionID string) bool {
1180 _, busy := a.activeRequests.Get(sessionID)
1181 return busy
1182}
1183
1184func (a *sessionAgent) QueuedPrompts(sessionID string) int {
1185 l, ok := a.messageQueue.Get(sessionID)
1186 if !ok {
1187 return 0
1188 }
1189 return len(l)
1190}
1191
1192func (a *sessionAgent) QueuedPromptsList(sessionID string) []string {
1193 l, ok := a.messageQueue.Get(sessionID)
1194 if !ok {
1195 return nil
1196 }
1197 prompts := make([]string, len(l))
1198 for i, call := range l {
1199 prompts[i] = call.Prompt
1200 }
1201 return prompts
1202}
1203
1204func (a *sessionAgent) SetModels(large Model, small Model) {
1205 a.largeModel.Set(large)
1206 a.smallModel.Set(small)
1207}
1208
1209func (a *sessionAgent) SetTools(tools []fantasy.AgentTool) {
1210 a.tools.SetSlice(tools)
1211}
1212
1213func (a *sessionAgent) SetSystemPrompt(systemPrompt string) {
1214 a.systemPrompt.Set(systemPrompt)
1215}
1216
1217func (a *sessionAgent) Model() Model {
1218 return a.largeModel.Get()
1219}
1220
1221// convertToToolResult converts a fantasy tool result to a message tool result.
1222func (a *sessionAgent) convertToToolResult(result fantasy.ToolResultContent) message.ToolResult {
1223 baseResult := message.ToolResult{
1224 ToolCallID: result.ToolCallID,
1225 Name: result.ToolName,
1226 Metadata: result.ClientMetadata,
1227 }
1228
1229 switch result.Result.GetType() {
1230 case fantasy.ToolResultContentTypeText:
1231 if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](result.Result); ok {
1232 baseResult.Content = r.Text
1233 }
1234 case fantasy.ToolResultContentTypeError:
1235 if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](result.Result); ok {
1236 baseResult.Content = r.Error.Error()
1237 baseResult.IsError = true
1238 }
1239 case fantasy.ToolResultContentTypeMedia:
1240 if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](result.Result); ok {
1241 if !stringext.IsValidBase64(r.Data) {
1242 slog.Warn("Tool returned media with invalid base64 data, discarding image",
1243 "tool", result.ToolName,
1244 "tool_call_id", result.ToolCallID,
1245 )
1246 baseResult.Content = "Tool returned image data with invalid encoding"
1247 baseResult.IsError = true
1248 } else {
1249 content := r.Text
1250 if content == "" {
1251 content = fmt.Sprintf("Loaded %s content", r.MediaType)
1252 }
1253 baseResult.Content = content
1254 baseResult.Data = r.Data
1255 baseResult.MIMEType = r.MediaType
1256 }
1257 }
1258 }
1259
1260 return baseResult
1261}
1262
1263// workaroundProviderMediaLimitations converts media content in tool results to
1264// user messages for providers that don't natively support images in tool results.
1265//
1266// Problem: OpenAI, Google, OpenRouter, and other OpenAI-compatible providers
1267// don't support sending images/media in tool result messages - they only accept
1268// text in tool results. However, they DO support images in user messages.
1269//
1270// If we send media in tool results to these providers, the API returns an error.
1271//
1272// Solution: For these providers, we:
1273// 1. Replace the media in the tool result with a text placeholder
1274// 2. Inject a user message immediately after with the image as a file attachment
1275// 3. This maintains the tool execution flow while working around API limitations
1276//
1277// Anthropic and Bedrock support images natively in tool results, so we skip
1278// this workaround for them.
1279//
1280// Example transformation:
1281//
1282// BEFORE: [tool result: image data]
1283// AFTER: [tool result: "Image loaded - see attached"], [user: image attachment]
1284func (a *sessionAgent) workaroundProviderMediaLimitations(messages []fantasy.Message, largeModel Model) []fantasy.Message {
1285 providerSupportsMedia := largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderAnthropic) ||
1286 largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderBedrock)
1287
1288 if providerSupportsMedia {
1289 return messages
1290 }
1291
1292 convertedMessages := make([]fantasy.Message, 0, len(messages))
1293
1294 for _, msg := range messages {
1295 if msg.Role != fantasy.MessageRoleTool {
1296 convertedMessages = append(convertedMessages, msg)
1297 continue
1298 }
1299
1300 textParts := make([]fantasy.MessagePart, 0, len(msg.Content))
1301 var mediaFiles []fantasy.FilePart
1302
1303 for _, part := range msg.Content {
1304 toolResult, ok := fantasy.AsMessagePart[fantasy.ToolResultPart](part)
1305 if !ok {
1306 textParts = append(textParts, part)
1307 continue
1308 }
1309
1310 if media, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](toolResult.Output); ok {
1311 decoded, err := base64.StdEncoding.DecodeString(media.Data)
1312 if err != nil {
1313 slog.Warn("Failed to decode media data", "error", err)
1314 textParts = append(textParts, part)
1315 continue
1316 }
1317
1318 mediaFiles = append(mediaFiles, fantasy.FilePart{
1319 Data: decoded,
1320 MediaType: media.MediaType,
1321 Filename: fmt.Sprintf("tool-result-%s", toolResult.ToolCallID),
1322 })
1323
1324 textParts = append(textParts, fantasy.ToolResultPart{
1325 ToolCallID: toolResult.ToolCallID,
1326 Output: fantasy.ToolResultOutputContentText{
1327 Text: "[Image/media content loaded - see attached file]",
1328 },
1329 ProviderOptions: toolResult.ProviderOptions,
1330 })
1331 } else {
1332 textParts = append(textParts, part)
1333 }
1334 }
1335
1336 convertedMessages = append(convertedMessages, fantasy.Message{
1337 Role: fantasy.MessageRoleTool,
1338 Content: textParts,
1339 })
1340
1341 if len(mediaFiles) > 0 {
1342 convertedMessages = append(convertedMessages, fantasy.NewUserMessage(
1343 "Here is the media content from the tool result:",
1344 mediaFiles...,
1345 ))
1346 }
1347 }
1348
1349 return convertedMessages
1350}
1351
1352// buildSummaryPrompt constructs the prompt text for session summarization.
1353func buildSummaryPrompt(todos []session.Todo) string {
1354 var sb strings.Builder
1355 sb.WriteString("Provide a detailed summary of our conversation above.")
1356 if len(todos) > 0 {
1357 sb.WriteString("\n\n## Current Todo List\n\n")
1358 for _, t := range todos {
1359 fmt.Fprintf(&sb, "- [%s] %s\n", t.Status, t.Content)
1360 }
1361 sb.WriteString("\nInclude these tasks and their statuses in your summary. ")
1362 sb.WriteString("Instruct the resuming assistant to use the `todos` tool to continue tracking progress on these tasks.")
1363 }
1364 return sb.String()
1365}
1366
1367func providerRetryLogFields(err *fantasy.ProviderError, delay time.Duration) []any {
1368 fields := []any{
1369 "retry_delay", delay.String(),
1370 }
1371 if err == nil {
1372 return fields
1373 }
1374 fields = append(fields, "status_code", err.StatusCode)
1375 if err.Title != "" {
1376 fields = append(fields, "title", err.Title)
1377 }
1378 if err.Message != "" {
1379 fields = append(fields, "message", err.Message)
1380 }
1381 return fields
1382}