// Package agent is the core orchestration layer for Crush AI agents. // // It provides session-based AI agent functionality for managing // conversations, tool execution, and message handling. It coordinates // interactions between language models, messages, sessions, and tools while // handling features like automatic summarization, queuing, and token // management. package agent import ( "cmp" "context" _ "embed" "encoding/base64" "errors" "fmt" "log/slog" "os" "regexp" "strconv" "strings" "sync" "time" "charm.land/fantasy" "charm.land/fantasy/providers/anthropic" "charm.land/fantasy/providers/bedrock" "charm.land/fantasy/providers/google" "charm.land/fantasy/providers/openai" "charm.land/fantasy/providers/openrouter" "charm.land/lipgloss/v2" "github.com/charmbracelet/catwalk/pkg/catwalk" "github.com/charmbracelet/crush/internal/agent/hyper" "github.com/charmbracelet/crush/internal/agent/tools" "github.com/charmbracelet/crush/internal/config" "github.com/charmbracelet/crush/internal/csync" "github.com/charmbracelet/crush/internal/message" "github.com/charmbracelet/crush/internal/permission" "github.com/charmbracelet/crush/internal/session" "github.com/charmbracelet/crush/internal/stringext" "github.com/charmbracelet/x/exp/charmtone" ) const ( defaultSessionName = "Untitled Session" // Constants for auto-summarization thresholds largeContextWindowThreshold = 200_000 largeContextWindowBuffer = 20_000 smallContextWindowRatio = 0.2 ) //go:embed templates/title.md var titlePrompt []byte //go:embed templates/summary.md var summaryPrompt []byte // Used to remove tags from generated titles. var thinkTagRegex = regexp.MustCompile(`.*?`) type SessionAgentCall struct { SessionID string Prompt string ProviderOptions fantasy.ProviderOptions Attachments []message.Attachment MaxOutputTokens int64 Temperature *float64 TopP *float64 TopK *int64 FrequencyPenalty *float64 PresencePenalty *float64 } type SessionAgent interface { Run(context.Context, SessionAgentCall) (*fantasy.AgentResult, error) SetModels(large Model, small Model) SetTools(tools []fantasy.AgentTool) SetSystemPrompt(systemPrompt string) Cancel(sessionID string) CancelAll() IsSessionBusy(sessionID string) bool IsBusy() bool QueuedPrompts(sessionID string) int QueuedPromptsList(sessionID string) []string ClearQueue(sessionID string) Summarize(context.Context, string, fantasy.ProviderOptions) error Model() Model } type Model struct { Model fantasy.LanguageModel CatwalkCfg catwalk.Model ModelCfg config.SelectedModel } type sessionAgent struct { largeModel *csync.Value[Model] smallModel *csync.Value[Model] systemPromptPrefix *csync.Value[string] systemPrompt *csync.Value[string] tools *csync.Slice[fantasy.AgentTool] isSubAgent bool sessions session.Service messages message.Service disableAutoSummarize bool isYolo bool messageQueue *csync.Map[string, []SessionAgentCall] activeRequests *csync.Map[string, context.CancelFunc] } type SessionAgentOptions struct { LargeModel Model SmallModel Model SystemPromptPrefix string SystemPrompt string IsSubAgent bool DisableAutoSummarize bool IsYolo bool Sessions session.Service Messages message.Service Tools []fantasy.AgentTool } func NewSessionAgent( opts SessionAgentOptions, ) SessionAgent { return &sessionAgent{ largeModel: csync.NewValue(opts.LargeModel), smallModel: csync.NewValue(opts.SmallModel), systemPromptPrefix: csync.NewValue(opts.SystemPromptPrefix), systemPrompt: csync.NewValue(opts.SystemPrompt), isSubAgent: opts.IsSubAgent, sessions: opts.Sessions, messages: opts.Messages, disableAutoSummarize: opts.DisableAutoSummarize, tools: csync.NewSliceFrom(opts.Tools), isYolo: opts.IsYolo, messageQueue: csync.NewMap[string, []SessionAgentCall](), activeRequests: csync.NewMap[string, context.CancelFunc](), } } func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy.AgentResult, error) { if call.Prompt == "" && !message.ContainsTextAttachment(call.Attachments) { return nil, ErrEmptyPrompt } if call.SessionID == "" { return nil, ErrSessionMissing } // Queue the message if busy if a.IsSessionBusy(call.SessionID) { existing, ok := a.messageQueue.Get(call.SessionID) if !ok { existing = []SessionAgentCall{} } existing = append(existing, call) a.messageQueue.Set(call.SessionID, existing) return nil, nil } // Copy mutable fields under lock to avoid races with SetTools/SetModels. agentTools := a.tools.Copy() largeModel := a.largeModel.Get() systemPrompt := a.systemPrompt.Get() promptPrefix := a.systemPromptPrefix.Get() if len(agentTools) > 0 { // Add Anthropic caching to the last tool. agentTools[len(agentTools)-1].SetProviderOptions(a.getCacheControlOptions()) } agent := fantasy.NewAgent( largeModel.Model, fantasy.WithSystemPrompt(systemPrompt), fantasy.WithTools(agentTools...), ) sessionLock := sync.Mutex{} currentSession, err := a.sessions.Get(ctx, call.SessionID) if err != nil { return nil, fmt.Errorf("failed to get session: %w", err) } msgs, err := a.getSessionMessages(ctx, currentSession) if err != nil { return nil, fmt.Errorf("failed to get session messages: %w", err) } var wg sync.WaitGroup // Generate title if first message. if len(msgs) == 0 { titleCtx := ctx // Copy to avoid race with ctx reassignment below. wg.Go(func() { a.generateTitle(titleCtx, call.SessionID, call.Prompt) }) } defer wg.Wait() // Add the user message to the session. _, err = a.createUserMessage(ctx, call) if err != nil { return nil, err } // Add the session to the context. ctx = context.WithValue(ctx, tools.SessionIDContextKey, call.SessionID) genCtx, cancel := context.WithCancel(ctx) a.activeRequests.Set(call.SessionID, cancel) defer cancel() defer a.activeRequests.Del(call.SessionID) history, files := a.preparePrompt(msgs, call.Attachments...) startTime := time.Now() a.eventPromptSent(call.SessionID) var currentAssistant *message.Message var shouldSummarize bool result, err := agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: message.PromptWithTextAttachments(call.Prompt, call.Attachments), Files: files, Messages: history, ProviderOptions: call.ProviderOptions, MaxOutputTokens: &call.MaxOutputTokens, TopP: call.TopP, Temperature: call.Temperature, PresencePenalty: call.PresencePenalty, TopK: call.TopK, FrequencyPenalty: call.FrequencyPenalty, PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = options.Messages for i := range prepared.Messages { prepared.Messages[i].ProviderOptions = nil } queuedCalls, _ := a.messageQueue.Get(call.SessionID) a.messageQueue.Del(call.SessionID) for _, queued := range queuedCalls { userMessage, createErr := a.createUserMessage(callContext, queued) if createErr != nil { return callContext, prepared, createErr } prepared.Messages = append(prepared.Messages, userMessage.ToAIMessage()...) } prepared.Messages = a.workaroundProviderMediaLimitations(prepared.Messages, largeModel) lastSystemRoleInx := 0 systemMessageUpdated := false for i, msg := range prepared.Messages { // Only add cache control to the last message. if msg.Role == fantasy.MessageRoleSystem { lastSystemRoleInx = i } else if !systemMessageUpdated { prepared.Messages[lastSystemRoleInx].ProviderOptions = a.getCacheControlOptions() systemMessageUpdated = true } // Than add cache control to the last 2 messages. if i > len(prepared.Messages)-3 { prepared.Messages[i].ProviderOptions = a.getCacheControlOptions() } } if promptPrefix != "" { prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(promptPrefix)}, prepared.Messages...) } var assistantMsg message.Message assistantMsg, err = a.messages.Create(callContext, call.SessionID, message.CreateMessageParams{ Role: message.Assistant, Parts: []message.ContentPart{}, Model: largeModel.ModelCfg.Model, Provider: largeModel.ModelCfg.Provider, }) if err != nil { return callContext, prepared, err } callContext = context.WithValue(callContext, tools.MessageIDContextKey, assistantMsg.ID) callContext = context.WithValue(callContext, tools.SupportsImagesContextKey, largeModel.CatwalkCfg.SupportsImages) callContext = context.WithValue(callContext, tools.ModelNameContextKey, largeModel.CatwalkCfg.Name) currentAssistant = &assistantMsg return callContext, prepared, err }, OnReasoningStart: func(id string, reasoning fantasy.ReasoningContent) error { currentAssistant.AppendReasoningContent(reasoning.Text) return a.messages.Update(genCtx, *currentAssistant) }, OnReasoningDelta: func(id string, text string) error { currentAssistant.AppendReasoningContent(text) return a.messages.Update(genCtx, *currentAssistant) }, OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error { // handle anthropic signature if anthropicData, ok := reasoning.ProviderMetadata[anthropic.Name]; ok { if reasoning, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok { currentAssistant.AppendReasoningSignature(reasoning.Signature) } } if googleData, ok := reasoning.ProviderMetadata[google.Name]; ok { if reasoning, ok := googleData.(*google.ReasoningMetadata); ok { currentAssistant.AppendThoughtSignature(reasoning.Signature, reasoning.ToolID) } } if openaiData, ok := reasoning.ProviderMetadata[openai.Name]; ok { if reasoning, ok := openaiData.(*openai.ResponsesReasoningMetadata); ok { currentAssistant.SetReasoningResponsesData(reasoning) } } currentAssistant.FinishThinking() return a.messages.Update(genCtx, *currentAssistant) }, OnTextDelta: func(id string, text string) error { // Strip leading newline from initial text content. This is is // particularly important in non-interactive mode where leading // newlines are very visible. if len(currentAssistant.Parts) == 0 { text = strings.TrimPrefix(text, "\n") } currentAssistant.AppendContent(text) return a.messages.Update(genCtx, *currentAssistant) }, OnToolInputStart: func(id string, toolName string) error { toolCall := message.ToolCall{ ID: id, Name: toolName, ProviderExecuted: false, Finished: false, } currentAssistant.AddToolCall(toolCall) return a.messages.Update(genCtx, *currentAssistant) }, OnRetry: func(err *fantasy.ProviderError, delay time.Duration) { // TODO: implement }, OnToolCall: func(tc fantasy.ToolCallContent) error { toolCall := message.ToolCall{ ID: tc.ToolCallID, Name: tc.ToolName, Input: tc.Input, ProviderExecuted: false, Finished: true, } currentAssistant.AddToolCall(toolCall) return a.messages.Update(genCtx, *currentAssistant) }, OnToolResult: func(result fantasy.ToolResultContent) error { toolResult := a.convertToToolResult(result) _, createMsgErr := a.messages.Create(genCtx, currentAssistant.SessionID, message.CreateMessageParams{ Role: message.Tool, Parts: []message.ContentPart{ toolResult, }, }) return createMsgErr }, OnStepFinish: func(stepResult fantasy.StepResult) error { finishReason := message.FinishReasonUnknown switch stepResult.FinishReason { case fantasy.FinishReasonLength: finishReason = message.FinishReasonMaxTokens case fantasy.FinishReasonStop: finishReason = message.FinishReasonEndTurn case fantasy.FinishReasonToolCalls: finishReason = message.FinishReasonToolUse } currentAssistant.AddFinish(finishReason, "", "") sessionLock.Lock() updatedSession, getSessionErr := a.sessions.Get(genCtx, call.SessionID) if getSessionErr != nil { sessionLock.Unlock() return getSessionErr } a.updateSessionUsage(largeModel, &updatedSession, stepResult.Usage, a.openrouterCost(stepResult.ProviderMetadata)) _, sessionErr := a.sessions.Save(genCtx, updatedSession) sessionLock.Unlock() if sessionErr != nil { return sessionErr } return a.messages.Update(genCtx, *currentAssistant) }, StopWhen: []fantasy.StopCondition{ func(_ []fantasy.StepResult) bool { cw := int64(largeModel.CatwalkCfg.ContextWindow) tokens := currentSession.CompletionTokens + currentSession.PromptTokens remaining := cw - tokens var threshold int64 if cw > largeContextWindowThreshold { threshold = largeContextWindowBuffer } else { threshold = int64(float64(cw) * smallContextWindowRatio) } if (remaining <= threshold) && !a.disableAutoSummarize { shouldSummarize = true return true } return false }, }, }) a.eventPromptResponded(call.SessionID, time.Since(startTime).Truncate(time.Second)) if err != nil { isCancelErr := errors.Is(err, context.Canceled) isPermissionErr := errors.Is(err, permission.ErrorPermissionDenied) if currentAssistant == nil { return result, err } // Ensure we finish thinking on error to close the reasoning state. currentAssistant.FinishThinking() toolCalls := currentAssistant.ToolCalls() // INFO: we use the parent context here because the genCtx has been cancelled. msgs, createErr := a.messages.List(ctx, currentAssistant.SessionID) if createErr != nil { return nil, createErr } for _, tc := range toolCalls { if !tc.Finished { tc.Finished = true tc.Input = "{}" currentAssistant.AddToolCall(tc) updateErr := a.messages.Update(ctx, *currentAssistant) if updateErr != nil { return nil, updateErr } } found := false for _, msg := range msgs { if msg.Role == message.Tool { for _, tr := range msg.ToolResults() { if tr.ToolCallID == tc.ID { found = true break } } } if found { break } } if found { continue } content := "There was an error while executing the tool" if isCancelErr { content = "Tool execution canceled by user" } else if isPermissionErr { content = "User denied permission" } toolResult := message.ToolResult{ ToolCallID: tc.ID, Name: tc.Name, Content: content, IsError: true, } _, createErr = a.messages.Create(ctx, currentAssistant.SessionID, message.CreateMessageParams{ Role: message.Tool, Parts: []message.ContentPart{ toolResult, }, }) if createErr != nil { return nil, createErr } } var fantasyErr *fantasy.Error var providerErr *fantasy.ProviderError const defaultTitle = "Provider Error" linkStyle := lipgloss.NewStyle().Foreground(charmtone.Guac).Underline(true) if isCancelErr { currentAssistant.AddFinish(message.FinishReasonCanceled, "User canceled request", "") } else if isPermissionErr { currentAssistant.AddFinish(message.FinishReasonPermissionDenied, "User denied permission", "") } else if errors.Is(err, hyper.ErrNoCredits) { url := hyper.BaseURL() link := linkStyle.Hyperlink(url, "id=hyper").Render(url) currentAssistant.AddFinish(message.FinishReasonError, "No credits", "You're out of credits. Add more at "+link) } else if errors.As(err, &providerErr) { if providerErr.Message == "The requested model is not supported." { url := "https://github.com/settings/copilot/features" link := linkStyle.Hyperlink(url, "id=copilot").Render(url) currentAssistant.AddFinish( message.FinishReasonError, "Copilot model not enabled", fmt.Sprintf("%q is not enabled in Copilot. Go to the following page to enable it. Then, wait 5 minutes before trying again. %s", largeModel.CatwalkCfg.Name, link), ) } else { currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(providerErr.Title), defaultTitle), providerErr.Message) } } else if errors.As(err, &fantasyErr) { currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(fantasyErr.Title), defaultTitle), fantasyErr.Message) } else { currentAssistant.AddFinish(message.FinishReasonError, defaultTitle, err.Error()) } // Note: we use the parent context here because the genCtx has been // cancelled. updateErr := a.messages.Update(ctx, *currentAssistant) if updateErr != nil { return nil, updateErr } return nil, err } if shouldSummarize { a.activeRequests.Del(call.SessionID) if summarizeErr := a.Summarize(genCtx, call.SessionID, call.ProviderOptions); summarizeErr != nil { return nil, summarizeErr } // If the agent wasn't done... if len(currentAssistant.ToolCalls()) > 0 { existing, ok := a.messageQueue.Get(call.SessionID) if !ok { existing = []SessionAgentCall{} } call.Prompt = fmt.Sprintf("The previous session was interrupted because it got too long, the initial user request was: `%s`", call.Prompt) existing = append(existing, call) a.messageQueue.Set(call.SessionID, existing) } } // Release active request before processing queued messages. a.activeRequests.Del(call.SessionID) cancel() queuedMessages, ok := a.messageQueue.Get(call.SessionID) if !ok || len(queuedMessages) == 0 { return result, err } // There are queued messages restart the loop. firstQueuedMessage := queuedMessages[0] a.messageQueue.Set(call.SessionID, queuedMessages[1:]) return a.Run(ctx, firstQueuedMessage) } func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fantasy.ProviderOptions) error { if a.IsSessionBusy(sessionID) { return ErrSessionBusy } // Copy mutable fields under lock to avoid races with SetModels. largeModel := a.largeModel.Get() systemPromptPrefix := a.systemPromptPrefix.Get() currentSession, err := a.sessions.Get(ctx, sessionID) if err != nil { return fmt.Errorf("failed to get session: %w", err) } msgs, err := a.getSessionMessages(ctx, currentSession) if err != nil { return err } if len(msgs) == 0 { // Nothing to summarize. return nil } aiMsgs, _ := a.preparePrompt(msgs) genCtx, cancel := context.WithCancel(ctx) a.activeRequests.Set(sessionID, cancel) defer a.activeRequests.Del(sessionID) defer cancel() agent := fantasy.NewAgent(largeModel.Model, fantasy.WithSystemPrompt(string(summaryPrompt)), ) summaryMessage, err := a.messages.Create(ctx, sessionID, message.CreateMessageParams{ Role: message.Assistant, Model: largeModel.Model.Model(), Provider: largeModel.Model.Provider(), IsSummaryMessage: true, }) if err != nil { return err } summaryPromptText := buildSummaryPrompt(currentSession.Todos) resp, err := agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: summaryPromptText, Messages: aiMsgs, ProviderOptions: opts, PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = options.Messages if systemPromptPrefix != "" { prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(systemPromptPrefix)}, prepared.Messages...) } return callContext, prepared, nil }, OnReasoningDelta: func(id string, text string) error { summaryMessage.AppendReasoningContent(text) return a.messages.Update(genCtx, summaryMessage) }, OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error { // Handle anthropic signature. if anthropicData, ok := reasoning.ProviderMetadata["anthropic"]; ok { if signature, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok && signature.Signature != "" { summaryMessage.AppendReasoningSignature(signature.Signature) } } summaryMessage.FinishThinking() return a.messages.Update(genCtx, summaryMessage) }, OnTextDelta: func(id, text string) error { summaryMessage.AppendContent(text) return a.messages.Update(genCtx, summaryMessage) }, }) if err != nil { isCancelErr := errors.Is(err, context.Canceled) if isCancelErr { // User cancelled summarize we need to remove the summary message. deleteErr := a.messages.Delete(ctx, summaryMessage.ID) return deleteErr } return err } summaryMessage.AddFinish(message.FinishReasonEndTurn, "", "") err = a.messages.Update(genCtx, summaryMessage) if err != nil { return err } var openrouterCost *float64 for _, step := range resp.Steps { stepCost := a.openrouterCost(step.ProviderMetadata) if stepCost != nil { newCost := *stepCost if openrouterCost != nil { newCost += *openrouterCost } openrouterCost = &newCost } } a.updateSessionUsage(largeModel, ¤tSession, resp.TotalUsage, openrouterCost) // Just in case, get just the last usage info. usage := resp.Response.Usage currentSession.SummaryMessageID = summaryMessage.ID currentSession.CompletionTokens = usage.OutputTokens currentSession.PromptTokens = 0 _, err = a.sessions.Save(genCtx, currentSession) return err } func (a *sessionAgent) getCacheControlOptions() fantasy.ProviderOptions { if t, _ := strconv.ParseBool(os.Getenv("CRUSH_DISABLE_ANTHROPIC_CACHE")); t { return fantasy.ProviderOptions{} } return fantasy.ProviderOptions{ anthropic.Name: &anthropic.ProviderCacheControlOptions{ CacheControl: anthropic.CacheControl{Type: "ephemeral"}, }, bedrock.Name: &anthropic.ProviderCacheControlOptions{ CacheControl: anthropic.CacheControl{Type: "ephemeral"}, }, } } func (a *sessionAgent) createUserMessage(ctx context.Context, call SessionAgentCall) (message.Message, error) { parts := []message.ContentPart{message.TextContent{Text: call.Prompt}} var attachmentParts []message.ContentPart for _, attachment := range call.Attachments { attachmentParts = append(attachmentParts, message.BinaryContent{Path: attachment.FilePath, MIMEType: attachment.MimeType, Data: attachment.Content}) } parts = append(parts, attachmentParts...) msg, err := a.messages.Create(ctx, call.SessionID, message.CreateMessageParams{ Role: message.User, Parts: parts, }) if err != nil { return message.Message{}, fmt.Errorf("failed to create user message: %w", err) } return msg, nil } func (a *sessionAgent) preparePrompt(msgs []message.Message, attachments ...message.Attachment) ([]fantasy.Message, []fantasy.FilePart) { var history []fantasy.Message if !a.isSubAgent { history = append(history, fantasy.NewUserMessage( fmt.Sprintf("%s", `This is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the "todos" tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.`, ), )) } for _, m := range msgs { if len(m.Parts) == 0 { continue } // Assistant message without content or tool calls (cancelled before it // returned anything). if m.Role == message.Assistant && len(m.ToolCalls()) == 0 && m.Content().Text == "" && m.ReasoningContent().String() == "" { continue } history = append(history, m.ToAIMessage()...) } var files []fantasy.FilePart for _, attachment := range attachments { if attachment.IsText() { continue } files = append(files, fantasy.FilePart{ Filename: attachment.FileName, Data: attachment.Content, MediaType: attachment.MimeType, }) } return history, files } func (a *sessionAgent) getSessionMessages(ctx context.Context, session session.Session) ([]message.Message, error) { msgs, err := a.messages.List(ctx, session.ID) if err != nil { return nil, fmt.Errorf("failed to list messages: %w", err) } if session.SummaryMessageID != "" { summaryMsgIndex := -1 for i, msg := range msgs { if msg.ID == session.SummaryMessageID { summaryMsgIndex = i break } } if summaryMsgIndex != -1 { msgs = msgs[summaryMsgIndex:] msgs[0].Role = message.User } } return msgs, nil } // generateTitle generates a session titled based on the initial prompt. func (a *sessionAgent) generateTitle(ctx context.Context, sessionID string, userPrompt string) { if userPrompt == "" { return } smallModel := a.smallModel.Get() largeModel := a.largeModel.Get() systemPromptPrefix := a.systemPromptPrefix.Get() var maxOutputTokens int64 = 40 if smallModel.CatwalkCfg.CanReason { maxOutputTokens = smallModel.CatwalkCfg.DefaultMaxTokens } newAgent := func(m fantasy.LanguageModel, p []byte, tok int64) fantasy.Agent { return fantasy.NewAgent(m, fantasy.WithSystemPrompt(string(p)+"\n /no_think"), fantasy.WithMaxOutputTokens(tok), ) } streamCall := fantasy.AgentStreamCall{ Prompt: fmt.Sprintf("Generate a concise title for the following content:\n\n%s\n \n\n", userPrompt), PrepareStep: func(callCtx context.Context, opts fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = opts.Messages if systemPromptPrefix != "" { prepared.Messages = append([]fantasy.Message{ fantasy.NewSystemMessage(systemPromptPrefix), }, prepared.Messages...) } return callCtx, prepared, nil }, } // Use the small model to generate the title. model := smallModel agent := newAgent(model.Model, titlePrompt, maxOutputTokens) resp, err := agent.Stream(ctx, streamCall) if err == nil { // We successfully generated a title with the small model. slog.Info("generated title with small model") } else { // It didn't work. Let's try with the big model. slog.Error("error generating title with small model; trying big model", "err", err) model = largeModel agent = newAgent(model.Model, titlePrompt, maxOutputTokens) resp, err = agent.Stream(ctx, streamCall) if err == nil { slog.Info("generated title with large model") } else { // Welp, the large model didn't work either. Use the default // session name and return. slog.Error("error generating title with large model", "err", err) saveErr := a.sessions.UpdateTitleAndUsage(ctx, sessionID, defaultSessionName, 0, 0, 0) if saveErr != nil { slog.Error("failed to save session title and usage", "error", saveErr) } return } } if resp == nil { // Actually, we didn't get a response so we can't. Use the default // session name and return. slog.Error("response is nil; can't generate title") saveErr := a.sessions.UpdateTitleAndUsage(ctx, sessionID, defaultSessionName, 0, 0, 0) if saveErr != nil { slog.Error("failed to save session title and usage", "error", saveErr) } return } // Clean up title. var title string title = strings.ReplaceAll(resp.Response.Content.Text(), "\n", " ") // Remove thinking tags if present. title = thinkTagRegex.ReplaceAllString(title, "") title = strings.TrimSpace(title) if title == "" { slog.Warn("empty title; using fallback") title = defaultSessionName } // Calculate usage and cost. var openrouterCost *float64 for _, step := range resp.Steps { stepCost := a.openrouterCost(step.ProviderMetadata) if stepCost != nil { newCost := *stepCost if openrouterCost != nil { newCost += *openrouterCost } openrouterCost = &newCost } } modelConfig := model.CatwalkCfg cost := modelConfig.CostPer1MInCached/1e6*float64(resp.TotalUsage.CacheCreationTokens) + modelConfig.CostPer1MOutCached/1e6*float64(resp.TotalUsage.CacheReadTokens) + modelConfig.CostPer1MIn/1e6*float64(resp.TotalUsage.InputTokens) + modelConfig.CostPer1MOut/1e6*float64(resp.TotalUsage.OutputTokens) // Use override cost if available (e.g., from OpenRouter). if openrouterCost != nil { cost = *openrouterCost } promptTokens := resp.TotalUsage.InputTokens + resp.TotalUsage.CacheCreationTokens completionTokens := resp.TotalUsage.OutputTokens + resp.TotalUsage.CacheReadTokens // Atomically update only title and usage fields to avoid overriding other // concurrent session updates. saveErr := a.sessions.UpdateTitleAndUsage(ctx, sessionID, title, promptTokens, completionTokens, cost) if saveErr != nil { slog.Error("failed to save session title and usage", "error", saveErr) return } } func (a *sessionAgent) openrouterCost(metadata fantasy.ProviderMetadata) *float64 { openrouterMetadata, ok := metadata[openrouter.Name] if !ok { return nil } opts, ok := openrouterMetadata.(*openrouter.ProviderMetadata) if !ok { return nil } return &opts.Usage.Cost } func (a *sessionAgent) updateSessionUsage(model Model, session *session.Session, usage fantasy.Usage, overrideCost *float64) { modelConfig := model.CatwalkCfg cost := modelConfig.CostPer1MInCached/1e6*float64(usage.CacheCreationTokens) + modelConfig.CostPer1MOutCached/1e6*float64(usage.CacheReadTokens) + modelConfig.CostPer1MIn/1e6*float64(usage.InputTokens) + modelConfig.CostPer1MOut/1e6*float64(usage.OutputTokens) a.eventTokensUsed(session.ID, model, usage, cost) if overrideCost != nil { session.Cost += *overrideCost } else { session.Cost += cost } session.CompletionTokens = usage.OutputTokens + usage.CacheReadTokens session.PromptTokens = usage.InputTokens + usage.CacheCreationTokens } func (a *sessionAgent) Cancel(sessionID string) { // Cancel regular requests. Don't use Take() here - we need the entry to // remain in activeRequests so IsBusy() returns true until the goroutine // fully completes (including error handling that may access the DB). // The defer in processRequest will clean up the entry. if cancel, ok := a.activeRequests.Get(sessionID); ok && cancel != nil { slog.Info("Request cancellation initiated", "session_id", sessionID) cancel() } // Also check for summarize requests. if cancel, ok := a.activeRequests.Get(sessionID + "-summarize"); ok && cancel != nil { slog.Info("Summarize cancellation initiated", "session_id", sessionID) cancel() } if a.QueuedPrompts(sessionID) > 0 { slog.Info("Clearing queued prompts", "session_id", sessionID) a.messageQueue.Del(sessionID) } } func (a *sessionAgent) ClearQueue(sessionID string) { if a.QueuedPrompts(sessionID) > 0 { slog.Info("Clearing queued prompts", "session_id", sessionID) a.messageQueue.Del(sessionID) } } func (a *sessionAgent) CancelAll() { if !a.IsBusy() { return } for key := range a.activeRequests.Seq2() { a.Cancel(key) // key is sessionID } timeout := time.After(5 * time.Second) for a.IsBusy() { select { case <-timeout: return default: time.Sleep(200 * time.Millisecond) } } } func (a *sessionAgent) IsBusy() bool { var busy bool for cancelFunc := range a.activeRequests.Seq() { if cancelFunc != nil { busy = true break } } return busy } func (a *sessionAgent) IsSessionBusy(sessionID string) bool { _, busy := a.activeRequests.Get(sessionID) return busy } func (a *sessionAgent) QueuedPrompts(sessionID string) int { l, ok := a.messageQueue.Get(sessionID) if !ok { return 0 } return len(l) } func (a *sessionAgent) QueuedPromptsList(sessionID string) []string { l, ok := a.messageQueue.Get(sessionID) if !ok { return nil } prompts := make([]string, len(l)) for i, call := range l { prompts[i] = call.Prompt } return prompts } func (a *sessionAgent) SetModels(large Model, small Model) { a.largeModel.Set(large) a.smallModel.Set(small) } func (a *sessionAgent) SetTools(tools []fantasy.AgentTool) { a.tools.SetSlice(tools) } func (a *sessionAgent) SetSystemPrompt(systemPrompt string) { a.systemPrompt.Set(systemPrompt) } func (a *sessionAgent) Model() Model { return a.largeModel.Get() } // convertToToolResult converts a fantasy tool result to a message tool result. func (a *sessionAgent) convertToToolResult(result fantasy.ToolResultContent) message.ToolResult { baseResult := message.ToolResult{ ToolCallID: result.ToolCallID, Name: result.ToolName, Metadata: result.ClientMetadata, } switch result.Result.GetType() { case fantasy.ToolResultContentTypeText: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](result.Result); ok { baseResult.Content = r.Text } case fantasy.ToolResultContentTypeError: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](result.Result); ok { baseResult.Content = r.Error.Error() baseResult.IsError = true } case fantasy.ToolResultContentTypeMedia: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](result.Result); ok { content := r.Text if content == "" { content = fmt.Sprintf("Loaded %s content", r.MediaType) } baseResult.Content = content baseResult.Data = r.Data baseResult.MIMEType = r.MediaType } } return baseResult } // workaroundProviderMediaLimitations converts media content in tool results to // user messages for providers that don't natively support images in tool results. // // Problem: OpenAI, Google, OpenRouter, and other OpenAI-compatible providers // don't support sending images/media in tool result messages - they only accept // text in tool results. However, they DO support images in user messages. // // If we send media in tool results to these providers, the API returns an error. // // Solution: For these providers, we: // 1. Replace the media in the tool result with a text placeholder // 2. Inject a user message immediately after with the image as a file attachment // 3. This maintains the tool execution flow while working around API limitations // // Anthropic and Bedrock support images natively in tool results, so we skip // this workaround for them. // // Example transformation: // // BEFORE: [tool result: image data] // AFTER: [tool result: "Image loaded - see attached"], [user: image attachment] func (a *sessionAgent) workaroundProviderMediaLimitations(messages []fantasy.Message, largeModel Model) []fantasy.Message { providerSupportsMedia := largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderAnthropic) || largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderBedrock) if providerSupportsMedia { return messages } convertedMessages := make([]fantasy.Message, 0, len(messages)) for _, msg := range messages { if msg.Role != fantasy.MessageRoleTool { convertedMessages = append(convertedMessages, msg) continue } textParts := make([]fantasy.MessagePart, 0, len(msg.Content)) var mediaFiles []fantasy.FilePart for _, part := range msg.Content { toolResult, ok := fantasy.AsMessagePart[fantasy.ToolResultPart](part) if !ok { textParts = append(textParts, part) continue } if media, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](toolResult.Output); ok { decoded, err := base64.StdEncoding.DecodeString(media.Data) if err != nil { slog.Warn("failed to decode media data", "error", err) textParts = append(textParts, part) continue } mediaFiles = append(mediaFiles, fantasy.FilePart{ Data: decoded, MediaType: media.MediaType, Filename: fmt.Sprintf("tool-result-%s", toolResult.ToolCallID), }) textParts = append(textParts, fantasy.ToolResultPart{ ToolCallID: toolResult.ToolCallID, Output: fantasy.ToolResultOutputContentText{ Text: "[Image/media content loaded - see attached file]", }, ProviderOptions: toolResult.ProviderOptions, }) } else { textParts = append(textParts, part) } } convertedMessages = append(convertedMessages, fantasy.Message{ Role: fantasy.MessageRoleTool, Content: textParts, }) if len(mediaFiles) > 0 { convertedMessages = append(convertedMessages, fantasy.NewUserMessage( "Here is the media content from the tool result:", mediaFiles..., )) } } return convertedMessages } // buildSummaryPrompt constructs the prompt text for session summarization. func buildSummaryPrompt(todos []session.Todo) string { var sb strings.Builder sb.WriteString("Provide a detailed summary of our conversation above.") if len(todos) > 0 { sb.WriteString("\n\n## Current Todo List\n\n") for _, t := range todos { fmt.Fprintf(&sb, "- [%s] %s\n", t.Status, t.Content) } sb.WriteString("\nInclude these tasks and their statuses in your summary. ") sb.WriteString("Instruct the resuming assistant to use the `todos` tool to continue tracking progress on these tasks.") } return sb.String() }