// Package agent is the core orchestration layer for Crush AI agents. // // It provides session-based AI agent functionality for managing // conversations, tool execution, and message handling. It coordinates // interactions between language models, messages, sessions, and tools while // handling features like automatic summarization, queuing, and token // management. package agent import ( "cmp" "context" _ "embed" "encoding/base64" "errors" "fmt" "log/slog" "net/http" "os" "regexp" "strconv" "strings" "sync" "sync/atomic" "time" "charm.land/catwalk/pkg/catwalk" "charm.land/fantasy" "charm.land/fantasy/providers/anthropic" "charm.land/fantasy/providers/bedrock" "charm.land/fantasy/providers/google" "charm.land/fantasy/providers/openai" "charm.land/fantasy/providers/openrouter" "charm.land/fantasy/providers/vercel" "charm.land/lipgloss/v2" "github.com/charmbracelet/crush/internal/agent/hyper" "github.com/charmbracelet/crush/internal/agent/notify" "github.com/charmbracelet/crush/internal/agent/tools" "github.com/charmbracelet/crush/internal/agent/tools/mcp" "github.com/charmbracelet/crush/internal/config" "github.com/charmbracelet/crush/internal/csync" "github.com/charmbracelet/crush/internal/message" "github.com/charmbracelet/crush/internal/pubsub" "github.com/charmbracelet/crush/internal/session" "github.com/charmbracelet/crush/internal/stringext" "github.com/charmbracelet/crush/internal/version" "github.com/charmbracelet/x/exp/charmtone" ) const ( DefaultSessionName = "Untitled Session" // Constants for auto-summarization thresholds largeContextWindowThreshold = 200_000 largeContextWindowBuffer = 20_000 smallContextWindowRatio = 0.2 ) var userAgent = fmt.Sprintf("Charm-Crush/%s (https://charm.land/crush)", version.Version) //go:embed templates/title.md var titlePrompt []byte //go:embed templates/summary.md var summaryPrompt []byte // Used to remove tags from generated titles. var ( thinkTagRegex = regexp.MustCompile(`(?s).*?`) orphanThinkTagRegex = regexp.MustCompile(``) ) type SessionAgentCall struct { SessionID string // RunID, when non-empty, is the caller-supplied correlator that // gets echoed back on the notify.RunComplete event emitted for // this turn. It is preserved when the call is enqueued behind a // busy session so the queued turn's terminal event is still // recognisable to the original caller. Callers that need a // reliable completion contract (e.g. `crush run` against a // session that may be busy) MUST set it; SessionID alone is // ambiguous when concurrent turns share the same session. RunID string Prompt string ProviderOptions fantasy.ProviderOptions Attachments []message.Attachment MaxOutputTokens int64 Temperature *float64 TopP *float64 TopK *int64 FrequencyPenalty *float64 PresencePenalty *float64 NonInteractive bool // OnComplete, when non-nil, replaces the default RunComplete // publish path: the inner Run hands the terminal payload to this // callback instead of emitting it on the RunComplete broker. The // coordinator uses this hook to coalesce the unauthorized → // re-auth → retry chain into a single user-visible terminal // event, so non-interactive clients (e.g. `crush run`) don't // exit on a stale failed-attempt RunComplete before the // successful retry. It is intentionally stripped when queueing // a busy-session call (see Run): the originating // coordinator.Run has long returned by the time the queued // recursion drains, so falling back to the default broker // publish keeps the event visible to subscribers. OnComplete func(notify.RunComplete) // Accepted, when non-nil, is the accept reservation taken by // BeginAccepted before the call was dispatched onto a goroutine // (the client/server fire-and-forget path). Run consumes it under // dispatchMu[SessionID] once the accepted -> (cancel-on-entry | // queued | active) transition has been chosen. When nil // (in-process / local callers like AppWorkspace), behavior is // unchanged and no accept tracking applies. Accepted *AcceptedRun // acceptSeq carries the accept sequence of the handle that produced // this call after it has been enqueued and its Accepted handle // stripped. The queue-drain paths compare it against a session's // cancel mark so a follow-up queued before a cancel is dropped while // one queued after the cancel survives. 0 means untracked (an // in-process enqueue with no accept reservation), which the drain // paths treat as covered by any present mark, preserving the // pre-sequence behavior. acceptSeq uint64 } type SessionAgent interface { Run(context.Context, SessionAgentCall) (*fantasy.AgentResult, error) BeginAccepted(sessionID string) *AcceptedRun SetModels(large Model, small Model) SetTools(tools []fantasy.AgentTool) SetSystemPrompt(systemPrompt string) Cancel(sessionID string) CancelAll() IsSessionBusy(sessionID string) bool IsBusy() bool QueuedPrompts(sessionID string) int QueuedPromptsList(sessionID string) []string ClearQueue(sessionID string) Summarize(context.Context, string, fantasy.ProviderOptions) error Model() Model } type Model struct { Model fantasy.LanguageModel CatwalkCfg catwalk.Model ModelCfg config.SelectedModel FlatRate bool } type sessionAgent struct { largeModel *csync.Value[Model] smallModel *csync.Value[Model] systemPromptPrefix *csync.Value[string] systemPrompt *csync.Value[string] tools *csync.Slice[fantasy.AgentTool] isSubAgent bool sessions session.Service messages message.Service disableAutoSummarize bool isYolo bool notify pubsub.Publisher[notify.Notification] runComplete pubsub.Publisher[notify.RunComplete] messageQueue *csync.Map[string, []SessionAgentCall] activeRequests *csync.Map[string, context.CancelFunc] // dispatchMu holds a per-session mutex that serializes the // accepted -> (cancel-on-entry | queued | active) transition in // Run against a concurrent Cancel. The lock is held only during // the brief handoff (no DB or LLM I/O under the lock). dispatchMu *csync.Map[string, *sync.Mutex] // acceptedRuns counts dispatched-but-not-yet-active runs per // session. A counter > 0 means a dispatched prompt is in flight // and has not yet completed the dispatch handoff in Run. Only // BeginAccepted increments it; only AcceptedRun.Close decrements // it. acceptedRuns *csync.Map[string, int] // cancelMark records, per session, a high-water accept sequence: an // accepted handle is canceled by it iff the handle's sequence is at // or below the mark. Cancel raises the mark to the latest sequence // assigned at cancel time, so a single Cancel covers every prompt // accepted-but-not-yet-active then, while a prompt accepted later // (higher sequence) is never poisoned. Absent or 0 means no pending // cancel. It is only raised by Cancel when acceptedRuns > 0, so an // idle Escape never records a mark. cancelMark *csync.Map[string, uint64] // dispatchMuCreate guards lazy creation of per-session entries in // dispatchMu so two goroutines can't race to lock different mutex // instances for the same session. dispatchMuCreate sync.Mutex // acceptedMu serializes increments/decrements of acceptedRuns and // the assignment of accept sequence numbers from acceptSeqGen. It // is separate from dispatchMu so AcceptedRun.Close (which may run // while Run holds dispatchMu for the same session) does not // deadlock by re-entering the dispatch lock. acceptedMu sync.Mutex // acceptSeqGen is the monotonic source of accept sequence numbers. // Each BeginAccepted increments it under acceptedMu and stamps the // returned handle, so sequences strictly increase in accept order // across the agent. Cancel uses its current value as the per-session // high-water mark. acceptSeqGen uint64 } type SessionAgentOptions struct { LargeModel Model SmallModel Model SystemPromptPrefix string SystemPrompt string IsSubAgent bool DisableAutoSummarize bool IsYolo bool Sessions session.Service Messages message.Service Tools []fantasy.AgentTool Notify pubsub.Publisher[notify.Notification] RunComplete pubsub.Publisher[notify.RunComplete] } func NewSessionAgent( opts SessionAgentOptions, ) SessionAgent { return &sessionAgent{ largeModel: csync.NewValue(opts.LargeModel), smallModel: csync.NewValue(opts.SmallModel), systemPromptPrefix: csync.NewValue(opts.SystemPromptPrefix), systemPrompt: csync.NewValue(opts.SystemPrompt), isSubAgent: opts.IsSubAgent, sessions: opts.Sessions, messages: opts.Messages, disableAutoSummarize: opts.DisableAutoSummarize, tools: csync.NewSliceFrom(opts.Tools), isYolo: opts.IsYolo, notify: opts.Notify, runComplete: opts.RunComplete, messageQueue: csync.NewMap[string, []SessionAgentCall](), activeRequests: csync.NewMap[string, context.CancelFunc](), dispatchMu: csync.NewMap[string, *sync.Mutex](), acceptedRuns: csync.NewMap[string, int](), cancelMark: csync.NewMap[string, uint64](), } } // AcceptedRun owns exactly one accept reservation taken by // BeginAccepted. It is the only carrier of accept-state across the // backend.runAgent / Coordinator.Run / sessionAgent.Run layers: a // counter > 0 means a dispatched prompt is in flight and has not yet // completed the dispatch handoff in Run. Close is the only way to // release the reservation and is idempotent. type AcceptedRun struct { agent *sessionAgent sessionID string // seq is the monotonic accept sequence stamped by BeginAccepted. A // cancel covers this handle iff seq is at or below the session's // cancel mark, so a handle accepted after a cancel (higher seq) is // never poisoned by it. seq uint64 done atomic.Bool } // Close decrements the accept counter for this reservation. It is safe // to call multiple times; only the first call has effect. func (r *AcceptedRun) Close() { if r == nil { return } if !r.done.CompareAndSwap(false, true) { return } r.agent.endAccepted(r.sessionID) } // SessionID exposes the session this reservation is for so the run path // can use it without an extra parameter. func (r *AcceptedRun) SessionID() string { if r == nil { return "" } return r.sessionID } // BeginAccepted increments the accept counter for sessionID and returns // a handle whose Close is the only way to decrement it. It is the only // entry point that mutates acceptedRuns. func (a *sessionAgent) BeginAccepted(sessionID string) *AcceptedRun { a.acceptedMu.Lock() defer a.acceptedMu.Unlock() count, _ := a.acceptedRuns.Get(sessionID) a.acceptedRuns.Set(sessionID, count+1) a.acceptSeqGen++ return &AcceptedRun{agent: a, sessionID: sessionID, seq: a.acceptSeqGen} } // endAccepted decrements the accept counter for sessionID. It is only // called via AcceptedRun.Close. It uses a dedicated lock (not the // per-session dispatch mutex) so it can run while Run holds dispatchMu // for the same session without deadlocking. // // When the count reaches zero the session's cancel mark is dropped: no // accepted handle remains for it to cover, and any handle accepted later // gets a strictly higher sequence that the mark would not match anyway. // Handles canceled on entry never reach RunComplete, so this is the only // place that clears the mark for an all-canceled batch. Sibling handles // covered by the same mark are serialized on the per-session dispatch // mutex and read the mark before they Close, so this never clears it out // from under a covered handle still waiting to enter Run. func (a *sessionAgent) endAccepted(sessionID string) { a.acceptedMu.Lock() defer a.acceptedMu.Unlock() count, ok := a.acceptedRuns.Get(sessionID) if !ok || count <= 1 { a.acceptedRuns.Del(sessionID) a.cancelMark.Del(sessionID) return } a.acceptedRuns.Set(sessionID, count-1) } // sessionMu returns the per-session dispatch mutex, creating it on first // use. Creation is guarded so concurrent callers always observe the same // mutex instance for a given session. func (a *sessionAgent) sessionMu(sessionID string) *sync.Mutex { if mu, ok := a.dispatchMu.Get(sessionID); ok { return mu } a.dispatchMuCreate.Lock() defer a.dispatchMuCreate.Unlock() if mu, ok := a.dispatchMu.Get(sessionID); ok { return mu } mu := &sync.Mutex{} a.dispatchMu.Set(sessionID, mu) return mu } // enqueueCall appends call to the session's message queue. The // OnComplete hook is stripped: the caller that supplied it (typically // coordinator.Run) has its own retry/coalesce scope that ends when it // returns, so by the time the queue drains nobody is left to consume the // buffered terminal event. The recursive Run falls back to the default // broker publish, which is what existing subscribers expect for queued // turns. func (a *sessionAgent) enqueueCall(call SessionAgentCall) { existing, ok := a.messageQueue.Get(call.SessionID) if !ok { existing = []SessionAgentCall{} } queued := call if call.Accepted != nil { // Preserve the accept sequence after the handle is stripped so // the queue-drain paths can tell a follow-up queued before a // cancel (covered by the mark) from one queued after it. queued.acceptSeq = call.Accepted.seq } queued.OnComplete = nil queued.Accepted = nil existing = append(existing, queued) a.messageQueue.Set(call.SessionID, existing) } // drainQueueForStep partitions the session's queued calls for the current // streaming step under the per-session dispatch mutex so the filtering is // atomic against a concurrent Cancel: canceledBySeq requires the caller to // hold that mutex, and evaluating it here (rather than after unlocking) // prevents a cancel recorded between the drain and the check from being // observed inconsistently. // // Calls covered by a pending cancel are dropped; the dropped ones that // carry a RunID are returned in canceledWithRunID so the caller can // publish their terminal cancelled RunComplete (a caller waiting on that // RunID, e.g. `crush run`, would otherwise hang). Uncanceled calls without // a RunID are returned in fold to be folded into the active turn, // preserving the existing follow-up behavior. Uncanceled calls that carry // a RunID are left in the queue so each runs as its own turn via the // recursive run path and publishes its own RunComplete, giving every // RunID-bearing prompt an explicit lifecycle instead of being silently // absorbed into another turn. fold is processed by the caller without the // lock held. func (a *sessionAgent) drainQueueForStep(sessionID string) (fold, canceledWithRunID []SessionAgentCall) { dispatchLock := a.sessionMu(sessionID) dispatchLock.Lock() defer dispatchLock.Unlock() queuedCalls, _ := a.messageQueue.Get(sessionID) var keep []SessionAgentCall for _, queued := range queuedCalls { if a.canceledBySeq(sessionID, queued.acceptSeq) { if queued.RunID != "" { canceledWithRunID = append(canceledWithRunID, queued) } continue } if queued.RunID != "" { keep = append(keep, queued) continue } fold = append(fold, queued) } if len(keep) == 0 { a.messageQueue.Del(sessionID) } else { a.messageQueue.Set(sessionID, keep) } return fold, canceledWithRunID } // publishCanceledQueueDrops emits a terminal cancelled RunComplete for // every dropped queued call that carries a RunID. A queued prompt removed // from the queue without ever running — covered by a pending cancel, or // cleared by Cancel/ClearQueue — would otherwise leave a caller blocked on // that RunID: `crush run` ignores live message events and exits only on a // RunComplete whose RunID matches. Calls without a RunID had no such waiter // and are dropped silently as before. A detached, bounded context keeps the // must-deliver publish alive even when the run context that triggered the // drop is already canceled. func (a *sessionAgent) publishCanceledQueueDrops(drops []SessionAgentCall) { var hasRunID bool for _, d := range drops { if d.RunID != "" { hasRunID = true break } } if !hasRunID { return } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() for _, d := range drops { if d.RunID == "" { continue } a.publishRunComplete(ctx, d, notify.RunComplete{ SessionID: d.SessionID, RunID: d.RunID, Cancelled: true, }) } } // clearQueueAndNotify removes all queued prompts for the session and // publishes a terminal cancelled RunComplete for any that carried a RunID, // so callers waiting on those RunIDs (e.g. `crush run`) are not left // hanging when their queued prompt is discarded without running. func (a *sessionAgent) clearQueueAndNotify(sessionID string) { queued, ok := a.messageQueue.Get(sessionID) a.messageQueue.Del(sessionID) if !ok { return } a.publishCanceledQueueDrops(queued) } // clearPendingCancel removes any pending-cancel mark for sessionID. It // takes the per-session dispatch lock so it is ordered against Cancel // and the dispatch handoff. func (a *sessionAgent) clearPendingCancel(sessionID string) { mu := a.sessionMu(sessionID) mu.Lock() defer mu.Unlock() a.cancelMark.Del(sessionID) } // canceledBySeq reports whether an accepted handle or queued call with // the given accept sequence is covered by a pending cancel for the // session. Callers must hold the session's dispatch mutex. A tracked // sequence (seq > 0) is covered only when it is at or below the cancel // high-water mark, so a prompt accepted after the cancel (higher seq) is // never poisoned. An untracked sequence (seq == 0, an in-process enqueue // with no accept reservation) is covered whenever any mark is present, // preserving the pre-sequence behavior. The mark is not consumed: it // stays so every sibling handle it covers observes the same cancel, and // a later handle (higher seq) ignores it regardless. func (a *sessionAgent) canceledBySeq(sessionID string, seq uint64) bool { mark, ok := a.cancelMark.Get(sessionID) if !ok || mark == 0 { return false } return seq == 0 || seq <= mark } // persistCanceledTurn writes the user/assistant records for a turn that // was canceled before (or just as) streaming would have produced them. // It creates the user message only when it was not already created by an // earlier createUserMessage call (userMsgCreated), then writes an // assistant message with FinishReasonCanceled. Both writes use // context.WithoutCancel(ctx) so workspace shutdown (which cancels the run // context) can't drop them. func (a *sessionAgent) persistCanceledTurn(ctx context.Context, call SessionAgentCall, userMsgCreated bool) error { writeCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Second) defer cancel() if !userMsgCreated { if _, err := a.createUserMessage(writeCtx, call); err != nil { return err } } largeModel := a.largeModel.Get() assistant, err := a.messages.Create(writeCtx, call.SessionID, message.CreateMessageParams{ Role: message.Assistant, Parts: []message.ContentPart{}, Model: largeModel.ModelCfg.Model, Provider: largeModel.ModelCfg.Provider, }) if err != nil { return err } assistant.AddFinish(message.FinishReasonCanceled, "User canceled request", "") return a.messages.Update(writeCtx, assistant) } // publishRunComplete emits the authoritative terminal event for a turn. // It honors the per-call OnComplete hook when set (so the coordinator can // coalesce retries) and otherwise falls back to the RunComplete broker. // ctx is used only for the bounded-blocking must-deliver publish; the // terminal payload is supplied by the caller. This is the single emit path // shared by the streaming defer and the cancel-on-entry early return so a // caller waiting on RunComplete (e.g. `crush run` with a RunID) always // observes exactly one terminal event regardless of which Run branch ends // the turn. func (a *sessionAgent) publishRunComplete(ctx context.Context, call SessionAgentCall, complete notify.RunComplete) { if call.OnComplete != nil { call.OnComplete(complete) return } if a.runComplete == nil { return } a.runComplete.PublishMustDeliver(ctx, pubsub.UpdatedEvent, complete) } // ValidateCall performs the cheap structural validation that // sessionAgent.Run requires before a call can be dispatched: a call must // carry either a non-empty prompt or a text attachment, and it must name a // session. It is exported so callers that accept a run before dispatching it // (e.g. backend.SendMessage) can apply the same checks and keep the error // contract consistent. func ValidateCall(call SessionAgentCall) error { if call.Prompt == "" && !message.ContainsTextAttachment(call.Attachments) { return ErrEmptyPrompt } if call.SessionID == "" { return ErrSessionMissing } return nil } func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (result *fantasy.AgentResult, retErr error) { if err := ValidateCall(call); err != nil { return nil, err } // genCtx/cancel are the run context and its cancel func. For the // accepted (fire-and-forget) dispatch path they are created under // dispatchMu below so a concurrent Cancel can observe the // activeRequests entry before the assistant message exists. For // the in-process path they stay nil here and are created later, // preserving the original ordering. var ( genCtx context.Context cancel context.CancelFunc activeRegistered bool userMsgCreated bool ) if call.Accepted != nil { // Serialize the accepted -> (cancel-on-entry | queued | // active) transition against a concurrent Cancel. Cancel takes // the same per-session lock, so every cancel observes at least // one of: a cancel mark, an activeRequests entry, or a // messageQueue entry it then clears. mu := a.sessionMu(call.SessionID) mu.Lock() if a.canceledBySeq(call.SessionID, call.Accepted.seq) { // Cancel-on-entry: a cancel arrived while this run was // dispatched but not yet active, and this handle's accept // sequence is at or below the session's cancel mark. The // mark is left in place so sibling handles it also covers // observe the same cancel; release the accept reservation, // drop the lock, and persist a canceled turn without // entering Stream. // // This path returns before the streaming defer that // publishes RunComplete is installed, so emit the terminal // event explicitly. Without it, a caller waiting on // RunComplete for this RunID (e.g. `crush run`, which // ignores message events and blocks on RunComplete) would // hang on an immediately-canceled accepted run. call.Accepted.Close() mu.Unlock() complete := notify.RunComplete{ SessionID: call.SessionID, RunID: call.RunID, Cancelled: true, } if err := a.persistCanceledTurn(ctx, call, false); err != nil { complete.Error = err.Error() a.publishRunComplete(ctx, call, complete) return nil, err } a.publishRunComplete(ctx, call, complete) return nil, nil } if a.IsSessionBusy(call.SessionID) { // Busy: an earlier prompt is active. Queue this call and // release the accept reservation. A Cancel arriving after // this point sees the active entry and clears the queue. a.enqueueCall(call) call.Accepted.Close() mu.Unlock() return nil, nil } // Idle: become the active run. Register the cancel func before // dropping the lock so a Cancel that arrives between here and // assistant creation is not lost. runCtx := context.WithValue(ctx, tools.SessionIDContextKey, call.SessionID) genCtx, cancel = context.WithCancel(runCtx) a.activeRequests.Set(call.SessionID, cancel) activeRegistered = true call.Accepted.Close() mu.Unlock() defer cancel() defer a.activeRequests.Del(call.SessionID) } else if a.IsSessionBusy(call.SessionID) { // Queue the message if busy. Strip OnComplete: the caller that // supplied the hook (typically coordinator.Run) has its own // retry/coalesce scope that ends when it returns, so by the time // the queue drains nobody is left to consume the buffered // terminal event. The recursive Run will fall back to the // default broker publish, which is what existing subscribers // expect for queued turns. a.enqueueCall(call) return nil, nil } // Copy mutable fields under lock to avoid races with SetTools/SetModels. agentTools := a.tools.Copy() largeModel := a.largeModel.Get() systemPrompt := a.systemPrompt.Get() promptPrefix := a.systemPromptPrefix.Get() var instructions strings.Builder for _, server := range mcp.GetStates() { if server.State != mcp.StateConnected { continue } if s := server.Client.InitializeResult().Instructions; s != "" { instructions.WriteString(s) instructions.WriteString("\n\n") } } if s := instructions.String(); s != "" { systemPrompt += "\n\n\n" + s + "\n" } if len(agentTools) > 0 { // Add Anthropic caching to the last tool. agentTools[len(agentTools)-1].SetProviderOptions(a.getCacheControlOptions()) } agent := fantasy.NewAgent( largeModel.Model, fantasy.WithSystemPrompt(systemPrompt), fantasy.WithTools(agentTools...), fantasy.WithUserAgent(userAgent), ) sessionLock := sync.Mutex{} currentSession, err := a.sessions.Get(ctx, call.SessionID) if err != nil { return nil, fmt.Errorf("failed to get session: %w", err) } msgs, err := a.getSessionMessages(ctx, currentSession) if err != nil { return nil, fmt.Errorf("failed to get session messages: %w", err) } var wg sync.WaitGroup // Generate title if first message. if len(msgs) == 0 { titleCtx := ctx // Copy to avoid race with ctx reassignment below. wg.Go(func() { a.generateTitle(titleCtx, call.SessionID, call.Prompt) }) } defer wg.Wait() // Add the user message to the session. _, err = a.createUserMessage(ctx, call) if err != nil { return nil, err } userMsgCreated = true // Add the session to the context. ctx = context.WithValue(ctx, tools.SessionIDContextKey, call.SessionID) // For the accepted dispatch path the run context and cancel func // were already created and registered under dispatchMu above; reuse // them. For the in-process path create them here, preserving the // original ordering. if !activeRegistered { genCtx, cancel = context.WithCancel(ctx) a.activeRequests.Set(call.SessionID, cancel) defer cancel() defer a.activeRequests.Del(call.SessionID) } // skipRunComplete is set just before the queued-recursion path so // the outer Run doesn't publish a RunComplete that would race // with — and be superseded by — the recursive call's own // RunComplete (each queued user prompt is its own turn and // publishes exactly one terminal event). var skipRunComplete bool // currentAssistant is declared here so the deferred RunComplete // publish below can capture the pointer that PrepareStep will // later (re)assign for each streaming step. The final assistant // message of the turn is the value reachable through this // pointer when the defer runs. var currentAssistant *message.Message // Drain any debounced message updates before returning. message.Service // already flushes synchronously on terminal updates, but a defer here // guarantees the contract at every Run exit (success, error, panic // recovery upstream) without callers needing to know. // // After the flush completes — meaning all per-message // Publish(UpdatedEvent) calls have fired and been buffered into // every subscriber's channel — publish the authoritative // RunComplete event for this turn. The flush-then-publish order // gives well-behaved clients the best chance of seeing the final // message event before RunComplete; the embedded Text field // reconciles for clients that observe the events out of order // (the pubsub broker fan-in does not serialize publishes from // different upstream brokers). defer func() { // Use a context detached from the run context: workspace // shutdown cancels ctx before this goroutine returns, but the // buffered streaming deltas must still land before the DB is // closed. A short timeout bounds the flush. flushCtx, flushCancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Second) defer flushCancel() if flushErr := a.messages.FlushAll(flushCtx); flushErr != nil { slog.Error("Failed to flush pending message updates after run", "error", flushErr) } if skipRunComplete { return } complete := notify.RunComplete{SessionID: call.SessionID, RunID: call.RunID} if currentAssistant != nil { complete.MessageID = currentAssistant.ID complete.Text = currentAssistant.Content().String() } if retErr != nil { complete.Error = retErr.Error() complete.Cancelled = errors.Is(retErr, context.Canceled) } else if ctx.Err() != nil { complete.Cancelled = true } // Prefer the per-call hook when supplied so the coordinator // can coalesce retries (e.g. unauthorized → re-auth → retry) // into a single user-visible terminal event. The fallback // must-deliver publish applies bounded-blocking semantics to // the authoritative terminal event so a momentarily-full // subscriber channel can't silently drop it and hang // non-interactive clients waiting on RunComplete. a.publishRunComplete(ctx, call, complete) }() history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...) startTime := time.Now() a.eventPromptSent(call.SessionID) var stepMessages []fantasy.Message var shouldSummarize bool // Don't send MaxOutputTokens if 0 — some providers (e.g. LM Studio) reject it var maxOutputTokens *int64 if call.MaxOutputTokens > 0 { maxOutputTokens = &call.MaxOutputTokens } result, err = agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: message.PromptWithTextAttachments(call.Prompt, call.Attachments), Files: files, Messages: history, ProviderOptions: call.ProviderOptions, MaxOutputTokens: maxOutputTokens, TopP: call.TopP, Temperature: call.Temperature, PresencePenalty: call.PresencePenalty, TopK: call.TopK, FrequencyPenalty: call.FrequencyPenalty, PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = options.Messages for i := range prepared.Messages { prepared.Messages[i].ProviderOptions = nil } // Use latest tools (updated by SetTools when MCP tools change). prepared.Tools = a.tools.Copy() // Drain queued follow-up prompts for this step. Calls covered // by a cancel recorded while they sat in the queue are dropped: // a cancel that arrived after a prompt was queued must not let // it run as part of this step. Coverage is per-call by accept // sequence so a follow-up queued after the cancel (higher seq) // is not dropped. A dropped prompt carrying a RunID still gets // its terminal cancelled RunComplete so a caller waiting on it // does not hang. Uncanceled prompts without a RunID are folded // into this turn; uncanceled prompts with a RunID are left // queued so each runs as its own turn (with its own // RunComplete) via the recursive run path below. fold, canceledRunIDs := a.drainQueueForStep(call.SessionID) a.publishCanceledQueueDrops(canceledRunIDs) for _, queued := range fold { userMessage, createErr := a.createUserMessage(callContext, queued) if createErr != nil { return callContext, prepared, createErr } prepared.Messages = append(prepared.Messages, userMessage.ToAIMessage()...) } prepared.Messages = a.workaroundProviderMediaLimitations(prepared.Messages, largeModel) lastSystemRoleInx := 0 systemMessageUpdated := false for i, msg := range prepared.Messages { // Only add cache control to the last message. if msg.Role == fantasy.MessageRoleSystem { lastSystemRoleInx = i } else if !systemMessageUpdated { prepared.Messages[lastSystemRoleInx].ProviderOptions = a.getCacheControlOptions() systemMessageUpdated = true } // Than add cache control to the last 2 messages. if i > len(prepared.Messages)-3 { prepared.Messages[i].ProviderOptions = a.getCacheControlOptions() } } if promptPrefix != "" { prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(promptPrefix)}, prepared.Messages...) } sessionLock.Lock() stepMessages = cloneFantasyMessages(prepared.Messages) sessionLock.Unlock() var assistantMsg message.Message assistantMsg, err = a.messages.Create(callContext, call.SessionID, message.CreateMessageParams{ Role: message.Assistant, Parts: []message.ContentPart{}, Model: largeModel.ModelCfg.Model, Provider: largeModel.ModelCfg.Provider, }) if err != nil { return callContext, prepared, err } callContext = context.WithValue(callContext, tools.MessageIDContextKey, assistantMsg.ID) callContext = context.WithValue(callContext, tools.SupportsImagesContextKey, largeModel.CatwalkCfg.SupportsImages) callContext = context.WithValue(callContext, tools.ModelNameContextKey, largeModel.CatwalkCfg.Name) currentAssistant = &assistantMsg return callContext, prepared, err }, OnReasoningStart: func(id string, reasoning fantasy.ReasoningContent) error { currentAssistant.AppendReasoningContent(reasoning.Text) return a.messages.Update(genCtx, *currentAssistant) }, OnReasoningDelta: func(id string, text string) error { currentAssistant.AppendReasoningContent(text) return a.messages.Update(genCtx, *currentAssistant) }, OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error { // handle anthropic signature if anthropicData, ok := reasoning.ProviderMetadata[anthropic.Name]; ok { if reasoning, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok { currentAssistant.AppendReasoningSignature(reasoning.Signature) } } if googleData, ok := reasoning.ProviderMetadata[google.Name]; ok { if reasoning, ok := googleData.(*google.ReasoningMetadata); ok { currentAssistant.AppendThoughtSignature(reasoning.Signature, reasoning.ToolID) } } if openaiData, ok := reasoning.ProviderMetadata[openai.Name]; ok { if reasoning, ok := openaiData.(*openai.ResponsesReasoningMetadata); ok { currentAssistant.SetReasoningResponsesData(reasoning) } } currentAssistant.FinishThinking() return a.messages.Update(genCtx, *currentAssistant) }, OnTextDelta: func(id string, text string) error { // Strip leading newline from initial text content. This is is // particularly important in non-interactive mode where leading // newlines are very visible. if len(currentAssistant.Parts) == 0 { text = strings.TrimPrefix(text, "\n") } currentAssistant.AppendContent(text) return a.messages.Update(genCtx, *currentAssistant) }, OnToolInputStart: func(id string, toolName string) error { toolCall := message.ToolCall{ ID: id, Name: toolName, ProviderExecuted: false, Finished: false, } currentAssistant.AddToolCall(toolCall) // Use parent ctx instead of genCtx to ensure the update succeeds // even if the request is canceled mid-stream return a.messages.Update(ctx, *currentAssistant) }, OnRetry: func(err *fantasy.ProviderError, delay time.Duration) { slog.Warn("Provider request failed, retrying", providerRetryLogFields(err, delay)...) }, OnToolCall: func(tc fantasy.ToolCallContent) error { toolCall := message.ToolCall{ ID: tc.ToolCallID, Name: tc.ToolName, Input: tc.Input, ProviderExecuted: false, Finished: true, } currentAssistant.AddToolCall(toolCall) // Use parent ctx instead of genCtx to ensure the update succeeds // even if the request is canceled mid-stream return a.messages.Update(ctx, *currentAssistant) }, OnToolResult: func(result fantasy.ToolResultContent) error { toolResult := a.convertToToolResult(result) // Use parent ctx instead of genCtx to ensure the message is created // even if the request is canceled mid-stream _, createMsgErr := a.messages.Create(ctx, currentAssistant.SessionID, message.CreateMessageParams{ Role: message.Tool, Parts: []message.ContentPart{ toolResult, }, }) return createMsgErr }, OnStepFinish: func(stepResult fantasy.StepResult) error { finishReason := message.FinishReasonUnknown switch stepResult.FinishReason { case fantasy.FinishReasonLength: finishReason = message.FinishReasonMaxTokens case fantasy.FinishReasonStop: finishReason = message.FinishReasonEndTurn case fantasy.FinishReasonToolCalls: finishReason = message.FinishReasonToolUse } // If a tool result halted the turn (e.g. a hook halt or a // permission denial), the step ends on FinishReasonToolCalls but // the model will not be called again. Treat it as the end of the // turn so the UI can render the assistant footer. if finishReason == message.FinishReasonToolUse { for _, tr := range stepResult.Content.ToolResults() { if tr.StopTurn { finishReason = message.FinishReasonEndTurn break } } } currentAssistant.AddFinish(finishReason, "", "") sessionLock.Lock() defer sessionLock.Unlock() updatedSession, getSessionErr := a.sessions.Get(ctx, call.SessionID) if getSessionErr != nil { return getSessionErr } usage, estimated := fallbackStepUsage(stepMessages, stepResult) a.updateSessionUsage(largeModel, &updatedSession, usage, a.openrouterCost(stepResult.ProviderMetadata), estimated) _, sessionErr := a.sessions.Save(ctx, updatedSession) if sessionErr != nil { return sessionErr } currentSession = updatedSession return a.messages.Update(genCtx, *currentAssistant) }, StopWhen: []fantasy.StopCondition{ func(_ []fantasy.StepResult) bool { cw := int64(largeModel.CatwalkCfg.ContextWindow) // If context window is unknown (0), skip auto-summarize // to avoid immediately truncating custom/local models. if cw == 0 { return false } tokens := currentSession.CompletionTokens + currentSession.PromptTokens remaining := cw - tokens var threshold int64 if cw > largeContextWindowThreshold { threshold = largeContextWindowBuffer } else { threshold = int64(float64(cw) * smallContextWindowRatio) } if (remaining <= threshold) && !a.disableAutoSummarize { shouldSummarize = true return true } return false }, func(steps []fantasy.StepResult) bool { return hasRepeatedToolCalls(steps, loopDetectionWindowSize, loopDetectionMaxRepeats) }, }, }) a.eventPromptResponded(call.SessionID, time.Since(startTime).Truncate(time.Second)) if err != nil { isHyper := largeModel.ModelCfg.Provider == hyper.Name isCancelErr := errors.Is(err, context.Canceled) if currentAssistant == nil { // Cancel-before-assistant-creation window: the run was // canceled after activeRequests.Set but before PrepareStep // created the assistant message. Without this, the turn // would return with no FinishReasonCanceled marker and no // user-visible record. The user message was already created // above, so persistCanceledTurn only writes the assistant // record. if isCancelErr { if persistErr := a.persistCanceledTurn(ctx, call, userMsgCreated); persistErr != nil { return nil, persistErr } } return result, err } // Persist final state with a context detached from the run // context. The run context (ctx) is derived from the // workspace context, which workspace shutdown cancels before // agent goroutines finish; using ctx here would drop the // final assistant state. WithoutCancel keeps the values // (e.g. session ID) while ignoring cancellation, and a short // timeout bounds the cleanup writes. cleanupCtx, cleanupCancel := context.WithTimeout(context.WithoutCancel(ctx), 5*time.Second) defer cleanupCancel() // Ensure we finish thinking on error to close the reasoning state. currentAssistant.FinishThinking() toolCalls := currentAssistant.ToolCalls() // INFO: we use the cleanup context here because the genCtx has been cancelled. msgs, createErr := a.messages.List(cleanupCtx, currentAssistant.SessionID) if createErr != nil { return nil, createErr } for _, tc := range toolCalls { if !tc.Finished { tc.Finished = true tc.Input = "{}" currentAssistant.AddToolCall(tc) updateErr := a.messages.Update(cleanupCtx, *currentAssistant) if updateErr != nil { return nil, updateErr } } found := false for _, msg := range msgs { if msg.Role == message.Tool { for _, tr := range msg.ToolResults() { if tr.ToolCallID == tc.ID { found = true break } } } if found { break } } if found { continue } content := "There was an error while executing the tool" if isCancelErr { content = "Error: user cancelled assistant tool calling" } toolResult := message.ToolResult{ ToolCallID: tc.ID, Name: tc.Name, Content: content, IsError: true, } _, createErr = a.messages.Create(cleanupCtx, currentAssistant.SessionID, message.CreateMessageParams{ Role: message.Tool, Parts: []message.ContentPart{ toolResult, }, }) if createErr != nil { return nil, createErr } } var fantasyErr *fantasy.Error var providerErr *fantasy.ProviderError const defaultTitle = "Provider Error" linkStyle := lipgloss.NewStyle().Foreground(charmtone.Guac).Underline(true) if isCancelErr { currentAssistant.AddFinish(message.FinishReasonCanceled, "User canceled request", "") } else if isHyper && errors.As(err, &providerErr) && providerErr.StatusCode == http.StatusUnauthorized { currentAssistant.AddFinish(message.FinishReasonError, "Unauthorized", `Please re-authenticate with Hyper. You can also run "crush auth" to re-authenticate.`) } else if isHyper && errors.As(err, &providerErr) && providerErr.StatusCode == http.StatusPaymentRequired { url := hyper.BaseURL() link := linkStyle.Hyperlink(url, "id=hyper").Render(url) currentAssistant.AddFinish(message.FinishReasonError, "No credits", "You're out of credits. Add more at "+link) } else if errors.As(err, &providerErr) { if providerErr.Message == "The requested model is not supported." { url := "https://github.com/settings/copilot/features" link := linkStyle.Hyperlink(url, "id=copilot").Render(url) currentAssistant.AddFinish( message.FinishReasonError, "Copilot model not enabled", fmt.Sprintf("%q is not enabled in Copilot. Go to the following page to enable it. Then, wait 5 minutes before trying again. %s", largeModel.CatwalkCfg.Name, link), ) } else { currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(providerErr.Title), defaultTitle), providerErr.Message) } } else if errors.As(err, &fantasyErr) { currentAssistant.AddFinish(message.FinishReasonError, cmp.Or(stringext.Capitalize(fantasyErr.Title), defaultTitle), fantasyErr.Message) } else { currentAssistant.AddFinish(message.FinishReasonError, defaultTitle, err.Error()) } // Note: we use the cleanup context here because the genCtx has been // cancelled. updateErr := a.messages.Update(cleanupCtx, *currentAssistant) if updateErr != nil { return nil, updateErr } return nil, err } if shouldSummarize { a.activeRequests.Del(call.SessionID) if summarizeErr := a.Summarize(genCtx, call.SessionID, call.ProviderOptions); summarizeErr != nil { return nil, summarizeErr } // If the agent wasn't done... if len(currentAssistant.ToolCalls()) > 0 { existing, ok := a.messageQueue.Get(call.SessionID) if !ok { existing = []SessionAgentCall{} } call.Prompt = fmt.Sprintf("The previous session was interrupted because it got too long, the initial user request was: `%s`", call.Prompt) existing = append(existing, call) a.messageQueue.Set(call.SessionID, existing) } } // Release active request before publishing the notification. // TUI handlers poll IsSessionBusy() and only re-evaluate when a // tea.Msg arrives, so the cleanup must precede the notify or // subscribers see stale busy state at the moment of receipt. a.activeRequests.Del(call.SessionID) cancel() // Send notification that agent has finished its turn (skip for // nested/non-interactive sessions). if !call.NonInteractive && a.notify != nil { a.notify.Publish(pubsub.CreatedEvent, notify.Notification{ SessionID: call.SessionID, SessionTitle: currentSession.Title, Type: notify.TypeAgentFinished, }) } // Hand off to the next queued prompt (if any) under dispatchMu so // the transition from this finished run to the queued run is atomic // against a concurrent Cancel. activeRequests for this session was // just deleted above, so without the lock there is a window in // which the session looks idle and a cancel becomes a no-op that // fails to stop the queued prompt. Holding the lock lets us observe // a pending cancel recorded against the session and drop the queue // instead of running it, and (for the recursion) hand a fresh // accept reservation to the dequeued call so acceptedRuns stays > 0 // across the recursive Run's own dispatch handoff — keeping the // session observable to Cancel for the entire transition and // closing the dequeue -> re-register window. mu := a.sessionMu(call.SessionID) mu.Lock() queuedMessages, _ := a.messageQueue.Get(call.SessionID) if mark, ok := a.cancelMark.Get(call.SessionID); ok && mark > 0 && len(queuedMessages) > 0 { // A cancel was recorded for this session (e.g. it arrived while // this run was active and follow-ups had been queued). Drop the // queued prompts it covers (accept sequence at or below the // mark, or untracked); keep any queued after the cancel (higher // sequence) so they still run. var kept []SessionAgentCall var canceledRunIDDrops []SessionAgentCall for _, q := range queuedMessages { if q.acceptSeq == 0 || q.acceptSeq <= mark { if q.RunID != "" { canceledRunIDDrops = append(canceledRunIDDrops, q) } continue } kept = append(kept, q) } queuedMessages = kept a.messageQueue.Set(call.SessionID, kept) // A dropped prompt carrying a RunID must still publish its // terminal cancelled RunComplete so a caller waiting on that // RunID does not hang. a.publishCanceledQueueDrops(canceledRunIDDrops) } if len(queuedMessages) == 0 { // No queued work. Clear the cancel mark only when no accepted // run remains in flight that it might still cover; otherwise a // sibling prompt (sequence at or below the mark) waiting to // enter Run would lose its cancellation. When accepted runs are // gone, this also clears a stale mark so it can't catch a // future run. a.messageQueue.Del(call.SessionID) a.acceptedMu.Lock() inFlight, _ := a.acceptedRuns.Get(call.SessionID) a.acceptedMu.Unlock() if inFlight == 0 { a.cancelMark.Del(call.SessionID) } mu.Unlock() return result, err } // There are queued messages, restart the loop. Suppress the outer // defer's emit: it would otherwise observe the recursive Run's retErr // (named-return clobbering through the return below) against this // turn's MessageID/Text and publish a mixed, racing event. skipRunComplete = true // Decide whether this turn still owes its own terminal RunComplete. // Each submitted prompt with a RunID has its own lifecycle, so a turn // that is finished and handing off to a *different* queued prompt must // publish its own RunComplete here — leaving it to the recursive turn // (which carries a different RunID) would hang a caller waiting on // this turn's RunID. The exception is the summarize-continuation path, // which re-queues this same call (same RunID) to resume after a // summary; in that case the eventual terminal turn for this RunID // publishes, so publishing now would double-emit. outerOwesRunComplete := call.RunID != "" if outerOwesRunComplete { for _, q := range queuedMessages { if q.RunID == call.RunID { outerOwesRunComplete = false break } } } firstQueuedMessage := queuedMessages[0] a.messageQueue.Set(call.SessionID, queuedMessages[1:]) // Reserve a fresh accept for the dequeued prompt before dropping the // lock so acceptedRuns > 0 across the handoff into the recursive // Run. This closes the window between this dequeue and the recursive // Run registering its activeRequests entry: a cancel arriving in // that window now records a pending cancel (acceptedRuns > 0) that // the recursive Run's accepted path observes as cancel-on-entry. firstQueuedMessage.Accepted = a.BeginAccepted(call.SessionID) mu.Unlock() if outerOwesRunComplete { complete := notify.RunComplete{SessionID: call.SessionID, RunID: call.RunID} if currentAssistant != nil { complete.MessageID = currentAssistant.ID complete.Text = currentAssistant.Content().String() } if ctx.Err() != nil { complete.Cancelled = true } a.publishRunComplete(ctx, call, complete) } return a.Run(ctx, firstQueuedMessage) } func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fantasy.ProviderOptions) error { if a.IsSessionBusy(sessionID) { return ErrSessionBusy } // Copy mutable fields under lock to avoid races with SetModels. largeModel := a.largeModel.Get() systemPromptPrefix := a.systemPromptPrefix.Get() currentSession, err := a.sessions.Get(ctx, sessionID) if err != nil { return fmt.Errorf("failed to get session: %w", err) } msgs, err := a.getSessionMessages(ctx, currentSession) if err != nil { return err } if len(msgs) == 0 { // Nothing to summarize. return nil } aiMsgs, _ := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages) genCtx, cancel := context.WithCancel(ctx) a.activeRequests.Set(sessionID, cancel) defer a.activeRequests.Del(sessionID) defer cancel() defer func() { if flushErr := a.messages.FlushAll(ctx); flushErr != nil { slog.Error("Failed to flush pending message updates after summarize", "error", flushErr) } }() agent := fantasy.NewAgent( largeModel.Model, fantasy.WithSystemPrompt(string(summaryPrompt)), fantasy.WithUserAgent(userAgent), ) summaryMessage, err := a.messages.Create(ctx, sessionID, message.CreateMessageParams{ Role: message.Assistant, Model: largeModel.ModelCfg.Model, Provider: largeModel.ModelCfg.Provider, IsSummaryMessage: true, }) if err != nil { return err } summaryPromptText := buildSummaryPrompt(currentSession.Todos) resp, err := agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: summaryPromptText, Messages: aiMsgs, ProviderOptions: opts, PrepareStep: func(callContext context.Context, options fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = options.Messages if systemPromptPrefix != "" { prepared.Messages = append([]fantasy.Message{fantasy.NewSystemMessage(systemPromptPrefix)}, prepared.Messages...) } return callContext, prepared, nil }, OnReasoningDelta: func(id string, text string) error { summaryMessage.AppendReasoningContent(text) return a.messages.Update(genCtx, summaryMessage) }, OnReasoningEnd: func(id string, reasoning fantasy.ReasoningContent) error { // Handle anthropic signature. if anthropicData, ok := reasoning.ProviderMetadata["anthropic"]; ok { if signature, ok := anthropicData.(*anthropic.ReasoningOptionMetadata); ok && signature.Signature != "" { summaryMessage.AppendReasoningSignature(signature.Signature) } } summaryMessage.FinishThinking() return a.messages.Update(genCtx, summaryMessage) }, OnTextDelta: func(id, text string) error { summaryMessage.AppendContent(text) return a.messages.Update(genCtx, summaryMessage) }, }) if err != nil { isCancelErr := errors.Is(err, context.Canceled) if isCancelErr { // User cancelled summarize we need to remove the summary message. deleteErr := a.messages.Delete(ctx, summaryMessage.ID) return deleteErr } // Mark the summary message as finished with an error so the UI // stops spinning. summaryMessage.AddFinish(message.FinishReasonError, "Summarization Error", err.Error()) if updateErr := a.messages.Update(ctx, summaryMessage); updateErr != nil { return updateErr } return err } summaryMessage.AddFinish(message.FinishReasonEndTurn, "", "") err = a.messages.Update(genCtx, summaryMessage) if err != nil { return err } var openrouterCost *float64 for _, step := range resp.Steps { stepCost := a.openrouterCost(step.ProviderMetadata) if stepCost != nil { newCost := *stepCost if openrouterCost != nil { newCost += *openrouterCost } openrouterCost = &newCost } } a.updateSessionUsage(largeModel, ¤tSession, resp.TotalUsage, openrouterCost, false) // Just in case, get just the last usage info. usage := resp.Response.Usage currentSession.SummaryMessageID = summaryMessage.ID currentSession.CompletionTokens = summaryCompletionTokens(usage, summaryMessage) currentSession.PromptTokens = 0 currentSession.EstimatedUsage = usageIsZero(usage) _, err = a.sessions.Save(genCtx, currentSession) if err != nil { return err } // Release the active request before processing queued messages so that // Run() does not see the session as busy. a.activeRequests.Del(sessionID) cancel() // Process any messages that were queued while summarizing. queuedMessages, ok := a.messageQueue.Get(sessionID) if !ok || len(queuedMessages) == 0 { return nil } firstQueuedMessage := queuedMessages[0] a.messageQueue.Set(sessionID, queuedMessages[1:]) _, qErr := a.Run(ctx, firstQueuedMessage) return qErr } func (a *sessionAgent) getCacheControlOptions() fantasy.ProviderOptions { if t, _ := strconv.ParseBool(os.Getenv("CRUSH_DISABLE_ANTHROPIC_CACHE")); t { return fantasy.ProviderOptions{} } return fantasy.ProviderOptions{ anthropic.Name: &anthropic.ProviderCacheControlOptions{ CacheControl: anthropic.CacheControl{Type: "ephemeral"}, }, bedrock.Name: &anthropic.ProviderCacheControlOptions{ CacheControl: anthropic.CacheControl{Type: "ephemeral"}, }, vercel.Name: &anthropic.ProviderCacheControlOptions{ CacheControl: anthropic.CacheControl{Type: "ephemeral"}, }, } } func (a *sessionAgent) createUserMessage(ctx context.Context, call SessionAgentCall) (message.Message, error) { parts := []message.ContentPart{message.TextContent{Text: call.Prompt}} var attachmentParts []message.ContentPart for _, attachment := range call.Attachments { attachmentParts = append(attachmentParts, message.BinaryContent{Path: attachment.FilePath, MIMEType: attachment.MimeType, Data: attachment.Content}) } parts = append(parts, attachmentParts...) msg, err := a.messages.Create(ctx, call.SessionID, message.CreateMessageParams{ Role: message.User, Parts: parts, }) if err != nil { return message.Message{}, fmt.Errorf("failed to create user message: %w", err) } return msg, nil } func (a *sessionAgent) preparePrompt(msgs []message.Message, supportsImages bool, attachments ...message.Attachment) ([]fantasy.Message, []fantasy.FilePart) { var history []fantasy.Message if !a.isSubAgent { history = append(history, fantasy.NewUserMessage( fmt.Sprintf( "%s", `This is a reminder that your todo list is currently empty. DO NOT mention this to the user explicitly because they are already aware. If you are working on tasks that would benefit from a todo list please use the "todos" tool to create one. If not, please feel free to ignore. Again do not mention this message to the user.`, ), )) } // Collect all tool call IDs present in assistant messages and all tool // result IDs present in tool messages. This lets us detect both orphaned // tool results (result without a call) and orphaned tool calls (call // without a result). knownToolCallIDs := make(map[string]struct{}) knownToolResultIDs := make(map[string]struct{}) for _, m := range msgs { switch m.Role { case message.Assistant: for _, tc := range m.ToolCalls() { knownToolCallIDs[tc.ID] = struct{}{} } case message.Tool: for _, tr := range m.ToolResults() { knownToolResultIDs[tr.ToolCallID] = struct{}{} } } } for _, m := range msgs { if len(m.Parts) == 0 { continue } // Assistant message without content or tool calls (cancelled before it returned anything). if m.Role == message.Assistant && len(m.ToolCalls()) == 0 && m.Content().Text == "" && m.ReasoningContent().String() == "" { continue } if m.Role == message.Tool { if msg, ok := filterOrphanedToolResults(m, knownToolCallIDs); ok { history = append(history, msg) } continue } aiMsgs := m.ToAIMessage() if !supportsImages { for i := range aiMsgs { if aiMsgs[i].Role == fantasy.MessageRoleUser { aiMsgs[i].Content = filterFileParts(aiMsgs[i].Content) } } } history = append(history, aiMsgs...) if m.Role == message.Assistant { if msg, ok := syntheticToolResultsForOrphanedCalls(m, knownToolResultIDs); ok { history = append(history, msg) } } } var files []fantasy.FilePart for _, attachment := range attachments { if attachment.IsText() { continue } files = append(files, fantasy.FilePart{ Filename: attachment.FileName, Data: attachment.Content, MediaType: attachment.MimeType, }) } return history, files } // filterFileParts removes fantasy.FilePart entries from a slice of message // parts. Used to strip image attachments from historical user messages when // the current model does not support them. func filterFileParts(parts []fantasy.MessagePart) []fantasy.MessagePart { filtered := make([]fantasy.MessagePart, 0, len(parts)) for _, part := range parts { if _, ok := fantasy.AsMessagePart[fantasy.FilePart](part); ok { continue } filtered = append(filtered, part) } return filtered } // filterOrphanedToolResults converts a tool message to a fantasy.Message, // dropping any tool result parts whose tool_call_id has no matching tool call // in the known set. An orphaned result causes API validation to fail on every // subsequent turn, permanently locking the session. Returns the filtered // message and true if at least one valid part remains. func filterOrphanedToolResults(m message.Message, knownToolCallIDs map[string]struct{}) (fantasy.Message, bool) { aiMsgs := m.ToAIMessage() if len(aiMsgs) == 0 { return fantasy.Message{}, false } var validParts []fantasy.MessagePart for _, part := range aiMsgs[0].Content { tr, ok := fantasy.AsMessagePart[fantasy.ToolResultPart](part) if !ok { validParts = append(validParts, part) continue } if _, known := knownToolCallIDs[tr.ToolCallID]; known { validParts = append(validParts, part) } else { slog.Warn( "Dropping orphaned tool result with no matching tool call", "tool_call_id", tr.ToolCallID, ) } } if len(validParts) == 0 { return fantasy.Message{}, false } msg := aiMsgs[0] msg.Content = validParts return msg, true } // syntheticToolResultsForOrphanedCalls returns a tool message containing // synthetic tool results for any tool calls in the assistant message that // have no matching result in knownToolResultIDs. LLM APIs require every // tool_use to be immediately followed by a tool_result; an interrupted // session can leave orphaned tool_use blocks that permanently lock the // conversation. Returns the message and true if any synthetic results were // produced. func syntheticToolResultsForOrphanedCalls(m message.Message, knownToolResultIDs map[string]struct{}) (fantasy.Message, bool) { var syntheticParts []fantasy.MessagePart for _, tc := range m.ToolCalls() { if _, hasResult := knownToolResultIDs[tc.ID]; hasResult { continue } slog.Warn( "Injecting synthetic tool result for orphaned tool call", "tool_call_id", tc.ID, "tool_name", tc.Name, ) syntheticParts = append(syntheticParts, fantasy.ToolResultPart{ ToolCallID: tc.ID, Output: fantasy.ToolResultOutputContentError{ Error: errors.New("tool call was interrupted and did not produce a result, you may retry this call if the result is still needed"), }, }) } if len(syntheticParts) == 0 { return fantasy.Message{}, false } return fantasy.Message{ Role: fantasy.MessageRoleTool, Content: syntheticParts, }, true } func (a *sessionAgent) getSessionMessages(ctx context.Context, session session.Session) ([]message.Message, error) { msgs, err := a.messages.List(ctx, session.ID) if err != nil { return nil, fmt.Errorf("failed to list messages: %w", err) } if session.SummaryMessageID != "" { summaryMsgIndex := -1 for i, msg := range msgs { if msg.ID == session.SummaryMessageID { summaryMsgIndex = i break } } if summaryMsgIndex != -1 { msgs = msgs[summaryMsgIndex:] msgs[0].Role = message.User } } return msgs, nil } // generateTitle generates a session titled based on the initial prompt. func (a *sessionAgent) generateTitle(ctx context.Context, sessionID string, userPrompt string) { if userPrompt == "" { return } smallModel := a.smallModel.Get() largeModel := a.largeModel.Get() systemPromptPrefix := a.systemPromptPrefix.Get() var maxOutputTokens int64 = 40 if smallModel.CatwalkCfg.CanReason { maxOutputTokens = smallModel.CatwalkCfg.DefaultMaxTokens } newAgent := func(m fantasy.LanguageModel, p []byte, tok int64) fantasy.Agent { return fantasy.NewAgent( m, fantasy.WithSystemPrompt(string(p)+"\n /no_think"), fantasy.WithMaxOutputTokens(tok), fantasy.WithUserAgent(userAgent), ) } streamCall := fantasy.AgentStreamCall{ Prompt: fmt.Sprintf("Generate a concise title for the following content:\n\n%s\n \n\n", userPrompt), PrepareStep: func(callCtx context.Context, opts fantasy.PrepareStepFunctionOptions) (_ context.Context, prepared fantasy.PrepareStepResult, err error) { prepared.Messages = opts.Messages if systemPromptPrefix != "" { prepared.Messages = append([]fantasy.Message{ fantasy.NewSystemMessage(systemPromptPrefix), }, prepared.Messages...) } return callCtx, prepared, nil }, } // Use the small model to generate the title. model := smallModel agent := newAgent(model.Model, titlePrompt, maxOutputTokens) resp, err := agent.Stream(ctx, streamCall) if err == nil { // We successfully generated a title with the small model. slog.Debug("Generated title with small model") } else { // It didn't work. Let's try with the big model. slog.Error("Error generating title with small model; trying big model", "err", err) model = largeModel agent = newAgent(model.Model, titlePrompt, maxOutputTokens) resp, err = agent.Stream(ctx, streamCall) if err == nil { slog.Debug("Generated title with large model") } else { // Welp, the large model didn't work either. Use the default // session name and return. slog.Error("Error generating title with large model", "err", err) saveErr := a.sessions.Rename(ctx, sessionID, DefaultSessionName) if saveErr != nil { slog.Error("Failed to save session title", "error", saveErr) } return } } if resp == nil { // Actually, we didn't get a response so we can't. Use the default // session name and return. slog.Error("Response is nil; can't generate title") saveErr := a.sessions.Rename(ctx, sessionID, DefaultSessionName) if saveErr != nil { slog.Error("Failed to save session title", "error", saveErr) } return } // Clean up title. var title string title = strings.ReplaceAll(resp.Response.Content.Text(), "\n", " ") // Remove thinking tags if present. title = thinkTagRegex.ReplaceAllString(title, "") title = orphanThinkTagRegex.ReplaceAllString(title, "") title = strings.TrimSpace(title) title = cmp.Or(title, DefaultSessionName) // Calculate usage and cost. var openrouterCost *float64 for _, step := range resp.Steps { stepCost := a.openrouterCost(step.ProviderMetadata) if stepCost != nil { newCost := *stepCost if openrouterCost != nil { newCost += *openrouterCost } openrouterCost = &newCost } } modelConfig := model.CatwalkCfg cost := modelConfig.CostPer1MInCached/1e6*float64(resp.TotalUsage.CacheCreationTokens) + modelConfig.CostPer1MOutCached/1e6*float64(resp.TotalUsage.CacheReadTokens) + modelConfig.CostPer1MIn/1e6*float64(resp.TotalUsage.InputTokens) + modelConfig.CostPer1MOut/1e6*float64(resp.TotalUsage.OutputTokens) // Use override cost if available (e.g., from OpenRouter). if openrouterCost != nil { cost = *openrouterCost } // Skip cost accumulation if model.FlatRate { cost = 0 } promptTokens := resp.TotalUsage.InputTokens + resp.TotalUsage.CacheCreationTokens completionTokens := resp.TotalUsage.OutputTokens // Atomically update only title and usage fields to avoid overriding other // concurrent session updates. saveErr := a.sessions.UpdateTitleAndUsage(ctx, sessionID, title, promptTokens, completionTokens, cost) if saveErr != nil { slog.Error("Failed to save session title and usage", "error", saveErr) return } } func (a *sessionAgent) openrouterCost(metadata fantasy.ProviderMetadata) *float64 { openrouterMetadata, ok := metadata[openrouter.Name] if !ok { return nil } opts, ok := openrouterMetadata.(*openrouter.ProviderMetadata) if !ok { return nil } return &opts.Usage.Cost } func (a *sessionAgent) updateSessionUsage(model Model, session *session.Session, usage fantasy.Usage, overrideCost *float64, estimated bool) { if !usageIsZero(usage) { session.EstimatedUsage = estimated } modelConfig := model.CatwalkCfg cost := modelConfig.CostPer1MInCached/1e6*float64(usage.CacheCreationTokens) + modelConfig.CostPer1MOutCached/1e6*float64(usage.CacheReadTokens) + modelConfig.CostPer1MIn/1e6*float64(usage.InputTokens) + modelConfig.CostPer1MOut/1e6*float64(usage.OutputTokens) if !estimated { a.eventTokensUsed(session.ID, model, usage, cost) } if estimated { cost = 0 } else { // Use override cost if available (e.g., from OpenRouter). if overrideCost != nil { cost = *overrideCost } // Skip cost accumulation if model.FlatRate { cost = 0 } } session.Cost += cost updateSessionTokenCounters(session, usage) } func updateSessionTokenCounters(session *session.Session, usage fantasy.Usage) { if usage.OutputTokens != 0 { session.CompletionTokens = usage.OutputTokens } if promptTokens := usage.InputTokens + usage.CacheReadTokens; promptTokens != 0 { session.PromptTokens = promptTokens } } func summaryCompletionTokens(usage fantasy.Usage, summaryMessage message.Message) int64 { if usage.OutputTokens != 0 { return usage.OutputTokens } return approxTokenCount(summaryMessage.Content().Text) + approxTokenCount(summaryMessage.ReasoningContent().String()) } func (a *sessionAgent) Cancel(sessionID string) { // Serialize against the dispatch handoff in Run so the accepted -> // (cancel-on-entry | queued | active) transition is atomic against // this cancel. Every cancel observes at least one of: an active // request, an accepted run (recorded as a pending cancel), or a // queue entry it then clears. If none of those hold, an idle Escape // is a true no-op and must not poison the next prompt. mu := a.sessionMu(sessionID) mu.Lock() defer mu.Unlock() // Cancel regular requests. Don't use Take() here - we need the entry to // remain in activeRequests so IsBusy() returns true until the goroutine // fully completes (including error handling that may access the DB). // The defer in processRequest will clean up the entry. if cancel, ok := a.activeRequests.Get(sessionID); ok && cancel != nil { slog.Debug("Request cancellation initiated", "session_id", sessionID) cancel() } // Also check for summarize requests. if cancel, ok := a.activeRequests.Get(sessionID + "-summarize"); ok && cancel != nil { slog.Debug("Summarize cancellation initiated", "session_id", sessionID) cancel() } // Record a pending cancel only when a dispatched-but-not-yet-active // run exists. This catches runs still in the goroutine scheduler or // about to enter Run's busy-queue branch, while leaving an idle // session untouched. Active and accepted are not mutually exclusive: // when a run is active and a follow-up has been accepted, both the // cancel above and this pending record fire. // // Raise the session's cancel mark to the latest accept sequence // assigned so far. Every prompt currently accepted-but-not-yet- // active has a sequence at or below that value, so one cancel covers // all of them; a prompt accepted after this cancel gets a strictly // higher sequence and is never poisoned. Using max keeps repeated // cancels idempotent while the same prompts are in flight and lets a // later cancel extend coverage to prompts accepted since. a.acceptedMu.Lock() count, ok := a.acceptedRuns.Get(sessionID) mark := a.acceptSeqGen a.acceptedMu.Unlock() if ok && count > 0 { slog.Debug("Recording cancel mark for accepted runs", "session_id", sessionID, "count", count, "mark", mark) existing, _ := a.cancelMark.Get(sessionID) a.cancelMark.Set(sessionID, max(existing, mark)) } if a.QueuedPrompts(sessionID) > 0 { slog.Debug("Clearing queued prompts", "session_id", sessionID) a.clearQueueAndNotify(sessionID) } } func (a *sessionAgent) ClearQueue(sessionID string) { if a.QueuedPrompts(sessionID) > 0 { slog.Debug("Clearing queued prompts", "session_id", sessionID) a.clearQueueAndNotify(sessionID) } } func (a *sessionAgent) CancelAll() { if !a.IsBusy() { return } for key := range a.activeRequests.Seq2() { a.Cancel(key) // key is sessionID } timeout := time.After(5 * time.Second) for a.IsBusy() { select { case <-timeout: return default: time.Sleep(200 * time.Millisecond) } } } func (a *sessionAgent) IsBusy() bool { var busy bool for cancelFunc := range a.activeRequests.Seq() { if cancelFunc != nil { busy = true break } } return busy } func (a *sessionAgent) IsSessionBusy(sessionID string) bool { _, busy := a.activeRequests.Get(sessionID) return busy } func (a *sessionAgent) QueuedPrompts(sessionID string) int { l, ok := a.messageQueue.Get(sessionID) if !ok { return 0 } return len(l) } func (a *sessionAgent) QueuedPromptsList(sessionID string) []string { l, ok := a.messageQueue.Get(sessionID) if !ok { return nil } prompts := make([]string, len(l)) for i, call := range l { prompts[i] = call.Prompt } return prompts } func (a *sessionAgent) SetModels(large Model, small Model) { a.largeModel.Set(large) a.smallModel.Set(small) } func (a *sessionAgent) SetTools(tools []fantasy.AgentTool) { a.tools.SetSlice(tools) } func (a *sessionAgent) SetSystemPrompt(systemPrompt string) { a.systemPrompt.Set(systemPrompt) } func (a *sessionAgent) Model() Model { return a.largeModel.Get() } // convertToToolResult converts a fantasy tool result to a message tool result. func (a *sessionAgent) convertToToolResult(result fantasy.ToolResultContent) message.ToolResult { baseResult := message.ToolResult{ ToolCallID: result.ToolCallID, Name: result.ToolName, Metadata: result.ClientMetadata, } switch result.Result.GetType() { case fantasy.ToolResultContentTypeText: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](result.Result); ok { baseResult.Content = r.Text } case fantasy.ToolResultContentTypeError: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](result.Result); ok { baseResult.Content = r.Error.Error() baseResult.IsError = true } case fantasy.ToolResultContentTypeMedia: if r, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](result.Result); ok { if !stringext.IsValidBase64(r.Data) { slog.Warn( "Tool returned media with invalid base64 data, discarding image", "tool", result.ToolName, "tool_call_id", result.ToolCallID, ) baseResult.Content = "Tool returned image data with invalid encoding" baseResult.IsError = true } else { content := r.Text if content == "" { content = fmt.Sprintf("Loaded %s content", r.MediaType) } baseResult.Content = content baseResult.Data = r.Data baseResult.MIMEType = r.MediaType } } } return baseResult } // workaroundProviderMediaLimitations converts media content in tool results to // user messages for providers that don't natively support images in tool results. // // Problem: OpenAI, Google, OpenRouter, and other OpenAI-compatible providers // don't support sending images/media in tool result messages - they only accept // text in tool results. However, they DO support images in user messages. // // If we send media in tool results to these providers, the API returns an error. // // Solution: For these providers, we: // 1. Replace the media in the tool result with a text placeholder // 2. Inject a user message immediately after with the image as a file attachment // 3. This maintains the tool execution flow while working around API limitations // // Anthropic and Bedrock support images natively in tool results, so we skip // this workaround for them. // // Example transformation: // // BEFORE: [tool result: image data] // AFTER: [tool result: "Image loaded - see attached"], [user: image attachment] func (a *sessionAgent) workaroundProviderMediaLimitations(messages []fantasy.Message, largeModel Model) []fantasy.Message { providerSupportsMedia := largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderAnthropic) || largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderBedrock) || largeModel.ModelCfg.Provider == string(catwalk.InferenceProviderBedrockEurope) if providerSupportsMedia { return messages } convertedMessages := make([]fantasy.Message, 0, len(messages)) for _, msg := range messages { if msg.Role != fantasy.MessageRoleTool { convertedMessages = append(convertedMessages, msg) continue } textParts := make([]fantasy.MessagePart, 0, len(msg.Content)) var mediaFiles []fantasy.FilePart for _, part := range msg.Content { toolResult, ok := fantasy.AsMessagePart[fantasy.ToolResultPart](part) if !ok { textParts = append(textParts, part) continue } if media, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentMedia](toolResult.Output); ok { decoded, err := base64.StdEncoding.DecodeString(media.Data) if err != nil { slog.Warn("Failed to decode media data", "error", err) textParts = append(textParts, part) continue } mediaFiles = append(mediaFiles, fantasy.FilePart{ Data: decoded, MediaType: media.MediaType, Filename: fmt.Sprintf("tool-result-%s", toolResult.ToolCallID), }) textParts = append(textParts, fantasy.ToolResultPart{ ToolCallID: toolResult.ToolCallID, Output: fantasy.ToolResultOutputContentText{ Text: "[Image/media content loaded - see attached file]", }, ProviderOptions: toolResult.ProviderOptions, }) } else { textParts = append(textParts, part) } } convertedMessages = append(convertedMessages, fantasy.Message{ Role: fantasy.MessageRoleTool, Content: textParts, }) if len(mediaFiles) > 0 { convertedMessages = append(convertedMessages, fantasy.NewUserMessage( "Here is the media content from the tool result:", mediaFiles..., )) } } return convertedMessages } // buildSummaryPrompt constructs the prompt text for session summarization. func buildSummaryPrompt(todos []session.Todo) string { var sb strings.Builder sb.WriteString("Provide a detailed summary of our conversation above.") if len(todos) > 0 { sb.WriteString("\n\n## Current Todo List\n\n") for _, t := range todos { fmt.Fprintf(&sb, "- [%s] %s\n", t.Status, t.Content) } sb.WriteString("\nInclude these tasks and their statuses in your summary. ") sb.WriteString("Instruct the resuming assistant to use the `todos` tool to continue tracking progress on these tasks.") } return sb.String() } func providerRetryLogFields(err *fantasy.ProviderError, delay time.Duration) []any { fields := []any{ "retry_delay", delay.String(), } if err == nil { return fields } fields = append(fields, "status_code", err.StatusCode) if err.Title != "" { fields = append(fields, "title", err.Title) } if err.Message != "" { fields = append(fields, "message", err.Message) } return fields }