coordinator.go

   1package agent
   2
   3import (
   4	"bytes"
   5	"cmp"
   6	"context"
   7	"encoding/json"
   8	"errors"
   9	"fmt"
  10	"io"
  11	"log/slog"
  12	"maps"
  13	"net/http"
  14	"os"
  15	"path/filepath"
  16	"slices"
  17	"strings"
  18
  19	"charm.land/catwalk/pkg/catwalk"
  20	"charm.land/fantasy"
  21	"github.com/charmbracelet/crush/internal/agent/hyper"
  22	"github.com/charmbracelet/crush/internal/agent/notify"
  23	"github.com/charmbracelet/crush/internal/agent/prompt"
  24	"github.com/charmbracelet/crush/internal/agent/tools"
  25	"github.com/charmbracelet/crush/internal/config"
  26	"github.com/charmbracelet/crush/internal/event"
  27	"github.com/charmbracelet/crush/internal/filetracker"
  28	"github.com/charmbracelet/crush/internal/history"
  29	"github.com/charmbracelet/crush/internal/hooks"
  30	"github.com/charmbracelet/crush/internal/log"
  31	"github.com/charmbracelet/crush/internal/lsp"
  32	"github.com/charmbracelet/crush/internal/message"
  33	"github.com/charmbracelet/crush/internal/oauth/copilot"
  34	"github.com/charmbracelet/crush/internal/permission"
  35	"github.com/charmbracelet/crush/internal/pubsub"
  36	"github.com/charmbracelet/crush/internal/session"
  37	"github.com/charmbracelet/crush/internal/skills"
  38	"golang.org/x/sync/errgroup"
  39
  40	"charm.land/fantasy/providers/anthropic"
  41	"charm.land/fantasy/providers/azure"
  42	"charm.land/fantasy/providers/bedrock"
  43	"charm.land/fantasy/providers/google"
  44	"charm.land/fantasy/providers/openai"
  45	"charm.land/fantasy/providers/openaicompat"
  46	"charm.land/fantasy/providers/openrouter"
  47	"charm.land/fantasy/providers/vercel"
  48	openaisdk "github.com/charmbracelet/openai-go/option"
  49	"github.com/qjebbs/go-jsons"
  50)
  51
  52// Coordinator errors.
  53var (
  54	errCoderAgentNotConfigured         = errors.New("coder agent not configured")
  55	errModelProviderNotConfigured      = errors.New("model provider not configured")
  56	errLargeModelNotSelected           = errors.New("large model not selected")
  57	errSmallModelNotSelected           = errors.New("small model not selected")
  58	errLargeModelProviderNotConfigured = errors.New("large model provider not configured")
  59	errSmallModelProviderNotConfigured = errors.New("small model provider not configured")
  60	errLargeModelNotFound              = errors.New("large model not found in provider config")
  61	errSmallModelNotFound              = errors.New("small model not found in provider config")
  62)
  63
  64// Copilot models that use the Responses API instead of Chat Completions.
  65var copilotResponsesModels = map[string]bool{
  66	"gpt-5.2":       true,
  67	"gpt-5.2-codex": true,
  68	"gpt-5.3-codex": true,
  69	"gpt-5.4":       true,
  70	"gpt-5.4-mini":  true,
  71	"gpt-5.5":       true,
  72	"gpt-5-mini":    true,
  73}
  74
  75// OpenCode models that user Anthropic Messages API instead of Chat Completions.
  76var opencodeMessagesModels = map[string]bool{
  77	"qwen3.7-max": true,
  78}
  79
  80type Coordinator interface {
  81	// INFO: (kujtim) this is not used yet we will use this when we have multiple agents
  82	// SetMainAgent(string)
  83	Run(ctx context.Context, sessionID, prompt string, attachments ...message.Attachment) (*fantasy.AgentResult, error)
  84	// RunAccepted runs a call that was already accepted via
  85	// BeginAccepted on the fire-and-forget dispatch path. The handle is
  86	// the only carrier of accept-state across the backend.runAgent /
  87	// Coordinator / sessionAgent.Run layers: it reaches
  88	// sessionAgent.Run as SessionAgentCall.Accepted, where it is
  89	// consumed under dispatchMu once the accepted -> (cancel-on-entry |
  90	// queued | active) transition is chosen.
  91	RunAccepted(ctx context.Context, accept *AcceptedRun, sessionID, prompt string, attachments ...message.Attachment) (*fantasy.AgentResult, error)
  92	BeginAccepted(sessionID string) *AcceptedRun
  93	Cancel(sessionID string)
  94	CancelAll()
  95	IsSessionBusy(sessionID string) bool
  96	IsBusy() bool
  97	QueuedPrompts(sessionID string) int
  98	QueuedPromptsList(sessionID string) []string
  99	ClearQueue(sessionID string)
 100	Summarize(context.Context, string) error
 101	Model() Model
 102	UpdateModels(ctx context.Context) error
 103}
 104
 105type coordinator struct {
 106	cfg         *config.ConfigStore
 107	sessions    session.Service
 108	messages    message.Service
 109	permissions permission.Service
 110	history     history.Service
 111	filetracker filetracker.Service
 112	lspManager  *lsp.Manager
 113	notify      pubsub.Publisher[notify.Notification]
 114	runComplete pubsub.Publisher[notify.RunComplete]
 115
 116	currentAgent SessionAgent
 117	agents       map[string]SessionAgent
 118
 119	// Skills discovery results (session-start snapshot).
 120	allSkills    []*skills.Skill // Pre-filter: all discovered after dedup.
 121	activeSkills []*skills.Skill // Post-filter: active skills only.
 122	skillTracker *skills.Tracker
 123
 124	readyWg errgroup.Group
 125}
 126
 127func NewCoordinator(
 128	ctx context.Context,
 129	cfg *config.ConfigStore,
 130	sessions session.Service,
 131	messages message.Service,
 132	permissions permission.Service,
 133	history history.Service,
 134	filetracker filetracker.Service,
 135	lspManager *lsp.Manager,
 136	notify pubsub.Publisher[notify.Notification],
 137	runComplete pubsub.Publisher[notify.RunComplete],
 138	skillsMgr *skills.Manager,
 139) (Coordinator, error) {
 140	// Skills are pre-discovered by the caller (see app.New /
 141	// backend.CreateWorkspace) and passed in via the manager. If no
 142	// manager was provided (legacy callers), fall back to an in-line
 143	// discovery so the coordinator still works.
 144	var allSkills, activeSkills []*skills.Skill
 145	if skillsMgr != nil {
 146		allSkills = skillsMgr.AllSkills()
 147		activeSkills = skillsMgr.ActiveSkills()
 148	} else {
 149		allSkills, activeSkills = discoverSkills(cfg)
 150	}
 151	skillTracker := skills.NewTracker(activeSkills)
 152
 153	c := &coordinator{
 154		cfg:          cfg,
 155		sessions:     sessions,
 156		messages:     messages,
 157		permissions:  permissions,
 158		history:      history,
 159		filetracker:  filetracker,
 160		lspManager:   lspManager,
 161		notify:       notify,
 162		runComplete:  runComplete,
 163		agents:       make(map[string]SessionAgent),
 164		allSkills:    allSkills,
 165		activeSkills: activeSkills,
 166		skillTracker: skillTracker,
 167	}
 168
 169	agentCfg, ok := cfg.Config().Agents[config.AgentCoder]
 170	if !ok {
 171		return nil, errCoderAgentNotConfigured
 172	}
 173
 174	// TODO: make this dynamic when we support multiple agents
 175	prompt, err := coderPrompt(prompt.WithWorkingDir(c.cfg.WorkingDir()))
 176	if err != nil {
 177		return nil, err
 178	}
 179
 180	agent, err := c.buildAgent(ctx, prompt, agentCfg, false)
 181	if err != nil {
 182		return nil, err
 183	}
 184	c.currentAgent = agent
 185	c.agents[config.AgentCoder] = agent
 186	return c, nil
 187}
 188
 189// Run implements Coordinator.
 190func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string, attachments ...message.Attachment) (*fantasy.AgentResult, error) {
 191	return c.run(ctx, nil, sessionID, prompt, attachments...)
 192}
 193
 194// RunAccepted implements Coordinator.
 195func (c *coordinator) RunAccepted(ctx context.Context, accept *AcceptedRun, sessionID string, prompt string, attachments ...message.Attachment) (*fantasy.AgentResult, error) {
 196	return c.run(ctx, accept, sessionID, prompt, attachments...)
 197}
 198
 199// run is the shared implementation behind Run and RunAccepted. When
 200// accept is non-nil it is threaded onto the SessionAgentCall as
 201// Accepted so sessionAgent.Run can consume the accept reservation under
 202// dispatchMu; when nil (the in-process/local path) no accept tracking
 203// applies.
 204func (c *coordinator) run(ctx context.Context, accept *AcceptedRun, sessionID string, prompt string, attachments ...message.Attachment) (*fantasy.AgentResult, error) {
 205	if err := c.readyWg.Wait(); err != nil {
 206		return nil, err
 207	}
 208
 209	// refresh models before each run
 210	if err := c.UpdateModels(ctx); err != nil {
 211		return nil, fmt.Errorf("failed to update models: %w", err)
 212	}
 213
 214	model := c.currentAgent.Model()
 215	maxTokens := model.CatwalkCfg.DefaultMaxTokens
 216	if model.ModelCfg.MaxTokens != 0 {
 217		maxTokens = model.ModelCfg.MaxTokens
 218	}
 219
 220	if !model.CatwalkCfg.SupportsImages && attachments != nil {
 221		// filter out image attachments
 222		filteredAttachments := make([]message.Attachment, 0, len(attachments))
 223		for _, att := range attachments {
 224			if att.IsText() {
 225				filteredAttachments = append(filteredAttachments, att)
 226			}
 227		}
 228		attachments = filteredAttachments
 229	}
 230
 231	providerCfg, ok := c.cfg.Config().Providers.Get(model.ModelCfg.Provider)
 232	if !ok {
 233		return nil, errModelProviderNotConfigured
 234	}
 235
 236	mergedOptions, temp, topP, topK, freqPenalty, presPenalty := mergeCallOptions(model, providerCfg)
 237
 238	if err := c.refreshTokenIfExpired(ctx, providerCfg); err != nil {
 239		// NOTE(@andreynering): We don't return here because the event handling to ask the user to reauthenticate
 240		// depends on the flow below. If refresh fails, proceed with the token we have.
 241		slog.Error("Failed to refresh OAuth2 token. Proceeding with existing token.", "error", err)
 242	}
 243
 244	// Coalesce per-attempt RunComplete payloads so only the final
 245	// outcome reaches subscribers. Without this, the first attempt's
 246	// failed RunComplete (unauthorized) would race ahead of the
 247	// retry's success, and `crush run` would exit on the stale error
 248	// before ever seeing the retry result. Each attempt's
 249	// SessionAgentCall.OnComplete hook overwrites latest; we publish
 250	// exactly once after retries resolve, via PublishMustDeliver, so
 251	// a momentarily-full subscriber buffer can't silently drop the
 252	// terminal event.
 253	var (
 254		latest    notify.RunComplete
 255		hasLatest bool
 256	)
 257	onComplete := func(rc notify.RunComplete) {
 258		latest = rc
 259		hasLatest = true
 260	}
 261	// Propagate the caller-supplied RunID (set via agent.WithRunID
 262	// at the HTTP boundary in backend.SendMessage) onto the
 263	// SessionAgentCall so the terminal RunComplete event echoes it
 264	// back. Both attempts in the retry chain reuse the same RunID;
 265	// the coalesce closure publishes the final outcome under that
 266	// same correlator.
 267	runID := RunIDFromContext(ctx)
 268	run := func() (*fantasy.AgentResult, error) {
 269		return c.currentAgent.Run(ctx, SessionAgentCall{
 270			SessionID:        sessionID,
 271			RunID:            runID,
 272			Prompt:           prompt,
 273			Attachments:      attachments,
 274			MaxOutputTokens:  maxTokens,
 275			ProviderOptions:  mergedOptions,
 276			Temperature:      temp,
 277			TopP:             topP,
 278			TopK:             topK,
 279			FrequencyPenalty: freqPenalty,
 280			PresencePenalty:  presPenalty,
 281			OnComplete:       onComplete,
 282			Accepted:         accept,
 283		})
 284	}
 285	beforeLoaded := c.skillTracker.LoadedNames()
 286	result, originalErr := run()
 287	logTurnSkillUsage(sessionID, prompt, c.activeSkills, c.skillTracker, beforeLoaded)
 288
 289	if c.isUnauthorized(originalErr) {
 290		if err := c.retryAfterUnauthorized(ctx, providerCfg); err == nil {
 291			result, originalErr = run()
 292		}
 293	}
 294
 295	if hasLatest && c.runComplete != nil {
 296		c.runComplete.PublishMustDeliver(ctx, pubsub.UpdatedEvent, latest)
 297		// Signal to the dispatcher (backend.runAgent) that the
 298		// authoritative terminal RunComplete for this run was already
 299		// emitted, so it does not publish a duplicate fallback for the
 300		// error it is about to receive.
 301		MarkRunCompletePublished(ctx)
 302	}
 303	return result, originalErr
 304}
 305
 306func getProviderOptions(model Model, providerCfg config.ProviderConfig) fantasy.ProviderOptions {
 307	options := fantasy.ProviderOptions{}
 308
 309	cfgOpts := []byte("{}")
 310	providerCfgOpts := []byte("{}")
 311	catwalkOpts := []byte("{}")
 312
 313	if model.ModelCfg.ProviderOptions != nil {
 314		data, err := json.Marshal(model.ModelCfg.ProviderOptions)
 315		if err == nil {
 316			cfgOpts = data
 317		}
 318	}
 319
 320	if providerCfg.ProviderOptions != nil {
 321		data, err := json.Marshal(providerCfg.ProviderOptions)
 322		if err == nil {
 323			providerCfgOpts = data
 324		}
 325	}
 326
 327	if model.CatwalkCfg.Options.ProviderOptions != nil {
 328		data, err := json.Marshal(model.CatwalkCfg.Options.ProviderOptions)
 329		if err == nil {
 330			catwalkOpts = data
 331		}
 332	}
 333
 334	readers := []io.Reader{
 335		bytes.NewReader(catwalkOpts),
 336		bytes.NewReader(providerCfgOpts),
 337		bytes.NewReader(cfgOpts),
 338	}
 339
 340	got, err := jsons.Merge(readers)
 341	if err != nil {
 342		slog.Error("Could not merge call config", "err", err)
 343		return options
 344	}
 345
 346	mergedOptions := make(map[string]any)
 347
 348	err = json.Unmarshal([]byte(got), &mergedOptions)
 349	if err != nil {
 350		slog.Error("Could not create config for call", "err", err)
 351		return options
 352	}
 353
 354	shouldSetEffort := model.CatwalkCfg.CanReason &&
 355		slices.Contains(model.CatwalkCfg.ReasoningLevels, model.ModelCfg.ReasoningEffort)
 356
 357	switch providerCfg.Type {
 358	case openai.Name, azure.Name:
 359		_, hasReasoningEffort := mergedOptions["reasoning_effort"]
 360		if !hasReasoningEffort && shouldSetEffort {
 361			mergedOptions["reasoning_effort"] = model.ModelCfg.ReasoningEffort
 362		}
 363		if openai.IsResponsesModel(model.CatwalkCfg.ID) {
 364			if openai.IsResponsesReasoningModel(model.CatwalkCfg.ID) {
 365				mergedOptions["reasoning_summary"] = "auto"
 366				mergedOptions["include"] = []openai.IncludeType{openai.IncludeReasoningEncryptedContent}
 367			}
 368			parsed, err := openai.ParseResponsesOptions(mergedOptions)
 369			if err == nil {
 370				options[openai.Name] = parsed
 371			}
 372		} else {
 373			parsed, err := openai.ParseOptions(mergedOptions)
 374			if err == nil {
 375				options[openai.Name] = parsed
 376			}
 377		}
 378	case anthropic.Name, bedrock.Name:
 379		var (
 380			_, hasEffort = mergedOptions["effort"]
 381			_, hasThink  = mergedOptions["thinking"]
 382		)
 383		switch {
 384		case !hasEffort && shouldSetEffort:
 385			mergedOptions["effort"] = model.ModelCfg.ReasoningEffort
 386		case !hasThink && model.ModelCfg.Think:
 387			mergedOptions["thinking"] = map[string]any{"budget_tokens": 2000}
 388		}
 389		parsed, err := anthropic.ParseOptions(mergedOptions)
 390		if err == nil {
 391			options[anthropic.Name] = parsed
 392		}
 393
 394	case openrouter.Name:
 395		_, hasReasoning := mergedOptions["reasoning"]
 396		if !hasReasoning && shouldSetEffort {
 397			mergedOptions["reasoning"] = map[string]any{
 398				"enabled": true,
 399				"effort":  model.ModelCfg.ReasoningEffort,
 400			}
 401		}
 402		parsed, err := openrouter.ParseOptions(mergedOptions)
 403		if err == nil {
 404			options[openrouter.Name] = parsed
 405		}
 406	case vercel.Name:
 407		_, hasReasoning := mergedOptions["reasoning"]
 408		if !hasReasoning && shouldSetEffort {
 409			mergedOptions["reasoning"] = map[string]any{
 410				"enabled": true,
 411				"effort":  model.ModelCfg.ReasoningEffort,
 412			}
 413		}
 414		parsed, err := vercel.ParseOptions(mergedOptions)
 415		if err == nil {
 416			options[vercel.Name] = parsed
 417		}
 418	case google.Name:
 419		_, hasReasoning := mergedOptions["thinking_config"]
 420		if !hasReasoning {
 421			if strings.HasPrefix(model.CatwalkCfg.ID, "gemini-2") {
 422				mergedOptions["thinking_config"] = map[string]any{
 423					"thinking_budget":  2000,
 424					"include_thoughts": true,
 425				}
 426			} else {
 427				mergedOptions["thinking_config"] = map[string]any{
 428					"thinking_level":   model.ModelCfg.ReasoningEffort,
 429					"include_thoughts": true,
 430				}
 431			}
 432		}
 433		parsed, err := google.ParseOptions(mergedOptions)
 434		if err == nil {
 435			options[google.Name] = parsed
 436		}
 437	case openaicompat.Name, hyper.Name:
 438		extraBody := make(map[string]any)
 439
 440		_, hasReasoningEffort := mergedOptions["reasoning_effort"]
 441		if !hasReasoningEffort && shouldSetEffort {
 442			switch providerCfg.ID {
 443			case string(catwalk.InferenceProviderIoNet):
 444				extraBody["reasoning"] = map[string]string{"effort": model.ModelCfg.ReasoningEffort}
 445			default:
 446				mergedOptions["reasoning_effort"] = model.ModelCfg.ReasoningEffort
 447			}
 448		}
 449
 450		// "reasoning effort" is a standard OpenAI field, but "thinking" is not.
 451		// Setting it in the right way for each provider.
 452		// TODO: Abstract this in Fantasy somehow?
 453		// TODO: Allow custom providers to specify how to set this?
 454		switch providerCfg.ID {
 455		case hyper.Name:
 456			extraBody["thinking"] = model.ModelCfg.Think
 457		case string(catwalk.InferenceProviderIoNet):
 458			if _, ok := extraBody["reasoning"]; !ok && model.CatwalkCfg.CanReason {
 459				if model.ModelCfg.Think {
 460					extraBody["reasoning"] = map[string]string{"effort": "medium"}
 461				} else {
 462					extraBody["reasoning"] = map[string]string{"effort": "none"}
 463				}
 464			}
 465		case string(catwalk.InferenceProviderZAI), string(catwalk.InferenceProviderDeepSeek):
 466			if model.ModelCfg.Think || model.ModelCfg.ReasoningEffort != "" {
 467				extraBody["thinking"] = map[string]any{
 468					"type": "enabled",
 469				}
 470			} else {
 471				extraBody["thinking"] = map[string]any{
 472					"type": "disabled",
 473				}
 474			}
 475		case string(catwalk.InferenceProviderAlibabaSingapore):
 476			if model.CatwalkCfg.CanReason {
 477				extraBody["enable_thinking"] = model.ModelCfg.Think
 478			}
 479		}
 480
 481		mergedOptions["extra_body"] = extraBody
 482
 483		parsed, err := openaicompat.ParseOptions(mergedOptions)
 484		if err == nil {
 485			options[openaicompat.Name] = parsed
 486		}
 487	}
 488
 489	return options
 490}
 491
 492func mergeCallOptions(model Model, cfg config.ProviderConfig) (fantasy.ProviderOptions, *float64, *float64, *int64, *float64, *float64) {
 493	modelOptions := getProviderOptions(model, cfg)
 494	temp := cmp.Or(model.ModelCfg.Temperature, model.CatwalkCfg.Options.Temperature)
 495	topP := cmp.Or(model.ModelCfg.TopP, model.CatwalkCfg.Options.TopP)
 496	topK := cmp.Or(model.ModelCfg.TopK, model.CatwalkCfg.Options.TopK)
 497	freqPenalty := cmp.Or(model.ModelCfg.FrequencyPenalty, model.CatwalkCfg.Options.FrequencyPenalty)
 498	presPenalty := cmp.Or(model.ModelCfg.PresencePenalty, model.CatwalkCfg.Options.PresencePenalty)
 499	return modelOptions, temp, topP, topK, freqPenalty, presPenalty
 500}
 501
 502func (c *coordinator) buildAgent(ctx context.Context, prompt *prompt.Prompt, agent config.Agent, isSubAgent bool) (SessionAgent, error) {
 503	large, small, err := c.buildAgentModels(ctx, isSubAgent)
 504	if err != nil {
 505		return nil, err
 506	}
 507
 508	largeProviderCfg, _ := c.cfg.Config().Providers.Get(large.ModelCfg.Provider)
 509	result := NewSessionAgent(SessionAgentOptions{
 510		LargeModel:           large,
 511		SmallModel:           small,
 512		SystemPromptPrefix:   largeProviderCfg.SystemPromptPrefix,
 513		SystemPrompt:         "",
 514		IsSubAgent:           isSubAgent,
 515		DisableAutoSummarize: c.cfg.Config().Options.DisableAutoSummarize,
 516		IsYolo:               c.permissions.SkipRequests(),
 517		Sessions:             c.sessions,
 518		Messages:             c.messages,
 519		Tools:                nil,
 520		Notify:               c.notify,
 521		RunComplete:          c.runComplete,
 522	})
 523
 524	c.readyWg.Go(func() error {
 525		systemPrompt, err := prompt.Build(ctx, large.Model.Provider(), large.Model.Model(), c.cfg)
 526		if err != nil {
 527			return err
 528		}
 529		result.SetSystemPrompt(systemPrompt)
 530		return nil
 531	})
 532
 533	c.readyWg.Go(func() error {
 534		tools, err := c.buildTools(ctx, agent, isSubAgent)
 535		if err != nil {
 536			return err
 537		}
 538		result.SetTools(tools)
 539		return nil
 540	})
 541
 542	return result, nil
 543}
 544
 545func (c *coordinator) buildTools(ctx context.Context, agent config.Agent, isSubAgent bool) ([]fantasy.AgentTool, error) {
 546	var allTools []fantasy.AgentTool
 547	if slices.Contains(agent.AllowedTools, AgentToolName) {
 548		agentTool, err := c.agentTool(ctx)
 549		if err != nil {
 550			return nil, err
 551		}
 552		allTools = append(allTools, agentTool)
 553	}
 554
 555	if slices.Contains(agent.AllowedTools, tools.AgenticFetchToolName) {
 556		agenticFetchTool, err := c.agenticFetchTool(ctx, nil)
 557		if err != nil {
 558			return nil, err
 559		}
 560		allTools = append(allTools, agenticFetchTool)
 561	}
 562
 563	// Get the model name for the agent
 564	modelID := ""
 565	if modelCfg, ok := c.cfg.Config().Models[agent.Model]; ok {
 566		if model := c.cfg.Config().GetModel(modelCfg.Provider, modelCfg.Model); model != nil {
 567			modelID = model.ID
 568		}
 569	}
 570
 571	logFile := filepath.Join(c.cfg.Config().Options.DataDirectory, "logs", "crush.log")
 572
 573	// Build hook runner if PreToolUse hooks are configured.
 574	var hookRunner *hooks.Runner
 575	if preToolHooks := c.cfg.Config().Hooks[hooks.EventPreToolUse]; len(preToolHooks) > 0 {
 576		hookRunner = hooks.NewRunner(preToolHooks, c.cfg.WorkingDir(), c.cfg.WorkingDir())
 577	}
 578
 579	allTools = append(
 580		allTools,
 581		tools.NewBashTool(c.permissions, c.cfg.WorkingDir(), c.cfg.Config().Options.Attribution, modelID),
 582		tools.NewCrushInfoTool(c.cfg, c.lspManager, c.allSkills, c.activeSkills, c.skillTracker),
 583		tools.NewCrushLogsTool(logFile),
 584		tools.NewJobOutputTool(),
 585		tools.NewJobKillTool(),
 586		tools.NewDownloadTool(c.permissions, c.cfg.WorkingDir(), nil),
 587		tools.NewEditTool(c.lspManager, c.permissions, c.history, c.filetracker, c.cfg.WorkingDir()),
 588		tools.NewMultiEditTool(c.lspManager, c.permissions, c.history, c.filetracker, c.cfg.WorkingDir()),
 589		tools.NewFetchTool(c.permissions, c.cfg.WorkingDir(), nil),
 590		tools.NewGlobTool(c.cfg.WorkingDir()),
 591		tools.NewGrepTool(c.cfg.WorkingDir(), c.cfg.Config().Tools.Grep),
 592		tools.NewLsTool(c.permissions, c.cfg.WorkingDir(), c.cfg.Config().Tools.Ls),
 593		tools.NewSourcegraphTool(nil),
 594		tools.NewTodosTool(c.sessions),
 595		tools.NewViewTool(c.lspManager, c.permissions, c.filetracker, c.skillTracker, c.cfg.WorkingDir(), c.cfg.Config().Options.SkillsPaths...),
 596		tools.NewWriteTool(c.lspManager, c.permissions, c.history, c.filetracker, c.cfg.WorkingDir()),
 597	)
 598
 599	// Add LSP tools if user has configured LSPs or auto_lsp is enabled (nil or true).
 600	if len(c.cfg.Config().LSP) > 0 || c.cfg.Config().Options.AutoLSP == nil || *c.cfg.Config().Options.AutoLSP {
 601		allTools = append(allTools, tools.NewDiagnosticsTool(c.lspManager), tools.NewReferencesTool(c.lspManager), tools.NewLSPRestartTool(c.lspManager))
 602	}
 603
 604	if len(c.cfg.Config().MCP) > 0 {
 605		allTools = append(
 606			allTools,
 607			tools.NewListMCPResourcesTool(c.cfg, c.permissions),
 608			tools.NewReadMCPResourceTool(c.cfg, c.permissions),
 609		)
 610	}
 611
 612	var filteredTools []fantasy.AgentTool
 613	for _, tool := range allTools {
 614		if slices.Contains(agent.AllowedTools, tool.Info().Name) {
 615			filteredTools = append(filteredTools, tool)
 616		}
 617	}
 618
 619	for _, tool := range tools.GetMCPTools(c.permissions, c.cfg, c.cfg.WorkingDir()) {
 620		if agent.AllowedMCP == nil {
 621			// No MCP restrictions
 622			filteredTools = append(filteredTools, tool)
 623			continue
 624		}
 625		if len(agent.AllowedMCP) == 0 {
 626			// No MCPs allowed
 627			slog.Debug("No MCPs allowed", "tool", tool.Name(), "agent", agent.Name)
 628			break
 629		}
 630
 631		for mcp, tools := range agent.AllowedMCP {
 632			if mcp != tool.MCP() {
 633				continue
 634			}
 635			if len(tools) == 0 || slices.Contains(tools, tool.MCPToolName()) {
 636				filteredTools = append(filteredTools, tool)
 637				break
 638			}
 639			slog.Debug("MCP not allowed", "tool", tool.Name(), "agent", agent.Name)
 640		}
 641	}
 642	slices.SortFunc(filteredTools, func(a, b fantasy.AgentTool) int {
 643		return strings.Compare(a.Info().Name, b.Info().Name)
 644	})
 645
 646	// Wrap tools with hook interception for the top-level agent only.
 647	// Sub-agents (the `agent` task tool, `agentic_fetch`, etc.) run
 648	// without hook interception to avoid firing the user's hook N times
 649	// per delegated turn. The top-level invocation of the sub-agent tool
 650	// itself is still wrapped from the coder's side.
 651	filteredTools = wrapToolsWithHooks(filteredTools, hookRunner, isSubAgent)
 652
 653	return filteredTools, nil
 654}
 655
 656// TODO: when we support multiple agents we need to change this so that we pass in the agent specific model config
 657func (c *coordinator) buildAgentModels(ctx context.Context, isSubAgent bool) (Model, Model, error) {
 658	largeModelCfg, ok := c.cfg.Config().Models[config.SelectedModelTypeLarge]
 659	if !ok {
 660		return Model{}, Model{}, errLargeModelNotSelected
 661	}
 662	smallModelCfg, ok := c.cfg.Config().Models[config.SelectedModelTypeSmall]
 663	if !ok {
 664		return Model{}, Model{}, errSmallModelNotSelected
 665	}
 666
 667	largeProviderCfg, ok := c.cfg.Config().Providers.Get(largeModelCfg.Provider)
 668	if !ok {
 669		return Model{}, Model{}, errLargeModelProviderNotConfigured
 670	}
 671
 672	largeProvider, err := c.buildProvider(largeProviderCfg, largeModelCfg, isSubAgent)
 673	if err != nil {
 674		return Model{}, Model{}, err
 675	}
 676
 677	smallProviderCfg, ok := c.cfg.Config().Providers.Get(smallModelCfg.Provider)
 678	if !ok {
 679		return Model{}, Model{}, errSmallModelProviderNotConfigured
 680	}
 681
 682	smallProvider, err := c.buildProvider(smallProviderCfg, smallModelCfg, true)
 683	if err != nil {
 684		return Model{}, Model{}, err
 685	}
 686
 687	var largeCatwalkModel *catwalk.Model
 688	var smallCatwalkModel *catwalk.Model
 689
 690	for _, m := range largeProviderCfg.Models {
 691		if m.ID == largeModelCfg.Model {
 692			largeCatwalkModel = &m
 693		}
 694	}
 695	for _, m := range smallProviderCfg.Models {
 696		if m.ID == smallModelCfg.Model {
 697			smallCatwalkModel = &m
 698		}
 699	}
 700
 701	if largeCatwalkModel == nil {
 702		return Model{}, Model{}, errLargeModelNotFound
 703	}
 704
 705	if smallCatwalkModel == nil {
 706		return Model{}, Model{}, errSmallModelNotFound
 707	}
 708
 709	largeModelID := largeModelCfg.Model
 710	smallModelID := smallModelCfg.Model
 711
 712	if largeModelCfg.Provider == openrouter.Name && isExactoSupported(largeModelID) {
 713		largeModelID += ":exacto"
 714	}
 715
 716	if smallModelCfg.Provider == openrouter.Name && isExactoSupported(smallModelID) {
 717		smallModelID += ":exacto"
 718	}
 719
 720	largeModel, err := largeProvider.LanguageModel(ctx, largeModelID)
 721	if err != nil {
 722		return Model{}, Model{}, err
 723	}
 724	smallModel, err := smallProvider.LanguageModel(ctx, smallModelID)
 725	if err != nil {
 726		return Model{}, Model{}, err
 727	}
 728
 729	return Model{
 730			Model:      largeModel,
 731			CatwalkCfg: *largeCatwalkModel,
 732			ModelCfg:   largeModelCfg,
 733			FlatRate:   largeProviderCfg.FlatRate,
 734		}, Model{
 735			Model:      smallModel,
 736			CatwalkCfg: *smallCatwalkModel,
 737			ModelCfg:   smallModelCfg,
 738			FlatRate:   smallProviderCfg.FlatRate,
 739		}, nil
 740}
 741
 742func (c *coordinator) buildAnthropicProvider(baseURL, apiKey string, headers map[string]string, providerID string) (fantasy.Provider, error) {
 743	var opts []anthropic.Option
 744
 745	switch {
 746	case strings.HasPrefix(apiKey, "Bearer "):
 747		// NOTE: Prevent the SDK from picking up the API key from env.
 748		os.Setenv("ANTHROPIC_API_KEY", "")
 749		headers["Authorization"] = apiKey
 750	case providerID == string(catwalk.InferenceProviderMiniMax) || providerID == string(catwalk.InferenceProviderMiniMaxChina):
 751		// NOTE: Prevent the SDK from picking up the API key from env.
 752		os.Setenv("ANTHROPIC_API_KEY", "")
 753		headers["Authorization"] = "Bearer " + apiKey
 754	case apiKey != "":
 755		// X-Api-Key header
 756		opts = append(opts, anthropic.WithAPIKey(apiKey))
 757	}
 758
 759	if len(headers) > 0 {
 760		opts = append(opts, anthropic.WithHeaders(headers))
 761	}
 762
 763	if baseURL != "" {
 764		opts = append(opts, anthropic.WithBaseURL(baseURL))
 765	}
 766
 767	if c.cfg.Config().Options.Debug {
 768		httpClient := log.NewHTTPClient()
 769		opts = append(opts, anthropic.WithHTTPClient(httpClient))
 770	}
 771	return anthropic.New(opts...)
 772}
 773
 774func (c *coordinator) buildOpenaiProvider(baseURL, apiKey string, headers map[string]string) (fantasy.Provider, error) {
 775	opts := []openai.Option{
 776		openai.WithAPIKey(apiKey),
 777		openai.WithUseResponsesAPI(),
 778	}
 779	if c.cfg.Config().Options.Debug {
 780		httpClient := log.NewHTTPClient()
 781		opts = append(opts, openai.WithHTTPClient(httpClient))
 782	}
 783	if len(headers) > 0 {
 784		opts = append(opts, openai.WithHeaders(headers))
 785	}
 786	if baseURL != "" {
 787		opts = append(opts, openai.WithBaseURL(baseURL))
 788	}
 789	return openai.New(opts...)
 790}
 791
 792func (c *coordinator) buildOpenrouterProvider(_, apiKey string, headers map[string]string) (fantasy.Provider, error) {
 793	opts := []openrouter.Option{
 794		openrouter.WithAPIKey(apiKey),
 795	}
 796	if c.cfg.Config().Options.Debug {
 797		httpClient := log.NewHTTPClient()
 798		opts = append(opts, openrouter.WithHTTPClient(httpClient))
 799	}
 800	if len(headers) > 0 {
 801		opts = append(opts, openrouter.WithHeaders(headers))
 802	}
 803	return openrouter.New(opts...)
 804}
 805
 806func (c *coordinator) buildVercelProvider(_, apiKey string, headers map[string]string) (fantasy.Provider, error) {
 807	opts := []vercel.Option{
 808		vercel.WithAPIKey(apiKey),
 809	}
 810	if c.cfg.Config().Options.Debug {
 811		httpClient := log.NewHTTPClient()
 812		opts = append(opts, vercel.WithHTTPClient(httpClient))
 813	}
 814	if len(headers) > 0 {
 815		opts = append(opts, vercel.WithHeaders(headers))
 816	}
 817	return vercel.New(opts...)
 818}
 819
 820func (c *coordinator) buildOpenaiCompatProvider(baseURL, apiKey string, headers map[string]string, extraBody map[string]any, providerID string, isSubAgent bool) (fantasy.Provider, error) {
 821	opts := []openaicompat.Option{
 822		openaicompat.WithBaseURL(baseURL),
 823		openaicompat.WithAPIKey(apiKey),
 824	}
 825
 826	// Set HTTP client based on provider and debug mode.
 827	var httpClient *http.Client
 828	switch providerID {
 829	case string(catwalk.InferenceProviderCopilot):
 830		opts = append(
 831			opts,
 832			openaicompat.WithUseResponsesAPI(),
 833			openaicompat.WithResponsesAPIFunc(func(modelID string) bool {
 834				return copilotResponsesModels[modelID]
 835			}),
 836		)
 837		httpClient = copilot.NewClient(isSubAgent, c.cfg.Config().Options.Debug)
 838	}
 839	if httpClient == nil && c.cfg.Config().Options.Debug {
 840		httpClient = log.NewHTTPClient()
 841	}
 842	if httpClient != nil {
 843		opts = append(opts, openaicompat.WithHTTPClient(httpClient))
 844	}
 845
 846	if len(headers) > 0 {
 847		opts = append(opts, openaicompat.WithHeaders(headers))
 848	}
 849
 850	for extraKey, extraValue := range extraBody {
 851		opts = append(opts, openaicompat.WithSDKOptions(openaisdk.WithJSONSet(extraKey, extraValue)))
 852	}
 853
 854	return openaicompat.New(opts...)
 855}
 856
 857func (c *coordinator) buildAzureProvider(baseURL, apiKey string, headers map[string]string, options map[string]string) (fantasy.Provider, error) {
 858	opts := []azure.Option{
 859		azure.WithBaseURL(baseURL),
 860		azure.WithAPIKey(apiKey),
 861		azure.WithUseResponsesAPI(),
 862	}
 863	if c.cfg.Config().Options.Debug {
 864		httpClient := log.NewHTTPClient()
 865		opts = append(opts, azure.WithHTTPClient(httpClient))
 866	}
 867	if options == nil {
 868		options = make(map[string]string)
 869	}
 870	if apiVersion, ok := options["apiVersion"]; ok {
 871		opts = append(opts, azure.WithAPIVersion(apiVersion))
 872	}
 873	if len(headers) > 0 {
 874		opts = append(opts, azure.WithHeaders(headers))
 875	}
 876
 877	return azure.New(opts...)
 878}
 879
 880func (c *coordinator) buildBedrockProvider(apiKey string, headers map[string]string, providerID string) (fantasy.Provider, error) {
 881	var opts []bedrock.Option
 882	if c.cfg.Config().Options.Debug {
 883		httpClient := log.NewHTTPClient()
 884		opts = append(opts, bedrock.WithHTTPClient(httpClient))
 885	}
 886	if len(headers) > 0 {
 887		opts = append(opts, bedrock.WithHeaders(headers))
 888	}
 889
 890	switch {
 891	case apiKey != "":
 892		opts = append(opts, bedrock.WithAPIKey(apiKey))
 893	case os.Getenv("AWS_BEARER_TOKEN_BEDROCK") != "":
 894		opts = append(opts, bedrock.WithAPIKey(os.Getenv("AWS_BEARER_TOKEN_BEDROCK")))
 895	default:
 896		// Skip, let the SDK do authentication.
 897	}
 898
 899	switch providerID {
 900	case string(catwalk.InferenceProviderBedrockEurope):
 901		opts = append(opts, bedrock.WithRegion("eu-west-1"))
 902	default:
 903		opts = append(opts, bedrock.WithRegion("us-east-1"))
 904	}
 905
 906	return bedrock.New(opts...)
 907}
 908
 909func (c *coordinator) buildGoogleProvider(baseURL, apiKey string, headers map[string]string) (fantasy.Provider, error) {
 910	opts := []google.Option{
 911		google.WithBaseURL(baseURL),
 912		google.WithGeminiAPIKey(apiKey),
 913	}
 914	if c.cfg.Config().Options.Debug {
 915		httpClient := log.NewHTTPClient()
 916		opts = append(opts, google.WithHTTPClient(httpClient))
 917	}
 918	if len(headers) > 0 {
 919		opts = append(opts, google.WithHeaders(headers))
 920	}
 921	return google.New(opts...)
 922}
 923
 924func (c *coordinator) buildGoogleVertexProvider(headers map[string]string, options map[string]string) (fantasy.Provider, error) {
 925	opts := []google.Option{}
 926	if c.cfg.Config().Options.Debug {
 927		httpClient := log.NewHTTPClient()
 928		opts = append(opts, google.WithHTTPClient(httpClient))
 929	}
 930	if len(headers) > 0 {
 931		opts = append(opts, google.WithHeaders(headers))
 932	}
 933
 934	project := options["project"]
 935	location := options["location"]
 936
 937	opts = append(opts, google.WithVertex(project, location))
 938
 939	return google.New(opts...)
 940}
 941
 942func (c *coordinator) isAnthropicThinking(model config.SelectedModel) bool {
 943	if model.Think {
 944		return true
 945	}
 946	opts, err := anthropic.ParseOptions(model.ProviderOptions)
 947	return err == nil && opts.Thinking != nil
 948}
 949
 950func (c *coordinator) buildProvider(providerCfg config.ProviderConfig, model config.SelectedModel, isSubAgent bool) (fantasy.Provider, error) {
 951	headers := maps.Clone(providerCfg.ExtraHeaders)
 952	if headers == nil {
 953		headers = make(map[string]string)
 954	}
 955
 956	// handle special headers for anthropic
 957	if providerCfg.Type == anthropic.Name && c.isAnthropicThinking(model) {
 958		if v, ok := headers["anthropic-beta"]; ok {
 959			headers["anthropic-beta"] = v + ",interleaved-thinking-2025-05-14"
 960		} else {
 961			headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
 962		}
 963	}
 964
 965	apiKey, _ := c.cfg.Resolve(providerCfg.APIKey)
 966	baseURL, _ := c.cfg.Resolve(providerCfg.BaseURL)
 967
 968	switch providerCfg.ID {
 969	case string(catwalk.InferenceProviderOpenCodeGo), string(catwalk.InferenceProviderOpenCodeZen):
 970		if opencodeMessagesModels[model.Model] {
 971			baseURL = strings.TrimSuffix(baseURL, "/v1")
 972			return c.buildAnthropicProvider(baseURL, apiKey, headers, providerCfg.ID)
 973		}
 974	}
 975
 976	switch providerCfg.Type {
 977	case openai.Name:
 978		return c.buildOpenaiProvider(baseURL, apiKey, headers)
 979	case anthropic.Name:
 980		return c.buildAnthropicProvider(baseURL, apiKey, headers, providerCfg.ID)
 981	case openrouter.Name:
 982		return c.buildOpenrouterProvider(baseURL, apiKey, headers)
 983	case vercel.Name:
 984		return c.buildVercelProvider(baseURL, apiKey, headers)
 985	case azure.Name:
 986		return c.buildAzureProvider(baseURL, apiKey, headers, providerCfg.ExtraParams)
 987	case bedrock.Name:
 988		return c.buildBedrockProvider(apiKey, headers, providerCfg.ID)
 989	case google.Name:
 990		return c.buildGoogleProvider(baseURL, apiKey, headers)
 991	case "google-vertex":
 992		return c.buildGoogleVertexProvider(headers, providerCfg.ExtraParams)
 993	case openaicompat.Name, hyper.Name:
 994		switch providerCfg.ID {
 995		case hyper.Name:
 996			baseURL = hyper.BaseURL() + "/v1"
 997			headers["x-crush-id"] = event.GetID()
 998		case string(catwalk.InferenceProviderZAI):
 999			if providerCfg.ExtraBody == nil {
1000				providerCfg.ExtraBody = map[string]any{}
1001			}
1002			providerCfg.ExtraBody["tool_stream"] = true
1003		}
1004		return c.buildOpenaiCompatProvider(baseURL, apiKey, headers, providerCfg.ExtraBody, providerCfg.ID, isSubAgent)
1005	default:
1006		return nil, fmt.Errorf("provider type not supported: %q", providerCfg.Type)
1007	}
1008}
1009
1010func isExactoSupported(modelID string) bool {
1011	supportedModels := []string{
1012		"moonshotai/kimi-k2-0905",
1013		"deepseek/deepseek-v3.1-terminus",
1014		"z-ai/glm-4.6",
1015		"openai/gpt-oss-120b",
1016		"qwen/qwen3-coder",
1017	}
1018	return slices.Contains(supportedModels, modelID)
1019}
1020
1021// BeginAccepted reserves an accept slot for sessionID on the active
1022// agent and returns the ownership handle. It is the fire-and-forget
1023// dispatch path's only way to mark a run as accepted-but-not-yet-active
1024// so a cancel arriving before the run registers in activeRequests is not
1025// lost.
1026func (c *coordinator) BeginAccepted(sessionID string) *AcceptedRun {
1027	return c.currentAgent.BeginAccepted(sessionID)
1028}
1029
1030func (c *coordinator) Cancel(sessionID string) {
1031	c.currentAgent.Cancel(sessionID)
1032}
1033
1034func (c *coordinator) CancelAll() {
1035	c.currentAgent.CancelAll()
1036}
1037
1038func (c *coordinator) ClearQueue(sessionID string) {
1039	c.currentAgent.ClearQueue(sessionID)
1040}
1041
1042func (c *coordinator) IsBusy() bool {
1043	return c.currentAgent.IsBusy()
1044}
1045
1046func (c *coordinator) IsSessionBusy(sessionID string) bool {
1047	return c.currentAgent.IsSessionBusy(sessionID)
1048}
1049
1050func (c *coordinator) Model() Model {
1051	return c.currentAgent.Model()
1052}
1053
1054func (c *coordinator) UpdateModels(ctx context.Context) error {
1055	// build the models again so we make sure we get the latest config
1056	large, small, err := c.buildAgentModels(ctx, false)
1057	if err != nil {
1058		return err
1059	}
1060	c.currentAgent.SetModels(large, small)
1061
1062	agentCfg, ok := c.cfg.Config().Agents[config.AgentCoder]
1063	if !ok {
1064		return errCoderAgentNotConfigured
1065	}
1066
1067	tools, err := c.buildTools(ctx, agentCfg, false)
1068	if err != nil {
1069		return err
1070	}
1071	c.currentAgent.SetTools(tools)
1072	return nil
1073}
1074
1075func (c *coordinator) QueuedPrompts(sessionID string) int {
1076	return c.currentAgent.QueuedPrompts(sessionID)
1077}
1078
1079func (c *coordinator) QueuedPromptsList(sessionID string) []string {
1080	return c.currentAgent.QueuedPromptsList(sessionID)
1081}
1082
1083func (c *coordinator) Summarize(ctx context.Context, sessionID string) error {
1084	providerCfg, ok := c.cfg.Config().Providers.Get(c.currentAgent.Model().ModelCfg.Provider)
1085	if !ok {
1086		return errModelProviderNotConfigured
1087	}
1088
1089	if err := c.refreshTokenIfExpired(ctx, providerCfg); err != nil {
1090		slog.Error("Failed to refresh OAuth2 token before summarize. Proceeding with existing token.", "error", err)
1091	}
1092
1093	summarize := func() error {
1094		return c.currentAgent.Summarize(ctx, sessionID, getProviderOptions(c.currentAgent.Model(), providerCfg))
1095	}
1096
1097	err := summarize()
1098	if err != nil && c.isUnauthorized(err) {
1099		if retryErr := c.retryAfterUnauthorized(ctx, providerCfg); retryErr == nil {
1100			return summarize()
1101		}
1102	}
1103
1104	return err
1105}
1106
1107// refreshTokenIfExpired proactively refreshes the OAuth token if it has expired.
1108func (c *coordinator) refreshTokenIfExpired(ctx context.Context, providerCfg config.ProviderConfig) error {
1109	if providerCfg.OAuthToken == nil || !providerCfg.OAuthToken.IsExpired() {
1110		return nil
1111	}
1112	slog.Debug("Token needs to be refreshed", "provider", providerCfg.ID)
1113	return c.refreshOAuth2Token(ctx, providerCfg)
1114}
1115
1116// retryAfterUnauthorized attempts to refresh credentials after receiving a 401
1117// and returns nil if retry should be attempted.
1118func (c *coordinator) retryAfterUnauthorized(ctx context.Context, providerCfg config.ProviderConfig) error {
1119	switch {
1120	case providerCfg.OAuthToken != nil:
1121		slog.Debug("Received 401. Refreshing token and retrying", "provider", providerCfg.ID)
1122		return c.refreshOAuth2Token(ctx, providerCfg)
1123	case strings.Contains(providerCfg.APIKeyTemplate, "$"):
1124		slog.Debug("Received 401. Refreshing API Key template and retrying", "provider", providerCfg.ID)
1125		return c.refreshApiKeyTemplate(ctx, providerCfg)
1126	default:
1127		return nil
1128	}
1129}
1130
1131func (c *coordinator) isUnauthorized(err error) bool {
1132	var providerErr *fantasy.ProviderError
1133	return errors.As(err, &providerErr) && providerErr.StatusCode == http.StatusUnauthorized
1134}
1135
1136func (c *coordinator) refreshOAuth2Token(ctx context.Context, providerCfg config.ProviderConfig) error {
1137	if err := c.cfg.RefreshOAuthToken(ctx, config.ScopeGlobal, providerCfg.ID); err != nil {
1138		slog.Error("Failed to refresh OAuth token after 401 error", "provider", providerCfg.ID, "error", err)
1139		return err
1140	}
1141	if err := c.UpdateModels(ctx); err != nil {
1142		return err
1143	}
1144	return nil
1145}
1146
1147func (c *coordinator) refreshApiKeyTemplate(ctx context.Context, providerCfg config.ProviderConfig) error {
1148	newAPIKey, err := c.cfg.Resolve(providerCfg.APIKeyTemplate)
1149	if err != nil {
1150		slog.Error("Failed to re-resolve API key after 401 error", "provider", providerCfg.ID, "error", err)
1151		return err
1152	}
1153
1154	providerCfg.APIKey = newAPIKey
1155	c.cfg.Config().Providers.Set(providerCfg.ID, providerCfg)
1156
1157	if err := c.UpdateModels(ctx); err != nil {
1158		return err
1159	}
1160	return nil
1161}
1162
1163// subAgentParams holds the parameters for running a sub-agent.
1164type subAgentParams struct {
1165	Agent          SessionAgent
1166	SessionID      string
1167	AgentMessageID string
1168	ToolCallID     string
1169	Prompt         string
1170	SessionTitle   string
1171	// SessionSetup is an optional callback invoked after session creation
1172	// but before agent execution, for custom session configuration.
1173	SessionSetup func(sessionID string)
1174}
1175
1176// runSubAgent runs a sub-agent and handles session management and cost accumulation.
1177// It creates a sub-session, runs the agent with the given prompt, and propagates
1178// the cost to the parent session.
1179func (c *coordinator) runSubAgent(ctx context.Context, params subAgentParams) (fantasy.ToolResponse, error) {
1180	// Create sub-session
1181	agentToolSessionID := c.sessions.CreateAgentToolSessionID(params.AgentMessageID, params.ToolCallID)
1182	session, err := c.sessions.CreateTaskSession(ctx, agentToolSessionID, params.SessionID, params.SessionTitle)
1183	if err != nil {
1184		return fantasy.ToolResponse{}, fmt.Errorf("create session: %w", err)
1185	}
1186
1187	// Call session setup function if provided
1188	if params.SessionSetup != nil {
1189		params.SessionSetup(session.ID)
1190	}
1191
1192	// Get model configuration
1193	model := params.Agent.Model()
1194	maxTokens := model.CatwalkCfg.DefaultMaxTokens
1195	if model.ModelCfg.MaxTokens != 0 {
1196		maxTokens = model.ModelCfg.MaxTokens
1197	}
1198
1199	providerCfg, ok := c.cfg.Config().Providers.Get(model.ModelCfg.Provider)
1200	if !ok {
1201		return fantasy.ToolResponse{}, errModelProviderNotConfigured
1202	}
1203
1204	// Run the agent
1205	result, err := params.Agent.Run(ctx, SessionAgentCall{
1206		SessionID:        session.ID,
1207		Prompt:           params.Prompt,
1208		MaxOutputTokens:  maxTokens,
1209		ProviderOptions:  getProviderOptions(model, providerCfg),
1210		Temperature:      model.ModelCfg.Temperature,
1211		TopP:             model.ModelCfg.TopP,
1212		TopK:             model.ModelCfg.TopK,
1213		FrequencyPenalty: model.ModelCfg.FrequencyPenalty,
1214		PresencePenalty:  model.ModelCfg.PresencePenalty,
1215		NonInteractive:   true,
1216	})
1217	if err != nil {
1218		return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to generate response: %s", err)), nil
1219	}
1220
1221	// Update parent session cost
1222	if err := c.updateParentSessionCost(ctx, session.ID, params.SessionID); err != nil {
1223		return fantasy.ToolResponse{}, err
1224	}
1225
1226	return fantasy.NewTextResponse(result.Response.Content.Text()), nil
1227}
1228
1229// updateParentSessionCost accumulates the cost from a child session to its parent session.
1230func (c *coordinator) updateParentSessionCost(ctx context.Context, childSessionID, parentSessionID string) error {
1231	childSession, err := c.sessions.Get(ctx, childSessionID)
1232	if err != nil {
1233		return fmt.Errorf("get child session: %w", err)
1234	}
1235
1236	parentSession, err := c.sessions.Get(ctx, parentSessionID)
1237	if err != nil {
1238		return fmt.Errorf("get parent session: %w", err)
1239	}
1240
1241	parentSession.Cost += childSession.Cost
1242
1243	if _, err := c.sessions.Save(ctx, parentSession); err != nil {
1244		return fmt.Errorf("save parent session: %w", err)
1245	}
1246
1247	return nil
1248}
1249
1250// discoverSkills is a thin fallback wrapper used only when no
1251// skills.Manager has been threaded through to the coordinator. All
1252// production call sites (backend.CreateWorkspace, setupLocalWorkspace)
1253// run discovery in advance and pass the results via the manager;
1254// reaching this path means a caller bypassed both. It deliberately does
1255// NOT publish to the package-level broker — there are no subscribers in
1256// that case, so doing so would be misleading without delivering the
1257// snapshot anywhere useful.
1258func discoverSkills(cfg *config.ConfigStore) (allSkills, activeSkills []*skills.Skill) {
1259	opts := cfg.Config().Options
1260	var paths, disabled []string
1261	if opts != nil {
1262		paths = opts.SkillsPaths
1263		disabled = opts.DisabledSkills
1264	}
1265	var resolver func(string) (string, error)
1266	if r := cfg.Resolver(); r != nil {
1267		resolver = r.ResolveValue
1268	}
1269	allSkills, activeSkills, states := skills.DiscoverFromConfig(skills.DiscoveryConfig{
1270		SkillsPaths:    paths,
1271		DisabledSkills: disabled,
1272		Resolver:       resolver,
1273	})
1274	logDiscoveryStats(states, paths, allSkills, activeSkills, disabled)
1275	return allSkills, activeSkills
1276}
1277
1278// logTurnSkillUsage emits a per-turn diagnostic line showing which skills
1279// (if any) were loaded during this turn and which looked relevant based on
1280// a cheap keyword match against the user prompt. The goal is to surface
1281// "should-have-loaded but didn't" situations for later analysis.
1282//
1283// Logged at Info level under component=skills; heavy fields are elided when
1284// there is nothing interesting to report.
1285func logTurnSkillUsage(
1286	sessionID string,
1287	prompt string,
1288	activeSkills []*skills.Skill,
1289	tracker *skills.Tracker,
1290	before []string,
1291) {
1292	if tracker == nil || len(activeSkills) == 0 {
1293		return
1294	}
1295
1296	after := tracker.LoadedNames()
1297
1298	beforeSet := make(map[string]bool, len(before))
1299	for _, n := range before {
1300		beforeSet[n] = true
1301	}
1302	var loadedThisTurn []string
1303	for _, n := range after {
1304		if !beforeSet[n] {
1305			loadedThisTurn = append(loadedThisTurn, n)
1306		}
1307	}
1308
1309	slog.Info(
1310		"Skill turn summary",
1311		"component", "skills",
1312		"session_id", sessionID,
1313		"prompt_len", len(prompt),
1314		"active_total", len(activeSkills),
1315		"loaded_total", len(after),
1316		"loaded_this_turn", loadedThisTurn,
1317	)
1318}
1319
1320// logDiscoveryStats emits a single structured log line summarising skill
1321// discovery for the current session. It is intentionally low-volume: one
1322// line per session start. Builtin vs user counts are derived from the
1323// SkillState.Path — builtin states use the "builtin/" embed prefix.
1324func logDiscoveryStats(
1325	states []*skills.SkillState,
1326	userPaths []string,
1327	allSkills, activeSkills []*skills.Skill,
1328	disabled []string,
1329) {
1330	var builtinOK, builtinErr, userOK, userErr int
1331	for _, s := range states {
1332		isBuiltin := strings.HasPrefix(s.Path, "builtin/")
1333		switch {
1334		case isBuiltin && s.State == skills.StateNormal:
1335			builtinOK++
1336		case isBuiltin && s.State == skills.StateError:
1337			builtinErr++
1338		case !isBuiltin && s.State == skills.StateNormal:
1339			userOK++
1340		case !isBuiltin && s.State == skills.StateError:
1341			userErr++
1342		}
1343	}
1344
1345	activeNames := make([]string, 0, len(activeSkills))
1346	for _, s := range activeSkills {
1347		activeNames = append(activeNames, s.Name)
1348	}
1349
1350	xml := skills.ToPromptXML(activeSkills)
1351
1352	slog.Info(
1353		"Skill discovery complete",
1354		"component", "skills",
1355		"builtin_ok", builtinOK,
1356		"builtin_errors", builtinErr,
1357		"user_ok", userOK,
1358		"user_errors", userErr,
1359		"user_paths", len(userPaths),
1360		"deduped_total", len(allSkills),
1361		"active", len(activeSkills),
1362		"disabled", len(disabled),
1363		"prompt_bytes", len(xml),
1364		"prompt_tok_est", skills.ApproxTokenCount(xml),
1365		"active_names", activeNames,
1366	)
1367}