diff --git a/internal/agent/agent.go b/internal/agent/agent.go index c7c96cd42cfce58eab918e9ae2e267ac3f78ad8d..b2074b066b49038703a05dbe190dbe8e18a835f9 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -249,12 +249,17 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy var currentAssistant *message.Message var shouldSummarize bool + // Don't send MaxOutputTokens if 0 — some providers (e.g. LM Studio) reject it + var maxOutputTokens *int64 + if call.MaxOutputTokens > 0 { + maxOutputTokens = &call.MaxOutputTokens + } result, err := agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: message.PromptWithTextAttachments(call.Prompt, call.Attachments), Files: files, Messages: history, ProviderOptions: call.ProviderOptions, - MaxOutputTokens: &call.MaxOutputTokens, + MaxOutputTokens: maxOutputTokens, TopP: call.TopP, Temperature: call.Temperature, PresencePenalty: call.PresencePenalty, @@ -425,6 +430,11 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy StopWhen: []fantasy.StopCondition{ func(_ []fantasy.StepResult) bool { cw := int64(largeModel.CatwalkCfg.ContextWindow) + // If context window is unknown (0), skip auto-summarize + // to avoid immediately truncating custom/local models. + if cw == 0 { + return false + } tokens := currentSession.CompletionTokens + currentSession.PromptTokens remaining := cw - tokens var threshold int64