From 741c18a21121fde99dbcadc7eadafca037ba790e Mon Sep 17 00:00:00 2001 From: Kartik33 Date: Mon, 6 Apr 2026 06:23:01 -0700 Subject: [PATCH] fix: support local models with unknown max_tokens and context window (#2554) Two fixes for local/custom model compatibility (LM Studio, Ollama, llama.cpp): 1. Don't send MaxOutputTokens when it's 0. Custom models not in the catwalk providers list have DefaultMaxTokens=0, which gets sent as max_tokens:0 in the API request. LM Studio rejects this with "maxPredictedTokens does not satisfy the schema". Fix: only send the field when the value is positive. 2. Skip auto-summarize when ContextWindow is 0. Custom models have ContextWindow=0, making remaining tokens negative, which immediately triggers summarize after the first response. The session resets with "previous session was interrupted because it got too long" even for short conversations. Fix: skip the check when context window is unknown. Fixes #1218 (regression), relates to #1583, #1591 --- internal/agent/agent.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/internal/agent/agent.go b/internal/agent/agent.go index c7c96cd42cfce58eab918e9ae2e267ac3f78ad8d..b2074b066b49038703a05dbe190dbe8e18a835f9 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -249,12 +249,17 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy var currentAssistant *message.Message var shouldSummarize bool + // Don't send MaxOutputTokens if 0 — some providers (e.g. LM Studio) reject it + var maxOutputTokens *int64 + if call.MaxOutputTokens > 0 { + maxOutputTokens = &call.MaxOutputTokens + } result, err := agent.Stream(genCtx, fantasy.AgentStreamCall{ Prompt: message.PromptWithTextAttachments(call.Prompt, call.Attachments), Files: files, Messages: history, ProviderOptions: call.ProviderOptions, - MaxOutputTokens: &call.MaxOutputTokens, + MaxOutputTokens: maxOutputTokens, TopP: call.TopP, Temperature: call.Temperature, PresencePenalty: call.PresencePenalty, @@ -425,6 +430,11 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy StopWhen: []fantasy.StopCondition{ func(_ []fantasy.StepResult) bool { cw := int64(largeModel.CatwalkCfg.ContextWindow) + // If context window is unknown (0), skip auto-summarize + // to avoid immediately truncating custom/local models. + if cw == 0 { + return false + } tokens := currentSession.CompletionTokens + currentSession.PromptTokens remaining := cw - tokens var threshold int64