From 0180f3e72ab95cd2be1d96927a67faa616dcd137 Mon Sep 17 00:00:00 2001 From: Ben Brandt Date: Thu, 18 Dec 2025 13:47:34 +0100 Subject: [PATCH] deepseek: Fix for max output tokens blocking completions (#45236) They count the requested max_output_tokens against the prompt total. Seems like a bug on their end as most other providers don't do this, but now we just default to None for the main models and let the API use its default behavior which works just fine. Closes: #45134 Release Notes: - deepseek: Fix issue with Deepseek API that was causing the token limit to be reached sooner than necessary --- crates/deepseek/src/deepseek.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/deepseek/src/deepseek.rs b/crates/deepseek/src/deepseek.rs index e978aa08048bfa4c7b7b203ce6b405ba8a0a7d0c..636258a5a132ce79cb5d15b1aaa25d6e4d3af643 100644 --- a/crates/deepseek/src/deepseek.rs +++ b/crates/deepseek/src/deepseek.rs @@ -103,8 +103,9 @@ impl Model { pub fn max_output_tokens(&self) -> Option { match self { - Self::Chat => Some(8_192), - Self::Reasoner => Some(64_000), + // Their API treats this max against the context window, which means we hit the limit a lot + // Using the default value of None in the API instead + Self::Chat | Self::Reasoner => None, Self::Custom { max_output_tokens, .. } => *max_output_tokens,