From 7be57baef03dd8b8a4cb4ae2b643f2b553f2e3fa Mon Sep 17 00:00:00 2001 From: Bennet Bo Fenner Date: Tue, 24 Jun 2025 18:23:59 +0200 Subject: [PATCH] agent: Fix issue with Anthropic thinking models (#33317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cc @osyvokon We were seeing a bunch of errors in our backend when people were using Claude models with thinking enabled. In the logs we would see > an error occurred while interacting with the Anthropic API: invalid_request_error: messages.x.content.0.type: Expected `thinking` or `redacted_thinking`, but found `text`. When `thinking` is enabled, a final `assistant` message must start with a thinking block (preceeding the lastmost set of `tool_use` and `tool_result` blocks). We recommend you include thinking blocks from previous turns. To avoid this requirement, disable `thinking`. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking However, this issue did not occur frequently and was not easily reproducible. Turns out it was triggered by us not correctly handling [Redacted Thinking Blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#thinking-redaction). I could constantly reproduce this issue by including this magic string: `ANTHROPIC_MAGIC_STRING_TRIGGER_REDACTED_THINKING_46C9A13E193C177646C7398A98432ECCCE4C1253D5E2D82641AC0E52CC2876CB ` in the request, which forces `claude-3-7-sonnet` to emit redacted thinking blocks (confusingly the magic string does not seem to be working for `claude-sonnet-4`). As soon as we hit a tool call Anthropic would return an error. Thanks to @osyvokon for pointing me in the right direction 😄! Release Notes: - agent: Fixed an issue where Anthropic models would sometimes return an error when thinking was enabled --- crates/agent/src/thread.rs | 25 ++++++++++++++++++- crates/agent/src/thread_store.rs | 2 +- .../src/assistant_context.rs | 1 + crates/eval/src/instance.rs | 2 ++ crates/language_model/src/language_model.rs | 4 +++ crates/language_model/src/request.rs | 2 +- .../language_models/src/provider/anthropic.rs | 10 +++----- 7 files changed, 36 insertions(+), 10 deletions(-) diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs index 7a08de7a0b54dccf792ce42b20666d1e19ca840a..a46aa9381ea45002495a8fc3d2ee408173d8b3d4 100644 --- a/crates/agent/src/thread.rs +++ b/crates/agent/src/thread.rs @@ -145,6 +145,10 @@ impl Message { } } + pub fn push_redacted_thinking(&mut self, data: String) { + self.segments.push(MessageSegment::RedactedThinking(data)); + } + pub fn push_text(&mut self, text: &str) { if let Some(MessageSegment::Text(segment)) = self.segments.last_mut() { segment.push_str(text); @@ -183,7 +187,7 @@ pub enum MessageSegment { text: String, signature: Option, }, - RedactedThinking(Vec), + RedactedThinking(String), } impl MessageSegment { @@ -1643,6 +1647,25 @@ impl Thread { }; } } + LanguageModelCompletionEvent::RedactedThinking { + data + } => { + thread.received_chunk(); + + if let Some(last_message) = thread.messages.last_mut() { + if last_message.role == Role::Assistant + && !thread.tool_use.has_tool_results(last_message.id) + { + last_message.push_redacted_thinking(data); + } else { + request_assistant_message_id = + Some(thread.insert_assistant_message( + vec![MessageSegment::RedactedThinking(data)], + cx, + )); + }; + } + } LanguageModelCompletionEvent::ToolUse(tool_use) => { let last_assistant_message_id = request_assistant_message_id .unwrap_or_else(|| { diff --git a/crates/agent/src/thread_store.rs b/crates/agent/src/thread_store.rs index 0582e67a5c4bb13c91a63877b9f17dccd3b18031..3c9150ff75f53241120b45c3418288e5033489e2 100644 --- a/crates/agent/src/thread_store.rs +++ b/crates/agent/src/thread_store.rs @@ -731,7 +731,7 @@ pub enum SerializedMessageSegment { signature: Option, }, RedactedThinking { - data: Vec, + data: String, }, } diff --git a/crates/assistant_context/src/assistant_context.rs b/crates/assistant_context/src/assistant_context.rs index 1444701aac98e048e67468f420d0fa6512013824..a692502a9c390ec168aad2a6448c020428c0f5b1 100644 --- a/crates/assistant_context/src/assistant_context.rs +++ b/crates/assistant_context/src/assistant_context.rs @@ -2117,6 +2117,7 @@ impl AssistantContext { ); } } + LanguageModelCompletionEvent::RedactedThinking { .. } => {}, LanguageModelCompletionEvent::Text(mut chunk) => { if let Some(start) = thought_process_stack.pop() { let end = buffer.anchor_before(message_old_end_offset); diff --git a/crates/eval/src/instance.rs b/crates/eval/src/instance.rs index b6802537c65974cd7284159cdb3a7a379a2e2ce0..bb66a04e1f07f1f070d9c4c6536f260a05a11bb6 100644 --- a/crates/eval/src/instance.rs +++ b/crates/eval/src/instance.rs @@ -1030,6 +1030,7 @@ pub fn response_events_to_markdown( Ok(LanguageModelCompletionEvent::Thinking { text, .. }) => { thinking_buffer.push_str(text); } + Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => {} Ok(LanguageModelCompletionEvent::Stop(reason)) => { flush_buffers(&mut response, &mut text_buffer, &mut thinking_buffer); response.push_str(&format!("**Stop**: {:?}\n\n", reason)); @@ -1126,6 +1127,7 @@ impl ThreadDialog { // Skip these Ok(LanguageModelCompletionEvent::UsageUpdate(_)) + | Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) | Ok(LanguageModelCompletionEvent::StatusUpdate { .. }) | Ok(LanguageModelCompletionEvent::StartMessage { .. }) | Ok(LanguageModelCompletionEvent::Stop(_)) => {} diff --git a/crates/language_model/src/language_model.rs b/crates/language_model/src/language_model.rs index 9f165df301d2a378c678da2e3b8c6a5c3ffdb03e..f84357bd98936e478a826df9a4d0563f2c857e10 100644 --- a/crates/language_model/src/language_model.rs +++ b/crates/language_model/src/language_model.rs @@ -67,6 +67,9 @@ pub enum LanguageModelCompletionEvent { text: String, signature: Option, }, + RedactedThinking { + data: String, + }, ToolUse(LanguageModelToolUse), StartMessage { message_id: String, @@ -359,6 +362,7 @@ pub trait LanguageModel: Send + Sync { Ok(LanguageModelCompletionEvent::StartMessage { .. }) => None, Ok(LanguageModelCompletionEvent::Text(text)) => Some(Ok(text)), Ok(LanguageModelCompletionEvent::Thinking { .. }) => None, + Ok(LanguageModelCompletionEvent::RedactedThinking { .. }) => None, Ok(LanguageModelCompletionEvent::Stop(_)) => None, Ok(LanguageModelCompletionEvent::ToolUse(_)) => None, Ok(LanguageModelCompletionEvent::UsageUpdate(token_usage)) => { diff --git a/crates/language_model/src/request.rs b/crates/language_model/src/request.rs index 559d8e9111405cef4c1b039a7c8ffa945de1d950..451a62775e6331b139ef5c4da57e4d7d930af6f8 100644 --- a/crates/language_model/src/request.rs +++ b/crates/language_model/src/request.rs @@ -303,7 +303,7 @@ pub enum MessageContent { text: String, signature: Option, }, - RedactedThinking(Vec), + RedactedThinking(String), Image(LanguageModelImage), ToolUse(LanguageModelToolUse), ToolResult(LanguageModelToolResult), diff --git a/crates/language_models/src/provider/anthropic.rs b/crates/language_models/src/provider/anthropic.rs index 719975c1d5ef51976a8d592c89d0a887892b9849..d19348eed6dcf8c65c06c20bfe5cdab4a2b41ddd 100644 --- a/crates/language_models/src/provider/anthropic.rs +++ b/crates/language_models/src/provider/anthropic.rs @@ -554,9 +554,7 @@ pub fn into_anthropic( } MessageContent::RedactedThinking(data) => { if !data.is_empty() { - Some(anthropic::RequestContent::RedactedThinking { - data: String::from_utf8(data).ok()?, - }) + Some(anthropic::RequestContent::RedactedThinking { data }) } else { None } @@ -730,10 +728,8 @@ impl AnthropicEventMapper { signature: None, })] } - ResponseContent::RedactedThinking { .. } => { - // Redacted thinking is encrypted and not accessible to the user, see: - // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#suggestions-for-handling-redacted-thinking-in-production - Vec::new() + ResponseContent::RedactedThinking { data } => { + vec![Ok(LanguageModelCompletionEvent::RedactedThinking { data })] } ResponseContent::ToolUse { id, name, .. } => { self.tool_uses_by_index.insert(