diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 600956c379144d1fcf8d101bfc8b9f85b5e6d4e1..3b6e014d25900931a02926ec69d64f211590c99e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -460,8 +460,10 @@ jobs:
           RET_CODE=0
           # Always check style
           [[ "${{ needs.style.result }}"      != 'success' ]] && { RET_CODE=1; echo "style tests failed"; }
-          [[ "${{ needs.check_docs.result }}" != 'success' ]] && { RET_CODE=1; echo "docs checks failed"; }
 
+          if [[ "${{ needs.job_spec.outputs.run_docs }}" == "true" ]]; then
+            [[ "${{ needs.check_docs.result }}" != 'success' ]] && { RET_CODE=1; echo "docs checks failed"; }
+          fi
           # Only check test jobs if they were supposed to run
           if [[ "${{ needs.job_spec.outputs.run_tests }}" == "true" ]]; then
             [[ "${{ needs.workspace_hack.result }}"       != 'success' ]] && { RET_CODE=1; echo "Workspace Hack failed"; }
diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
index 6bc76fe2ded9f019c9f8c3feca9cb3b8f6d140bc..6eefdfea954c58919850baabe013d3d8676b54f9 100644
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@@ -7,7 +7,7 @@ on:
   pull_request:
     branches:
       - "**"
-    types: [opened, synchronize, reopened, labeled]
+    types: [synchronize, reopened, labeled]
 
   workflow_dispatch:
 
@@ -25,16 +25,6 @@ env:
   ZED_EVAL_TELEMETRY: 1
 
 jobs:
-  # This is a no-op job that we run to prevent GitHub from marking the workflow
-  # as failed for PRs that don't have the `run-eval` label.
-  noop:
-    name: No-op
-    runs-on: ubuntu-latest
-    if: github.repository_owner == 'zed-industries'
-    steps:
-      - name: No-op
-        run: echo "Nothing to do"
-
   run_eval:
     timeout-minutes: 60
     name: Run Agent Eval
diff --git a/Cargo.lock b/Cargo.lock
index dd0ff583c7fd2373dc24c0d28252b9e3a0e13f89..6047a8c25f2d497ce766512407fb851b29e54ecb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -78,6 +78,7 @@ dependencies = [
  "language",
  "language_model",
  "log",
+ "parking_lot",
  "paths",
  "postage",
  "pretty_assertions",
diff --git a/assets/keymaps/default-linux.json b/assets/keymaps/default-linux.json
index 0c4de0e0532f09f01aaf420a4e2803067c9e25b1..e21005816b84aca4e0c8e8c4e4bea2cb2003d4a6 100644
--- a/assets/keymaps/default-linux.json
+++ b/assets/keymaps/default-linux.json
@@ -244,6 +244,7 @@
       "ctrl-alt-e": "agent::RemoveAllContext",
       "ctrl-shift-e": "project_panel::ToggleFocus",
       "ctrl-shift-enter": "agent::ContinueThread",
+      "super-ctrl-b": "agent::ToggleBurnMode",
       "alt-enter": "agent::ContinueWithBurnMode"
     }
   },
diff --git a/assets/keymaps/default-macos.json b/assets/keymaps/default-macos.json
index 5bd99963bdb7f22ea1a63d0daa60a55b8d8baccd..51f4ffe23f255f31becbec25e15d20955ec058f9 100644
--- a/assets/keymaps/default-macos.json
+++ b/assets/keymaps/default-macos.json
@@ -283,6 +283,7 @@
       "cmd->": "assistant::QuoteSelection",
       "cmd-alt-e": "agent::RemoveAllContext",
       "cmd-shift-e": "project_panel::ToggleFocus",
+      "cmd-ctrl-b": "agent::ToggleBurnMode",
       "cmd-shift-enter": "agent::ContinueThread",
       "alt-enter": "agent::ContinueWithBurnMode"
     }
@@ -587,6 +588,7 @@
       "alt-cmd-o": ["projects::OpenRecent", { "create_new_window": false }],
       "ctrl-cmd-o": ["projects::OpenRemote", { "from_existing_connection": false, "create_new_window": false }],
       "ctrl-cmd-shift-o": ["projects::OpenRemote", { "from_existing_connection": true, "create_new_window": false }],
+      "cmd-ctrl-b": "branches::OpenRecent",
       "ctrl-~": "workspace::NewTerminal",
       "cmd-s": "workspace::Save",
       "cmd-k s": "workspace::SaveWithoutFormat",
@@ -604,6 +606,7 @@
       "cmd-8": ["workspace::ActivatePane", 7],
       "cmd-9": ["workspace::ActivatePane", 8],
       "cmd-b": "workspace::ToggleLeftDock",
+      "cmd-alt-b": "workspace::ToggleRightDock",
       "cmd-r": "workspace::ToggleRightDock",
       "cmd-j": "workspace::ToggleBottomDock",
       "alt-cmd-y": "workspace::CloseAllDocks",
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index f320e58d002f219186082ea3375225b10059b806..135363ab6552a9b6737dfce0e0c95ced3237ae5c 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -72,6 +72,7 @@ gpui = { workspace = true, "features" = ["test-support"] }
 indoc.workspace = true
 language = { workspace = true, "features" = ["test-support"] }
 language_model = { workspace = true, "features" = ["test-support"] }
+parking_lot.workspace = true
 pretty_assertions.workspace = true
 project = { workspace = true, features = ["test-support"] }
 workspace = { workspace = true, features = ["test-support"] }
diff --git a/crates/agent/src/thread.rs b/crates/agent/src/thread.rs
index 33b9209f0ccd199d28d7aad4f19b81286eb7dfac..68624d7c3b9d304c5b27c79f1bcdb072117b7dcd 100644
--- a/crates/agent/src/thread.rs
+++ b/crates/agent/src/thread.rs
@@ -39,12 +39,20 @@ use proto::Plan;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::Settings;
-use std::{io::Write, ops::Range, sync::Arc, time::Instant};
+use std::{
+    io::Write,
+    ops::Range,
+    sync::Arc,
+    time::{Duration, Instant},
+};
 use thiserror::Error;
 use util::{ResultExt as _, post_inc};
 use uuid::Uuid;
 use zed_llm_client::{CompletionIntent, CompletionRequestStatus, UsageLimit};
 
+const MAX_RETRY_ATTEMPTS: u8 = 3;
+const BASE_RETRY_DELAY_SECS: u64 = 5;
+
 #[derive(
     Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Serialize, Deserialize, JsonSchema,
 )]
@@ -118,6 +126,7 @@ pub struct Message {
     pub loaded_context: LoadedContext,
     pub creases: Vec<MessageCrease>,
     pub is_hidden: bool,
+    pub ui_only: bool,
 }
 
 impl Message {
@@ -364,6 +373,7 @@ pub struct Thread {
     exceeded_window_error: Option<ExceededWindowError>,
     tool_use_limit_reached: bool,
     feedback: Option<ThreadFeedback>,
+    retry_state: Option<RetryState>,
     message_feedback: HashMap<MessageId, ThreadFeedback>,
     last_auto_capture_at: Option<Instant>,
     last_received_chunk_at: Option<Instant>,
@@ -375,6 +385,13 @@ pub struct Thread {
     profile: AgentProfile,
 }
 
+#[derive(Clone, Debug)]
+struct RetryState {
+    attempt: u8,
+    max_attempts: u8,
+    intent: CompletionIntent,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum ThreadSummary {
     Pending,
@@ -456,6 +473,7 @@ impl Thread {
             exceeded_window_error: None,
             tool_use_limit_reached: false,
             feedback: None,
+            retry_state: None,
             message_feedback: HashMap::default(),
             last_auto_capture_at: None,
             last_received_chunk_at: None,
@@ -522,6 +540,7 @@ impl Thread {
             detailed_summary_tx,
             detailed_summary_rx,
             completion_mode,
+            retry_state: None,
             messages: serialized
                 .messages
                 .into_iter()
@@ -557,6 +576,7 @@ impl Thread {
                         })
                         .collect(),
                     is_hidden: message.is_hidden,
+                    ui_only: false, // UI-only messages are not persisted
                 })
                 .collect(),
             next_message_id,
@@ -1041,6 +1061,7 @@ impl Thread {
             loaded_context,
             creases,
             is_hidden,
+            ui_only: false,
         });
         self.touch_updated_at();
         cx.emit(ThreadEvent::MessageAdded(id));
@@ -1130,6 +1151,7 @@ impl Thread {
                 updated_at: this.updated_at(),
                 messages: this
                     .messages()
+                    .filter(|message| !message.ui_only)
                     .map(|message| SerializedMessage {
                         id: message.id,
                         role: message.role,
@@ -1228,7 +1250,7 @@ impl Thread {
 
         let request = self.to_completion_request(model.clone(), intent, cx);
 
-        self.stream_completion(request, model, window, cx);
+        self.stream_completion(request, model, intent, window, cx);
     }
 
     pub fn used_tools_since_last_user_message(&self) -> bool {
@@ -1303,6 +1325,11 @@ impl Thread {
 
         let mut message_ix_to_cache = None;
         for message in &self.messages {
+            // ui_only messages are for the UI only, not for the model
+            if message.ui_only {
+                continue;
+            }
+
             let mut request_message = LanguageModelRequestMessage {
                 role: message.role,
                 content: Vec::new(),
@@ -1457,6 +1484,7 @@ impl Thread {
         &mut self,
         request: LanguageModelRequest,
         model: Arc<dyn LanguageModel>,
+        intent: CompletionIntent,
         window: Option<AnyWindowHandle>,
         cx: &mut Context<Self>,
     ) {
@@ -1770,58 +1798,64 @@ impl Thread {
             };
 
             let result = stream_completion.await;
+            let mut retry_scheduled = false;
 
             thread
                 .update(cx, |thread, cx| {
                     thread.finalize_pending_checkpoint(cx);
                     match result.as_ref() {
-                        Ok(stop_reason) => match stop_reason {
-                            StopReason::ToolUse => {
-                                let tool_uses = thread.use_pending_tools(window, model.clone(), cx);
-                                cx.emit(ThreadEvent::UsePendingTools { tool_uses });
-                            }
-                            StopReason::EndTurn | StopReason::MaxTokens  => {
-                                thread.project.update(cx, |project, cx| {
-                                    project.set_agent_location(None, cx);
-                                });
-                            }
-                            StopReason::Refusal => {
-                                thread.project.update(cx, |project, cx| {
-                                    project.set_agent_location(None, cx);
-                                });
-
-                                // Remove the turn that was refused.
-                                //
-                                // https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/handle-streaming-refusals#reset-context-after-refusal
-                                {
-                                    let mut messages_to_remove = Vec::new();
+                        Ok(stop_reason) => {
+                            match stop_reason {
+                                StopReason::ToolUse => {
+                                    let tool_uses = thread.use_pending_tools(window, model.clone(), cx);
+                                    cx.emit(ThreadEvent::UsePendingTools { tool_uses });
+                                }
+                                StopReason::EndTurn | StopReason::MaxTokens  => {
+                                    thread.project.update(cx, |project, cx| {
+                                        project.set_agent_location(None, cx);
+                                    });
+                                }
+                                StopReason::Refusal => {
+                                    thread.project.update(cx, |project, cx| {
+                                        project.set_agent_location(None, cx);
+                                    });
 
-                                    for (ix, message) in thread.messages.iter().enumerate().rev() {
-                                        messages_to_remove.push(message.id);
+                                    // Remove the turn that was refused.
+                                    //
+                                    // https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/handle-streaming-refusals#reset-context-after-refusal
+                                    {
+                                        let mut messages_to_remove = Vec::new();
 
-                                        if message.role == Role::User {
-                                            if ix == 0 {
-                                                break;
-                                            }
+                                        for (ix, message) in thread.messages.iter().enumerate().rev() {
+                                            messages_to_remove.push(message.id);
 
-                                            if let Some(prev_message) = thread.messages.get(ix - 1) {
-                                                if prev_message.role == Role::Assistant {
+                                            if message.role == Role::User {
+                                                if ix == 0 {
                                                     break;
                                                 }
+
+                                                if let Some(prev_message) = thread.messages.get(ix - 1) {
+                                                    if prev_message.role == Role::Assistant {
+                                                        break;
+                                                    }
+                                                }
                                             }
                                         }
-                                    }
 
-                                    for message_id in messages_to_remove {
-                                        thread.delete_message(message_id, cx);
+                                        for message_id in messages_to_remove {
+                                            thread.delete_message(message_id, cx);
+                                        }
                                     }
-                                }
 
-                                cx.emit(ThreadEvent::ShowError(ThreadError::Message {
-                                    header: "Language model refusal".into(),
-                                    message: "Model refused to generate content for safety reasons.".into(),
-                                }));
+                                    cx.emit(ThreadEvent::ShowError(ThreadError::Message {
+                                        header: "Language model refusal".into(),
+                                        message: "Model refused to generate content for safety reasons.".into(),
+                                    }));
+                                }
                             }
+
+                            // We successfully completed, so cancel any remaining retries.
+                            thread.retry_state = None;
                         },
                         Err(error) => {
                             thread.project.update(cx, |project, cx| {
@@ -1859,17 +1893,58 @@ impl Thread {
                                         });
                                         cx.notify();
                                     }
-                                    LanguageModelKnownError::RateLimitExceeded { .. } => {
-                                        // In the future we will report the error to the user, wait retry_after, and then retry.
-                                        emit_generic_error(error, cx);
+                                    LanguageModelKnownError::RateLimitExceeded { retry_after } => {
+                                        let provider_name = model.provider_name();
+                                        let error_message = format!(
+                                            "{}'s API rate limit exceeded",
+                                            provider_name.0.as_ref()
+                                        );
+
+                                        thread.handle_rate_limit_error(
+                                            &error_message,
+                                            *retry_after,
+                                            model.clone(),
+                                            intent,
+                                            window,
+                                            cx,
+                                        );
+                                        retry_scheduled = true;
                                     }
                                     LanguageModelKnownError::Overloaded => {
-                                        // In the future we will wait and then retry, up to N times.
-                                        emit_generic_error(error, cx);
+                                        let provider_name = model.provider_name();
+                                        let error_message = format!(
+                                            "{}'s API servers are overloaded right now",
+                                            provider_name.0.as_ref()
+                                        );
+
+                                        retry_scheduled = thread.handle_retryable_error(
+                                            &error_message,
+                                            model.clone(),
+                                            intent,
+                                            window,
+                                            cx,
+                                        );
+                                        if !retry_scheduled {
+                                            emit_generic_error(error, cx);
+                                        }
                                     }
                                     LanguageModelKnownError::ApiInternalServerError => {
-                                        // In the future we will retry the request, but only once.
-                                        emit_generic_error(error, cx);
+                                        let provider_name = model.provider_name();
+                                        let error_message = format!(
+                                            "{}'s API server reported an internal server error",
+                                            provider_name.0.as_ref()
+                                        );
+
+                                        retry_scheduled = thread.handle_retryable_error(
+                                            &error_message,
+                                            model.clone(),
+                                            intent,
+                                            window,
+                                            cx,
+                                        );
+                                        if !retry_scheduled {
+                                            emit_generic_error(error, cx);
+                                        }
                                     }
                                     LanguageModelKnownError::ReadResponseError(_) |
                                     LanguageModelKnownError::DeserializeResponse(_) |
@@ -1882,11 +1957,15 @@ impl Thread {
                                 emit_generic_error(error, cx);
                             }
 
-                            thread.cancel_last_completion(window, cx);
+                            if !retry_scheduled {
+                                thread.cancel_last_completion(window, cx);
+                            }
                         }
                     }
 
-                    cx.emit(ThreadEvent::Stopped(result.map_err(Arc::new)));
+                    if !retry_scheduled {
+                        cx.emit(ThreadEvent::Stopped(result.map_err(Arc::new)));
+                    }
 
                     if let Some((request_callback, (request, response_events))) = thread
                         .request_callback
@@ -2002,6 +2081,146 @@ impl Thread {
         });
     }
 
+    fn handle_rate_limit_error(
+        &mut self,
+        error_message: &str,
+        retry_after: Duration,
+        model: Arc<dyn LanguageModel>,
+        intent: CompletionIntent,
+        window: Option<AnyWindowHandle>,
+        cx: &mut Context<Self>,
+    ) {
+        // For rate limit errors, we only retry once with the specified duration
+        let retry_message = format!(
+            "{error_message}. Retrying in {} seconds…",
+            retry_after.as_secs()
+        );
+
+        // Add a UI-only message instead of a regular message
+        let id = self.next_message_id.post_inc();
+        self.messages.push(Message {
+            id,
+            role: Role::System,
+            segments: vec![MessageSegment::Text(retry_message)],
+            loaded_context: LoadedContext::default(),
+            creases: Vec::new(),
+            is_hidden: false,
+            ui_only: true,
+        });
+        cx.emit(ThreadEvent::MessageAdded(id));
+        // Schedule the retry
+        let thread_handle = cx.entity().downgrade();
+
+        cx.spawn(async move |_thread, cx| {
+            cx.background_executor().timer(retry_after).await;
+
+            thread_handle
+                .update(cx, |thread, cx| {
+                    // Retry the completion
+                    thread.send_to_model(model, intent, window, cx);
+                })
+                .log_err();
+        })
+        .detach();
+    }
+
+    fn handle_retryable_error(
+        &mut self,
+        error_message: &str,
+        model: Arc<dyn LanguageModel>,
+        intent: CompletionIntent,
+        window: Option<AnyWindowHandle>,
+        cx: &mut Context<Self>,
+    ) -> bool {
+        self.handle_retryable_error_with_delay(error_message, None, model, intent, window, cx)
+    }
+
+    fn handle_retryable_error_with_delay(
+        &mut self,
+        error_message: &str,
+        custom_delay: Option<Duration>,
+        model: Arc<dyn LanguageModel>,
+        intent: CompletionIntent,
+        window: Option<AnyWindowHandle>,
+        cx: &mut Context<Self>,
+    ) -> bool {
+        let retry_state = self.retry_state.get_or_insert(RetryState {
+            attempt: 0,
+            max_attempts: MAX_RETRY_ATTEMPTS,
+            intent,
+        });
+
+        retry_state.attempt += 1;
+        let attempt = retry_state.attempt;
+        let max_attempts = retry_state.max_attempts;
+        let intent = retry_state.intent;
+
+        if attempt <= max_attempts {
+            // Use custom delay if provided (e.g., from rate limit), otherwise exponential backoff
+            let delay = if let Some(custom_delay) = custom_delay {
+                custom_delay
+            } else {
+                let delay_secs = BASE_RETRY_DELAY_SECS * 2u64.pow((attempt - 1) as u32);
+                Duration::from_secs(delay_secs)
+            };
+
+            // Add a transient message to inform the user
+            let delay_secs = delay.as_secs();
+            let retry_message = format!(
+                "{}. Retrying (attempt {} of {}) in {} seconds...",
+                error_message, attempt, max_attempts, delay_secs
+            );
+
+            // Add a UI-only message instead of a regular message
+            let id = self.next_message_id.post_inc();
+            self.messages.push(Message {
+                id,
+                role: Role::System,
+                segments: vec![MessageSegment::Text(retry_message)],
+                loaded_context: LoadedContext::default(),
+                creases: Vec::new(),
+                is_hidden: false,
+                ui_only: true,
+            });
+            cx.emit(ThreadEvent::MessageAdded(id));
+
+            // Schedule the retry
+            let thread_handle = cx.entity().downgrade();
+
+            cx.spawn(async move |_thread, cx| {
+                cx.background_executor().timer(delay).await;
+
+                thread_handle
+                    .update(cx, |thread, cx| {
+                        // Retry the completion
+                        thread.send_to_model(model, intent, window, cx);
+                    })
+                    .log_err();
+            })
+            .detach();
+
+            true
+        } else {
+            // Max retries exceeded
+            self.retry_state = None;
+
+            let notification_text = if max_attempts == 1 {
+                "Failed after retrying.".into()
+            } else {
+                format!("Failed after retrying {} times.", max_attempts).into()
+            };
+
+            // Stop generating since we're giving up on retrying.
+            self.pending_completions.clear();
+
+            cx.emit(ThreadEvent::RetriesFailed {
+                message: notification_text,
+            });
+
+            false
+        }
+    }
+
     pub fn start_generating_detailed_summary_if_needed(
         &mut self,
         thread_store: WeakEntity<ThreadStore>,
@@ -2354,7 +2573,9 @@ impl Thread {
         window: Option<AnyWindowHandle>,
         cx: &mut Context<Self>,
     ) -> bool {
-        let mut canceled = self.pending_completions.pop().is_some();
+        let mut canceled = self.pending_completions.pop().is_some() || self.retry_state.is_some();
+
+        self.retry_state = None;
 
         for pending_tool_use in self.tool_use.cancel_pending() {
             canceled = true;
@@ -2943,6 +3164,9 @@ pub enum ThreadEvent {
     CancelEditing,
     CompletionCanceled,
     ProfileChanged,
+    RetriesFailed {
+        message: SharedString,
+    },
 }
 
 impl EventEmitter<ThreadEvent> for Thread {}
@@ -3038,16 +3262,28 @@ mod tests {
     use crate::{
         context::load_context, context_store::ContextStore, thread_store, thread_store::ThreadStore,
     };
+
+    // Test-specific constants
+    const TEST_RATE_LIMIT_RETRY_SECS: u64 = 30;
     use agent_settings::{AgentProfileId, AgentSettings, LanguageModelParameters};
     use assistant_tool::ToolRegistry;
+    use futures::StreamExt;
+    use futures::future::BoxFuture;
+    use futures::stream::BoxStream;
     use gpui::TestAppContext;
     use icons::IconName;
     use language_model::fake_provider::{FakeLanguageModel, FakeLanguageModelProvider};
+    use language_model::{
+        LanguageModelCompletionError, LanguageModelName, LanguageModelProviderId,
+        LanguageModelProviderName, LanguageModelToolChoice,
+    };
+    use parking_lot::Mutex;
     use project::{FakeFs, Project};
     use prompt_store::PromptBuilder;
     use serde_json::json;
     use settings::{Settings, SettingsStore};
     use std::sync::Arc;
+    use std::time::Duration;
     use theme::ThemeSettings;
     use util::path;
     use workspace::Workspace;
@@ -3822,6 +4058,1183 @@ fn main() {{
         }
     }
 
+    // Helper to create a model that returns errors
+    enum TestError {
+        Overloaded,
+        InternalServerError,
+    }
+
+    struct ErrorInjector {
+        inner: Arc<FakeLanguageModel>,
+        error_type: TestError,
+    }
+
+    impl ErrorInjector {
+        fn new(error_type: TestError) -> Self {
+            Self {
+                inner: Arc::new(FakeLanguageModel::default()),
+                error_type,
+            }
+        }
+    }
+
+    impl LanguageModel for ErrorInjector {
+        fn id(&self) -> LanguageModelId {
+            self.inner.id()
+        }
+
+        fn name(&self) -> LanguageModelName {
+            self.inner.name()
+        }
+
+        fn provider_id(&self) -> LanguageModelProviderId {
+            self.inner.provider_id()
+        }
+
+        fn provider_name(&self) -> LanguageModelProviderName {
+            self.inner.provider_name()
+        }
+
+        fn supports_tools(&self) -> bool {
+            self.inner.supports_tools()
+        }
+
+        fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
+            self.inner.supports_tool_choice(choice)
+        }
+
+        fn supports_images(&self) -> bool {
+            self.inner.supports_images()
+        }
+
+        fn telemetry_id(&self) -> String {
+            self.inner.telemetry_id()
+        }
+
+        fn max_token_count(&self) -> u64 {
+            self.inner.max_token_count()
+        }
+
+        fn count_tokens(
+            &self,
+            request: LanguageModelRequest,
+            cx: &App,
+        ) -> BoxFuture<'static, Result<u64>> {
+            self.inner.count_tokens(request, cx)
+        }
+
+        fn stream_completion(
+            &self,
+            _request: LanguageModelRequest,
+            _cx: &AsyncApp,
+        ) -> BoxFuture<
+            'static,
+            Result<
+                BoxStream<
+                    'static,
+                    Result<LanguageModelCompletionEvent, LanguageModelCompletionError>,
+                >,
+                LanguageModelCompletionError,
+            >,
+        > {
+            let error = match self.error_type {
+                TestError::Overloaded => LanguageModelCompletionError::Overloaded,
+                TestError::InternalServerError => {
+                    LanguageModelCompletionError::ApiInternalServerError
+                }
+            };
+            async move {
+                let stream = futures::stream::once(async move { Err(error) });
+                Ok(stream.boxed())
+            }
+            .boxed()
+        }
+
+        fn as_fake(&self) -> &FakeLanguageModel {
+            &self.inner
+        }
+    }
+
+    #[gpui::test]
+    async fn test_retry_on_overloaded_error(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create model that returns overloaded error
+        let model = Arc::new(ErrorInjector::new(TestError::Overloaded));
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Start completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+
+        cx.run_until_parked();
+
+        thread.read_with(cx, |thread, _| {
+            assert!(thread.retry_state.is_some(), "Should have retry state");
+            let retry_state = thread.retry_state.as_ref().unwrap();
+            assert_eq!(retry_state.attempt, 1, "Should be first retry attempt");
+            assert_eq!(
+                retry_state.max_attempts, MAX_RETRY_ATTEMPTS,
+                "Should have default max attempts"
+            );
+        });
+
+        // Check that a retry message was added
+        thread.read_with(cx, |thread, _| {
+            let mut messages = thread.messages();
+            assert!(
+                messages.any(|msg| {
+                    msg.role == Role::System
+                        && msg.ui_only
+                        && msg.segments.iter().any(|seg| {
+                            if let MessageSegment::Text(text) = seg {
+                                text.contains("overloaded")
+                                    && text
+                                        .contains(&format!("attempt 1 of {}", MAX_RETRY_ATTEMPTS))
+                            } else {
+                                false
+                            }
+                        })
+                }),
+                "Should have added a system retry message"
+            );
+        });
+
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+
+        assert_eq!(retry_count, 1, "Should have one retry message");
+    }
+
+    #[gpui::test]
+    async fn test_retry_on_internal_server_error(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create model that returns internal server error
+        let model = Arc::new(ErrorInjector::new(TestError::InternalServerError));
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Start completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+
+        cx.run_until_parked();
+
+        // Check retry state on thread
+        thread.read_with(cx, |thread, _| {
+            assert!(thread.retry_state.is_some(), "Should have retry state");
+            let retry_state = thread.retry_state.as_ref().unwrap();
+            assert_eq!(retry_state.attempt, 1, "Should be first retry attempt");
+            assert_eq!(
+                retry_state.max_attempts, MAX_RETRY_ATTEMPTS,
+                "Should have correct max attempts"
+            );
+        });
+
+        // Check that a retry message was added with provider name
+        thread.read_with(cx, |thread, _| {
+            let mut messages = thread.messages();
+            assert!(
+                messages.any(|msg| {
+                    msg.role == Role::System
+                        && msg.ui_only
+                        && msg.segments.iter().any(|seg| {
+                            if let MessageSegment::Text(text) = seg {
+                                text.contains("internal")
+                                    && text.contains("Fake")
+                                    && text
+                                        .contains(&format!("attempt 1 of {}", MAX_RETRY_ATTEMPTS))
+                            } else {
+                                false
+                            }
+                        })
+                }),
+                "Should have added a system retry message with provider name"
+            );
+        });
+
+        // Count retry messages
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+
+        assert_eq!(retry_count, 1, "Should have one retry message");
+    }
+
+    #[gpui::test]
+    async fn test_exponential_backoff_on_retries(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create model that returns overloaded error
+        let model = Arc::new(ErrorInjector::new(TestError::Overloaded));
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Track retry events and completion count
+        // Track completion events
+        let completion_count = Arc::new(Mutex::new(0));
+        let completion_count_clone = completion_count.clone();
+
+        let _subscription = thread.update(cx, |_, cx| {
+            cx.subscribe(&thread, move |_, _, event: &ThreadEvent, _| {
+                if let ThreadEvent::NewRequest = event {
+                    *completion_count_clone.lock() += 1;
+                }
+            })
+        });
+
+        // First attempt
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+        cx.run_until_parked();
+
+        // Should have scheduled first retry - count retry messages
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+        assert_eq!(retry_count, 1, "Should have scheduled first retry");
+
+        // Check retry state
+        thread.read_with(cx, |thread, _| {
+            assert!(thread.retry_state.is_some(), "Should have retry state");
+            let retry_state = thread.retry_state.as_ref().unwrap();
+            assert_eq!(retry_state.attempt, 1, "Should be first retry attempt");
+        });
+
+        // Advance clock for first retry
+        cx.executor()
+            .advance_clock(Duration::from_secs(BASE_RETRY_DELAY_SECS));
+        cx.run_until_parked();
+
+        // Should have scheduled second retry - count retry messages
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+        assert_eq!(retry_count, 2, "Should have scheduled second retry");
+
+        // Check retry state updated
+        thread.read_with(cx, |thread, _| {
+            assert!(thread.retry_state.is_some(), "Should have retry state");
+            let retry_state = thread.retry_state.as_ref().unwrap();
+            assert_eq!(retry_state.attempt, 2, "Should be second retry attempt");
+            assert_eq!(
+                retry_state.max_attempts, MAX_RETRY_ATTEMPTS,
+                "Should have correct max attempts"
+            );
+        });
+
+        // Advance clock for second retry (exponential backoff)
+        cx.executor()
+            .advance_clock(Duration::from_secs(BASE_RETRY_DELAY_SECS * 2));
+        cx.run_until_parked();
+
+        // Should have scheduled third retry
+        // Count all retry messages now
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+        assert_eq!(
+            retry_count, MAX_RETRY_ATTEMPTS as usize,
+            "Should have scheduled third retry"
+        );
+
+        // Check retry state updated
+        thread.read_with(cx, |thread, _| {
+            assert!(thread.retry_state.is_some(), "Should have retry state");
+            let retry_state = thread.retry_state.as_ref().unwrap();
+            assert_eq!(
+                retry_state.attempt, MAX_RETRY_ATTEMPTS,
+                "Should be at max retry attempt"
+            );
+            assert_eq!(
+                retry_state.max_attempts, MAX_RETRY_ATTEMPTS,
+                "Should have correct max attempts"
+            );
+        });
+
+        // Advance clock for third retry (exponential backoff)
+        cx.executor()
+            .advance_clock(Duration::from_secs(BASE_RETRY_DELAY_SECS * 4));
+        cx.run_until_parked();
+
+        // No more retries should be scheduled after clock was advanced.
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+        assert_eq!(
+            retry_count, MAX_RETRY_ATTEMPTS as usize,
+            "Should not exceed max retries"
+        );
+
+        // Final completion count should be initial + max retries
+        assert_eq!(
+            *completion_count.lock(),
+            (MAX_RETRY_ATTEMPTS + 1) as usize,
+            "Should have made initial + max retry attempts"
+        );
+    }
+
+    #[gpui::test]
+    async fn test_max_retries_exceeded(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create model that returns overloaded error
+        let model = Arc::new(ErrorInjector::new(TestError::Overloaded));
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Track events
+        let retries_failed = Arc::new(Mutex::new(false));
+        let retries_failed_clone = retries_failed.clone();
+
+        let _subscription = thread.update(cx, |_, cx| {
+            cx.subscribe(&thread, move |_, _, event: &ThreadEvent, _| {
+                if let ThreadEvent::RetriesFailed { .. } = event {
+                    *retries_failed_clone.lock() = true;
+                }
+            })
+        });
+
+        // Start initial completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+        cx.run_until_parked();
+
+        // Advance through all retries
+        for i in 0..MAX_RETRY_ATTEMPTS {
+            let delay = if i == 0 {
+                BASE_RETRY_DELAY_SECS
+            } else {
+                BASE_RETRY_DELAY_SECS * 2u64.pow(i as u32 - 1)
+            };
+            cx.executor().advance_clock(Duration::from_secs(delay));
+            cx.run_until_parked();
+        }
+
+        // After the 3rd retry is scheduled, we need to wait for it to execute and fail
+        // The 3rd retry has a delay of BASE_RETRY_DELAY_SECS * 4 (20 seconds)
+        let final_delay = BASE_RETRY_DELAY_SECS * 2u64.pow((MAX_RETRY_ATTEMPTS - 1) as u32);
+        cx.executor()
+            .advance_clock(Duration::from_secs(final_delay));
+        cx.run_until_parked();
+
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("Retrying") && text.contains("seconds")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+
+        // After max retries, should emit RetriesFailed event
+        assert_eq!(
+            retry_count, MAX_RETRY_ATTEMPTS as usize,
+            "Should have attempted max retries"
+        );
+        assert!(
+            *retries_failed.lock(),
+            "Should emit RetriesFailed event after max retries exceeded"
+        );
+
+        // Retry state should be cleared
+        thread.read_with(cx, |thread, _| {
+            assert!(
+                thread.retry_state.is_none(),
+                "Retry state should be cleared after max retries"
+            );
+
+            // Verify we have the expected number of retry messages
+            let retry_messages = thread
+                .messages
+                .iter()
+                .filter(|msg| msg.ui_only && msg.role == Role::System)
+                .count();
+            assert_eq!(
+                retry_messages, MAX_RETRY_ATTEMPTS as usize,
+                "Should have one retry message per attempt"
+            );
+        });
+    }
+
+    #[gpui::test]
+    async fn test_retry_message_removed_on_retry(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // We'll use a wrapper to switch behavior after first failure
+        struct RetryTestModel {
+            inner: Arc<FakeLanguageModel>,
+            failed_once: Arc<Mutex<bool>>,
+        }
+
+        impl LanguageModel for RetryTestModel {
+            fn id(&self) -> LanguageModelId {
+                self.inner.id()
+            }
+
+            fn name(&self) -> LanguageModelName {
+                self.inner.name()
+            }
+
+            fn provider_id(&self) -> LanguageModelProviderId {
+                self.inner.provider_id()
+            }
+
+            fn provider_name(&self) -> LanguageModelProviderName {
+                self.inner.provider_name()
+            }
+
+            fn supports_tools(&self) -> bool {
+                self.inner.supports_tools()
+            }
+
+            fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
+                self.inner.supports_tool_choice(choice)
+            }
+
+            fn supports_images(&self) -> bool {
+                self.inner.supports_images()
+            }
+
+            fn telemetry_id(&self) -> String {
+                self.inner.telemetry_id()
+            }
+
+            fn max_token_count(&self) -> u64 {
+                self.inner.max_token_count()
+            }
+
+            fn count_tokens(
+                &self,
+                request: LanguageModelRequest,
+                cx: &App,
+            ) -> BoxFuture<'static, Result<u64>> {
+                self.inner.count_tokens(request, cx)
+            }
+
+            fn stream_completion(
+                &self,
+                request: LanguageModelRequest,
+                cx: &AsyncApp,
+            ) -> BoxFuture<
+                'static,
+                Result<
+                    BoxStream<
+                        'static,
+                        Result<LanguageModelCompletionEvent, LanguageModelCompletionError>,
+                    >,
+                    LanguageModelCompletionError,
+                >,
+            > {
+                if !*self.failed_once.lock() {
+                    *self.failed_once.lock() = true;
+                    // Return error on first attempt
+                    let stream = futures::stream::once(async move {
+                        Err(LanguageModelCompletionError::Overloaded)
+                    });
+                    async move { Ok(stream.boxed()) }.boxed()
+                } else {
+                    // Succeed on retry
+                    self.inner.stream_completion(request, cx)
+                }
+            }
+
+            fn as_fake(&self) -> &FakeLanguageModel {
+                &self.inner
+            }
+        }
+
+        let model = Arc::new(RetryTestModel {
+            inner: Arc::new(FakeLanguageModel::default()),
+            failed_once: Arc::new(Mutex::new(false)),
+        });
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Track message deletions
+        // Track when retry completes successfully
+        let retry_completed = Arc::new(Mutex::new(false));
+        let retry_completed_clone = retry_completed.clone();
+
+        let _subscription = thread.update(cx, |_, cx| {
+            cx.subscribe(&thread, move |_, _, event: &ThreadEvent, _| {
+                if let ThreadEvent::StreamedCompletion = event {
+                    *retry_completed_clone.lock() = true;
+                }
+            })
+        });
+
+        // Start completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+        cx.run_until_parked();
+
+        // Get the retry message ID
+        let retry_message_id = thread.read_with(cx, |thread, _| {
+            thread
+                .messages()
+                .find(|msg| msg.role == Role::System && msg.ui_only)
+                .map(|msg| msg.id)
+                .expect("Should have a retry message")
+        });
+
+        // Wait for retry
+        cx.executor()
+            .advance_clock(Duration::from_secs(BASE_RETRY_DELAY_SECS));
+        cx.run_until_parked();
+
+        // Stream some successful content
+        let fake_model = model.as_fake();
+        // After the retry, there should be a new pending completion
+        let pending = fake_model.pending_completions();
+        assert!(
+            !pending.is_empty(),
+            "Should have a pending completion after retry"
+        );
+        fake_model.stream_completion_response(&pending[0], "Success!");
+        fake_model.end_completion_stream(&pending[0]);
+        cx.run_until_parked();
+
+        // Check that the retry completed successfully
+        assert!(
+            *retry_completed.lock(),
+            "Retry should have completed successfully"
+        );
+
+        // Retry message should still exist but be marked as ui_only
+        thread.read_with(cx, |thread, _| {
+            let retry_msg = thread
+                .message(retry_message_id)
+                .expect("Retry message should still exist");
+            assert!(retry_msg.ui_only, "Retry message should be ui_only");
+            assert_eq!(
+                retry_msg.role,
+                Role::System,
+                "Retry message should have System role"
+            );
+        });
+    }
+
+    #[gpui::test]
+    async fn test_successful_completion_clears_retry_state(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create a model that fails once then succeeds
+        struct FailOnceModel {
+            inner: Arc<FakeLanguageModel>,
+            failed_once: Arc<Mutex<bool>>,
+        }
+
+        impl LanguageModel for FailOnceModel {
+            fn id(&self) -> LanguageModelId {
+                self.inner.id()
+            }
+
+            fn name(&self) -> LanguageModelName {
+                self.inner.name()
+            }
+
+            fn provider_id(&self) -> LanguageModelProviderId {
+                self.inner.provider_id()
+            }
+
+            fn provider_name(&self) -> LanguageModelProviderName {
+                self.inner.provider_name()
+            }
+
+            fn supports_tools(&self) -> bool {
+                self.inner.supports_tools()
+            }
+
+            fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
+                self.inner.supports_tool_choice(choice)
+            }
+
+            fn supports_images(&self) -> bool {
+                self.inner.supports_images()
+            }
+
+            fn telemetry_id(&self) -> String {
+                self.inner.telemetry_id()
+            }
+
+            fn max_token_count(&self) -> u64 {
+                self.inner.max_token_count()
+            }
+
+            fn count_tokens(
+                &self,
+                request: LanguageModelRequest,
+                cx: &App,
+            ) -> BoxFuture<'static, Result<u64>> {
+                self.inner.count_tokens(request, cx)
+            }
+
+            fn stream_completion(
+                &self,
+                request: LanguageModelRequest,
+                cx: &AsyncApp,
+            ) -> BoxFuture<
+                'static,
+                Result<
+                    BoxStream<
+                        'static,
+                        Result<LanguageModelCompletionEvent, LanguageModelCompletionError>,
+                    >,
+                    LanguageModelCompletionError,
+                >,
+            > {
+                if !*self.failed_once.lock() {
+                    *self.failed_once.lock() = true;
+                    // Return error on first attempt
+                    let stream = futures::stream::once(async move {
+                        Err(LanguageModelCompletionError::Overloaded)
+                    });
+                    async move { Ok(stream.boxed()) }.boxed()
+                } else {
+                    // Succeed on retry
+                    self.inner.stream_completion(request, cx)
+                }
+            }
+        }
+
+        let fail_once_model = Arc::new(FailOnceModel {
+            inner: Arc::new(FakeLanguageModel::default()),
+            failed_once: Arc::new(Mutex::new(false)),
+        });
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message(
+                "Test message",
+                ContextLoadResult::default(),
+                None,
+                vec![],
+                cx,
+            );
+        });
+
+        // Start completion with fail-once model
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(
+                fail_once_model.clone(),
+                CompletionIntent::UserPrompt,
+                None,
+                cx,
+            );
+        });
+
+        cx.run_until_parked();
+
+        // Verify retry state exists after first failure
+        thread.read_with(cx, |thread, _| {
+            assert!(
+                thread.retry_state.is_some(),
+                "Should have retry state after failure"
+            );
+        });
+
+        // Wait for retry delay
+        cx.executor()
+            .advance_clock(Duration::from_secs(BASE_RETRY_DELAY_SECS));
+        cx.run_until_parked();
+
+        // The retry should now use our FailOnceModel which should succeed
+        // We need to help the FakeLanguageModel complete the stream
+        let inner_fake = fail_once_model.inner.clone();
+
+        // Wait a bit for the retry to start
+        cx.run_until_parked();
+
+        // Check for pending completions and complete them
+        if let Some(pending) = inner_fake.pending_completions().first() {
+            inner_fake.stream_completion_response(pending, "Success!");
+            inner_fake.end_completion_stream(pending);
+        }
+        cx.run_until_parked();
+
+        thread.read_with(cx, |thread, _| {
+            assert!(
+                thread.retry_state.is_none(),
+                "Retry state should be cleared after successful completion"
+            );
+
+            let has_assistant_message = thread
+                .messages
+                .iter()
+                .any(|msg| msg.role == Role::Assistant && !msg.ui_only);
+            assert!(
+                has_assistant_message,
+                "Should have an assistant message after successful retry"
+            );
+        });
+    }
+
+    #[gpui::test]
+    async fn test_rate_limit_retry_single_attempt(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create a model that returns rate limit error with retry_after
+        struct RateLimitModel {
+            inner: Arc<FakeLanguageModel>,
+        }
+
+        impl LanguageModel for RateLimitModel {
+            fn id(&self) -> LanguageModelId {
+                self.inner.id()
+            }
+
+            fn name(&self) -> LanguageModelName {
+                self.inner.name()
+            }
+
+            fn provider_id(&self) -> LanguageModelProviderId {
+                self.inner.provider_id()
+            }
+
+            fn provider_name(&self) -> LanguageModelProviderName {
+                self.inner.provider_name()
+            }
+
+            fn supports_tools(&self) -> bool {
+                self.inner.supports_tools()
+            }
+
+            fn supports_tool_choice(&self, choice: LanguageModelToolChoice) -> bool {
+                self.inner.supports_tool_choice(choice)
+            }
+
+            fn supports_images(&self) -> bool {
+                self.inner.supports_images()
+            }
+
+            fn telemetry_id(&self) -> String {
+                self.inner.telemetry_id()
+            }
+
+            fn max_token_count(&self) -> u64 {
+                self.inner.max_token_count()
+            }
+
+            fn count_tokens(
+                &self,
+                request: LanguageModelRequest,
+                cx: &App,
+            ) -> BoxFuture<'static, Result<u64>> {
+                self.inner.count_tokens(request, cx)
+            }
+
+            fn stream_completion(
+                &self,
+                _request: LanguageModelRequest,
+                _cx: &AsyncApp,
+            ) -> BoxFuture<
+                'static,
+                Result<
+                    BoxStream<
+                        'static,
+                        Result<LanguageModelCompletionEvent, LanguageModelCompletionError>,
+                    >,
+                    LanguageModelCompletionError,
+                >,
+            > {
+                async move {
+                    let stream = futures::stream::once(async move {
+                        Err(LanguageModelCompletionError::RateLimitExceeded {
+                            retry_after: Duration::from_secs(TEST_RATE_LIMIT_RETRY_SECS),
+                        })
+                    });
+                    Ok(stream.boxed())
+                }
+                .boxed()
+            }
+
+            fn as_fake(&self) -> &FakeLanguageModel {
+                &self.inner
+            }
+        }
+
+        let model = Arc::new(RateLimitModel {
+            inner: Arc::new(FakeLanguageModel::default()),
+        });
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Start completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+
+        cx.run_until_parked();
+
+        let retry_count = thread.update(cx, |thread, _| {
+            thread
+                .messages
+                .iter()
+                .filter(|m| {
+                    m.ui_only
+                        && m.segments.iter().any(|s| {
+                            if let MessageSegment::Text(text) = s {
+                                text.contains("rate limit exceeded")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count()
+        });
+        assert_eq!(retry_count, 1, "Should have scheduled one retry");
+
+        thread.read_with(cx, |thread, _| {
+            assert!(
+                thread.retry_state.is_none(),
+                "Rate limit errors should not set retry_state"
+            );
+        });
+
+        // Verify we have one retry message
+        thread.read_with(cx, |thread, _| {
+            let retry_messages = thread
+                .messages
+                .iter()
+                .filter(|msg| {
+                    msg.ui_only
+                        && msg.segments.iter().any(|seg| {
+                            if let MessageSegment::Text(text) = seg {
+                                text.contains("rate limit exceeded")
+                            } else {
+                                false
+                            }
+                        })
+                })
+                .count();
+            assert_eq!(
+                retry_messages, 1,
+                "Should have one rate limit retry message"
+            );
+        });
+
+        // Check that retry message doesn't include attempt count
+        thread.read_with(cx, |thread, _| {
+            let retry_message = thread
+                .messages
+                .iter()
+                .find(|msg| msg.role == Role::System && msg.ui_only)
+                .expect("Should have a retry message");
+
+            // Check that the message doesn't contain attempt count
+            if let Some(MessageSegment::Text(text)) = retry_message.segments.first() {
+                assert!(
+                    !text.contains("attempt"),
+                    "Rate limit retry message should not contain attempt count"
+                );
+                assert!(
+                    text.contains(&format!(
+                        "Retrying in {} seconds",
+                        TEST_RATE_LIMIT_RETRY_SECS
+                    )),
+                    "Rate limit retry message should contain retry delay"
+                );
+            }
+        });
+    }
+
+    #[gpui::test]
+    async fn test_ui_only_messages_not_sent_to_model(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, model) = setup_test_environment(cx, project.clone()).await;
+
+        // Insert a regular user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Insert a UI-only message (like our retry notifications)
+        thread.update(cx, |thread, cx| {
+            let id = thread.next_message_id.post_inc();
+            thread.messages.push(Message {
+                id,
+                role: Role::System,
+                segments: vec![MessageSegment::Text(
+                    "This is a UI-only message that should not be sent to the model".to_string(),
+                )],
+                loaded_context: LoadedContext::default(),
+                creases: Vec::new(),
+                is_hidden: true,
+                ui_only: true,
+            });
+            cx.emit(ThreadEvent::MessageAdded(id));
+        });
+
+        // Insert another regular message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message(
+                "How are you?",
+                ContextLoadResult::default(),
+                None,
+                vec![],
+                cx,
+            );
+        });
+
+        // Generate the completion request
+        let request = thread.update(cx, |thread, cx| {
+            thread.to_completion_request(model.clone(), CompletionIntent::UserPrompt, cx)
+        });
+
+        // Verify that the request only contains non-UI-only messages
+        // Should have system prompt + 2 user messages, but not the UI-only message
+        let user_messages: Vec<_> = request
+            .messages
+            .iter()
+            .filter(|msg| msg.role == Role::User)
+            .collect();
+        assert_eq!(
+            user_messages.len(),
+            2,
+            "Should have exactly 2 user messages"
+        );
+
+        // Verify the UI-only content is not present anywhere in the request
+        let request_text = request
+            .messages
+            .iter()
+            .flat_map(|msg| &msg.content)
+            .filter_map(|content| match content {
+                MessageContent::Text(text) => Some(text.as_str()),
+                _ => None,
+            })
+            .collect::<String>();
+
+        assert!(
+            !request_text.contains("UI-only message"),
+            "UI-only message content should not be in the request"
+        );
+
+        // Verify the thread still has all 3 messages (including UI-only)
+        thread.read_with(cx, |thread, _| {
+            assert_eq!(
+                thread.messages().count(),
+                3,
+                "Thread should have 3 messages"
+            );
+            assert_eq!(
+                thread.messages().filter(|m| m.ui_only).count(),
+                1,
+                "Thread should have 1 UI-only message"
+            );
+        });
+
+        // Verify that UI-only messages are not serialized
+        let serialized = thread
+            .update(cx, |thread, cx| thread.serialize(cx))
+            .await
+            .unwrap();
+        assert_eq!(
+            serialized.messages.len(),
+            2,
+            "Serialized thread should only have 2 messages (no UI-only)"
+        );
+    }
+
+    #[gpui::test]
+    async fn test_retry_cancelled_on_stop(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let project = create_test_project(cx, json!({})).await;
+        let (_, _, thread, _, _base_model) = setup_test_environment(cx, project.clone()).await;
+
+        // Create model that returns overloaded error
+        let model = Arc::new(ErrorInjector::new(TestError::Overloaded));
+
+        // Insert a user message
+        thread.update(cx, |thread, cx| {
+            thread.insert_user_message("Hello!", ContextLoadResult::default(), None, vec![], cx);
+        });
+
+        // Start completion
+        thread.update(cx, |thread, cx| {
+            thread.send_to_model(model.clone(), CompletionIntent::UserPrompt, None, cx);
+        });
+
+        cx.run_until_parked();
+
+        // Verify retry was scheduled by checking for retry message
+        let has_retry_message = thread.read_with(cx, |thread, _| {
+            thread.messages.iter().any(|m| {
+                m.ui_only
+                    && m.segments.iter().any(|s| {
+                        if let MessageSegment::Text(text) = s {
+                            text.contains("Retrying") && text.contains("seconds")
+                        } else {
+                            false
+                        }
+                    })
+            })
+        });
+        assert!(has_retry_message, "Should have scheduled a retry");
+
+        // Cancel the completion before the retry happens
+        thread.update(cx, |thread, cx| {
+            thread.cancel_last_completion(None, cx);
+        });
+
+        cx.run_until_parked();
+
+        // The retry should not have happened - no pending completions
+        let fake_model = model.as_fake();
+        assert_eq!(
+            fake_model.pending_completions().len(),
+            0,
+            "Should have no pending completions after cancellation"
+        );
+
+        // Verify the retry was cancelled by checking retry state
+        thread.read_with(cx, |thread, _| {
+            if let Some(retry_state) = &thread.retry_state {
+                panic!(
+                    "retry_state should be cleared after cancellation, but found: attempt={}, max_attempts={}, intent={:?}",
+                    retry_state.attempt, retry_state.max_attempts, retry_state.intent
+                );
+            }
+        });
+    }
+
     fn test_summarize_error(
         model: &Arc<dyn LanguageModel>,
         thread: &Entity<Thread>,
diff --git a/crates/agent_ui/src/active_thread.rs b/crates/agent_ui/src/active_thread.rs
index 0e7ca9aa897d1962742e660d2d29e43f8dfe6593..4da959d36e9f77ba06e71d722400a60d9f5be25b 100644
--- a/crates/agent_ui/src/active_thread.rs
+++ b/crates/agent_ui/src/active_thread.rs
@@ -1140,6 +1140,9 @@ impl ActiveThread {
                 self.save_thread(cx);
                 cx.notify();
             }
+            ThreadEvent::RetriesFailed { message } => {
+                self.show_notification(message, ui::IconName::Warning, window, cx);
+            }
         }
     }
 
@@ -1835,9 +1838,10 @@ impl ActiveThread {
             .filter(|(id, _)| *id == message_id)
             .map(|(_, state)| state);
 
-        let colors = cx.theme().colors();
-        let editor_bg_color = colors.editor_background;
-        let panel_bg = colors.panel_background;
+        let (editor_bg_color, panel_bg) = {
+            let colors = cx.theme().colors();
+            (colors.editor_background, colors.panel_background)
+        };
 
         let open_as_markdown = IconButton::new(("open-as-markdown", ix), IconName::DocumentText)
             .icon_size(IconSize::XSmall)
@@ -2025,152 +2029,162 @@ impl ActiveThread {
             }
         });
 
-        let styled_message = match message.role {
-            Role::User => v_flex()
-                .id(("message-container", ix))
-                .pt_2()
-                .pl_2()
-                .pr_2p5()
-                .pb_4()
-                .child(
+        let styled_message = if message.ui_only {
+            self.render_ui_notification(message_content, ix, cx)
+        } else {
+            match message.role {
+                Role::User => {
+                    let colors = cx.theme().colors();
                     v_flex()
-                        .id(("user-message", ix))
-                        .bg(editor_bg_color)
-                        .rounded_lg()
-                        .shadow_md()
-                        .border_1()
-                        .border_color(colors.border)
-                        .hover(|hover| hover.border_color(colors.text_accent.opacity(0.5)))
+                        .id(("message-container", ix))
+                        .pt_2()
+                        .pl_2()
+                        .pr_2p5()
+                        .pb_4()
                         .child(
                             v_flex()
-                                .p_2p5()
-                                .gap_1()
-                                .children(message_content)
-                                .when_some(editing_message_state, |this, state| {
-                                    let focus_handle = state.editor.focus_handle(cx).clone();
-
-                                    this.child(
-                                        h_flex()
-                                            .w_full()
-                                            .gap_1()
-                                            .justify_between()
-                                            .flex_wrap()
-                                            .child(
+                                .id(("user-message", ix))
+                                .bg(editor_bg_color)
+                                .rounded_lg()
+                                .shadow_md()
+                                .border_1()
+                                .border_color(colors.border)
+                                .hover(|hover| hover.border_color(colors.text_accent.opacity(0.5)))
+                                .child(
+                                    v_flex()
+                                        .p_2p5()
+                                        .gap_1()
+                                        .children(message_content)
+                                        .when_some(editing_message_state, |this, state| {
+                                            let focus_handle = state.editor.focus_handle(cx).clone();
+
+                                            this.child(
                                                 h_flex()
-                                                    .gap_1p5()
+                                                    .w_full()
+                                                    .gap_1()
+                                                    .justify_between()
+                                                    .flex_wrap()
                                                     .child(
-                                                        div()
-                                                            .opacity(0.8)
+                                                        h_flex()
+                                                            .gap_1p5()
+                                                            .child(
+                                                                div()
+                                                                    .opacity(0.8)
+                                                                    .child(
+                                                                        Icon::new(IconName::Warning)
+                                                                            .size(IconSize::Indicator)
+                                                                            .color(Color::Warning)
+                                                                    ),
+                                                            )
                                                             .child(
-                                                                Icon::new(IconName::Warning)
-                                                                    .size(IconSize::Indicator)
-                                                                    .color(Color::Warning)
+                                                                Label::new("Editing will restart the thread from this point.")
+                                                                    .color(Color::Muted)
+                                                                    .size(LabelSize::XSmall),
                                                             ),
                                                     )
                                                     .child(
-                                                        Label::new("Editing will restart the thread from this point.")
-                                                            .color(Color::Muted)
-                                                            .size(LabelSize::XSmall),
-                                                    ),
-                                            )
-                                            .child(
-                                                h_flex()
-                                                    .gap_0p5()
-                                                    .child(
-                                                        IconButton::new(
-                                                            "cancel-edit-message",
-                                                            IconName::Close,
-                                                        )
-                                                        .shape(ui::IconButtonShape::Square)
-                                                        .icon_color(Color::Error)
-                                                        .icon_size(IconSize::Small)
-                                                        .tooltip({
-                                                            let focus_handle = focus_handle.clone();
-                                                            move |window, cx| {
-                                                                Tooltip::for_action_in(
-                                                                    "Cancel Edit",
-                                                                    &menu::Cancel,
-                                                                    &focus_handle,
-                                                                    window,
-                                                                    cx,
+                                                        h_flex()
+                                                            .gap_0p5()
+                                                            .child(
+                                                                IconButton::new(
+                                                                    "cancel-edit-message",
+                                                                    IconName::Close,
                                                                 )
-                                                            }
-                                                        })
-                                                        .on_click(cx.listener(Self::handle_cancel_click)),
-                                                    )
-                                                    .child(
-                                                        IconButton::new(
-                                                            "confirm-edit-message",
-                                                            IconName::Return,
-                                                        )
-                                                        .disabled(state.editor.read(cx).is_empty(cx))
-                                                        .shape(ui::IconButtonShape::Square)
-                                                        .icon_color(Color::Muted)
-                                                        .icon_size(IconSize::Small)
-                                                        .tooltip({
-                                                            let focus_handle = focus_handle.clone();
-                                                            move |window, cx| {
-                                                                Tooltip::for_action_in(
-                                                                    "Regenerate",
-                                                                    &menu::Confirm,
-                                                                    &focus_handle,
-                                                                    window,
-                                                                    cx,
+                                                                .shape(ui::IconButtonShape::Square)
+                                                                .icon_color(Color::Error)
+                                                                .icon_size(IconSize::Small)
+                                                                .tooltip({
+                                                                    let focus_handle = focus_handle.clone();
+                                                                    move |window, cx| {
+                                                                        Tooltip::for_action_in(
+                                                                            "Cancel Edit",
+                                                                            &menu::Cancel,
+                                                                            &focus_handle,
+                                                                            window,
+                                                                            cx,
+                                                                        )
+                                                                    }
+                                                                })
+                                                                .on_click(cx.listener(Self::handle_cancel_click)),
+                                                            )
+                                                            .child(
+                                                                IconButton::new(
+                                                                    "confirm-edit-message",
+                                                                    IconName::Return,
                                                                 )
-                                                            }
-                                                        })
-                                                        .on_click(
-                                                            cx.listener(Self::handle_regenerate_click),
-                                                        ),
-                                                    ),
+                                                                .disabled(state.editor.read(cx).is_empty(cx))
+                                                                .shape(ui::IconButtonShape::Square)
+                                                                .icon_color(Color::Muted)
+                                                                .icon_size(IconSize::Small)
+                                                                .tooltip({
+                                                                    let focus_handle = focus_handle.clone();
+                                                                    move |window, cx| {
+                                                                        Tooltip::for_action_in(
+                                                                            "Regenerate",
+                                                                            &menu::Confirm,
+                                                                            &focus_handle,
+                                                                            window,
+                                                                            cx,
+                                                                        )
+                                                                    }
+                                                                })
+                                                                .on_click(
+                                                                    cx.listener(Self::handle_regenerate_click),
+                                                                ),
+                                                            ),
+                                                    )
                                             )
-                                    )
-                                }),
+                                        }),
+                                )
+                                .on_click(cx.listener({
+                                    let message_creases = message.creases.clone();
+                                    move |this, _, window, cx| {
+                                        if let Some(message_text) =
+                                            this.thread.read(cx).message(message_id).and_then(|message| {
+                                                message.segments.first().and_then(|segment| {
+                                                    match segment {
+                                                        MessageSegment::Text(message_text) => {
+                                                            Some(Into::<Arc<str>>::into(message_text.as_str()))
+                                                        }
+                                                        _ => {
+                                                            None
+                                                        }
+                                                    }
+                                                })
+                                            })
+                                        {
+                                            this.start_editing_message(
+                                                message_id,
+                                                message_text,
+                                                &message_creases,
+                                                window,
+                                                cx,
+                                            );
+                                        }
+                                    }
+                                })),
                         )
-                        .on_click(cx.listener({
-                            let message_creases = message.creases.clone();
-                            move |this, _, window, cx| {
-                                if let Some(message_text) =
-                                    this.thread.read(cx).message(message_id).and_then(|message| {
-                                        message.segments.first().and_then(|segment| {
-                                            match segment {
-                                                MessageSegment::Text(message_text) => {
-                                                    Some(Into::<Arc<str>>::into(message_text.as_str()))
-                                                }
-                                                _ => {
-                                                    None
-                                                }
-                                            }
-                                        })
-                                    })
-                                {
-                                    this.start_editing_message(
-                                        message_id,
-                                        message_text,
-                                        &message_creases,
-                                        window,
-                                        cx,
-                                    );
-                                }
-                            }
-                        })),
-                ),
-            Role::Assistant => v_flex()
-                .id(("message-container", ix))
-                .px(RESPONSE_PADDING_X)
-                .gap_2()
-                .children(message_content)
-                .when(has_tool_uses, |parent| {
-                    parent.children(tool_uses.into_iter().map(|tool_use| {
-                        self.render_tool_use(tool_use, window, workspace.clone(), cx)
-                    }))
-                }),
-            Role::System => div().id(("message-container", ix)).py_1().px_2().child(
-                v_flex()
-                    .bg(colors.editor_background)
-                    .rounded_sm()
-                    .child(div().p_4().children(message_content)),
-            ),
+                }
+                Role::Assistant => v_flex()
+                    .id(("message-container", ix))
+                    .px(RESPONSE_PADDING_X)
+                    .gap_2()
+                    .children(message_content)
+                    .when(has_tool_uses, |parent| {
+                        parent.children(tool_uses.into_iter().map(|tool_use| {
+                            self.render_tool_use(tool_use, window, workspace.clone(), cx)
+                        }))
+                    }),
+                Role::System => {
+                    let colors = cx.theme().colors();
+                    div().id(("message-container", ix)).py_1().px_2().child(
+                        v_flex()
+                            .bg(colors.editor_background)
+                            .rounded_sm()
+                            .child(div().p_4().children(message_content)),
+                    )
+                }
+            }
         };
 
         let after_editing_message = self
@@ -2509,6 +2523,42 @@ impl ActiveThread {
             .blend(cx.theme().colors().editor_foreground.opacity(0.025))
     }
 
+    fn render_ui_notification(
+        &self,
+        message_content: impl IntoIterator<Item = impl IntoElement>,
+        ix: usize,
+        cx: &mut Context<Self>,
+    ) -> Stateful<Div> {
+        let colors = cx.theme().colors();
+        div().id(("message-container", ix)).py_1().px_2().child(
+            v_flex()
+                .w_full()
+                .bg(colors.editor_background)
+                .rounded_sm()
+                .child(
+                    h_flex()
+                        .w_full()
+                        .p_2()
+                        .gap_2()
+                        .child(
+                            div().flex_none().child(
+                                Icon::new(IconName::Warning)
+                                    .size(IconSize::Small)
+                                    .color(Color::Warning),
+                            ),
+                        )
+                        .child(
+                            v_flex()
+                                .flex_1()
+                                .min_w_0()
+                                .text_size(TextSize::Small.rems(cx))
+                                .text_color(cx.theme().colors().text_muted)
+                                .children(message_content),
+                        ),
+                ),
+        )
+    }
+
     fn render_message_thinking_segment(
         &self,
         message_id: MessageId,
@@ -3763,9 +3813,9 @@ mod tests {
 
         // Stream response to user message
         thread.update(cx, |thread, cx| {
-            let request =
-                thread.to_completion_request(model.clone(), CompletionIntent::UserPrompt, cx);
-            thread.stream_completion(request, model, cx.active_window(), cx)
+            let intent = CompletionIntent::UserPrompt;
+            let request = thread.to_completion_request(model.clone(), intent, cx);
+            thread.stream_completion(request, model, intent, cx.active_window(), cx)
         });
         // Follow the agent
         cx.update(|window, cx| {
diff --git a/crates/agent_ui/src/agent_diff.rs b/crates/agent_ui/src/agent_diff.rs
index aa5e49551b4e1c31b111823318af2bf649146e02..b8e67512e2b069f2a4f19c4903512f385c4eeab7 100644
--- a/crates/agent_ui/src/agent_diff.rs
+++ b/crates/agent_ui/src/agent_diff.rs
@@ -1380,6 +1380,7 @@ impl AgentDiff {
             | ThreadEvent::ToolConfirmationNeeded
             | ThreadEvent::ToolUseLimitReached
             | ThreadEvent::CancelEditing
+            | ThreadEvent::RetriesFailed { .. }
             | ThreadEvent::ProfileChanged => {}
         }
     }
diff --git a/crates/agent_ui/src/agent_panel.rs b/crates/agent_ui/src/agent_panel.rs
index f9b934b2382c76cf9d475fc65310560227832a21..2ae39aba848a069fa597960af587b7405412f05f 100644
--- a/crates/agent_ui/src/agent_panel.rs
+++ b/crates/agent_ui/src/agent_panel.rs
@@ -144,14 +144,17 @@ pub fn init(cx: &mut App) {
                     workspace.follow(CollaboratorId::Agent, window, cx);
                 })
                 .register_action(|workspace, _: &ExpandMessageEditor, window, cx| {
-                    if let Some(panel) = workspace.panel::<AgentPanel>(cx) {
-                        workspace.focus_panel::<AgentPanel>(window, cx);
-                        panel.update(cx, |panel, cx| {
-                            panel.message_editor.update(cx, |editor, cx| {
+                    let Some(panel) = workspace.panel::<AgentPanel>(cx) else {
+                        return;
+                    };
+                    workspace.focus_panel::<AgentPanel>(window, cx);
+                    panel.update(cx, |panel, cx| {
+                        if let Some(message_editor) = panel.active_message_editor() {
+                            message_editor.update(cx, |editor, cx| {
                                 editor.expand_message_editor(&ExpandMessageEditor, window, cx);
                             });
-                        });
-                    }
+                        }
+                    });
                 })
                 .register_action(|workspace, _: &ToggleNavigationMenu, window, cx| {
                     if let Some(panel) = workspace.panel::<AgentPanel>(cx) {
@@ -191,6 +194,7 @@ enum ActiveView {
     Thread {
         thread: Entity<ActiveThread>,
         change_title_editor: Entity<Editor>,
+        message_editor: Entity<MessageEditor>,
         _subscriptions: Vec<gpui::Subscription>,
     },
     Agent2Thread {
@@ -225,6 +229,7 @@ impl ActiveView {
 
     pub fn thread(
         active_thread: Entity<ActiveThread>,
+        message_editor: Entity<MessageEditor>,
         window: &mut Window,
         cx: &mut Context<AgentPanel>,
     ) -> Self {
@@ -237,6 +242,24 @@ impl ActiveView {
         });
 
         let subscriptions = vec![
+            cx.subscribe(&message_editor, |this, _, event, cx| match event {
+                MessageEditorEvent::Changed | MessageEditorEvent::EstimatedTokenCount => {
+                    cx.notify();
+                }
+                MessageEditorEvent::ScrollThreadToBottom => match &this.active_view {
+                    ActiveView::Thread { thread, .. } => {
+                        thread.update(cx, |thread, cx| {
+                            thread.scroll_to_bottom(cx);
+                        });
+                    }
+                    ActiveView::Agent2Thread { .. } => {
+                        // todo!
+                    }
+                    ActiveView::TextThread { .. }
+                    | ActiveView::History
+                    | ActiveView::Configuration => {}
+                },
+            }),
             window.subscribe(&editor, cx, {
                 {
                     let thread = active_thread.clone();
@@ -289,6 +312,7 @@ impl ActiveView {
         Self::Thread {
             change_title_editor: editor,
             thread: active_thread,
+            message_editor: message_editor,
             _subscriptions: subscriptions,
         }
     }
@@ -401,9 +425,6 @@ pub struct AgentPanel {
     fs: Arc<dyn Fs>,
     language_registry: Arc<LanguageRegistry>,
     thread_store: Entity<ThreadStore>,
-    // todo! move to active view?
-    message_editor: Entity<MessageEditor>,
-    _message_editor_subscription: Subscription,
     _default_model_subscription: Subscription,
     context_store: Entity<TextThreadStore>,
     prompt_store: Option<Entity<PromptStore>>,
@@ -548,24 +569,6 @@ impl AgentPanel {
             )
         });
 
-        let message_editor_subscription =
-            cx.subscribe(&message_editor, |this, _, event, cx| match event {
-                MessageEditorEvent::Changed | MessageEditorEvent::EstimatedTokenCount => {
-                    cx.notify();
-                }
-                MessageEditorEvent::ScrollThreadToBottom => match &this.active_view {
-                    ActiveView::Thread { thread, .. } => {
-                        thread.update(cx, |thread, cx| {
-                            thread.scroll_to_bottom(cx);
-                        });
-                    }
-                    ActiveView::Agent2Thread { .. } => todo!(),
-                    ActiveView::TextThread { .. }
-                    | ActiveView::History
-                    | ActiveView::Configuration => {}
-                },
-            });
-
         let thread_id = thread.read(cx).id().clone();
         let history_store = cx.new(|cx| {
             HistoryStore::new(
@@ -593,7 +596,7 @@ impl AgentPanel {
 
         let panel_type = AgentSettings::get_global(cx).default_view;
         let active_view = match panel_type {
-            DefaultView::Thread => ActiveView::thread(active_thread, window, cx),
+            DefaultView::Thread => ActiveView::thread(active_thread, message_editor, window, cx),
             DefaultView::TextThread => {
                 let context =
                     context_store.update(cx, |context_store, cx| context_store.create(cx));
@@ -684,8 +687,6 @@ impl AgentPanel {
             fs: fs.clone(),
             language_registry,
             thread_store: thread_store.clone(),
-            message_editor,
-            _message_editor_subscription: message_editor_subscription,
             _default_model_subscription,
             context_store,
             prompt_store,
@@ -757,12 +758,27 @@ impl AgentPanel {
         }
     }
 
+    fn active_message_editor(&self) -> Option<&Entity<MessageEditor>> {
+        match &self.active_view {
+            ActiveView::Thread { message_editor, .. } => Some(message_editor),
+            ActiveView::Agent2Thread { .. } => {
+                // todo!
+              None
+            }
+            ActiveView::TextThread { .. }
+            | ActiveView::History
+            | ActiveView::Configuration => None,
+        }
+    }
+
     fn new_thread(&mut self, action: &NewThread, window: &mut Window, cx: &mut Context<Self>) {
         // Preserve chat box text when using creating new thread from summary'
-        let preserved_text = action
-            .from_thread_id
-            .is_some()
-            .then(|| self.message_editor.read(cx).get_text(cx).trim().to_string());
+        let preserved_text = if action.from_thread_id.is_some() {
+            self.active_message_editor()
+                .map(|editor| editor.read(cx).get_text(cx).trim().to_string())
+        } else {
+            None
+        };
 
         let thread = self
             .thread_store
@@ -775,22 +791,6 @@ impl AgentPanel {
             )
         });
 
-        let active_thread = cx.new(|cx| {
-            ActiveThread::new(
-                thread.clone(),
-                self.thread_store.clone(),
-                self.context_store.clone(),
-                context_store.clone(),
-                self.language_registry.clone(),
-                self.workspace.clone(),
-                window,
-                cx,
-            )
-        });
-
-        let thread_view = ActiveView::thread(active_thread.clone(), window, cx);
-        self.set_active_view(thread_view, window, cx);
-
         if let Some(other_thread_id) = action.from_thread_id.clone() {
             let other_thread_task = self.thread_store.update(cx, |this, cx| {
                 this.open_thread(&other_thread_id, window, cx)
@@ -810,48 +810,47 @@ impl AgentPanel {
             })
             .detach_and_log_err(cx);
         }
+        
+        let active_thread = cx.new(|cx| {
+            ActiveThread::new(
+                thread.clone(),
+                self.thread_store.clone(),
+                self.context_store.clone(),
+                context_store.clone(),
+                self.language_registry.clone(),
+                self.workspace.clone(),
+                window,
+                cx,
+            )
+        });
 
-        AgentDiff::set_active_thread(&self.workspace, &thread, window, cx);
-
-        // todo! move message editor to active view?
-        self.message_editor = cx.new(|cx| {
+        let message_editor = cx.new(|cx| {
             MessageEditor::new(
                 self.fs.clone(),
                 self.workspace.clone(),
                 self.user_store.clone(),
-                context_store,
+                context_store.clone(),
                 self.prompt_store.clone(),
                 self.thread_store.downgrade(),
                 self.context_store.downgrade(),
-                thread,
+                thread.clone(),
                 window,
                 cx,
             )
         });
 
         if let Some(text) = preserved_text {
-            self.message_editor.update(cx, |editor, cx| {
+            message_editor.update(cx, |editor, cx| {
                 editor.set_text(text, window, cx);
             });
         }
 
-        self.message_editor.focus_handle(cx).focus(window);
+        message_editor.focus_handle(cx).focus(window);
 
-        let message_editor_subscription = cx.subscribe(&self.message_editor, {
-            let active_thread = active_thread.clone();
-            move |_, _, event, cx| match event {
-                MessageEditorEvent::Changed | MessageEditorEvent::EstimatedTokenCount => {
-                    cx.notify();
-                }
-                MessageEditorEvent::ScrollThreadToBottom => {
-                    active_thread.update(cx, |thread, cx| {
-                        thread.scroll_to_bottom(cx);
-                    });
-                }
-            }
-        });
+        let thread_view = ActiveView::thread(active_thread.clone(), message_editor, window, cx);
+        self.set_active_view(thread_view, window, cx);
 
-        self._message_editor_subscription = message_editor_subscription;
+        AgentDiff::set_active_thread(&self.workspace, &thread, window, cx);
     }
 
     fn new_prompt_editor(&mut self, window: &mut Window, cx: &mut Context<Self>) {
@@ -1058,11 +1057,8 @@ impl AgentPanel {
                 cx,
             )
         });
-        let thread_view = ActiveView::thread(active_thread.clone(), window, cx);
-        self.set_active_view(thread_view, window, cx);
-        AgentDiff::set_active_thread(&self.workspace, &thread, window, cx);
 
-        self.message_editor = cx.new(|cx| {
+        let message_editor = cx.new(|cx| {
             MessageEditor::new(
                 self.fs.clone(),
                 self.workspace.clone(),
@@ -1071,28 +1067,16 @@ impl AgentPanel {
                 self.prompt_store.clone(),
                 self.thread_store.downgrade(),
                 self.context_store.downgrade(),
-                thread,
+                thread.clone(),
                 window,
                 cx,
             )
         });
-        self.message_editor.focus_handle(cx).focus(window);
-
-        let message_editor_subscription = cx.subscribe(&self.message_editor, {
-            let active_thread = active_thread.clone();
-            move |_, _, event, cx| match event {
-                MessageEditorEvent::Changed | MessageEditorEvent::EstimatedTokenCount => {
-                    cx.notify();
-                }
-                MessageEditorEvent::ScrollThreadToBottom => {
-                    active_thread.update(cx, |thread, cx| {
-                        thread.scroll_to_bottom(cx);
-                    });
-                }
-            }
-        });
+        message_editor.focus_handle(cx).focus(window);
 
-        self._message_editor_subscription = message_editor_subscription;
+        let thread_view = ActiveView::thread(active_thread.clone(), message_editor, window, cx);
+        self.set_active_view(thread_view, window, cx);
+        AgentDiff::set_active_thread(&self.workspace, &thread, window, cx);
     }
 
     pub fn go_back(&mut self, _: &workspace::GoBack, window: &mut Window, cx: &mut Context<Self>) {
@@ -1103,13 +1087,16 @@ impl AgentPanel {
                     self.active_view = previous_view;
 
                     match &self.active_view {
-                        ActiveView::Thread { .. } => {
-                            self.message_editor.focus_handle(cx).focus(window);
+                        ActiveView::Thread { message_editor, .. } => {
+                            message_editor.focus_handle(cx).focus(window);
+                        }
+                        ActiveView::Agent2Thread { .. } => {
+                            todo!()
                         }
                         ActiveView::TextThread { context_editor, .. } => {
                             context_editor.focus_handle(cx).focus(window);
                         }
-                        _ => {}
+                        ActiveView::History | ActiveView::Configuration => {}
                     }
                 }
                 cx.notify();
@@ -1333,10 +1320,6 @@ impl AgentPanel {
             .update(cx, |this, cx| this.delete_thread(thread_id, cx))
     }
 
-    pub(crate) fn has_active_thread(&self) -> bool {
-        matches!(self.active_view, ActiveView::Thread { .. })
-    }
-
     fn continue_conversation(&mut self, window: &mut Window, cx: &mut Context<Self>) {
         let ActiveView::Thread { thread, .. } = &self.active_view else {
             return;
@@ -1531,8 +1514,11 @@ impl AgentPanel {
 impl Focusable for AgentPanel {
     fn focus_handle(&self, cx: &App) -> FocusHandle {
         match &self.active_view {
-            ActiveView::Thread { .. } => self.message_editor.focus_handle(cx),
-            ActiveView::Agent2Thread { .. } => self.message_editor.focus_handle(cx),
+            ActiveView::Thread { message_editor, .. } => message_editor.focus_handle(cx),
+            ActiveView::Agent2Thread { .. } => {
+                // todo! add own message editor to agent2
+                cx.focus_handle()
+            }
             ActiveView::History => self.history.focus_handle(cx),
             ActiveView::TextThread { context_editor, .. } => context_editor.focus_handle(cx),
             ActiveView::Configuration => {
@@ -1991,8 +1977,12 @@ impl AgentPanel {
     }
 
     fn render_token_count(&self, cx: &App) -> Option<AnyElement> {
-        let active_thread = match &self.active_view {
-            ActiveView::Thread { thread, .. } => thread.read(cx),
+        let (active_thread, message_editor) = match &self.active_view {
+            ActiveView::Thread {
+                thread,
+                message_editor,
+                ..
+            } => (thread.read(cx), message_editor.read(cx)),
             ActiveView::Agent2Thread { .. } => {
                 todo!();
             }
@@ -2001,17 +1991,14 @@ impl AgentPanel {
             }
         };
 
-        let editor_empty = self.message_editor.read(cx).is_editor_fully_empty(cx);
+        let editor_empty = message_editor.is_editor_fully_empty(cx);
 
         if active_thread.is_empty() && editor_empty {
             return None;
         }
 
         let thread = active_thread.thread().read(cx);
-
         let is_generating = thread.is_generating();
-        let message_editor = self.message_editor.read(cx);
-
         let conversation_token_usage = thread.total_token_usage()?;
 
         let (total_token_usage, is_estimating) =
@@ -3198,7 +3185,11 @@ impl Render for AgentPanel {
             .children(self.render_upsell(window, cx))
             .children(self.render_trial_end_upsell(window, cx))
             .map(|parent| match &self.active_view {
-                ActiveView::Thread { thread, .. } => parent
+                ActiveView::Thread {
+                    thread,
+                    message_editor,
+                    ..
+                } => parent
                     .relative()
                     .child(if thread.read(cx).is_empty() {
                         self.render_thread_empty_state(window, cx)
@@ -3207,7 +3198,7 @@ impl Render for AgentPanel {
                         thread.clone().into_any_element()
                     })
                     .children(self.render_tool_use_limit_reached(window, cx))
-                    .child(h_flex().child(self.message_editor.clone()))
+                    .child(h_flex().child(message_editor.clone()))
                     .when_some(thread.read(cx).last_error(), |this, last_error| {
                         this.child(
                             div()
@@ -3236,7 +3227,8 @@ impl Render for AgentPanel {
                 ActiveView::Agent2Thread { .. } => parent
                     .relative()
                     .child("todo!")
-                    .child(h_flex().child(self.message_editor.clone()))
+                    // todo!
+                    // .child(h_flex().child(self.message_editor.clone()))
                     .child(self.render_drag_target(cx)),
                 ActiveView::History => parent.child(self.history.clone()),
                 ActiveView::TextThread {
@@ -3378,8 +3370,8 @@ impl AgentPanelDelegate for ConcreteAssistantPanelDelegate {
             // Wait to create a new context until the workspace is no longer
             // being updated.
             cx.defer_in(window, move |panel, window, cx| {
-                if panel.has_active_thread() {
-                    panel.message_editor.update(cx, |message_editor, cx| {
+                if let Some(message_editor) = panel.active_message_editor() {
+                    message_editor.update(cx, |message_editor, cx| {
                         message_editor.context_store().update(cx, |store, cx| {
                             let buffer = buffer.read(cx);
                             let selection_ranges = selection_ranges
diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs
index 09770364cb6b460a4ce8d61d76bcc833cb466129..904eca83e609dc8766fb3a5a69ed9040c82f0168 100644
--- a/crates/eval/src/example.rs
+++ b/crates/eval/src/example.rs
@@ -221,6 +221,9 @@ impl ExampleContext {
                 ThreadEvent::ShowError(thread_error) => {
                     tx.try_send(Err(anyhow!(thread_error.clone()))).ok();
                 }
+                ThreadEvent::RetriesFailed { .. } => {
+                    // Ignore retries failed events
+                }
                 ThreadEvent::Stopped(reason) => match reason {
                     Ok(StopReason::EndTurn) => {
                         tx.close_channel();
diff --git a/crates/extensions_ui/src/extension_suggest.rs b/crates/extensions_ui/src/extension_suggest.rs
index 9fdf67ea531ddb3f0bebd0498c231c4713b06750..9b1d1f8cdfc5e3f9201e6513d632c1ec96f15058 100644
--- a/crates/extensions_ui/src/extension_suggest.rs
+++ b/crates/extensions_ui/src/extension_suggest.rs
@@ -60,6 +60,7 @@ const SUGGESTIONS_BY_EXTENSION_ID: &[(&str, &[&str])] = &[
     ("r", &["r", "R"]),
     ("racket", &["rkt"]),
     ("rescript", &["res", "resi"]),
+    ("rst", &["rst"]),
     ("ruby", &["rb", "erb"]),
     ("scheme", &["scm"]),
     ("scss", &["scss"]),
diff --git a/docs/src/extensions/developing-extensions.md b/docs/src/extensions/developing-extensions.md
index a523218b43c022f5c1166d0e431ed193fdd3ef2e..97af1f2673ef7f8d8995e9cefbf0351d5b490734 100644
--- a/docs/src/extensions/developing-extensions.md
+++ b/docs/src/extensions/developing-extensions.md
@@ -120,6 +120,8 @@ git submodule add https://github.com/your-username/foobar-zed.git extensions/foo
 git add extensions/foobar
 ```
 
+> All extension submodules must use HTTPS URLs and not SSH URLS (`git@github.com`).
+
 2. Add a new entry to the top-level `extensions.toml` file containing your extension:
 
 ```toml