agent: Show an error when the model requests limit has been reached (#28868)

Marshall Bowers and Oleksiy Syvokon created

This PR adds an error message when the model requests limit has been
hit.

Release Notes:

- N/A

Co-authored-by: Oleksiy Syvokon <oleksiy.syvokon@gmail.com>

Change summary

Cargo.lock                                     | 78 +++++++++++++------
Cargo.toml                                     |  2 
crates/agent/src/assistant_panel.rs            | 65 ++++++++++++++++
crates/agent/src/thread.rs                     | 13 ++
crates/language_model/src/model/cloud_model.rs | 18 ++++
crates/language_models/Cargo.toml              |  1 
crates/language_models/src/provider/cloud.rs   | 32 +++++++
7 files changed, 178 insertions(+), 31 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -324,7 +324,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "strum",
+ "strum 0.26.3",
  "thiserror 2.0.12",
  "workspace-hack",
 ]
@@ -567,7 +567,7 @@ dependencies = [
  "settings",
  "smallvec",
  "smol",
- "strum",
+ "strum 0.26.3",
  "telemetry_events",
  "text",
  "theme",
@@ -1881,7 +1881,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "strum",
+ "strum 0.26.3",
  "thiserror 2.0.12",
  "tokio",
  "workspace-hack",
@@ -3028,7 +3028,7 @@ dependencies = [
  "settings",
  "sha2",
  "sqlx",
- "strum",
+ "strum 0.26.3",
  "subtle",
  "supermaven_api",
  "telemetry_events",
@@ -3360,7 +3360,7 @@ dependencies = [
  "serde",
  "serde_json",
  "settings",
- "strum",
+ "strum 0.26.3",
  "task",
  "theme",
  "ui",
@@ -4477,7 +4477,7 @@ dependencies = [
  "optfield",
  "proc-macro2",
  "quote",
- "strum",
+ "strum 0.26.3",
  "syn 2.0.100",
 ]
 
@@ -5122,7 +5122,7 @@ dependencies = [
  "serde",
  "settings",
  "smallvec",
- "strum",
+ "strum 0.26.3",
  "telemetry",
  "theme",
  "ui",
@@ -5973,7 +5973,7 @@ dependencies = [
  "serde_derive",
  "serde_json",
  "settings",
- "strum",
+ "strum 0.26.3",
  "telemetry",
  "theme",
  "time",
@@ -6066,7 +6066,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "strum",
+ "strum 0.26.3",
  "workspace-hack",
 ]
 
@@ -6172,7 +6172,7 @@ dependencies = [
  "slotmap",
  "smallvec",
  "smol",
- "strum",
+ "strum 0.26.3",
  "sum_tree",
  "taffy",
  "thiserror 2.0.12",
@@ -6820,7 +6820,7 @@ name = "icons"
 version = "0.1.0"
 dependencies = [
  "serde",
- "strum",
+ "strum 0.26.3",
  "workspace-hack",
 ]
 
@@ -7088,7 +7088,7 @@ dependencies = [
  "paths",
  "pretty_assertions",
  "serde",
- "strum",
+ "strum 0.26.3",
  "util",
  "workspace-hack",
 ]
@@ -7674,7 +7674,7 @@ dependencies = [
  "serde",
  "serde_json",
  "smol",
- "strum",
+ "strum 0.26.3",
  "telemetry_events",
  "thiserror 2.0.12",
  "util",
@@ -7734,7 +7734,7 @@ dependencies = [
  "serde_json",
  "settings",
  "smol",
- "strum",
+ "strum 0.26.3",
  "theme",
  "thiserror 2.0.12",
  "tiktoken-rs",
@@ -7742,6 +7742,7 @@ dependencies = [
  "ui",
  "util",
  "workspace-hack",
+ "zed_llm_client",
 ]
 
 [[package]]
@@ -8706,7 +8707,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "strum",
+ "strum 0.26.3",
  "workspace-hack",
 ]
 
@@ -9553,7 +9554,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "strum",
+ "strum 0.26.3",
  "workspace-hack",
 ]
 
@@ -12132,7 +12133,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sha2",
- "strum",
+ "strum 0.26.3",
  "tracing",
  "util",
  "workspace-hack",
@@ -12660,7 +12661,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sqlx",
- "strum",
+ "strum 0.26.3",
  "thiserror 2.0.12",
  "time",
  "tracing",
@@ -13705,7 +13706,7 @@ dependencies = [
  "settings",
  "simplelog",
  "story",
- "strum",
+ "strum 0.26.3",
  "theme",
  "title_bar",
  "ui",
@@ -13787,7 +13788,16 @@ version = "0.26.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
 dependencies = [
- "strum_macros",
+ "strum_macros 0.26.4",
+]
+
+[[package]]
+name = "strum"
+version = "0.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
+dependencies = [
+ "strum_macros 0.27.1",
 ]
 
 [[package]]
@@ -13803,6 +13813,19 @@ dependencies = [
  "syn 2.0.100",
 ]
 
+[[package]]
+name = "strum_macros"
+version = "0.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn 2.0.100",
+]
+
 [[package]]
 name = "subtle"
 version = "2.6.1"
@@ -14418,7 +14441,7 @@ dependencies = [
  "serde_json_lenient",
  "serde_repr",
  "settings",
- "strum",
+ "strum 0.26.3",
  "thiserror 2.0.12",
  "util",
  "uuid",
@@ -14452,7 +14475,7 @@ dependencies = [
  "serde_json",
  "serde_json_lenient",
  "simplelog",
- "strum",
+ "strum 0.26.3",
  "theme",
  "vscode_theme",
  "workspace-hack",
@@ -15453,7 +15476,7 @@ dependencies = [
  "settings",
  "smallvec",
  "story",
- "strum",
+ "strum 0.26.3",
  "theme",
  "ui_macros",
  "util",
@@ -17624,7 +17647,7 @@ dependencies = [
  "settings",
  "smallvec",
  "sqlez",
- "strum",
+ "strum 0.26.3",
  "task",
  "telemetry",
  "tempfile",
@@ -17769,7 +17792,7 @@ dependencies = [
  "sqlx-macros-core",
  "sqlx-postgres",
  "sqlx-sqlite",
- "strum",
+ "strum 0.26.3",
  "subtle",
  "syn 1.0.109",
  "syn 2.0.100",
@@ -18328,12 +18351,13 @@ dependencies = [
 
 [[package]]
 name = "zed_llm_client"
-version = "0.4.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1bf21350eced858d129840589158a8f6895c4fa4327ae56dd8c7d6a98495bed4"
+checksum = "1d28a5d6bdb0f40acf5261c39cabbf65a13b55ba4b86d9beb5b8b1c484373f1a"
 dependencies = [
  "serde",
  "serde_json",
+ "strum 0.27.1",
  "uuid",
 ]
 

Cargo.toml 🔗

@@ -601,7 +601,7 @@ wasmtime-wasi = "29"
 which = "6.0.0"
 wit-component = "0.221"
 workspace-hack = "0.1.0"
-zed_llm_client = "0.4"
+zed_llm_client = "0.4.2"
 zstd = "0.11"
 metal = "0.29"
 

crates/agent/src/assistant_panel.rs 🔗

@@ -25,6 +25,7 @@ use language_model_selector::ToggleModelSelector;
 use project::Project;
 use prompt_library::{PromptLibrary, open_prompt_library};
 use prompt_store::PromptBuilder;
+use proto::Plan;
 use settings::{Settings, update_settings_file};
 use time::UtcOffset;
 use ui::{
@@ -1449,6 +1450,9 @@ impl AssistantPanel {
                     ThreadError::MaxMonthlySpendReached => {
                         self.render_max_monthly_spend_reached_error(cx)
                     }
+                    ThreadError::ModelRequestLimitReached { plan } => {
+                        self.render_model_request_limit_reached_error(plan, cx)
+                    }
                     ThreadError::Message { header, message } => {
                         self.render_error_message(header, message, cx)
                     }
@@ -1551,6 +1555,67 @@ impl AssistantPanel {
             .into_any()
     }
 
+    fn render_model_request_limit_reached_error(
+        &self,
+        plan: Plan,
+        cx: &mut Context<Self>,
+    ) -> AnyElement {
+        let error_message = match plan {
+            Plan::Free => "Model request limit reached. Upgrade to Zed Pro for more requests.",
+            Plan::ZedPro => {
+                "Model request limit reached. Upgrade to usage-based billing for more requests."
+            }
+        };
+        let call_to_action = match plan {
+            Plan::Free => "Upgrade to Zed Pro",
+            Plan::ZedPro => "Upgrade to usage-based billing",
+        };
+
+        v_flex()
+            .gap_0p5()
+            .child(
+                h_flex()
+                    .gap_1p5()
+                    .items_center()
+                    .child(Icon::new(IconName::XCircle).color(Color::Error))
+                    .child(Label::new("Model Request Limit Reached").weight(FontWeight::MEDIUM)),
+            )
+            .child(
+                div()
+                    .id("error-message")
+                    .max_h_24()
+                    .overflow_y_scroll()
+                    .child(Label::new(error_message)),
+            )
+            .child(
+                h_flex()
+                    .justify_end()
+                    .mt_1()
+                    .child(
+                        Button::new("subscribe", call_to_action).on_click(cx.listener(
+                            |this, _, _, cx| {
+                                this.thread.update(cx, |this, _cx| {
+                                    this.clear_last_error();
+                                });
+
+                                cx.open_url(&zed_urls::account_url(cx));
+                                cx.notify();
+                            },
+                        )),
+                    )
+                    .child(Button::new("dismiss", "Dismiss").on_click(cx.listener(
+                        |this, _, _, cx| {
+                            this.thread.update(cx, |this, _cx| {
+                                this.clear_last_error();
+                            });
+
+                            cx.notify();
+                        },
+                    ))),
+            )
+            .into_any()
+    }
+
     fn render_error_message(
         &self,
         header: SharedString,

crates/agent/src/thread.rs 🔗

@@ -18,12 +18,13 @@ use language_model::{
     ConfiguredModel, LanguageModel, LanguageModelCompletionEvent, LanguageModelId,
     LanguageModelKnownError, LanguageModelRegistry, LanguageModelRequest,
     LanguageModelRequestMessage, LanguageModelRequestTool, LanguageModelToolResult,
-    LanguageModelToolUseId, MaxMonthlySpendReachedError, MessageContent, PaymentRequiredError,
-    Role, StopReason, TokenUsage,
+    LanguageModelToolUseId, MaxMonthlySpendReachedError, MessageContent,
+    ModelRequestLimitReachedError, PaymentRequiredError, Role, StopReason, TokenUsage,
 };
 use project::Project;
 use project::git_store::{GitStore, GitStoreCheckpoint, RepositoryState};
 use prompt_store::PromptBuilder;
+use proto::Plan;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 use settings::Settings;
@@ -1150,6 +1151,12 @@ impl Thread {
                                 cx.emit(ThreadEvent::ShowError(
                                     ThreadError::MaxMonthlySpendReached,
                                 ));
+                            } else if let Some(error) =
+                                error.downcast_ref::<ModelRequestLimitReachedError>()
+                            {
+                                cx.emit(ThreadEvent::ShowError(
+                                    ThreadError::ModelRequestLimitReached { plan: error.plan },
+                                ));
                             } else if let Some(known_error) =
                                 error.downcast_ref::<LanguageModelKnownError>()
                             {
@@ -1929,6 +1936,8 @@ pub enum ThreadError {
     PaymentRequired,
     #[error("Max monthly spend reached")]
     MaxMonthlySpendReached,
+    #[error("Model request limit reached")]
+    ModelRequestLimitReached { plan: Plan },
     #[error("Message {header}: {message}")]
     Message {
         header: SharedString,

crates/language_model/src/model/cloud_model.rs 🔗

@@ -142,6 +142,24 @@ impl fmt::Display for MaxMonthlySpendReachedError {
     }
 }
 
+#[derive(Error, Debug)]
+pub struct ModelRequestLimitReachedError {
+    pub plan: Plan,
+}
+
+impl fmt::Display for ModelRequestLimitReachedError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let message = match self.plan {
+            Plan::Free => "Model request limit reached. Upgrade to Zed Pro for more requests.",
+            Plan::ZedPro => {
+                "Model request limit reached. Upgrade to usage-based billing for more requests."
+            }
+        };
+
+        write!(f, "{message}")
+    }
+}
+
 #[derive(Clone, Default)]
 pub struct LlmApiToken(Arc<RwLock<Option<String>>>);
 

crates/language_models/Cargo.toml 🔗

@@ -53,6 +53,7 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 ui.workspace = true
 util.workspace = true
 workspace-hack.workspace = true
+zed_llm_client.workspace = true
 
 [dev-dependencies]
 editor = { workspace = true, features = ["test-support"] }

crates/language_models/src/provider/cloud.rs 🔗

@@ -16,18 +16,21 @@ use language_model::{
     AuthenticateError, CloudModel, LanguageModel, LanguageModelCacheConfiguration, LanguageModelId,
     LanguageModelKnownError, LanguageModelName, LanguageModelProviderId, LanguageModelProviderName,
     LanguageModelProviderState, LanguageModelProviderTosView, LanguageModelRequest,
-    LanguageModelToolSchemaFormat, RateLimiter, ZED_CLOUD_PROVIDER_ID,
+    LanguageModelToolSchemaFormat, ModelRequestLimitReachedError, RateLimiter,
+    ZED_CLOUD_PROVIDER_ID,
 };
 use language_model::{
     LanguageModelAvailability, LanguageModelCompletionEvent, LanguageModelProvider, LlmApiToken,
     MaxMonthlySpendReachedError, PaymentRequiredError, RefreshLlmTokenListener,
 };
+use proto::Plan;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize, de::DeserializeOwned};
 use serde_json::value::RawValue;
 use settings::{Settings, SettingsStore};
 use smol::Timer;
 use smol::io::{AsyncReadExt, BufReader};
+use std::str::FromStr as _;
 use std::{
     sync::{Arc, LazyLock},
     time::Duration,
@@ -35,6 +38,7 @@ use std::{
 use strum::IntoEnumIterator;
 use thiserror::Error;
 use ui::{TintColor, prelude::*};
+use zed_llm_client::{CURRENT_PLAN_HEADER_NAME, SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME};
 
 use crate::AllLanguageModelSettings;
 use crate::provider::anthropic::{count_anthropic_tokens, into_anthropic};
@@ -551,6 +555,32 @@ impl CloudLanguageModel {
                     .is_some()
             {
                 return Err(anyhow!(MaxMonthlySpendReachedError));
+            } else if status == StatusCode::FORBIDDEN
+                && response
+                    .headers()
+                    .get(SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME)
+                    .is_some()
+            {
+                if let Some("model_requests") = response
+                    .headers()
+                    .get(SUBSCRIPTION_LIMIT_RESOURCE_HEADER_NAME)
+                    .and_then(|resource| resource.to_str().ok())
+                {
+                    if let Some(plan) = response
+                        .headers()
+                        .get(CURRENT_PLAN_HEADER_NAME)
+                        .and_then(|plan| plan.to_str().ok())
+                        .and_then(|plan| zed_llm_client::Plan::from_str(plan).ok())
+                    {
+                        let plan = match plan {
+                            zed_llm_client::Plan::Free => Plan::Free,
+                            zed_llm_client::Plan::ZedPro => Plan::ZedPro,
+                        };
+                        return Err(anyhow!(ModelRequestLimitReachedError { plan }));
+                    }
+                }
+
+                return Err(anyhow!("Forbidden"));
             } else if status.as_u16() >= 500 && status.as_u16() < 600 {
                 // If we encounter an error in the 500 range, retry after a delay.
                 // We've seen at least these in the wild from API providers: