Report telemetry events for rate limit errors (#16130)

Max Brunsfeld and Marshall created

clickhouse telemetry schema:

```
CREATE TABLE default.llm_rate_limit_events
(
    `time` DateTime64(3),
    `user_id` Int32,
    `is_staff` Bool,
    `plan` LowCardinality(String),
    `model` String,
    `provider` LowCardinality(String),
    `usage_measure` LowCardinality(String),
    `requests_this_minute` UInt64,
    `tokens_this_minute` UInt64,
    `tokens_this_day` UInt64,
    `max_requests_per_minute` UInt64,
    `max_tokens_per_minute` UInt64,
    `max_tokens_per_day` UInt64,
    `users_in_recent_minutes` UInt64,
    `users_in_recent_days` UInt64
)
ORDER BY tuple()
```

Release Notes:

- N/A

Co-authored-by: Marshall <marshall@zed.dev>

Change summary

crates/collab/src/llm.rs           | 59 ++++++++++++++++++++++++++-----
crates/collab/src/llm/telemetry.rs | 29 +++++++++++++++
2 files changed, 77 insertions(+), 11 deletions(-)

Detailed changes

crates/collab/src/llm.rs 🔗

@@ -18,7 +18,7 @@ use axum::{
     Extension, Json, Router, TypedHeader,
 };
 use chrono::{DateTime, Duration, Utc};
-use db::{ActiveUserCount, LlmDatabase};
+use db::{usage_measure::UsageMeasure, ActiveUserCount, LlmDatabase};
 use futures::{Stream, StreamExt as _};
 use http_client::IsahcHttpClient;
 use rpc::{
@@ -29,7 +29,7 @@ use std::{
     sync::Arc,
     task::{Context, Poll},
 };
-use telemetry::{report_llm_usage, LlmUsageEventRow};
+use telemetry::{report_llm_rate_limit, report_llm_usage, LlmRateLimitEventRow, LlmUsageEventRow};
 use tokio::sync::RwLock;
 use util::ResultExt;
 
@@ -401,38 +401,75 @@ async fn check_usage_limit(
 
     let active_users = state.get_active_user_count().await?;
 
+    let users_in_recent_minutes = active_users.users_in_recent_minutes.max(1);
+    let users_in_recent_days = active_users.users_in_recent_days.max(1);
+
     let per_user_max_requests_per_minute =
-        model.max_requests_per_minute as usize / active_users.users_in_recent_minutes.max(1);
+        model.max_requests_per_minute as usize / users_in_recent_minutes;
     let per_user_max_tokens_per_minute =
-        model.max_tokens_per_minute as usize / active_users.users_in_recent_minutes.max(1);
-    let per_user_max_tokens_per_day =
-        model.max_tokens_per_day as usize / active_users.users_in_recent_days.max(1);
+        model.max_tokens_per_minute as usize / users_in_recent_minutes;
+    let per_user_max_tokens_per_day = model.max_tokens_per_day as usize / users_in_recent_days;
 
     let checks = [
         (
             usage.requests_this_minute,
             per_user_max_requests_per_minute,
-            "requests per minute",
+            UsageMeasure::RequestsPerMinute,
         ),
         (
             usage.tokens_this_minute,
             per_user_max_tokens_per_minute,
-            "tokens per minute",
+            UsageMeasure::TokensPerMinute,
         ),
         (
             usage.tokens_this_day,
             per_user_max_tokens_per_day,
-            "tokens per day",
+            UsageMeasure::TokensPerDay,
         ),
     ];
 
-    for (usage, limit, resource) in checks {
+    for (used, limit, usage_measure) in checks {
         // Temporarily bypass rate-limiting for staff members.
         if claims.is_staff {
             continue;
         }
 
-        if usage > limit {
+        if used > limit {
+            let resource = match usage_measure {
+                UsageMeasure::RequestsPerMinute => "requests_per_minute",
+                UsageMeasure::TokensPerMinute => "tokens_per_minute",
+                UsageMeasure::TokensPerDay => "tokens_per_day",
+                _ => "",
+            };
+
+            if let Some(client) = state.clickhouse_client.as_ref() {
+                report_llm_rate_limit(
+                    client,
+                    LlmRateLimitEventRow {
+                        time: Utc::now().timestamp_millis(),
+                        user_id: claims.user_id as i32,
+                        is_staff: claims.is_staff,
+                        plan: match claims.plan {
+                            Plan::Free => "free".to_string(),
+                            Plan::ZedPro => "zed_pro".to_string(),
+                        },
+                        model: model.name.clone(),
+                        provider: provider.to_string(),
+                        usage_measure: resource.to_string(),
+                        requests_this_minute: usage.requests_this_minute as u64,
+                        tokens_this_minute: usage.tokens_this_minute as u64,
+                        tokens_this_day: usage.tokens_this_day as u64,
+                        users_in_recent_minutes: users_in_recent_minutes as u64,
+                        users_in_recent_days: users_in_recent_days as u64,
+                        max_requests_per_minute: per_user_max_requests_per_minute as u64,
+                        max_tokens_per_minute: per_user_max_tokens_per_minute as u64,
+                        max_tokens_per_day: per_user_max_tokens_per_day as u64,
+                    },
+                )
+                .await
+                .log_err();
+            }
+
             return Err(Error::http(
                 StatusCode::TOO_MANY_REQUESTS,
                 format!("Rate limit exceeded. Maximum {} reached.", resource),

crates/collab/src/llm/telemetry.rs 🔗

@@ -19,9 +19,38 @@ pub struct LlmUsageEventRow {
     pub spending_this_month: u64,
 }
 
+#[derive(Serialize, Debug, clickhouse::Row)]
+pub struct LlmRateLimitEventRow {
+    pub time: i64,
+    pub user_id: i32,
+    pub is_staff: bool,
+    pub plan: String,
+    pub model: String,
+    pub provider: String,
+    pub usage_measure: String,
+    pub requests_this_minute: u64,
+    pub tokens_this_minute: u64,
+    pub tokens_this_day: u64,
+    pub users_in_recent_minutes: u64,
+    pub users_in_recent_days: u64,
+    pub max_requests_per_minute: u64,
+    pub max_tokens_per_minute: u64,
+    pub max_tokens_per_day: u64,
+}
+
 pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {
     let mut insert = client.insert("llm_usage_events")?;
     insert.write(&row).await?;
     insert.end().await?;
     Ok(())
 }
+
+pub async fn report_llm_rate_limit(
+    client: &clickhouse::Client,
+    row: LlmRateLimitEventRow,
+) -> Result<()> {
+    let mut insert = client.insert("llm_rate_limits")?;
+    insert.write(&row).await?;
+    insert.end().await?;
+    Ok(())
+}