Capture telemetry data on per-user monthly LLM spending (#16050)

Max Brunsfeld and Marshall created

Release Notes:

- N/A

---------

Co-authored-by: Marshall <marshall@zed.dev>

Change summary

crates/anthropic/src/anthropic.rs                                             |  2 
crates/collab/migrations_llm/20240809160000_add_pricing_columns_to_models.sql |  3 
crates/collab/src/llm.rs                                                      |  7 
crates/collab/src/llm/db/queries/providers.rs                                 | 29 
crates/collab/src/llm/db/queries/usages.rs                                    | 64 
crates/collab/src/llm/db/seed.rs                                              | 74 
crates/collab/src/llm/db/tables/model.rs                                      |  2 
crates/collab/src/llm/db/tables/usage_measure.rs                              |  3 
crates/collab/src/llm/db/tests/usage_tests.rs                                 | 50 
crates/collab/src/llm/telemetry.rs                                            |  4 
10 files changed, 153 insertions(+), 85 deletions(-)

Detailed changes

crates/anthropic/src/anthropic.rs 🔗

@@ -53,7 +53,7 @@ impl Model {
             Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620",
             Model::Claude3Opus => "claude-3-opus-20240229",
             Model::Claude3Sonnet => "claude-3-sonnet-20240229",
-            Model::Claude3Haiku => "claude-3-opus-20240307",
+            Model::Claude3Haiku => "claude-3-haiku-20240307",
             Self::Custom { name, .. } => name,
         }
     }

crates/collab/src/llm.rs 🔗

@@ -457,7 +457,8 @@ impl<S> Drop for TokenCountingStream<S> {
                     claims.user_id as i32,
                     provider,
                     &model,
-                    input_token_count + output_token_count,
+                    input_token_count,
+                    output_token_count,
                     Utc::now(),
                 )
                 .await
@@ -481,7 +482,9 @@ impl<S> Drop for TokenCountingStream<S> {
                         requests_this_minute: usage.requests_this_minute as u64,
                         tokens_this_minute: usage.tokens_this_minute as u64,
                         tokens_this_day: usage.tokens_this_day as u64,
-                        tokens_this_month: usage.tokens_this_month as u64,
+                        input_tokens_this_month: usage.input_tokens_this_month as u64,
+                        output_tokens_this_month: usage.output_tokens_this_month as u64,
+                        spending_this_month: usage.spending_this_month as u64,
                     },
                 )
                 .await

crates/collab/src/llm/db/queries/providers.rs 🔗

@@ -3,10 +3,14 @@ use sea_orm::QueryOrder;
 use std::str::FromStr;
 use strum::IntoEnumIterator as _;
 
-pub struct ModelRateLimits {
+pub struct ModelParams {
+    pub provider: LanguageModelProvider,
+    pub name: String,
     pub max_requests_per_minute: i64,
     pub max_tokens_per_minute: i64,
     pub max_tokens_per_day: i64,
+    pub price_per_million_input_tokens: i32,
+    pub price_per_million_output_tokens: i32,
 }
 
 impl LlmDatabase {
@@ -75,20 +79,23 @@ impl LlmDatabase {
         Ok(())
     }
 
-    pub async fn insert_models(
-        &mut self,
-        models: &[(LanguageModelProvider, String, ModelRateLimits)],
-    ) -> Result<()> {
+    pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> {
         let all_provider_ids = &self.provider_ids;
         self.transaction(|tx| async move {
-            model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| {
-                let provider_id = all_provider_ids[&provider];
+            model::Entity::insert_many(models.into_iter().map(|model_params| {
+                let provider_id = all_provider_ids[&model_params.provider];
                 model::ActiveModel {
                     provider_id: ActiveValue::set(provider_id),
-                    name: ActiveValue::set(name.clone()),
-                    max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute),
-                    max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute),
-                    max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day),
+                    name: ActiveValue::set(model_params.name.clone()),
+                    max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute),
+                    max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute),
+                    max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day),
+                    price_per_million_input_tokens: ActiveValue::set(
+                        model_params.price_per_million_input_tokens,
+                    ),
+                    price_per_million_output_tokens: ActiveValue::set(
+                        model_params.price_per_million_output_tokens,
+                    ),
                     ..Default::default()
                 }
             }))

crates/collab/src/llm/db/queries/usages.rs 🔗

@@ -11,7 +11,9 @@ pub struct Usage {
     pub requests_this_minute: usize,
     pub tokens_this_minute: usize,
     pub tokens_this_day: usize,
-    pub tokens_this_month: usize,
+    pub input_tokens_this_month: usize,
+    pub output_tokens_this_month: usize,
+    pub spending_this_month: usize,
 }
 
 #[derive(Clone, Copy, Debug, Default)]
@@ -87,14 +89,20 @@ impl LlmDatabase {
                 self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?;
             let tokens_this_day =
                 self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
-            let tokens_this_month =
-                self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?;
+            let input_tokens_this_month =
+                self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
+            let output_tokens_this_month =
+                self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
+            let spending_this_month =
+                calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
 
             Ok(Usage {
                 requests_this_minute,
                 tokens_this_minute,
                 tokens_this_day,
-                tokens_this_month,
+                input_tokens_this_month,
+                output_tokens_this_month,
+                spending_this_month,
             })
         })
         .await
@@ -105,7 +113,8 @@ impl LlmDatabase {
         user_id: i32,
         provider: LanguageModelProvider,
         model_name: &str,
-        token_count: usize,
+        input_token_count: usize,
+        output_token_count: usize,
         now: DateTimeUtc,
     ) -> Result<Usage> {
         self.transaction(|tx| async move {
@@ -138,7 +147,7 @@ impl LlmDatabase {
                     &usages,
                     UsageMeasure::TokensPerMinute,
                     now,
-                    token_count,
+                    input_token_count + output_token_count,
                     &tx,
                 )
                 .await?;
@@ -149,27 +158,42 @@ impl LlmDatabase {
                     &usages,
                     UsageMeasure::TokensPerDay,
                     now,
-                    token_count,
+                    input_token_count + output_token_count,
                     &tx,
                 )
                 .await?;
-            let tokens_this_month = self
+            let input_tokens_this_month = self
                 .update_usage_for_measure(
                     user_id,
                     model.id,
                     &usages,
-                    UsageMeasure::TokensPerMonth,
+                    UsageMeasure::InputTokensPerMonth,
                     now,
-                    token_count,
+                    input_token_count,
                     &tx,
                 )
                 .await?;
+            let output_tokens_this_month = self
+                .update_usage_for_measure(
+                    user_id,
+                    model.id,
+                    &usages,
+                    UsageMeasure::OutputTokensPerMonth,
+                    now,
+                    output_token_count,
+                    &tx,
+                )
+                .await?;
+            let spending_this_month =
+                calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
 
             Ok(Usage {
                 requests_this_minute,
                 tokens_this_minute,
                 tokens_this_day,
-                tokens_this_month,
+                input_tokens_this_month,
+                output_tokens_this_month,
+                spending_this_month,
             })
         })
         .await
@@ -303,6 +327,18 @@ impl LlmDatabase {
     }
 }
 
+fn calculate_spending(
+    model: &model::Model,
+    input_tokens_this_month: usize,
+    output_tokens_this_month: usize,
+) -> usize {
+    let input_token_cost =
+        input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
+    let output_token_cost =
+        output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
+    input_token_cost + output_token_cost
+}
+
 const MINUTE_BUCKET_COUNT: usize = 12;
 const DAY_BUCKET_COUNT: usize = 48;
 const MONTH_BUCKET_COUNT: usize = 30;
@@ -313,7 +349,8 @@ impl UsageMeasure {
             UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT,
             UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
             UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
-            UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT,
+            UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
+            UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
         }
     }
 
@@ -322,7 +359,8 @@ impl UsageMeasure {
             UsageMeasure::RequestsPerMinute => Duration::minutes(1),
             UsageMeasure::TokensPerMinute => Duration::minutes(1),
             UsageMeasure::TokensPerDay => Duration::hours(24),
-            UsageMeasure::TokensPerMonth => Duration::days(30),
+            UsageMeasure::InputTokensPerMonth => Duration::days(30),
+            UsageMeasure::OutputTokensPerMonth => Duration::days(30),
         }
     }
 

crates/collab/src/llm/db/seed.rs 🔗

@@ -1,45 +1,45 @@
 use super::*;
 use crate::{Config, Result};
-use queries::providers::ModelRateLimits;
+use queries::providers::ModelParams;
 
 pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> {
     db.insert_models(&[
-        (
-            LanguageModelProvider::Anthropic,
-            "claude-3-5-sonnet".into(),
-            ModelRateLimits {
-                max_requests_per_minute: 5,
-                max_tokens_per_minute: 20_000,
-                max_tokens_per_day: 300_000,
-            },
-        ),
-        (
-            LanguageModelProvider::Anthropic,
-            "claude-3-opus".into(),
-            ModelRateLimits {
-                max_requests_per_minute: 5,
-                max_tokens_per_minute: 10_000,
-                max_tokens_per_day: 300_000,
-            },
-        ),
-        (
-            LanguageModelProvider::Anthropic,
-            "claude-3-sonnet".into(),
-            ModelRateLimits {
-                max_requests_per_minute: 5,
-                max_tokens_per_minute: 20_000,
-                max_tokens_per_day: 300_000,
-            },
-        ),
-        (
-            LanguageModelProvider::Anthropic,
-            "claude-3-haiku".into(),
-            ModelRateLimits {
-                max_requests_per_minute: 5,
-                max_tokens_per_minute: 25_000,
-                max_tokens_per_day: 300_000,
-            },
-        ),
+        ModelParams {
+            provider: LanguageModelProvider::Anthropic,
+            name: "claude-3-5-sonnet".into(),
+            max_requests_per_minute: 5,
+            max_tokens_per_minute: 20_000,
+            max_tokens_per_day: 300_000,
+            price_per_million_input_tokens: 300,   // $3.00/MTok
+            price_per_million_output_tokens: 1500, // $15.00/MTok
+        },
+        ModelParams {
+            provider: LanguageModelProvider::Anthropic,
+            name: "claude-3-opus".into(),
+            max_requests_per_minute: 5,
+            max_tokens_per_minute: 10_000,
+            max_tokens_per_day: 300_000,
+            price_per_million_input_tokens: 1500,  // $15.00/MTok
+            price_per_million_output_tokens: 7500, // $75.00/MTok
+        },
+        ModelParams {
+            provider: LanguageModelProvider::Anthropic,
+            name: "claude-3-sonnet".into(),
+            max_requests_per_minute: 5,
+            max_tokens_per_minute: 20_000,
+            max_tokens_per_day: 300_000,
+            price_per_million_input_tokens: 1500,  // $15.00/MTok
+            price_per_million_output_tokens: 7500, // $75.00/MTok
+        },
+        ModelParams {
+            provider: LanguageModelProvider::Anthropic,
+            name: "claude-3-haiku".into(),
+            max_requests_per_minute: 5,
+            max_tokens_per_minute: 25_000,
+            max_tokens_per_day: 300_000,
+            price_per_million_input_tokens: 25,   // $0.25/MTok
+            price_per_million_output_tokens: 125, // $1.25/MTok
+        },
     ])
     .await
 }

crates/collab/src/llm/db/tables/model.rs 🔗

@@ -13,6 +13,8 @@ pub struct Model {
     pub max_requests_per_minute: i64,
     pub max_tokens_per_minute: i64,
     pub max_tokens_per_day: i64,
+    pub price_per_million_input_tokens: i32,
+    pub price_per_million_output_tokens: i32,
 }
 
 #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]

crates/collab/src/llm/db/tables/usage_measure.rs 🔗

@@ -9,7 +9,8 @@ pub enum UsageMeasure {
     RequestsPerMinute,
     TokensPerMinute,
     TokensPerDay,
-    TokensPerMonth,
+    InputTokensPerMonth,
+    OutputTokensPerMonth,
 }
 
 #[derive(Clone, Debug, PartialEq, DeriveEntityModel)]

crates/collab/src/llm/db/tests/usage_tests.rs 🔗

@@ -1,5 +1,5 @@
 use crate::{
-    llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase},
+    llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase},
     test_llm_db,
 };
 use chrono::{Duration, Utc};
@@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
     let model = "claude-3-5-sonnet";
 
     db.initialize().await.unwrap();
-    db.insert_models(&[(
+    db.insert_models(&[ModelParams {
         provider,
-        model.to_string(),
-        ModelRateLimits {
-            max_requests_per_minute: 5,
-            max_tokens_per_minute: 10_000,
-            max_tokens_per_day: 50_000,
-        },
-    )])
+        name: model.to_string(),
+        max_requests_per_minute: 5,
+        max_tokens_per_minute: 10_000,
+        max_tokens_per_day: 50_000,
+        price_per_million_input_tokens: 50,
+        price_per_million_output_tokens: 50,
+    }])
     .await
     .unwrap();
 
@@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
     let user_id = 123;
 
     let now = t0;
-    db.record_usage(user_id, provider, model, 1000, now)
+    db.record_usage(user_id, provider, model, 1000, 0, now)
         .await
         .unwrap();
 
     let now = t0 + Duration::seconds(10);
-    db.record_usage(user_id, provider, model, 2000, now)
+    db.record_usage(user_id, provider, model, 2000, 0, now)
         .await
         .unwrap();
 
@@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 2,
             tokens_this_minute: 3000,
             tokens_this_day: 3000,
-            tokens_this_month: 3000,
+            input_tokens_this_month: 3000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 
@@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 1,
             tokens_this_minute: 2000,
             tokens_this_day: 3000,
-            tokens_this_month: 3000,
+            input_tokens_this_month: 3000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 
     let now = t0 + Duration::seconds(60);
-    db.record_usage(user_id, provider, model, 3000, now)
+    db.record_usage(user_id, provider, model, 3000, 0, now)
         .await
         .unwrap();
 
@@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 2,
             tokens_this_minute: 5000,
             tokens_this_day: 6000,
-            tokens_this_month: 6000,
+            input_tokens_this_month: 6000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 
@@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 0,
             tokens_this_minute: 0,
             tokens_this_day: 5000,
-            tokens_this_month: 6000,
+            input_tokens_this_month: 6000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 
-    db.record_usage(user_id, provider, model, 4000, now)
+    db.record_usage(user_id, provider, model, 4000, 0, now)
         .await
         .unwrap();
 
@@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 1,
             tokens_this_minute: 4000,
             tokens_this_day: 9000,
-            tokens_this_month: 10000,
+            input_tokens_this_month: 10000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 
@@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
             requests_this_minute: 0,
             tokens_this_minute: 0,
             tokens_this_day: 0,
-            tokens_this_month: 9000,
+            input_tokens_this_month: 9000,
+            output_tokens_this_month: 0,
+            spending_this_month: 0,
         }
     );
 }

crates/collab/src/llm/telemetry.rs 🔗

@@ -14,7 +14,9 @@ pub struct LlmUsageEventRow {
     pub requests_this_minute: u64,
     pub tokens_this_minute: u64,
     pub tokens_this_day: u64,
-    pub tokens_this_month: u64,
+    pub input_tokens_this_month: u64,
+    pub output_tokens_this_month: u64,
+    pub spending_this_month: u64,
 }
 
 pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {