Detailed changes
@@ -53,7 +53,7 @@ impl Model {
Model::Claude3_5Sonnet => "claude-3-5-sonnet-20240620",
Model::Claude3Opus => "claude-3-opus-20240229",
Model::Claude3Sonnet => "claude-3-sonnet-20240229",
- Model::Claude3Haiku => "claude-3-opus-20240307",
+ Model::Claude3Haiku => "claude-3-haiku-20240307",
Self::Custom { name, .. } => name,
}
}
@@ -0,0 +1,3 @@
+ALTER TABLE models
+ ADD COLUMN price_per_million_input_tokens integer NOT NULL DEFAULT 0,
+ ADD COLUMN price_per_million_output_tokens integer NOT NULL DEFAULT 0;
@@ -457,7 +457,8 @@ impl<S> Drop for TokenCountingStream<S> {
claims.user_id as i32,
provider,
&model,
- input_token_count + output_token_count,
+ input_token_count,
+ output_token_count,
Utc::now(),
)
.await
@@ -481,7 +482,9 @@ impl<S> Drop for TokenCountingStream<S> {
requests_this_minute: usage.requests_this_minute as u64,
tokens_this_minute: usage.tokens_this_minute as u64,
tokens_this_day: usage.tokens_this_day as u64,
- tokens_this_month: usage.tokens_this_month as u64,
+ input_tokens_this_month: usage.input_tokens_this_month as u64,
+ output_tokens_this_month: usage.output_tokens_this_month as u64,
+ spending_this_month: usage.spending_this_month as u64,
},
)
.await
@@ -3,10 +3,14 @@ use sea_orm::QueryOrder;
use std::str::FromStr;
use strum::IntoEnumIterator as _;
-pub struct ModelRateLimits {
+pub struct ModelParams {
+ pub provider: LanguageModelProvider,
+ pub name: String,
pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64,
+ pub price_per_million_input_tokens: i32,
+ pub price_per_million_output_tokens: i32,
}
impl LlmDatabase {
@@ -75,20 +79,23 @@ impl LlmDatabase {
Ok(())
}
- pub async fn insert_models(
- &mut self,
- models: &[(LanguageModelProvider, String, ModelRateLimits)],
- ) -> Result<()> {
+ pub async fn insert_models(&mut self, models: &[ModelParams]) -> Result<()> {
let all_provider_ids = &self.provider_ids;
self.transaction(|tx| async move {
- model::Entity::insert_many(models.into_iter().map(|(provider, name, rate_limits)| {
- let provider_id = all_provider_ids[&provider];
+ model::Entity::insert_many(models.into_iter().map(|model_params| {
+ let provider_id = all_provider_ids[&model_params.provider];
model::ActiveModel {
provider_id: ActiveValue::set(provider_id),
- name: ActiveValue::set(name.clone()),
- max_requests_per_minute: ActiveValue::set(rate_limits.max_requests_per_minute),
- max_tokens_per_minute: ActiveValue::set(rate_limits.max_tokens_per_minute),
- max_tokens_per_day: ActiveValue::set(rate_limits.max_tokens_per_day),
+ name: ActiveValue::set(model_params.name.clone()),
+ max_requests_per_minute: ActiveValue::set(model_params.max_requests_per_minute),
+ max_tokens_per_minute: ActiveValue::set(model_params.max_tokens_per_minute),
+ max_tokens_per_day: ActiveValue::set(model_params.max_tokens_per_day),
+ price_per_million_input_tokens: ActiveValue::set(
+ model_params.price_per_million_input_tokens,
+ ),
+ price_per_million_output_tokens: ActiveValue::set(
+ model_params.price_per_million_output_tokens,
+ ),
..Default::default()
}
}))
@@ -11,7 +11,9 @@ pub struct Usage {
pub requests_this_minute: usize,
pub tokens_this_minute: usize,
pub tokens_this_day: usize,
- pub tokens_this_month: usize,
+ pub input_tokens_this_month: usize,
+ pub output_tokens_this_month: usize,
+ pub spending_this_month: usize,
}
#[derive(Clone, Copy, Debug, Default)]
@@ -87,14 +89,20 @@ impl LlmDatabase {
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMinute)?;
let tokens_this_day =
self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerDay)?;
- let tokens_this_month =
- self.get_usage_for_measure(&usages, now, UsageMeasure::TokensPerMonth)?;
+ let input_tokens_this_month =
+ self.get_usage_for_measure(&usages, now, UsageMeasure::InputTokensPerMonth)?;
+ let output_tokens_this_month =
+ self.get_usage_for_measure(&usages, now, UsageMeasure::OutputTokensPerMonth)?;
+ let spending_this_month =
+ calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
- tokens_this_month,
+ input_tokens_this_month,
+ output_tokens_this_month,
+ spending_this_month,
})
})
.await
@@ -105,7 +113,8 @@ impl LlmDatabase {
user_id: i32,
provider: LanguageModelProvider,
model_name: &str,
- token_count: usize,
+ input_token_count: usize,
+ output_token_count: usize,
now: DateTimeUtc,
) -> Result<Usage> {
self.transaction(|tx| async move {
@@ -138,7 +147,7 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerMinute,
now,
- token_count,
+ input_token_count + output_token_count,
&tx,
)
.await?;
@@ -149,27 +158,42 @@ impl LlmDatabase {
&usages,
UsageMeasure::TokensPerDay,
now,
- token_count,
+ input_token_count + output_token_count,
&tx,
)
.await?;
- let tokens_this_month = self
+ let input_tokens_this_month = self
.update_usage_for_measure(
user_id,
model.id,
&usages,
- UsageMeasure::TokensPerMonth,
+ UsageMeasure::InputTokensPerMonth,
now,
- token_count,
+ input_token_count,
&tx,
)
.await?;
+ let output_tokens_this_month = self
+ .update_usage_for_measure(
+ user_id,
+ model.id,
+ &usages,
+ UsageMeasure::OutputTokensPerMonth,
+ now,
+ output_token_count,
+ &tx,
+ )
+ .await?;
+ let spending_this_month =
+ calculate_spending(model, input_tokens_this_month, output_tokens_this_month);
Ok(Usage {
requests_this_minute,
tokens_this_minute,
tokens_this_day,
- tokens_this_month,
+ input_tokens_this_month,
+ output_tokens_this_month,
+ spending_this_month,
})
})
.await
@@ -303,6 +327,18 @@ impl LlmDatabase {
}
}
+fn calculate_spending(
+ model: &model::Model,
+ input_tokens_this_month: usize,
+ output_tokens_this_month: usize,
+) -> usize {
+ let input_token_cost =
+ input_tokens_this_month * model.price_per_million_input_tokens as usize / 1_000_000;
+ let output_token_cost =
+ output_tokens_this_month * model.price_per_million_output_tokens as usize / 1_000_000;
+ input_token_cost + output_token_cost
+}
+
const MINUTE_BUCKET_COUNT: usize = 12;
const DAY_BUCKET_COUNT: usize = 48;
const MONTH_BUCKET_COUNT: usize = 30;
@@ -313,7 +349,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerMinute => MINUTE_BUCKET_COUNT,
UsageMeasure::TokensPerDay => DAY_BUCKET_COUNT,
- UsageMeasure::TokensPerMonth => MONTH_BUCKET_COUNT,
+ UsageMeasure::InputTokensPerMonth => MONTH_BUCKET_COUNT,
+ UsageMeasure::OutputTokensPerMonth => MONTH_BUCKET_COUNT,
}
}
@@ -322,7 +359,8 @@ impl UsageMeasure {
UsageMeasure::RequestsPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerMinute => Duration::minutes(1),
UsageMeasure::TokensPerDay => Duration::hours(24),
- UsageMeasure::TokensPerMonth => Duration::days(30),
+ UsageMeasure::InputTokensPerMonth => Duration::days(30),
+ UsageMeasure::OutputTokensPerMonth => Duration::days(30),
}
}
@@ -1,45 +1,45 @@
use super::*;
use crate::{Config, Result};
-use queries::providers::ModelRateLimits;
+use queries::providers::ModelParams;
pub async fn seed_database(_config: &Config, db: &mut LlmDatabase, _force: bool) -> Result<()> {
db.insert_models(&[
- (
- LanguageModelProvider::Anthropic,
- "claude-3-5-sonnet".into(),
- ModelRateLimits {
- max_requests_per_minute: 5,
- max_tokens_per_minute: 20_000,
- max_tokens_per_day: 300_000,
- },
- ),
- (
- LanguageModelProvider::Anthropic,
- "claude-3-opus".into(),
- ModelRateLimits {
- max_requests_per_minute: 5,
- max_tokens_per_minute: 10_000,
- max_tokens_per_day: 300_000,
- },
- ),
- (
- LanguageModelProvider::Anthropic,
- "claude-3-sonnet".into(),
- ModelRateLimits {
- max_requests_per_minute: 5,
- max_tokens_per_minute: 20_000,
- max_tokens_per_day: 300_000,
- },
- ),
- (
- LanguageModelProvider::Anthropic,
- "claude-3-haiku".into(),
- ModelRateLimits {
- max_requests_per_minute: 5,
- max_tokens_per_minute: 25_000,
- max_tokens_per_day: 300_000,
- },
- ),
+ ModelParams {
+ provider: LanguageModelProvider::Anthropic,
+ name: "claude-3-5-sonnet".into(),
+ max_requests_per_minute: 5,
+ max_tokens_per_minute: 20_000,
+ max_tokens_per_day: 300_000,
+ price_per_million_input_tokens: 300, // $3.00/MTok
+ price_per_million_output_tokens: 1500, // $15.00/MTok
+ },
+ ModelParams {
+ provider: LanguageModelProvider::Anthropic,
+ name: "claude-3-opus".into(),
+ max_requests_per_minute: 5,
+ max_tokens_per_minute: 10_000,
+ max_tokens_per_day: 300_000,
+ price_per_million_input_tokens: 1500, // $15.00/MTok
+ price_per_million_output_tokens: 7500, // $75.00/MTok
+ },
+ ModelParams {
+ provider: LanguageModelProvider::Anthropic,
+ name: "claude-3-sonnet".into(),
+ max_requests_per_minute: 5,
+ max_tokens_per_minute: 20_000,
+ max_tokens_per_day: 300_000,
+ price_per_million_input_tokens: 1500, // $15.00/MTok
+ price_per_million_output_tokens: 7500, // $75.00/MTok
+ },
+ ModelParams {
+ provider: LanguageModelProvider::Anthropic,
+ name: "claude-3-haiku".into(),
+ max_requests_per_minute: 5,
+ max_tokens_per_minute: 25_000,
+ max_tokens_per_day: 300_000,
+ price_per_million_input_tokens: 25, // $0.25/MTok
+ price_per_million_output_tokens: 125, // $1.25/MTok
+ },
])
.await
}
@@ -13,6 +13,8 @@ pub struct Model {
pub max_requests_per_minute: i64,
pub max_tokens_per_minute: i64,
pub max_tokens_per_day: i64,
+ pub price_per_million_input_tokens: i32,
+ pub price_per_million_output_tokens: i32,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
@@ -9,7 +9,8 @@ pub enum UsageMeasure {
RequestsPerMinute,
TokensPerMinute,
TokensPerDay,
- TokensPerMonth,
+ InputTokensPerMonth,
+ OutputTokensPerMonth,
}
#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
@@ -1,5 +1,5 @@
use crate::{
- llm::db::{queries::providers::ModelRateLimits, queries::usages::Usage, LlmDatabase},
+ llm::db::{queries::providers::ModelParams, queries::usages::Usage, LlmDatabase},
test_llm_db,
};
use chrono::{Duration, Utc};
@@ -13,15 +13,15 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let model = "claude-3-5-sonnet";
db.initialize().await.unwrap();
- db.insert_models(&[(
+ db.insert_models(&[ModelParams {
provider,
- model.to_string(),
- ModelRateLimits {
- max_requests_per_minute: 5,
- max_tokens_per_minute: 10_000,
- max_tokens_per_day: 50_000,
- },
- )])
+ name: model.to_string(),
+ max_requests_per_minute: 5,
+ max_tokens_per_minute: 10_000,
+ max_tokens_per_day: 50_000,
+ price_per_million_input_tokens: 50,
+ price_per_million_output_tokens: 50,
+ }])
.await
.unwrap();
@@ -29,12 +29,12 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
let user_id = 123;
let now = t0;
- db.record_usage(user_id, provider, model, 1000, now)
+ db.record_usage(user_id, provider, model, 1000, 0, now)
.await
.unwrap();
let now = t0 + Duration::seconds(10);
- db.record_usage(user_id, provider, model, 2000, now)
+ db.record_usage(user_id, provider, model, 2000, 0, now)
.await
.unwrap();
@@ -45,7 +45,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 3000,
tokens_this_day: 3000,
- tokens_this_month: 3000,
+ input_tokens_this_month: 3000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
@@ -57,12 +59,14 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 2000,
tokens_this_day: 3000,
- tokens_this_month: 3000,
+ input_tokens_this_month: 3000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
let now = t0 + Duration::seconds(60);
- db.record_usage(user_id, provider, model, 3000, now)
+ db.record_usage(user_id, provider, model, 3000, 0, now)
.await
.unwrap();
@@ -73,7 +77,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 2,
tokens_this_minute: 5000,
tokens_this_day: 6000,
- tokens_this_month: 6000,
+ input_tokens_this_month: 6000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
@@ -86,11 +92,13 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 5000,
- tokens_this_month: 6000,
+ input_tokens_this_month: 6000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
- db.record_usage(user_id, provider, model, 4000, now)
+ db.record_usage(user_id, provider, model, 4000, 0, now)
.await
.unwrap();
@@ -101,7 +109,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 1,
tokens_this_minute: 4000,
tokens_this_day: 9000,
- tokens_this_month: 10000,
+ input_tokens_this_month: 10000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
@@ -114,7 +124,9 @@ async fn test_tracking_usage(db: &mut LlmDatabase) {
requests_this_minute: 0,
tokens_this_minute: 0,
tokens_this_day: 0,
- tokens_this_month: 9000,
+ input_tokens_this_month: 9000,
+ output_tokens_this_month: 0,
+ spending_this_month: 0,
}
);
}
@@ -14,7 +14,9 @@ pub struct LlmUsageEventRow {
pub requests_this_minute: u64,
pub tokens_this_minute: u64,
pub tokens_this_day: u64,
- pub tokens_this_month: u64,
+ pub input_tokens_this_month: u64,
+ pub output_tokens_this_month: u64,
+ pub spending_this_month: u64,
}
pub async fn report_llm_usage(client: &clickhouse::Client, row: LlmUsageEventRow) -> Result<()> {