collab: Adjust rate-limiting measures for Claude 3.7 Sonnet (#28111)

Marshall Bowers created

This PR updates the usage measures used for rate limiting when using
Claude 3.7 Sonnet.

Instead of using the combined `tokens_per_minute` measure we now rate
limit individually on `input_tokens_per_minute` (which exclude cache
reads) and `output_tokens_per_minute`.

Release Notes:

- N/A

Change summary

crates/collab/src/llm.rs | 58 +++++++++++++++++++++++++++++------------
1 file changed, 41 insertions(+), 17 deletions(-)

Detailed changes

crates/collab/src/llm.rs 🔗

@@ -514,23 +514,47 @@ async fn check_usage_limit(
         .get_usage(user_id, provider, model_name, Utc::now())
         .await?;
 
-    let checks = [
-        (
-            usage.requests_this_minute,
-            per_user_max_requests_per_minute,
-            UsageMeasure::RequestsPerMinute,
-        ),
-        (
-            usage.tokens_this_minute,
-            per_user_max_tokens_per_minute,
-            UsageMeasure::TokensPerMinute,
-        ),
-        (
-            usage.tokens_this_day,
-            per_user_max_tokens_per_day,
-            UsageMeasure::TokensPerDay,
-        ),
-    ];
+    let checks = match (provider, model_name) {
+        (LanguageModelProvider::Anthropic, "claude-3-7-sonnet") => vec![
+            (
+                usage.requests_this_minute,
+                per_user_max_requests_per_minute,
+                UsageMeasure::RequestsPerMinute,
+            ),
+            (
+                usage.input_tokens_this_minute,
+                per_user_max_tokens_per_minute,
+                UsageMeasure::InputTokensPerMinute,
+            ),
+            (
+                usage.output_tokens_this_minute,
+                per_user_max_tokens_per_minute,
+                UsageMeasure::OutputTokensPerMinute,
+            ),
+            (
+                usage.tokens_this_day,
+                per_user_max_tokens_per_day,
+                UsageMeasure::TokensPerDay,
+            ),
+        ],
+        _ => vec![
+            (
+                usage.requests_this_minute,
+                per_user_max_requests_per_minute,
+                UsageMeasure::RequestsPerMinute,
+            ),
+            (
+                usage.tokens_this_minute,
+                per_user_max_tokens_per_minute,
+                UsageMeasure::TokensPerMinute,
+            ),
+            (
+                usage.tokens_this_day,
+                per_user_max_tokens_per_day,
+                UsageMeasure::TokensPerDay,
+            ),
+        ],
+    };
 
     for (used, limit, usage_measure) in checks {
         if used > limit {