Increase rate limits for computing embeddings (#11326)

Nathan Sobo created

- Also, remove the rate limit for getting cached embeddings entirely.

Release Notes:

- N/A

Change summary

crates/collab/src/rpc.rs | 26 +-------------------------
1 file changed, 1 insertion(+), 25 deletions(-)

Detailed changes

crates/collab/src/rpc.rs 🔗

@@ -4501,7 +4501,7 @@ impl RateLimit for ComputeEmbeddingsRateLimit {
         std::env::var("EMBED_TEXTS_RATE_LIMIT_PER_HOUR")
             .ok()
             .and_then(|v| v.parse().ok())
-            .unwrap_or(120) // Picked arbitrarily
+            .unwrap_or(5000) // Picked arbitrarily
     }
 
     fn refill_duration() -> chrono::Duration {
@@ -4573,25 +4573,6 @@ async fn compute_embeddings(
     Ok(())
 }
 
-struct GetCachedEmbeddingsRateLimit;
-
-impl RateLimit for GetCachedEmbeddingsRateLimit {
-    fn capacity() -> usize {
-        std::env::var("EMBED_TEXTS_RATE_LIMIT_PER_HOUR")
-            .ok()
-            .and_then(|v| v.parse().ok())
-            .unwrap_or(120) // Picked arbitrarily
-    }
-
-    fn refill_duration() -> chrono::Duration {
-        chrono::Duration::hours(1)
-    }
-
-    fn db_name() -> &'static str {
-        "get-cached-embeddings"
-    }
-}
-
 async fn get_cached_embeddings(
     request: proto::GetCachedEmbeddings,
     response: Response<proto::GetCachedEmbeddings>,
@@ -4599,11 +4580,6 @@ async fn get_cached_embeddings(
 ) -> Result<()> {
     authorize_access_to_language_models(&session).await?;
 
-    session
-        .rate_limiter
-        .check::<GetCachedEmbeddingsRateLimit>(session.user_id())
-        .await?;
-
     let db = session.db().await;
     let embeddings = db.get_embeddings(&request.model, &request.digests).await?;