From 03acbb7de35afc27ae7eb32b43b1c4121875da8f Mon Sep 17 00:00:00 2001 From: Marshall Bowers Date: Tue, 11 Nov 2025 11:13:59 -0500 Subject: [PATCH] collab: Remove unused embeddings queries and model (#42463) This PR removes the queries and database model for embeddings, as they're no longer used. Release Notes: - N/A --- crates/collab/src/db/queries.rs | 1 - crates/collab/src/db/queries/embeddings.rs | 94 ------------------- crates/collab/src/db/tables.rs | 1 - crates/collab/src/db/tables/embedding.rs | 18 ---- crates/collab/src/db/tests.rs | 3 - crates/collab/src/db/tests/embedding_tests.rs | 87 ----------------- crates/collab/src/main.rs | 4 +- 7 files changed, 1 insertion(+), 207 deletions(-) delete mode 100644 crates/collab/src/db/queries/embeddings.rs delete mode 100644 crates/collab/src/db/tables/embedding.rs delete mode 100644 crates/collab/src/db/tests/embedding_tests.rs diff --git a/crates/collab/src/db/queries.rs b/crates/collab/src/db/queries.rs index 7b457a5da438e0a9ab7c6cd79368b2845e962318..db91021c22b35b7b7159bd5cd54e28f8fa1a14e2 100644 --- a/crates/collab/src/db/queries.rs +++ b/crates/collab/src/db/queries.rs @@ -5,7 +5,6 @@ pub mod buffers; pub mod channels; pub mod contacts; pub mod contributors; -pub mod embeddings; pub mod extensions; pub mod notifications; pub mod projects; diff --git a/crates/collab/src/db/queries/embeddings.rs b/crates/collab/src/db/queries/embeddings.rs deleted file mode 100644 index 6ae8013284f4652d5cb0d4a19214c3a5c1a42df0..0000000000000000000000000000000000000000 --- a/crates/collab/src/db/queries/embeddings.rs +++ /dev/null @@ -1,94 +0,0 @@ -use super::*; -use time::Duration; -use time::OffsetDateTime; - -impl Database { - pub async fn get_embeddings( - &self, - model: &str, - digests: &[Vec], - ) -> Result, Vec>> { - self.transaction(|tx| async move { - let embeddings = { - let mut db_embeddings = embedding::Entity::find() - .filter( - embedding::Column::Model.eq(model).and( - embedding::Column::Digest - .is_in(digests.iter().map(|digest| digest.as_slice())), - ), - ) - .stream(&*tx) - .await?; - - let mut embeddings = HashMap::default(); - while let Some(db_embedding) = db_embeddings.next().await { - let db_embedding = db_embedding?; - embeddings.insert(db_embedding.digest, db_embedding.dimensions); - } - embeddings - }; - - if !embeddings.is_empty() { - let now = OffsetDateTime::now_utc(); - let retrieved_at = PrimitiveDateTime::new(now.date(), now.time()); - - embedding::Entity::update_many() - .filter( - embedding::Column::Digest - .is_in(embeddings.keys().map(|digest| digest.as_slice())), - ) - .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at)) - .exec(&*tx) - .await?; - } - - Ok(embeddings) - }) - .await - } - - pub async fn save_embeddings( - &self, - model: &str, - embeddings: &HashMap, Vec>, - ) -> Result<()> { - self.transaction(|tx| async move { - embedding::Entity::insert_many(embeddings.iter().map(|(digest, dimensions)| { - let now_offset_datetime = OffsetDateTime::now_utc(); - let retrieved_at = - PrimitiveDateTime::new(now_offset_datetime.date(), now_offset_datetime.time()); - - embedding::ActiveModel { - model: ActiveValue::set(model.to_string()), - digest: ActiveValue::set(digest.clone()), - dimensions: ActiveValue::set(dimensions.clone()), - retrieved_at: ActiveValue::set(retrieved_at), - } - })) - .on_conflict( - OnConflict::columns([embedding::Column::Model, embedding::Column::Digest]) - .do_nothing() - .to_owned(), - ) - .exec_without_returning(&*tx) - .await?; - Ok(()) - }) - .await - } - - pub async fn purge_old_embeddings(&self) -> Result<()> { - self.transaction(|tx| async move { - embedding::Entity::delete_many() - .filter( - embedding::Column::RetrievedAt - .lte(OffsetDateTime::now_utc() - Duration::days(60)), - ) - .exec(&*tx) - .await?; - - Ok(()) - }) - .await - } -} diff --git a/crates/collab/src/db/tables.rs b/crates/collab/src/db/tables.rs index e619acaaf2bc237caac67dedcb5c738114d260d5..0220955824af30f489afe32f9695af3dbb52cdc9 100644 --- a/crates/collab/src/db/tables.rs +++ b/crates/collab/src/db/tables.rs @@ -8,7 +8,6 @@ pub mod channel_chat_participant; pub mod channel_member; pub mod contact; pub mod contributor; -pub mod embedding; pub mod extension; pub mod extension_version; pub mod follower; diff --git a/crates/collab/src/db/tables/embedding.rs b/crates/collab/src/db/tables/embedding.rs deleted file mode 100644 index 8743b4b9e65751bf350bff1db532de38ce73f368..0000000000000000000000000000000000000000 --- a/crates/collab/src/db/tables/embedding.rs +++ /dev/null @@ -1,18 +0,0 @@ -use sea_orm::entity::prelude::*; -use time::PrimitiveDateTime; - -#[derive(Clone, Debug, PartialEq, DeriveEntityModel)] -#[sea_orm(table_name = "embeddings")] -pub struct Model { - #[sea_orm(primary_key)] - pub model: String, - #[sea_orm(primary_key)] - pub digest: Vec, - pub dimensions: Vec, - pub retrieved_at: PrimitiveDateTime, -} - -#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] -pub enum Relation {} - -impl ActiveModelBehavior for ActiveModel {} diff --git a/crates/collab/src/db/tests.rs b/crates/collab/src/db/tests.rs index 67c36576aac0f938bbc040202d7fa83e35af2d3b..7aed2ebc2dd16f31cde4116a70377b40b1cb8b2f 100644 --- a/crates/collab/src/db/tests.rs +++ b/crates/collab/src/db/tests.rs @@ -2,9 +2,6 @@ mod buffer_tests; mod channel_tests; mod contributor_tests; mod db_tests; -// we only run postgres tests on macos right now -#[cfg(target_os = "macos")] -mod embedding_tests; mod extension_tests; use crate::migrations::run_database_migrations; diff --git a/crates/collab/src/db/tests/embedding_tests.rs b/crates/collab/src/db/tests/embedding_tests.rs deleted file mode 100644 index 5d8d69c0304d3a16b55e9d7b1477fe62cc22024a..0000000000000000000000000000000000000000 --- a/crates/collab/src/db/tests/embedding_tests.rs +++ /dev/null @@ -1,87 +0,0 @@ -use super::TestDb; -use crate::db::embedding; -use collections::HashMap; -use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, sea_query::Expr}; -use std::ops::Sub; -use time::{Duration, OffsetDateTime, PrimitiveDateTime}; - -// SQLite does not support array arguments, so we only test this against a real postgres instance -#[gpui::test] -async fn test_get_embeddings_postgres(cx: &mut gpui::TestAppContext) { - let test_db = TestDb::postgres(cx.executor()); - let db = test_db.db(); - - let provider = "test_model"; - let digest1 = vec![1, 2, 3]; - let digest2 = vec![4, 5, 6]; - let embeddings = HashMap::from_iter([ - (digest1.clone(), vec![0.1, 0.2, 0.3]), - (digest2.clone(), vec![0.4, 0.5, 0.6]), - ]); - - // Save embeddings - db.save_embeddings(provider, &embeddings).await.unwrap(); - - // Retrieve embeddings - let retrieved_embeddings = db - .get_embeddings(provider, &[digest1.clone(), digest2.clone()]) - .await - .unwrap(); - assert_eq!(retrieved_embeddings.len(), 2); - assert!(retrieved_embeddings.contains_key(&digest1)); - assert!(retrieved_embeddings.contains_key(&digest2)); - - // Check if the retrieved embeddings are correct - assert_eq!(retrieved_embeddings[&digest1], vec![0.1, 0.2, 0.3]); - assert_eq!(retrieved_embeddings[&digest2], vec![0.4, 0.5, 0.6]); -} - -#[gpui::test] -async fn test_purge_old_embeddings(cx: &mut gpui::TestAppContext) { - let test_db = TestDb::postgres(cx.executor()); - let db = test_db.db(); - - let model = "test_model"; - let digest = vec![7, 8, 9]; - let embeddings = HashMap::from_iter([(digest.clone(), vec![0.7, 0.8, 0.9])]); - - // Save old embeddings - db.save_embeddings(model, &embeddings).await.unwrap(); - - // Reach into the DB and change the retrieved at to be > 60 days - db.transaction(|tx| { - let digest = digest.clone(); - async move { - let sixty_days_ago = OffsetDateTime::now_utc().sub(Duration::days(61)); - let retrieved_at = PrimitiveDateTime::new(sixty_days_ago.date(), sixty_days_ago.time()); - - embedding::Entity::update_many() - .filter( - embedding::Column::Model - .eq(model) - .and(embedding::Column::Digest.eq(digest)), - ) - .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at)) - .exec(&*tx) - .await - .unwrap(); - - Ok(()) - } - }) - .await - .unwrap(); - - // Purge old embeddings - db.purge_old_embeddings().await.unwrap(); - - // Try to retrieve the purged embeddings - let retrieved_embeddings = db - .get_embeddings(model, std::slice::from_ref(&digest)) - .await - .unwrap(); - assert!( - retrieved_embeddings.is_empty(), - "Old embeddings should have been purged" - ); -} diff --git a/crates/collab/src/main.rs b/crates/collab/src/main.rs index 6b94459910647c1e48ee69f2b0dd38afd3723821..08047c56e55c016f3fd2b34d0935fb33a61b5dad 100644 --- a/crates/collab/src/main.rs +++ b/crates/collab/src/main.rs @@ -13,7 +13,7 @@ use collab::llm::db::LlmDatabase; use collab::migrations::run_database_migrations; use collab::{ AppState, Config, Result, api::fetch_extensions_from_blob_store_periodically, db, env, - executor::Executor, rpc::ResultExt, + executor::Executor, }; use db::Database; use std::{ @@ -95,8 +95,6 @@ async fn main() -> Result<()> { let state = AppState::new(config, Executor::Production).await?; if mode.is_collab() { - state.db.purge_old_embeddings().await.trace_err(); - let epoch = state .db .create_server(&state.config.zed_environment)