collab: Remove unused embeddings queries and model (#42463)

Marshall Bowers created

This PR removes the queries and database model for embeddings, as
they're no longer used.

Release Notes:

- N/A

Change summary

crates/collab/src/db/queries.rs               |  1 
crates/collab/src/db/queries/embeddings.rs    | 94 ---------------------
crates/collab/src/db/tables.rs                |  1 
crates/collab/src/db/tables/embedding.rs      | 18 ----
crates/collab/src/db/tests.rs                 |  3 
crates/collab/src/db/tests/embedding_tests.rs | 87 -------------------
crates/collab/src/main.rs                     |  4 
7 files changed, 1 insertion(+), 207 deletions(-)

Detailed changes

crates/collab/src/db/queries.rs 🔗

@@ -5,7 +5,6 @@ pub mod buffers;
 pub mod channels;
 pub mod contacts;
 pub mod contributors;
-pub mod embeddings;
 pub mod extensions;
 pub mod notifications;
 pub mod projects;

crates/collab/src/db/queries/embeddings.rs 🔗

@@ -1,94 +0,0 @@
-use super::*;
-use time::Duration;
-use time::OffsetDateTime;
-
-impl Database {
-    pub async fn get_embeddings(
-        &self,
-        model: &str,
-        digests: &[Vec<u8>],
-    ) -> Result<HashMap<Vec<u8>, Vec<f32>>> {
-        self.transaction(|tx| async move {
-            let embeddings = {
-                let mut db_embeddings = embedding::Entity::find()
-                    .filter(
-                        embedding::Column::Model.eq(model).and(
-                            embedding::Column::Digest
-                                .is_in(digests.iter().map(|digest| digest.as_slice())),
-                        ),
-                    )
-                    .stream(&*tx)
-                    .await?;
-
-                let mut embeddings = HashMap::default();
-                while let Some(db_embedding) = db_embeddings.next().await {
-                    let db_embedding = db_embedding?;
-                    embeddings.insert(db_embedding.digest, db_embedding.dimensions);
-                }
-                embeddings
-            };
-
-            if !embeddings.is_empty() {
-                let now = OffsetDateTime::now_utc();
-                let retrieved_at = PrimitiveDateTime::new(now.date(), now.time());
-
-                embedding::Entity::update_many()
-                    .filter(
-                        embedding::Column::Digest
-                            .is_in(embeddings.keys().map(|digest| digest.as_slice())),
-                    )
-                    .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at))
-                    .exec(&*tx)
-                    .await?;
-            }
-
-            Ok(embeddings)
-        })
-        .await
-    }
-
-    pub async fn save_embeddings(
-        &self,
-        model: &str,
-        embeddings: &HashMap<Vec<u8>, Vec<f32>>,
-    ) -> Result<()> {
-        self.transaction(|tx| async move {
-            embedding::Entity::insert_many(embeddings.iter().map(|(digest, dimensions)| {
-                let now_offset_datetime = OffsetDateTime::now_utc();
-                let retrieved_at =
-                    PrimitiveDateTime::new(now_offset_datetime.date(), now_offset_datetime.time());
-
-                embedding::ActiveModel {
-                    model: ActiveValue::set(model.to_string()),
-                    digest: ActiveValue::set(digest.clone()),
-                    dimensions: ActiveValue::set(dimensions.clone()),
-                    retrieved_at: ActiveValue::set(retrieved_at),
-                }
-            }))
-            .on_conflict(
-                OnConflict::columns([embedding::Column::Model, embedding::Column::Digest])
-                    .do_nothing()
-                    .to_owned(),
-            )
-            .exec_without_returning(&*tx)
-            .await?;
-            Ok(())
-        })
-        .await
-    }
-
-    pub async fn purge_old_embeddings(&self) -> Result<()> {
-        self.transaction(|tx| async move {
-            embedding::Entity::delete_many()
-                .filter(
-                    embedding::Column::RetrievedAt
-                        .lte(OffsetDateTime::now_utc() - Duration::days(60)),
-                )
-                .exec(&*tx)
-                .await?;
-
-            Ok(())
-        })
-        .await
-    }
-}

crates/collab/src/db/tables.rs 🔗

@@ -8,7 +8,6 @@ pub mod channel_chat_participant;
 pub mod channel_member;
 pub mod contact;
 pub mod contributor;
-pub mod embedding;
 pub mod extension;
 pub mod extension_version;
 pub mod follower;

crates/collab/src/db/tables/embedding.rs 🔗

@@ -1,18 +0,0 @@
-use sea_orm::entity::prelude::*;
-use time::PrimitiveDateTime;
-
-#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
-#[sea_orm(table_name = "embeddings")]
-pub struct Model {
-    #[sea_orm(primary_key)]
-    pub model: String,
-    #[sea_orm(primary_key)]
-    pub digest: Vec<u8>,
-    pub dimensions: Vec<f32>,
-    pub retrieved_at: PrimitiveDateTime,
-}
-
-#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
-pub enum Relation {}
-
-impl ActiveModelBehavior for ActiveModel {}

crates/collab/src/db/tests.rs 🔗

@@ -2,9 +2,6 @@ mod buffer_tests;
 mod channel_tests;
 mod contributor_tests;
 mod db_tests;
-// we only run postgres tests on macos right now
-#[cfg(target_os = "macos")]
-mod embedding_tests;
 mod extension_tests;
 
 use crate::migrations::run_database_migrations;

crates/collab/src/db/tests/embedding_tests.rs 🔗

@@ -1,87 +0,0 @@
-use super::TestDb;
-use crate::db::embedding;
-use collections::HashMap;
-use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, sea_query::Expr};
-use std::ops::Sub;
-use time::{Duration, OffsetDateTime, PrimitiveDateTime};
-
-// SQLite does not support array arguments, so we only test this against a real postgres instance
-#[gpui::test]
-async fn test_get_embeddings_postgres(cx: &mut gpui::TestAppContext) {
-    let test_db = TestDb::postgres(cx.executor());
-    let db = test_db.db();
-
-    let provider = "test_model";
-    let digest1 = vec![1, 2, 3];
-    let digest2 = vec![4, 5, 6];
-    let embeddings = HashMap::from_iter([
-        (digest1.clone(), vec![0.1, 0.2, 0.3]),
-        (digest2.clone(), vec![0.4, 0.5, 0.6]),
-    ]);
-
-    // Save embeddings
-    db.save_embeddings(provider, &embeddings).await.unwrap();
-
-    // Retrieve embeddings
-    let retrieved_embeddings = db
-        .get_embeddings(provider, &[digest1.clone(), digest2.clone()])
-        .await
-        .unwrap();
-    assert_eq!(retrieved_embeddings.len(), 2);
-    assert!(retrieved_embeddings.contains_key(&digest1));
-    assert!(retrieved_embeddings.contains_key(&digest2));
-
-    // Check if the retrieved embeddings are correct
-    assert_eq!(retrieved_embeddings[&digest1], vec![0.1, 0.2, 0.3]);
-    assert_eq!(retrieved_embeddings[&digest2], vec![0.4, 0.5, 0.6]);
-}
-
-#[gpui::test]
-async fn test_purge_old_embeddings(cx: &mut gpui::TestAppContext) {
-    let test_db = TestDb::postgres(cx.executor());
-    let db = test_db.db();
-
-    let model = "test_model";
-    let digest = vec![7, 8, 9];
-    let embeddings = HashMap::from_iter([(digest.clone(), vec![0.7, 0.8, 0.9])]);
-
-    // Save old embeddings
-    db.save_embeddings(model, &embeddings).await.unwrap();
-
-    // Reach into the DB and change the retrieved at to be > 60 days
-    db.transaction(|tx| {
-        let digest = digest.clone();
-        async move {
-            let sixty_days_ago = OffsetDateTime::now_utc().sub(Duration::days(61));
-            let retrieved_at = PrimitiveDateTime::new(sixty_days_ago.date(), sixty_days_ago.time());
-
-            embedding::Entity::update_many()
-                .filter(
-                    embedding::Column::Model
-                        .eq(model)
-                        .and(embedding::Column::Digest.eq(digest)),
-                )
-                .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at))
-                .exec(&*tx)
-                .await
-                .unwrap();
-
-            Ok(())
-        }
-    })
-    .await
-    .unwrap();
-
-    // Purge old embeddings
-    db.purge_old_embeddings().await.unwrap();
-
-    // Try to retrieve the purged embeddings
-    let retrieved_embeddings = db
-        .get_embeddings(model, std::slice::from_ref(&digest))
-        .await
-        .unwrap();
-    assert!(
-        retrieved_embeddings.is_empty(),
-        "Old embeddings should have been purged"
-    );
-}

crates/collab/src/main.rs 🔗

@@ -13,7 +13,7 @@ use collab::llm::db::LlmDatabase;
 use collab::migrations::run_database_migrations;
 use collab::{
     AppState, Config, Result, api::fetch_extensions_from_blob_store_periodically, db, env,
-    executor::Executor, rpc::ResultExt,
+    executor::Executor,
 };
 use db::Database;
 use std::{
@@ -95,8 +95,6 @@ async fn main() -> Result<()> {
                 let state = AppState::new(config, Executor::Production).await?;
 
                 if mode.is_collab() {
-                    state.db.purge_old_embeddings().await.trace_err();
-
                     let epoch = state
                         .db
                         .create_server(&state.config.zed_environment)