crates/collab/src/db/queries.rs 🔗
@@ -5,7 +5,6 @@ pub mod buffers;
pub mod channels;
pub mod contacts;
pub mod contributors;
-pub mod embeddings;
pub mod extensions;
pub mod notifications;
pub mod projects;
Marshall Bowers created
This PR removes the queries and database model for embeddings, as
they're no longer used.
Release Notes:
- N/A
crates/collab/src/db/queries.rs | 1
crates/collab/src/db/queries/embeddings.rs | 94 ---------------------
crates/collab/src/db/tables.rs | 1
crates/collab/src/db/tables/embedding.rs | 18 ----
crates/collab/src/db/tests.rs | 3
crates/collab/src/db/tests/embedding_tests.rs | 87 -------------------
crates/collab/src/main.rs | 4
7 files changed, 1 insertion(+), 207 deletions(-)
@@ -5,7 +5,6 @@ pub mod buffers;
pub mod channels;
pub mod contacts;
pub mod contributors;
-pub mod embeddings;
pub mod extensions;
pub mod notifications;
pub mod projects;
@@ -1,94 +0,0 @@
-use super::*;
-use time::Duration;
-use time::OffsetDateTime;
-
-impl Database {
- pub async fn get_embeddings(
- &self,
- model: &str,
- digests: &[Vec<u8>],
- ) -> Result<HashMap<Vec<u8>, Vec<f32>>> {
- self.transaction(|tx| async move {
- let embeddings = {
- let mut db_embeddings = embedding::Entity::find()
- .filter(
- embedding::Column::Model.eq(model).and(
- embedding::Column::Digest
- .is_in(digests.iter().map(|digest| digest.as_slice())),
- ),
- )
- .stream(&*tx)
- .await?;
-
- let mut embeddings = HashMap::default();
- while let Some(db_embedding) = db_embeddings.next().await {
- let db_embedding = db_embedding?;
- embeddings.insert(db_embedding.digest, db_embedding.dimensions);
- }
- embeddings
- };
-
- if !embeddings.is_empty() {
- let now = OffsetDateTime::now_utc();
- let retrieved_at = PrimitiveDateTime::new(now.date(), now.time());
-
- embedding::Entity::update_many()
- .filter(
- embedding::Column::Digest
- .is_in(embeddings.keys().map(|digest| digest.as_slice())),
- )
- .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at))
- .exec(&*tx)
- .await?;
- }
-
- Ok(embeddings)
- })
- .await
- }
-
- pub async fn save_embeddings(
- &self,
- model: &str,
- embeddings: &HashMap<Vec<u8>, Vec<f32>>,
- ) -> Result<()> {
- self.transaction(|tx| async move {
- embedding::Entity::insert_many(embeddings.iter().map(|(digest, dimensions)| {
- let now_offset_datetime = OffsetDateTime::now_utc();
- let retrieved_at =
- PrimitiveDateTime::new(now_offset_datetime.date(), now_offset_datetime.time());
-
- embedding::ActiveModel {
- model: ActiveValue::set(model.to_string()),
- digest: ActiveValue::set(digest.clone()),
- dimensions: ActiveValue::set(dimensions.clone()),
- retrieved_at: ActiveValue::set(retrieved_at),
- }
- }))
- .on_conflict(
- OnConflict::columns([embedding::Column::Model, embedding::Column::Digest])
- .do_nothing()
- .to_owned(),
- )
- .exec_without_returning(&*tx)
- .await?;
- Ok(())
- })
- .await
- }
-
- pub async fn purge_old_embeddings(&self) -> Result<()> {
- self.transaction(|tx| async move {
- embedding::Entity::delete_many()
- .filter(
- embedding::Column::RetrievedAt
- .lte(OffsetDateTime::now_utc() - Duration::days(60)),
- )
- .exec(&*tx)
- .await?;
-
- Ok(())
- })
- .await
- }
-}
@@ -8,7 +8,6 @@ pub mod channel_chat_participant;
pub mod channel_member;
pub mod contact;
pub mod contributor;
-pub mod embedding;
pub mod extension;
pub mod extension_version;
pub mod follower;
@@ -1,18 +0,0 @@
-use sea_orm::entity::prelude::*;
-use time::PrimitiveDateTime;
-
-#[derive(Clone, Debug, PartialEq, DeriveEntityModel)]
-#[sea_orm(table_name = "embeddings")]
-pub struct Model {
- #[sea_orm(primary_key)]
- pub model: String,
- #[sea_orm(primary_key)]
- pub digest: Vec<u8>,
- pub dimensions: Vec<f32>,
- pub retrieved_at: PrimitiveDateTime,
-}
-
-#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
-pub enum Relation {}
-
-impl ActiveModelBehavior for ActiveModel {}
@@ -2,9 +2,6 @@ mod buffer_tests;
mod channel_tests;
mod contributor_tests;
mod db_tests;
-// we only run postgres tests on macos right now
-#[cfg(target_os = "macos")]
-mod embedding_tests;
mod extension_tests;
use crate::migrations::run_database_migrations;
@@ -1,87 +0,0 @@
-use super::TestDb;
-use crate::db::embedding;
-use collections::HashMap;
-use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, sea_query::Expr};
-use std::ops::Sub;
-use time::{Duration, OffsetDateTime, PrimitiveDateTime};
-
-// SQLite does not support array arguments, so we only test this against a real postgres instance
-#[gpui::test]
-async fn test_get_embeddings_postgres(cx: &mut gpui::TestAppContext) {
- let test_db = TestDb::postgres(cx.executor());
- let db = test_db.db();
-
- let provider = "test_model";
- let digest1 = vec![1, 2, 3];
- let digest2 = vec![4, 5, 6];
- let embeddings = HashMap::from_iter([
- (digest1.clone(), vec![0.1, 0.2, 0.3]),
- (digest2.clone(), vec![0.4, 0.5, 0.6]),
- ]);
-
- // Save embeddings
- db.save_embeddings(provider, &embeddings).await.unwrap();
-
- // Retrieve embeddings
- let retrieved_embeddings = db
- .get_embeddings(provider, &[digest1.clone(), digest2.clone()])
- .await
- .unwrap();
- assert_eq!(retrieved_embeddings.len(), 2);
- assert!(retrieved_embeddings.contains_key(&digest1));
- assert!(retrieved_embeddings.contains_key(&digest2));
-
- // Check if the retrieved embeddings are correct
- assert_eq!(retrieved_embeddings[&digest1], vec![0.1, 0.2, 0.3]);
- assert_eq!(retrieved_embeddings[&digest2], vec![0.4, 0.5, 0.6]);
-}
-
-#[gpui::test]
-async fn test_purge_old_embeddings(cx: &mut gpui::TestAppContext) {
- let test_db = TestDb::postgres(cx.executor());
- let db = test_db.db();
-
- let model = "test_model";
- let digest = vec![7, 8, 9];
- let embeddings = HashMap::from_iter([(digest.clone(), vec![0.7, 0.8, 0.9])]);
-
- // Save old embeddings
- db.save_embeddings(model, &embeddings).await.unwrap();
-
- // Reach into the DB and change the retrieved at to be > 60 days
- db.transaction(|tx| {
- let digest = digest.clone();
- async move {
- let sixty_days_ago = OffsetDateTime::now_utc().sub(Duration::days(61));
- let retrieved_at = PrimitiveDateTime::new(sixty_days_ago.date(), sixty_days_ago.time());
-
- embedding::Entity::update_many()
- .filter(
- embedding::Column::Model
- .eq(model)
- .and(embedding::Column::Digest.eq(digest)),
- )
- .col_expr(embedding::Column::RetrievedAt, Expr::value(retrieved_at))
- .exec(&*tx)
- .await
- .unwrap();
-
- Ok(())
- }
- })
- .await
- .unwrap();
-
- // Purge old embeddings
- db.purge_old_embeddings().await.unwrap();
-
- // Try to retrieve the purged embeddings
- let retrieved_embeddings = db
- .get_embeddings(model, std::slice::from_ref(&digest))
- .await
- .unwrap();
- assert!(
- retrieved_embeddings.is_empty(),
- "Old embeddings should have been purged"
- );
-}
@@ -13,7 +13,7 @@ use collab::llm::db::LlmDatabase;
use collab::migrations::run_database_migrations;
use collab::{
AppState, Config, Result, api::fetch_extensions_from_blob_store_periodically, db, env,
- executor::Executor, rpc::ResultExt,
+ executor::Executor,
};
use db::Database;
use std::{
@@ -95,8 +95,6 @@ async fn main() -> Result<()> {
let state = AppState::new(config, Executor::Production).await?;
if mode.is_collab() {
- state.db.purge_old_embeddings().await.trace_err();
-
let epoch = state
.db
.create_server(&state.config.zed_environment)