added opt-in default settings for vector store

KCaverly created

Change summary

Cargo.lock                                       |  2 +
assets/settings/default.json                     |  6 +++
crates/vector_store/Cargo.toml                   |  3 +
crates/vector_store/src/db.rs                    |  7 ---
crates/vector_store/src/vector_store.rs          | 29 +++++++++------
crates/vector_store/src/vector_store_settings.rs | 32 ++++++++++++++++++
crates/vector_store/src/vector_store_tests.rs    | 10 +++++
7 files changed, 70 insertions(+), 19 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -8503,8 +8503,10 @@ dependencies = [
  "rand 0.8.5",
  "rpc",
  "rusqlite",
+ "schemars",
  "serde",
  "serde_json",
+ "settings",
  "smol",
  "tempdir",
  "theme",

assets/settings/default.json 🔗

@@ -291,6 +291,12 @@
     // the terminal will default to matching the buffer's font family.
     // "font_family": "Zed Mono"
   },
+  // Difference settings for vector_store
+  "vector_store": {
+    "enable": false,
+    "reindexing_delay_seconds": 600,
+    "embedding_batch_size": 150
+  },
   // Different settings for specific languages.
   "languages": {
     "Plain Text": {

crates/vector_store/Cargo.toml 🔗

@@ -18,6 +18,7 @@ picker = { path = "../picker" }
 theme = { path = "../theme" }
 editor = { path = "../editor" }
 rpc = { path = "../rpc" }
+settings = { path = "../settings" }
 anyhow.workspace = true
 futures.workspace = true
 smol.workspace = true
@@ -33,6 +34,7 @@ bincode = "1.3.3"
 matrixmultiply = "0.3.7"
 tiktoken-rs = "0.5.0"
 rand.workspace = true
+schemars.workspace = true
 
 [dev-dependencies]
 gpui = { path = "../gpui", features = ["test-support"] }
@@ -40,6 +42,7 @@ language = { path = "../language", features = ["test-support"] }
 project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
+settings = { path = "../settings", features = ["test-support"]}
 tree-sitter-rust = "*"
 rand.workspace = true
 unindent.workspace = true

crates/vector_store/src/db.rs 🔗

@@ -204,8 +204,6 @@ impl VectorDatabase {
     ) -> Result<Vec<(i64, PathBuf, usize, String)>> {
         let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
         self.for_each_document(&worktree_ids, |id, embedding| {
-            eprintln!("document {id} {embedding:?}");
-
             let similarity = dot(&embedding, &query_embedding);
             let ix = match results
                 .binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal))
@@ -243,10 +241,7 @@ impl VectorDatabase {
                 Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
             })?
             .filter_map(|row| row.ok())
-            .for_each(|(id, embedding)| {
-                dbg!("id");
-                f(id, embedding.0)
-            });
+            .for_each(|(id, embedding)| f(id, embedding.0));
         Ok(())
     }
 

crates/vector_store/src/vector_store.rs 🔗

@@ -2,22 +2,25 @@ mod db;
 mod embedding;
 mod modal;
 mod parsing;
+mod vector_store_settings;
 
 #[cfg(test)]
 mod vector_store_tests;
 
+use crate::vector_store_settings::VectorStoreSettings;
 use anyhow::{anyhow, Result};
 use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
 use futures::{channel::oneshot, Future};
 use gpui::{
-    AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext,
-    WeakModelHandle,
+    AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Subscription, Task,
+    ViewContext, WeakModelHandle,
 };
 use language::{Language, LanguageRegistry};
 use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
 use parsing::{CodeContextRetriever, ParsedFile};
 use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId};
+use settings::SettingsStore;
 use smol::channel;
 use std::{
     collections::HashMap,
@@ -34,9 +37,6 @@ use util::{
 };
 use workspace::{Workspace, WorkspaceCreated};
 
-const REINDEXING_DELAY_SECONDS: u64 = 3;
-const EMBEDDINGS_BATCH_SIZE: usize = 150;
-
 pub fn init(
     fs: Arc<dyn Fs>,
     http_client: Arc<dyn HttpClient>,
@@ -47,6 +47,12 @@ pub fn init(
         return;
     }
 
+    settings::register::<VectorStoreSettings>(cx);
+
+    if !settings::get::<VectorStoreSettings>(cx).enable {
+        return;
+    }
+
     let db_file_path = EMBEDDINGS_DIR
         .join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
         .join("embeddings_db");
@@ -83,6 +89,7 @@ pub fn init(
             .detach();
 
             cx.add_action({
+                // "semantic search: Toggle"
                 move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext<Workspace>| {
                     let vector_store = vector_store.clone();
                     workspace.toggle_modal(cx, |workspace, cx| {
@@ -274,7 +281,6 @@ impl VectorStore {
                             worktree_id,
                             indexed_file,
                         } => {
-                            log::info!("Inserting Data for {:?}", &indexed_file.path);
                             db.insert_file(worktree_id, indexed_file).log_err();
                         }
                         DbOperation::Delete { worktree_id, path } => {
@@ -347,6 +353,7 @@ impl VectorStore {
             });
 
             // batch_tx/rx: Batch Files to Send for Embeddings
+            let batch_size = settings::get::<VectorStoreSettings>(cx).embedding_batch_size;
             let (batch_files_tx, batch_files_rx) = channel::unbounded::<EmbeddingJob>();
             let _batch_files_task = cx.background().spawn(async move {
                 let mut queue_len = 0;
@@ -361,7 +368,7 @@ impl VectorStore {
                         } => {
                             queue_len += &document_spans.len();
                             embeddings_queue.push((worktree_id, parsed_file, document_spans));
-                            queue_len >= EMBEDDINGS_BATCH_SIZE
+                            queue_len >= batch_size
                         }
                         EmbeddingJob::Flush => true,
                     };
@@ -387,8 +394,6 @@ impl VectorStore {
                     let cursor = QueryCursor::new();
                     let mut retriever = CodeContextRetriever { parser, cursor, fs };
                     while let Ok(pending_file) = parsing_files_rx.recv().await {
-                        log::info!("Parsing File: {:?}", &pending_file.relative_path);
-
                         if let Some((indexed_file, document_spans)) =
                             retriever.parse_file(pending_file.clone()).await.log_err()
                         {
@@ -476,11 +481,9 @@ impl VectorStore {
         let parsing_files_tx = self.parsing_files_tx.clone();
 
         cx.spawn(|this, mut cx| async move {
-            let t0 = Instant::now();
             futures::future::join_all(worktree_scans_complete).await;
 
             let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
-            log::info!("Worktree Scanning Done in {:?}", t0.elapsed().as_millis());
 
             if let Some(db_directory) = database_url.parent() {
                 fs.create_dir(db_directory).await.log_err();
@@ -665,6 +668,8 @@ impl VectorStore {
         cx: &mut ModelContext<'_, VectorStore>,
         worktree_id: &WorktreeId,
     ) -> Option<()> {
+        let reindexing_delay = settings::get::<VectorStoreSettings>(cx).reindexing_delay_seconds;
+
         let worktree = project
             .read(cx)
             .worktree_for_id(worktree_id.clone(), cx)?
@@ -725,7 +730,7 @@ impl VectorStore {
                             if !already_stored {
                                 this.update(&mut cx, |this, _| {
                                     let reindex_time = modified_time
-                                        + Duration::from_secs(REINDEXING_DELAY_SECONDS);
+                                        + Duration::from_secs(reindexing_delay as u64);
 
                                     let project_state =
                                         this.projects.get_mut(&project.downgrade())?;

crates/vector_store/src/vector_store_settings.rs 🔗

@@ -0,0 +1,32 @@
+use anyhow;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::Setting;
+
+#[derive(Deserialize, Debug)]
+pub struct VectorStoreSettings {
+    pub enable: bool,
+    pub reindexing_delay_seconds: usize,
+    pub embedding_batch_size: usize,
+}
+
+#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)]
+pub struct VectorStoreSettingsContent {
+    pub enable: Option<bool>,
+    pub reindexing_delay_seconds: Option<usize>,
+    pub embedding_batch_size: Option<usize>,
+}
+
+impl Setting for VectorStoreSettings {
+    const KEY: Option<&'static str> = Some("vector_store");
+
+    type FileContent = VectorStoreSettingsContent;
+
+    fn load(
+        default_value: &Self::FileContent,
+        user_values: &[&Self::FileContent],
+        _: &gpui::AppContext,
+    ) -> anyhow::Result<Self> {
+        Self::load_via_json_merge(default_value, user_values)
+    }
+}

crates/vector_store/src/vector_store_tests.rs 🔗

@@ -1,4 +1,6 @@
-use crate::{db::dot, embedding::EmbeddingProvider, VectorStore};
+use crate::{
+    db::dot, embedding::EmbeddingProvider, vector_store_settings::VectorStoreSettings, VectorStore,
+};
 use anyhow::Result;
 use async_trait::async_trait;
 use gpui::{Task, TestAppContext};
@@ -6,11 +8,17 @@ use language::{Language, LanguageConfig, LanguageRegistry};
 use project::{FakeFs, Project};
 use rand::Rng;
 use serde_json::json;
+use settings::SettingsStore;
 use std::sync::Arc;
 use unindent::Unindent;
 
 #[gpui::test]
 async fn test_vector_store(cx: &mut TestAppContext) {
+    cx.update(|cx| {
+        cx.set_global(SettingsStore::test(cx));
+        settings::register::<VectorStoreSettings>(cx);
+    });
+
     let fs = FakeFs::new(cx.background());
     fs.insert_tree(
         "/the-root",