Cargo.lock 🔗
@@ -8503,8 +8503,10 @@ dependencies = [
"rand 0.8.5",
"rpc",
"rusqlite",
+ "schemars",
"serde",
"serde_json",
+ "settings",
"smol",
"tempdir",
"theme",
KCaverly created
Cargo.lock | 2 +
assets/settings/default.json | 6 +++
crates/vector_store/Cargo.toml | 3 +
crates/vector_store/src/db.rs | 7 ---
crates/vector_store/src/vector_store.rs | 29 +++++++++------
crates/vector_store/src/vector_store_settings.rs | 32 ++++++++++++++++++
crates/vector_store/src/vector_store_tests.rs | 10 +++++
7 files changed, 70 insertions(+), 19 deletions(-)
@@ -8503,8 +8503,10 @@ dependencies = [
"rand 0.8.5",
"rpc",
"rusqlite",
+ "schemars",
"serde",
"serde_json",
+ "settings",
"smol",
"tempdir",
"theme",
@@ -291,6 +291,12 @@
// the terminal will default to matching the buffer's font family.
// "font_family": "Zed Mono"
},
+ // Difference settings for vector_store
+ "vector_store": {
+ "enable": false,
+ "reindexing_delay_seconds": 600,
+ "embedding_batch_size": 150
+ },
// Different settings for specific languages.
"languages": {
"Plain Text": {
@@ -18,6 +18,7 @@ picker = { path = "../picker" }
theme = { path = "../theme" }
editor = { path = "../editor" }
rpc = { path = "../rpc" }
+settings = { path = "../settings" }
anyhow.workspace = true
futures.workspace = true
smol.workspace = true
@@ -33,6 +34,7 @@ bincode = "1.3.3"
matrixmultiply = "0.3.7"
tiktoken-rs = "0.5.0"
rand.workspace = true
+schemars.workspace = true
[dev-dependencies]
gpui = { path = "../gpui", features = ["test-support"] }
@@ -40,6 +42,7 @@ language = { path = "../language", features = ["test-support"] }
project = { path = "../project", features = ["test-support"] }
rpc = { path = "../rpc", features = ["test-support"] }
workspace = { path = "../workspace", features = ["test-support"] }
+settings = { path = "../settings", features = ["test-support"]}
tree-sitter-rust = "*"
rand.workspace = true
unindent.workspace = true
@@ -204,8 +204,6 @@ impl VectorDatabase {
) -> Result<Vec<(i64, PathBuf, usize, String)>> {
let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
self.for_each_document(&worktree_ids, |id, embedding| {
- eprintln!("document {id} {embedding:?}");
-
let similarity = dot(&embedding, &query_embedding);
let ix = match results
.binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal))
@@ -243,10 +241,7 @@ impl VectorDatabase {
Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
})?
.filter_map(|row| row.ok())
- .for_each(|(id, embedding)| {
- dbg!("id");
- f(id, embedding.0)
- });
+ .for_each(|(id, embedding)| f(id, embedding.0));
Ok(())
}
@@ -2,22 +2,25 @@ mod db;
mod embedding;
mod modal;
mod parsing;
+mod vector_store_settings;
#[cfg(test)]
mod vector_store_tests;
+use crate::vector_store_settings::VectorStoreSettings;
use anyhow::{anyhow, Result};
use db::VectorDatabase;
use embedding::{EmbeddingProvider, OpenAIEmbeddings};
use futures::{channel::oneshot, Future};
use gpui::{
- AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext,
- WeakModelHandle,
+ AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Subscription, Task,
+ ViewContext, WeakModelHandle,
};
use language::{Language, LanguageRegistry};
use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
use parsing::{CodeContextRetriever, ParsedFile};
use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId};
+use settings::SettingsStore;
use smol::channel;
use std::{
collections::HashMap,
@@ -34,9 +37,6 @@ use util::{
};
use workspace::{Workspace, WorkspaceCreated};
-const REINDEXING_DELAY_SECONDS: u64 = 3;
-const EMBEDDINGS_BATCH_SIZE: usize = 150;
-
pub fn init(
fs: Arc<dyn Fs>,
http_client: Arc<dyn HttpClient>,
@@ -47,6 +47,12 @@ pub fn init(
return;
}
+ settings::register::<VectorStoreSettings>(cx);
+
+ if !settings::get::<VectorStoreSettings>(cx).enable {
+ return;
+ }
+
let db_file_path = EMBEDDINGS_DIR
.join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
.join("embeddings_db");
@@ -83,6 +89,7 @@ pub fn init(
.detach();
cx.add_action({
+ // "semantic search: Toggle"
move |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext<Workspace>| {
let vector_store = vector_store.clone();
workspace.toggle_modal(cx, |workspace, cx| {
@@ -274,7 +281,6 @@ impl VectorStore {
worktree_id,
indexed_file,
} => {
- log::info!("Inserting Data for {:?}", &indexed_file.path);
db.insert_file(worktree_id, indexed_file).log_err();
}
DbOperation::Delete { worktree_id, path } => {
@@ -347,6 +353,7 @@ impl VectorStore {
});
// batch_tx/rx: Batch Files to Send for Embeddings
+ let batch_size = settings::get::<VectorStoreSettings>(cx).embedding_batch_size;
let (batch_files_tx, batch_files_rx) = channel::unbounded::<EmbeddingJob>();
let _batch_files_task = cx.background().spawn(async move {
let mut queue_len = 0;
@@ -361,7 +368,7 @@ impl VectorStore {
} => {
queue_len += &document_spans.len();
embeddings_queue.push((worktree_id, parsed_file, document_spans));
- queue_len >= EMBEDDINGS_BATCH_SIZE
+ queue_len >= batch_size
}
EmbeddingJob::Flush => true,
};
@@ -387,8 +394,6 @@ impl VectorStore {
let cursor = QueryCursor::new();
let mut retriever = CodeContextRetriever { parser, cursor, fs };
while let Ok(pending_file) = parsing_files_rx.recv().await {
- log::info!("Parsing File: {:?}", &pending_file.relative_path);
-
if let Some((indexed_file, document_spans)) =
retriever.parse_file(pending_file.clone()).await.log_err()
{
@@ -476,11 +481,9 @@ impl VectorStore {
let parsing_files_tx = self.parsing_files_tx.clone();
cx.spawn(|this, mut cx| async move {
- let t0 = Instant::now();
futures::future::join_all(worktree_scans_complete).await;
let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
- log::info!("Worktree Scanning Done in {:?}", t0.elapsed().as_millis());
if let Some(db_directory) = database_url.parent() {
fs.create_dir(db_directory).await.log_err();
@@ -665,6 +668,8 @@ impl VectorStore {
cx: &mut ModelContext<'_, VectorStore>,
worktree_id: &WorktreeId,
) -> Option<()> {
+ let reindexing_delay = settings::get::<VectorStoreSettings>(cx).reindexing_delay_seconds;
+
let worktree = project
.read(cx)
.worktree_for_id(worktree_id.clone(), cx)?
@@ -725,7 +730,7 @@ impl VectorStore {
if !already_stored {
this.update(&mut cx, |this, _| {
let reindex_time = modified_time
- + Duration::from_secs(REINDEXING_DELAY_SECONDS);
+ + Duration::from_secs(reindexing_delay as u64);
let project_state =
this.projects.get_mut(&project.downgrade())?;
@@ -0,0 +1,32 @@
+use anyhow;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use settings::Setting;
+
+#[derive(Deserialize, Debug)]
+pub struct VectorStoreSettings {
+ pub enable: bool,
+ pub reindexing_delay_seconds: usize,
+ pub embedding_batch_size: usize,
+}
+
+#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)]
+pub struct VectorStoreSettingsContent {
+ pub enable: Option<bool>,
+ pub reindexing_delay_seconds: Option<usize>,
+ pub embedding_batch_size: Option<usize>,
+}
+
+impl Setting for VectorStoreSettings {
+ const KEY: Option<&'static str> = Some("vector_store");
+
+ type FileContent = VectorStoreSettingsContent;
+
+ fn load(
+ default_value: &Self::FileContent,
+ user_values: &[&Self::FileContent],
+ _: &gpui::AppContext,
+ ) -> anyhow::Result<Self> {
+ Self::load_via_json_merge(default_value, user_values)
+ }
+}
@@ -1,4 +1,6 @@
-use crate::{db::dot, embedding::EmbeddingProvider, VectorStore};
+use crate::{
+ db::dot, embedding::EmbeddingProvider, vector_store_settings::VectorStoreSettings, VectorStore,
+};
use anyhow::Result;
use async_trait::async_trait;
use gpui::{Task, TestAppContext};
@@ -6,11 +8,17 @@ use language::{Language, LanguageConfig, LanguageRegistry};
use project::{FakeFs, Project};
use rand::Rng;
use serde_json::json;
+use settings::SettingsStore;
use std::sync::Arc;
use unindent::Unindent;
#[gpui::test]
async fn test_vector_store(cx: &mut TestAppContext) {
+ cx.update(|cx| {
+ cx.set_global(SettingsStore::test(cx));
+ settings::register::<VectorStoreSettings>(cx);
+ });
+
let fs = FakeFs::new(cx.background());
fs.insert_tree(
"/the-root",