WIP: started work on vector store db, by walking project worktrees.\n\nCo-Authored-By: Max <max@zed.dev>

KCaverly created

Change summary

Cargo.lock                              |  15 +++
Cargo.toml                              |   1 
crates/vector_store/Cargo.toml          |  25 +++++
crates/vector_store/README.md           |  31 ++++++
crates/vector_store/src/vector_store.rs | 134 +++++++++++++++++++++++++++
crates/zed/Cargo.toml                   |   1 
crates/zed/src/main.rs                  |   1 
7 files changed, 208 insertions(+)

Detailed changes

Cargo.lock 🔗

@@ -7877,6 +7877,20 @@ version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
 
+[[package]]
+name = "vector_store"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "futures 0.3.28",
+ "gpui",
+ "language",
+ "project",
+ "smol",
+ "util",
+ "workspace",
+]
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -8917,6 +8931,7 @@ dependencies = [
  "urlencoding",
  "util",
  "uuid 1.3.2",
+ "vector_store",
  "vim",
  "welcome",
  "workspace",

Cargo.toml 🔗

@@ -63,6 +63,7 @@ members = [
     "crates/theme_selector",
     "crates/theme_testbench",
     "crates/util",
+    "crates/vector_store",
     "crates/vim",
     "crates/workspace",
     "crates/welcome",

crates/vector_store/Cargo.toml 🔗

@@ -0,0 +1,25 @@
+[package]
+name = "vector_store"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[lib]
+path = "src/vector_store.rs"
+doctest = false
+
+[dependencies]
+gpui = { path = "../gpui" }
+language = { path = "../language" }
+project = { path = "../project" }
+workspace = { path = "../workspace" }
+util = { path = "../util" }
+anyhow.workspace = true
+futures.workspace = true
+smol.workspace = true
+
+[dev-dependencies]
+gpui = { path = "../gpui", features = ["test-support"] }
+language = { path = "../language", features = ["test-support"] }
+project = { path = "../project", features = ["test-support"] }
+workspace = { path = "../workspace", features = ["test-support"] }

crates/vector_store/README.md 🔗

@@ -0,0 +1,31 @@
+
+WIP: Sample SQL Queries
+/*
+
+create table "files" (
+"id" INTEGER PRIMARY KEY,
+"path" VARCHAR,
+"sha1" VARCHAR,
+);
+
+create table symbols (
+"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE,
+"offset" INTEGER,
+"embedding" VECTOR,
+);
+
+insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id;
+insert into symbols (
+"file_id",
+"start",
+"end",
+"embedding"
+) values (
+(id,),
+(id,),
+(id,),
+(id,),
+)
+
+
+*/

crates/vector_store/src/vector_store.rs 🔗

@@ -0,0 +1,134 @@
+use anyhow::{anyhow, Result};
+use gpui::{AppContext, Entity, ModelContext, ModelHandle};
+use language::LanguageRegistry;
+use project::{Fs, Project};
+use smol::channel;
+use std::{path::PathBuf, sync::Arc};
+use util::ResultExt;
+use workspace::WorkspaceCreated;
+
+pub fn init(fs: Arc<dyn Fs>, language_registry: Arc<LanguageRegistry>, cx: &mut AppContext) {
+    let vector_store = cx.add_model(|cx| VectorStore::new(fs, language_registry));
+
+    cx.subscribe_global::<WorkspaceCreated, _>({
+        let vector_store = vector_store.clone();
+        move |event, cx| {
+            let workspace = &event.0;
+            if let Some(workspace) = workspace.upgrade(cx) {
+                let project = workspace.read(cx).project().clone();
+                if project.read(cx).is_local() {
+                    vector_store.update(cx, |store, cx| {
+                        store.add_project(project, cx);
+                    });
+                }
+            }
+        }
+    })
+    .detach();
+}
+
+struct Document {
+    offset: usize,
+    name: String,
+    embedding: Vec<f32>,
+}
+
+struct IndexedFile {
+    path: PathBuf,
+    sha1: String,
+    documents: Vec<Document>,
+}
+
+struct SearchResult {
+    path: PathBuf,
+    offset: usize,
+    name: String,
+    distance: f32,
+}
+
+struct VectorStore {
+    fs: Arc<dyn Fs>,
+    language_registry: Arc<LanguageRegistry>,
+}
+
+impl VectorStore {
+    fn new(fs: Arc<dyn Fs>, language_registry: Arc<LanguageRegistry>) -> Self {
+        Self {
+            fs,
+            language_registry,
+        }
+    }
+
+    async fn index_file(
+        fs: &Arc<dyn Fs>,
+        language_registry: &Arc<LanguageRegistry>,
+        file_path: PathBuf,
+    ) -> Result<IndexedFile> {
+        eprintln!("indexing file {file_path:?}");
+        Err(anyhow!("not implemented"))
+        // todo!();
+    }
+
+    fn add_project(&mut self, project: ModelHandle<Project>, cx: &mut ModelContext<Self>) {
+        let worktree_scans_complete = project
+            .read(cx)
+            .worktrees(cx)
+            .map(|worktree| worktree.read(cx).as_local().unwrap().scan_complete())
+            .collect::<Vec<_>>();
+
+        let fs = self.fs.clone();
+        let language_registry = self.language_registry.clone();
+
+        cx.spawn(|this, cx| async move {
+            futures::future::join_all(worktree_scans_complete).await;
+
+            let worktrees = project.read_with(&cx, |project, cx| {
+                project
+                    .worktrees(cx)
+                    .map(|worktree| worktree.read(cx).snapshot())
+                    .collect::<Vec<_>>()
+            });
+
+            let (paths_tx, paths_rx) = channel::unbounded::<PathBuf>();
+            let (indexed_files_tx, indexed_files_rx) = channel::unbounded::<IndexedFile>();
+            cx.background()
+                .spawn(async move {
+                    for worktree in worktrees {
+                        for file in worktree.files(false, 0) {
+                            paths_tx.try_send(worktree.absolutize(&file.path)).unwrap();
+                        }
+                    }
+                })
+                .detach();
+            cx.background()
+                .spawn(async move {
+                    while let Ok(indexed_file) = indexed_files_rx.recv().await {
+                        // write document to database
+                    }
+                })
+                .detach();
+            cx.background()
+                .scoped(|scope| {
+                    for _ in 0..cx.background().num_cpus() {
+                        scope.spawn(async {
+                            while let Ok(file_path) = paths_rx.recv().await {
+                                if let Some(indexed_file) =
+                                    Self::index_file(&fs, &language_registry, file_path)
+                                        .await
+                                        .log_err()
+                                {
+                                    indexed_files_tx.try_send(indexed_file).unwrap();
+                                }
+                            }
+                        });
+                    }
+                })
+                .await;
+        })
+        .detach();
+    }
+}
+
+impl Entity for VectorStore {
+    type Event = ();
+}

crates/zed/Cargo.toml 🔗

@@ -64,6 +64,7 @@ theme = { path = "../theme" }
 theme_selector = { path = "../theme_selector" }
 theme_testbench = { path = "../theme_testbench" }
 util = { path = "../util" }
+vector_store = { path = "../vector_store" }
 vim = { path = "../vim" }
 workspace = { path = "../workspace" }
 welcome = { path = "../welcome" }

crates/zed/src/main.rs 🔗

@@ -152,6 +152,7 @@ fn main() {
         project_panel::init(cx);
         diagnostics::init(cx);
         search::init(cx);
+        vector_store::init(fs.clone(), languages.clone(), cx);
         vim::init(cx);
         terminal_view::init(cx);
         theme_testbench::init(cx);