From a56d454a0760420899d8c6582bde60e851efcf27 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 10:10:24 -0400
Subject: [PATCH 01/34] added semantic search support for c

---
 crates/zed/src/languages/c/embedding.scm | 39 ++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 crates/zed/src/languages/c/embedding.scm
diff --git a/crates/zed/src/languages/c/embedding.scm b/crates/zed/src/languages/c/embedding.scm
new file mode 100644
index 0000000000000000000000000000000000000000..cd1915f62bb5c27f7617bde91327a78129564511
--- /dev/null
+++ b/crates/zed/src/languages/c/embedding.scm
@@ -0,0 +1,39 @@
+(declaration
+    (type_qualifier)? @context
+    type: (_)? @context
+    declarator: [
+        (function_declarator
+            declarator: (_) @name)
+        (pointer_declarator
+            "*" @context
+            declarator: (function_declarator
+                declarator: (_) @name))
+        (pointer_declarator
+            "*" @context
+            declarator: (pointer_declarator
+                "*" @context
+                declarator: (function_declarator
+                    declarator: (_) @name)))
+    ]
+) @item
+
+(function_definition
+    (type_qualifier)? @context
+    type: (_)? @context
+    declarator: [
+        (function_declarator
+            declarator: (_) @name
+                )
+        (pointer_declarator
+            "*" @context
+            declarator: (function_declarator
+                declarator: (_) @name
+                    ))
+        (pointer_declarator
+            "*" @context
+            declarator: (pointer_declarator
+                "*" @context
+                declarator: (function_declarator
+                    declarator: (_) @name)))
+    ]
+) @item

From 5eab62858004493879172ff4576ca32ced8e6bea Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 14:33:31 -0400
Subject: [PATCH 02/34] Added go parsing for semantic search, and added
 preceeding comments on go and rust.

Co-authored-by: Alex <alexviscreanu@gmail.com>
Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 crates/language/src/language.rs             |  4 --
 crates/vector_store/src/parsing.rs          | 23 ++++----
 crates/zed/src/languages/go/embedding.scm   | 24 +++++++++
 crates/zed/src/languages/rust/embedding.scm | 58 ++++++++-------------
 4 files changed, 55 insertions(+), 54 deletions(-)
 create mode 100644 crates/zed/src/languages/go/embedding.scm

diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs
index dbd35f0e87bc602ac91aaf8196b81a4a017fff93..4ec5e88a7edde55c90e975c4bd944e3f38c1bb8b 100644
--- a/crates/language/src/language.rs
+++ b/crates/language/src/language.rs
@@ -525,7 +525,6 @@ pub struct EmbeddingConfig {
     pub item_capture_ix: u32,
     pub name_capture_ix: u32,
     pub context_capture_ix: Option<u32>,
-    pub extra_context_capture_ix: Option<u32>,
 }
 
 struct InjectionConfig {
@@ -1246,14 +1245,12 @@ impl Language {
         let mut item_capture_ix = None;
         let mut name_capture_ix = None;
         let mut context_capture_ix = None;
-        let mut extra_context_capture_ix = None;
         get_capture_indices(
             &query,
             &mut [
                 ("item", &mut item_capture_ix),
                 ("name", &mut name_capture_ix),
                 ("context", &mut context_capture_ix),
-                ("context.extra", &mut extra_context_capture_ix),
             ],
         );
         if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
@@ -1262,7 +1259,6 @@ impl Language {
                 item_capture_ix,
                 name_capture_ix,
                 context_capture_ix,
-                extra_context_capture_ix,
             });
         }
         Ok(self)
diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 91dcf699f8c3add9088b2af4f0d4df59b7551ac2..3e697399b1fa5b6dc2a42a29ec97f1e490643613 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -53,7 +53,7 @@ impl CodeContextRetriever {
             .ok_or_else(|| anyhow!("parsing failed"))?;
 
         let mut documents = Vec::new();
-        let mut context_spans = Vec::new();
+        let mut document_texts = Vec::new();
 
         // Iterate through query matches
         for mat in self.cursor.matches(
@@ -61,11 +61,10 @@ impl CodeContextRetriever {
             tree.root_node(),
             content.as_bytes(),
         ) {
-            // log::info!("-----MATCH-----");
-
             let mut name: Vec<&str> = vec![];
             let mut item: Option<&str> = None;
             let mut offset: Option<usize> = None;
+            let mut context_spans: Vec<&str> = vec![];
             for capture in mat.captures {
                 if capture.index == embedding_config.item_capture_ix {
                     offset = Some(capture.node.byte_range().start);
@@ -79,25 +78,21 @@ impl CodeContextRetriever {
                 if let Some(context_capture_ix) = embedding_config.context_capture_ix {
                     if capture.index == context_capture_ix {
                         if let Some(context) = content.get(capture.node.byte_range()) {
-                            name.push(context);
+                            context_spans.push(context);
                         }
                     }
                 }
             }
 
             if item.is_some() && offset.is_some() && name.len() > 0 {
-                let context_span = CODE_CONTEXT_TEMPLATE
+                let item = format!("{}\n{}", context_spans.join("\n"), item.unwrap());
+
+                let document_text = CODE_CONTEXT_TEMPLATE
                     .replace("<path>", pending_file.relative_path.to_str().unwrap())
                     .replace("<language>", &pending_file.language.name().to_lowercase())
-                    .replace("<item>", item.unwrap());
-
-                let mut truncated_span = context_span.clone();
-                truncated_span.truncate(100);
-
-                // log::info!("Name:       {:?}", name);
-                // log::info!("Span:       {:?}", truncated_span);
+                    .replace("<item>", item.as_str());
 
-                context_spans.push(context_span);
+                document_texts.push(document_text);
                 documents.push(Document {
                     name: name.join(" "),
                     offset: offset.unwrap(),
@@ -112,7 +107,7 @@ impl CodeContextRetriever {
                 mtime: pending_file.modified_time,
                 documents,
             },
-            context_spans,
+            document_texts,
         ));
     }
 }
diff --git a/crates/zed/src/languages/go/embedding.scm b/crates/zed/src/languages/go/embedding.scm
new file mode 100644
index 0000000000000000000000000000000000000000..9d8700cdfb57d1008acc09c11013f2046e7bd157
--- /dev/null
+++ b/crates/zed/src/languages/go/embedding.scm
@@ -0,0 +1,24 @@
+(
+    (comment)* @context
+    .
+    (type_declaration
+        (type_spec
+            name: (_) @name)
+    ) @item
+)
+
+(
+    (comment)* @context
+    .
+    (function_declaration
+        name: (_) @name
+    ) @item
+)
+
+(
+    (comment)* @context
+    .
+    (method_declaration
+        name: (_) @name
+    ) @item
+)
diff --git a/crates/zed/src/languages/rust/embedding.scm b/crates/zed/src/languages/rust/embedding.scm
index ea8bab9f68113a9b725e094a4d31f3e572c4bed7..3aec101e9fbb5d63a49db52869f34757135b0ab2 100644
--- a/crates/zed/src/languages/rust/embedding.scm
+++ b/crates/zed/src/languages/rust/embedding.scm
@@ -1,36 +1,22 @@
-(struct_item
-    (visibility_modifier)? @context
-    "struct" @context
-    name: (_) @name) @item
-
-(enum_item
-    (visibility_modifier)? @context
-    "enum" @context
-    name: (_) @name) @item
-
-(impl_item
-    "impl" @context
-    trait: (_)? @name
-    "for"? @context
-    type: (_) @name) @item
-
-(trait_item
-    (visibility_modifier)? @context
-    "trait" @context
-    name: (_) @name) @item
-
-(function_item
-    (visibility_modifier)? @context
-    (function_modifiers)? @context
-    "fn" @context
-    name: (_) @name) @item
-
-(function_signature_item
-    (visibility_modifier)? @context
-    (function_modifiers)? @context
-    "fn" @context
-    name: (_) @name) @item
-
-(macro_definition
-    . "macro_rules!" @context
-    name: (_) @name) @item
+(
+    (line_comment)* @context
+    .
+    [
+        (enum_item
+            name: (_) @name) @item
+        (struct_item
+            name: (_) @name) @item
+        (impl_item
+            trait: (_)? @name
+            "for"? @name
+            type: (_) @name) @item
+        (trait_item
+            name: (_) @name) @item
+        (function_item
+            name: (_) @name) @item
+        (macro_definition
+            name: (_) @name) @item
+        (function_signature_item
+            name: (_) @name) @item
+    ]
+)

From 0a0e40fb246b3f1e0e8751f24bf008387f223c4b Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 16:34:32 -0400
Subject: [PATCH 03/34] refactored code context retrieval and standardized
 database migration

Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 Cargo.lock                                    |   2 +
 crates/vector_store/Cargo.toml                |   3 +
 crates/vector_store/src/db.rs                 | 132 +++++++++++------
 crates/vector_store/src/modal.rs              |   2 +-
 crates/vector_store/src/parsing.rs            |  82 +++++-----
 crates/vector_store/src/vector_store.rs       | 140 ++++++++++--------
 crates/vector_store/src/vector_store_tests.rs |  21 ++-
 7 files changed, 233 insertions(+), 149 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0ac6a2ee890418104d6851a961d321f1ef7e8f36..4359659a53bad7b2b33bca0fa9e41cd6ae09b11f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8483,7 +8483,9 @@ dependencies = [
  "anyhow",
  "async-trait",
  "bincode",
+ "ctor",
  "editor",
+ "env_logger 0.9.3",
  "futures 0.3.28",
  "gpui",
  "isahc",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index 40bff8b95c167e43c9f20d31e47871d52d5ff8b1..8e1dea59fd8c0fe890291388fccaa9ac7cd3443d 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -44,6 +44,9 @@ rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
 tree-sitter-rust = "*"
+
 rand.workspace = true
 unindent.workspace = true
 tempdir.workspace = true
+ctor.workspace = true
+env_logger.workspace = true
diff --git a/crates/vector_store/src/db.rs b/crates/vector_store/src/db.rs
index a91a1872b59774a1863ae2a9ff867cf1b7ad39b3..d3d05f8c62c9d5639e641094204caa112e96c54f 100644
--- a/crates/vector_store/src/db.rs
+++ b/crates/vector_store/src/db.rs
@@ -1,21 +1,21 @@
+use crate::{parsing::Document, VECTOR_STORE_VERSION};
+use anyhow::{anyhow, Result};
+use project::Fs;
+use rpc::proto::Timestamp;
+use rusqlite::{
+    params,
+    types::{FromSql, FromSqlResult, ValueRef},
+};
 use std::{
     cmp::Ordering,
     collections::HashMap,
+    ops::Range,
     path::{Path, PathBuf},
     rc::Rc,
+    sync::Arc,
     time::SystemTime,
 };
 
-use anyhow::{anyhow, Result};
-
-use crate::parsing::ParsedFile;
-use crate::VECTOR_STORE_VERSION;
-use rpc::proto::Timestamp;
-use rusqlite::{
-    params,
-    types::{FromSql, FromSqlResult, ValueRef},
-};
-
 #[derive(Debug)]
 pub struct FileRecord {
     pub id: usize,
@@ -42,48 +42,88 @@ pub struct VectorDatabase {
 }
 
 impl VectorDatabase {
-    pub fn new(path: String) -> Result<Self> {
+    pub async fn new(fs: Arc<dyn Fs>, path: Arc<PathBuf>) -> Result<Self> {
+        if let Some(db_directory) = path.parent() {
+            fs.create_dir(db_directory).await?;
+        }
+
         let this = Self {
-            db: rusqlite::Connection::open(path)?,
+            db: rusqlite::Connection::open(path.as_path())?,
         };
         this.initialize_database()?;
         Ok(this)
     }
 
+    fn get_existing_version(&self) -> Result<i64> {
+        let mut version_query = self.db.prepare("SELECT version from vector_store_config")?;
+        version_query
+            .query_row([], |row| Ok(row.get::<_, i64>(0)?))
+            .map_err(|err| anyhow!("version query failed: {err}"))
+    }
+
     fn initialize_database(&self) -> Result<()> {
         rusqlite::vtab::array::load_module(&self.db)?;
 
-        // This will create the database if it doesnt exist
+        if self
+            .get_existing_version()
+            .map_or(false, |version| version == VECTOR_STORE_VERSION as i64)
+        {
+            return Ok(());
+        }
+
+        self.db
+            .execute(
+                "
+                    DROP TABLE vector_store_config;
+                    DROP TABLE worktrees;
+                    DROP TABLE files;
+                    DROP TABLE documents;
+                ",
+                [],
+            )
+            .ok();
 
         // Initialize Vector Databasing Tables
         self.db.execute(
-            "CREATE TABLE IF NOT EXISTS worktrees (
+            "CREATE TABLE vector_store_config (
+                version INTEGER NOT NULL
+            )",
+            [],
+        )?;
+
+        self.db.execute(
+            "INSERT INTO vector_store_config (version) VALUES (?1)",
+            params![VECTOR_STORE_VERSION],
+        )?;
+
+        self.db.execute(
+            "CREATE TABLE worktrees (
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 absolute_path VARCHAR NOT NULL
             );
-            CREATE UNIQUE INDEX IF NOT EXISTS worktrees_absolute_path ON worktrees (absolute_path);
+            CREATE UNIQUE INDEX worktrees_absolute_path ON worktrees (absolute_path);
             ",
             [],
         )?;
 
         self.db.execute(
-            "CREATE TABLE IF NOT EXISTS files (
+            "CREATE TABLE files (
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 worktree_id INTEGER NOT NULL,
                 relative_path VARCHAR NOT NULL,
                 mtime_seconds INTEGER NOT NULL,
                 mtime_nanos INTEGER NOT NULL,
-                vector_store_version INTEGER NOT NULL,
                 FOREIGN KEY(worktree_id) REFERENCES worktrees(id) ON DELETE CASCADE
             )",
             [],
         )?;
 
         self.db.execute(
-            "CREATE TABLE IF NOT EXISTS documents (
+            "CREATE TABLE documents (
                 id INTEGER PRIMARY KEY AUTOINCREMENT,
                 file_id INTEGER NOT NULL,
-                offset INTEGER NOT NULL,
+                start_byte INTEGER NOT NULL,
+                end_byte INTEGER NOT NULL,
                 name VARCHAR NOT NULL,
                 embedding BLOB NOT NULL,
                 FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
@@ -102,43 +142,44 @@ impl VectorDatabase {
         Ok(())
     }
 
-    pub fn insert_file(&self, worktree_id: i64, indexed_file: ParsedFile) -> Result<()> {
+    pub fn insert_file(
+        &self,
+        worktree_id: i64,
+        path: PathBuf,
+        mtime: SystemTime,
+        documents: Vec<Document>,
+    ) -> Result<()> {
         // Write to files table, and return generated id.
         self.db.execute(
             "
             DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2;
             ",
-            params![worktree_id, indexed_file.path.to_str()],
+            params![worktree_id, path.to_str()],
         )?;
-        let mtime = Timestamp::from(indexed_file.mtime);
+        let mtime = Timestamp::from(mtime);
         self.db.execute(
             "
             INSERT INTO files
-            (worktree_id, relative_path, mtime_seconds, mtime_nanos, vector_store_version)
+            (worktree_id, relative_path, mtime_seconds, mtime_nanos)
             VALUES
-            (?1, ?2, $3, $4, $5);
+            (?1, ?2, $3, $4);
             ",
-            params![
-                worktree_id,
-                indexed_file.path.to_str(),
-                mtime.seconds,
-                mtime.nanos,
-                VECTOR_STORE_VERSION
-            ],
+            params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos],
         )?;
 
         let file_id = self.db.last_insert_rowid();
 
         // Currently inserting at approximately 3400 documents a second
         // I imagine we can speed this up with a bulk insert of some kind.
-        for document in indexed_file.documents {
+        for document in documents {
             let embedding_blob = bincode::serialize(&document.embedding)?;
 
             self.db.execute(
-                "INSERT INTO documents (file_id, offset, name, embedding) VALUES (?1, ?2, ?3, ?4)",
+                "INSERT INTO documents (file_id, start_byte, end_byte, name, embedding) VALUES (?1, ?2, ?3, ?4, $5)",
                 params![
                     file_id,
-                    document.offset.to_string(),
+                    document.range.start.to_string(),
+                    document.range.end.to_string(),
                     document.name,
                     embedding_blob
                 ],
@@ -204,7 +245,7 @@ impl VectorDatabase {
         worktree_ids: &[i64],
         query_embedding: &Vec<f32>,
         limit: usize,
-    ) -> Result<Vec<(i64, PathBuf, usize, String)>> {
+    ) -> Result<Vec<(i64, PathBuf, Range<usize>, String)>> {
         let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
         self.for_each_document(&worktree_ids, |id, embedding| {
             let similarity = dot(&embedding, &query_embedding);
@@ -248,11 +289,18 @@ impl VectorDatabase {
         Ok(())
     }
 
-    fn get_documents_by_ids(&self, ids: &[i64]) -> Result<Vec<(i64, PathBuf, usize, String)>> {
+    fn get_documents_by_ids(
+        &self,
+        ids: &[i64],
+    ) -> Result<Vec<(i64, PathBuf, Range<usize>, String)>> {
         let mut statement = self.db.prepare(
             "
                 SELECT
-                    documents.id, files.worktree_id, files.relative_path, documents.offset, documents.name
+                    documents.id,
+                    files.worktree_id,
+                    files.relative_path,
+                    documents.start_byte,
+                    documents.end_byte, documents.name
                 FROM
                     documents, files
                 WHERE
@@ -266,15 +314,15 @@ impl VectorDatabase {
                 row.get::<_, i64>(0)?,
                 row.get::<_, i64>(1)?,
                 row.get::<_, String>(2)?.into(),
-                row.get(3)?,
-                row.get(4)?,
+                row.get(3)?..row.get(4)?,
+                row.get(5)?,
             ))
         })?;
 
-        let mut values_by_id = HashMap::<i64, (i64, PathBuf, usize, String)>::default();
+        let mut values_by_id = HashMap::<i64, (i64, PathBuf, Range<usize>, String)>::default();
         for row in result_iter {
-            let (id, worktree_id, path, offset, name) = row?;
-            values_by_id.insert(id, (worktree_id, path, offset, name));
+            let (id, worktree_id, path, range, name) = row?;
+            values_by_id.insert(id, (worktree_id, path, range, name));
         }
 
         let mut results = Vec::with_capacity(ids.len());
diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs
index 9225fe8786e9173a82b32a9aedf5a7e979ff6f88..b797a208062ee623db11fbbfe40948847639465f 100644
--- a/crates/vector_store/src/modal.rs
+++ b/crates/vector_store/src/modal.rs
@@ -66,7 +66,7 @@ impl PickerDelegate for SemanticSearchDelegate {
             });
 
             let workspace = self.workspace.clone();
-            let position = search_result.clone().offset;
+            let position = search_result.clone().byte_range.start;
             cx.spawn(|_, mut cx| async move {
                 let buffer = buffer.await?;
                 workspace.update(&mut cx, |workspace, cx| {
diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 3e697399b1fa5b6dc2a42a29ec97f1e490643613..23dcf505c92896b1eb18499d4b05b633d1c37bf7 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -1,41 +1,39 @@
-use std::{path::PathBuf, sync::Arc, time::SystemTime};
-
 use anyhow::{anyhow, Ok, Result};
-use project::Fs;
+use language::Language;
+use std::{ops::Range, path::Path, sync::Arc};
 use tree_sitter::{Parser, QueryCursor};
 
-use crate::PendingFile;
-
 #[derive(Debug, PartialEq, Clone)]
 pub struct Document {
-    pub offset: usize,
     pub name: String,
+    pub range: Range<usize>,
+    pub content: String,
     pub embedding: Vec<f32>,
 }
 
-#[derive(Debug, PartialEq, Clone)]
-pub struct ParsedFile {
-    pub path: PathBuf,
-    pub mtime: SystemTime,
-    pub documents: Vec<Document>,
-}
-
 const CODE_CONTEXT_TEMPLATE: &str =
     "The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
 
 pub struct CodeContextRetriever {
     pub parser: Parser,
     pub cursor: QueryCursor,
-    pub fs: Arc<dyn Fs>,
 }
 
 impl CodeContextRetriever {
-    pub async fn parse_file(
+    pub fn new() -> Self {
+        Self {
+            parser: Parser::new(),
+            cursor: QueryCursor::new(),
+        }
+    }
+
+    pub fn parse_file(
         &mut self,
-        pending_file: PendingFile,
-    ) -> Result<(ParsedFile, Vec<String>)> {
-        let grammar = pending_file
-            .language
+        relative_path: &Path,
+        content: &str,
+        language: Arc<Language>,
+    ) -> Result<Vec<Document>> {
+        let grammar = language
             .grammar()
             .ok_or_else(|| anyhow!("no grammar for language"))?;
         let embedding_config = grammar
@@ -43,8 +41,6 @@ impl CodeContextRetriever {
             .as_ref()
             .ok_or_else(|| anyhow!("no embedding queries"))?;
 
-        let content = self.fs.load(&pending_file.absolute_path).await?;
-
         self.parser.set_language(grammar.ts_language).unwrap();
 
         let tree = self
@@ -53,7 +49,6 @@ impl CodeContextRetriever {
             .ok_or_else(|| anyhow!("parsing failed"))?;
 
         let mut documents = Vec::new();
-        let mut document_texts = Vec::new();
 
         // Iterate through query matches
         for mat in self.cursor.matches(
@@ -63,11 +58,11 @@ impl CodeContextRetriever {
         ) {
             let mut name: Vec<&str> = vec![];
             let mut item: Option<&str> = None;
-            let mut offset: Option<usize> = None;
+            let mut byte_range: Option<Range<usize>> = None;
             let mut context_spans: Vec<&str> = vec![];
             for capture in mat.captures {
                 if capture.index == embedding_config.item_capture_ix {
-                    offset = Some(capture.node.byte_range().start);
+                    byte_range = Some(capture.node.byte_range());
                     item = content.get(capture.node.byte_range());
                 } else if capture.index == embedding_config.name_capture_ix {
                     if let Some(name_content) = content.get(capture.node.byte_range()) {
@@ -84,30 +79,25 @@ impl CodeContextRetriever {
                 }
             }
 
-            if item.is_some() && offset.is_some() && name.len() > 0 {
-                let item = format!("{}\n{}", context_spans.join("\n"), item.unwrap());
-
-                let document_text = CODE_CONTEXT_TEMPLATE
-                    .replace("<path>", pending_file.relative_path.to_str().unwrap())
-                    .replace("<language>", &pending_file.language.name().to_lowercase())
-                    .replace("<item>", item.as_str());
-
-                document_texts.push(document_text);
-                documents.push(Document {
-                    name: name.join(" "),
-                    offset: offset.unwrap(),
-                    embedding: Vec::new(),
-                })
+            if let Some((item, byte_range)) = item.zip(byte_range) {
+                if !name.is_empty() {
+                    let item = format!("{}\n{}", context_spans.join("\n"), item);
+
+                    let document_text = CODE_CONTEXT_TEMPLATE
+                        .replace("<path>", relative_path.to_str().unwrap())
+                        .replace("<language>", &language.name().to_lowercase())
+                        .replace("<item>", item.as_str());
+
+                    documents.push(Document {
+                        range: byte_range,
+                        content: document_text,
+                        embedding: Vec::new(),
+                        name: name.join(" ").to_string(),
+                    });
+                }
             }
         }
 
-        return Ok((
-            ParsedFile {
-                path: pending_file.relative_path,
-                mtime: pending_file.modified_time,
-                documents,
-            },
-            document_texts,
-        ));
+        return Ok(documents);
     }
 }
diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs
index 0a197bc40663034d4156cf025c731449cef725c7..3d9c32875eef17c6cc58b1bed1637c1b920c2b0f 100644
--- a/crates/vector_store/src/vector_store.rs
+++ b/crates/vector_store/src/vector_store.rs
@@ -18,16 +18,16 @@ use gpui::{
 };
 use language::{Language, LanguageRegistry};
 use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
-use parsing::{CodeContextRetriever, ParsedFile};
+use parsing::{CodeContextRetriever, Document};
 use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId};
 use smol::channel;
 use std::{
     collections::HashMap,
+    ops::Range,
     path::{Path, PathBuf},
     sync::Arc,
     time::{Duration, Instant, SystemTime},
 };
-use tree_sitter::{Parser, QueryCursor};
 use util::{
     channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
     http::HttpClient,
@@ -36,7 +36,7 @@ use util::{
 };
 use workspace::{Workspace, WorkspaceCreated};
 
-const VECTOR_STORE_VERSION: usize = 0;
+const VECTOR_STORE_VERSION: usize = 1;
 const EMBEDDINGS_BATCH_SIZE: usize = 150;
 
 pub fn init(
@@ -80,11 +80,11 @@ pub fn init(
         let vector_store = VectorStore::new(
             fs,
             db_file_path,
-            // Arc::new(embedding::DummyEmbeddings {}),
-            Arc::new(OpenAIEmbeddings {
-                client: http_client,
-                executor: cx.background(),
-            }),
+            Arc::new(embedding::DummyEmbeddings {}),
+            // Arc::new(OpenAIEmbeddings {
+            //     client: http_client,
+            //     executor: cx.background(),
+            // }),
             language_registry,
             cx.clone(),
         )
@@ -212,14 +212,16 @@ pub struct PendingFile {
 pub struct SearchResult {
     pub worktree_id: WorktreeId,
     pub name: String,
-    pub offset: usize,
+    pub byte_range: Range<usize>,
     pub file_path: PathBuf,
 }
 
 enum DbOperation {
     InsertFile {
         worktree_id: i64,
-        indexed_file: ParsedFile,
+        documents: Vec<Document>,
+        path: PathBuf,
+        mtime: SystemTime,
     },
     Delete {
         worktree_id: i64,
@@ -238,8 +240,9 @@ enum DbOperation {
 enum EmbeddingJob {
     Enqueue {
         worktree_id: i64,
-        parsed_file: ParsedFile,
-        document_spans: Vec<String>,
+        path: PathBuf,
+        mtime: SystemTime,
+        documents: Vec<Document>,
     },
     Flush,
 }
@@ -256,18 +259,7 @@ impl VectorStore {
 
         let db = cx
             .background()
-            .spawn({
-                let fs = fs.clone();
-                let database_url = database_url.clone();
-                async move {
-                    if let Some(db_directory) = database_url.parent() {
-                        fs.create_dir(db_directory).await.log_err();
-                    }
-
-                    let db = VectorDatabase::new(database_url.to_string_lossy().to_string())?;
-                    anyhow::Ok(db)
-                }
-            })
+            .spawn(VectorDatabase::new(fs.clone(), database_url.clone()))
             .await?;
 
         Ok(cx.add_model(|cx| {
@@ -280,9 +272,12 @@ impl VectorStore {
                     match job {
                         DbOperation::InsertFile {
                             worktree_id,
-                            indexed_file,
+                            documents,
+                            path,
+                            mtime,
                         } => {
-                            db.insert_file(worktree_id, indexed_file).log_err();
+                            db.insert_file(worktree_id, path, mtime, documents)
+                                .log_err();
                         }
                         DbOperation::Delete { worktree_id, path } => {
                             db.delete_file(worktree_id, path).log_err();
@@ -304,35 +299,45 @@ impl VectorStore {
 
             // embed_tx/rx: Embed Batch and Send to Database
             let (embed_batch_tx, embed_batch_rx) =
-                channel::unbounded::<Vec<(i64, ParsedFile, Vec<String>)>>();
+                channel::unbounded::<Vec<(i64, Vec<Document>, PathBuf, SystemTime)>>();
             let _embed_batch_task = cx.background().spawn({
                 let db_update_tx = db_update_tx.clone();
                 let embedding_provider = embedding_provider.clone();
                 async move {
                     while let Ok(mut embeddings_queue) = embed_batch_rx.recv().await {
                         // Construct Batch
-                        let mut document_spans = vec![];
-                        for (_, _, document_span) in embeddings_queue.iter() {
-                            document_spans.extend(document_span.iter().map(|s| s.as_str()));
+                        let mut batch_documents = vec![];
+                        for (_, documents, _, _) in embeddings_queue.iter() {
+                            batch_documents
+                                .extend(documents.iter().map(|document| document.content.as_str()));
                         }
 
-                        if let Ok(embeddings) = embedding_provider.embed_batch(document_spans).await
+                        if let Ok(embeddings) =
+                            embedding_provider.embed_batch(batch_documents).await
                         {
+                            log::trace!(
+                                "created {} embeddings for {} files",
+                                embeddings.len(),
+                                embeddings_queue.len(),
+                            );
+
                             let mut i = 0;
                             let mut j = 0;
 
                             for embedding in embeddings.iter() {
-                                while embeddings_queue[i].1.documents.len() == j {
+                                while embeddings_queue[i].1.len() == j {
                                     i += 1;
                                     j = 0;
                                 }
 
-                                embeddings_queue[i].1.documents[j].embedding = embedding.to_owned();
+                                embeddings_queue[i].1[j].embedding = embedding.to_owned();
                                 j += 1;
                             }
 
-                            for (worktree_id, indexed_file, _) in embeddings_queue.into_iter() {
-                                for document in indexed_file.documents.iter() {
+                            for (worktree_id, documents, path, mtime) in
+                                embeddings_queue.into_iter()
+                            {
+                                for document in documents.iter() {
                                     // TODO: Update this so it doesn't panic
                                     assert!(
                                         document.embedding.len() > 0,
@@ -343,7 +348,9 @@ impl VectorStore {
                                 db_update_tx
                                     .send(DbOperation::InsertFile {
                                         worktree_id,
-                                        indexed_file,
+                                        documents,
+                                        path,
+                                        mtime,
                                     })
                                     .await
                                     .unwrap();
@@ -362,12 +369,13 @@ impl VectorStore {
                 while let Ok(job) = batch_files_rx.recv().await {
                     let should_flush = match job {
                         EmbeddingJob::Enqueue {
-                            document_spans,
+                            documents,
                             worktree_id,
-                            parsed_file,
+                            path,
+                            mtime,
                         } => {
-                            queue_len += &document_spans.len();
-                            embeddings_queue.push((worktree_id, parsed_file, document_spans));
+                            queue_len += &documents.len();
+                            embeddings_queue.push((worktree_id, documents, path, mtime));
                             queue_len >= EMBEDDINGS_BATCH_SIZE
                         }
                         EmbeddingJob::Flush => true,
@@ -385,26 +393,38 @@ impl VectorStore {
             let (parsing_files_tx, parsing_files_rx) = channel::unbounded::<PendingFile>();
 
             let mut _parsing_files_tasks = Vec::new();
-            // for _ in 0..cx.background().num_cpus() {
-            for _ in 0..1 {
+            for _ in 0..cx.background().num_cpus() {
                 let fs = fs.clone();
                 let parsing_files_rx = parsing_files_rx.clone();
                 let batch_files_tx = batch_files_tx.clone();
                 _parsing_files_tasks.push(cx.background().spawn(async move {
-                    let parser = Parser::new();
-                    let cursor = QueryCursor::new();
-                    let mut retriever = CodeContextRetriever { parser, cursor, fs };
+                    let mut retriever = CodeContextRetriever::new();
                     while let Ok(pending_file) = parsing_files_rx.recv().await {
-                        if let Some((indexed_file, document_spans)) =
-                            retriever.parse_file(pending_file.clone()).await.log_err()
+                        if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err()
                         {
-                            batch_files_tx
-                                .try_send(EmbeddingJob::Enqueue {
-                                    worktree_id: pending_file.worktree_db_id,
-                                    parsed_file: indexed_file,
-                                    document_spans,
-                                })
-                                .unwrap();
+                            if let Some(documents) = retriever
+                                .parse_file(
+                                    &pending_file.relative_path,
+                                    &content,
+                                    pending_file.language,
+                                )
+                                .log_err()
+                            {
+                                log::trace!(
+                                    "parsed path {:?}: {} documents",
+                                    pending_file.relative_path,
+                                    documents.len()
+                                );
+
+                                batch_files_tx
+                                    .try_send(EmbeddingJob::Enqueue {
+                                        worktree_id: pending_file.worktree_db_id,
+                                        path: pending_file.relative_path,
+                                        mtime: pending_file.modified_time,
+                                        documents,
+                                    })
+                                    .unwrap();
+                            }
                         }
 
                         if parsing_files_rx.len() == 0 {
@@ -543,6 +563,7 @@ impl VectorStore {
                                         });
 
                                     if !already_stored {
+                                        log::trace!("sending for parsing: {:?}", path_buf);
                                         parsing_files_tx
                                             .try_send(PendingFile {
                                                 worktree_db_id: db_ids_by_worktree_id
@@ -565,8 +586,8 @@ impl VectorStore {
                                     .unwrap();
                             }
                         }
-                        log::info!(
-                            "Parsing Worktree Completed in {:?}",
+                        log::trace!(
+                            "parsing worktree completed in {:?}",
                             t0.elapsed().as_millis()
                         );
                     }
@@ -622,11 +643,12 @@ impl VectorStore {
 
         let embedding_provider = self.embedding_provider.clone();
         let database_url = self.database_url.clone();
+        let fs = self.fs.clone();
         cx.spawn(|this, cx| async move {
             let documents = cx
                 .background()
                 .spawn(async move {
-                    let database = VectorDatabase::new(database_url.to_string_lossy().into())?;
+                    let database = VectorDatabase::new(fs, database_url).await?;
 
                     let phrase_embedding = embedding_provider
                         .embed_batch(vec![&phrase])
@@ -648,12 +670,12 @@ impl VectorStore {
 
                 Ok(documents
                     .into_iter()
-                    .filter_map(|(worktree_db_id, file_path, offset, name)| {
+                    .filter_map(|(worktree_db_id, file_path, byte_range, name)| {
                         let worktree_id = project_state.worktree_id_for_db_id(worktree_db_id)?;
                         Some(SearchResult {
                             worktree_id,
                             name,
-                            offset,
+                            byte_range,
                             file_path,
                         })
                     })
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index b6e47e7a2341e71a790a749642770e81cd147aaf..c4349c72808a90ff2baeedd2485a11592a56d87c 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -12,6 +12,13 @@ use settings::SettingsStore;
 use std::sync::Arc;
 use unindent::Unindent;
 
+#[ctor::ctor]
+fn init_logger() {
+    if std::env::var("RUST_LOG").is_ok() {
+        env_logger::init();
+    }
+}
+
 #[gpui::test]
 async fn test_vector_store(cx: &mut TestAppContext) {
     cx.update(|cx| {
@@ -95,11 +102,23 @@ async fn test_vector_store(cx: &mut TestAppContext) {
         .await
         .unwrap();
 
-    assert_eq!(search_results[0].offset, 0);
+    assert_eq!(search_results[0].byte_range.start, 0);
     assert_eq!(search_results[0].name, "aaa");
     assert_eq!(search_results[0].worktree_id, worktree_id);
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval(cx: &mut TestAppContext) {
+    // let mut retriever = CodeContextRetriever::new(fs);
+
+    // retriever::parse_file(
+    //     "
+    //     //
+    // ",
+    // );
+    //
+}
+
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
     assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);

From 623cb9833c17aaac11d4a2d4bea03295ffa842c4 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 16:58:42 -0400
Subject: [PATCH 04/34] add tests for rust context parsing, and update rust
 embedding query

Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 crates/vector_store/src/parsing.rs            |   6 +-
 crates/vector_store/src/vector_store_tests.rs | 156 ++++++++++++++----
 crates/zed/src/languages/rust/embedding.scm   |  64 +++++--
 3 files changed, 179 insertions(+), 47 deletions(-)

diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 23dcf505c92896b1eb18499d4b05b633d1c37bf7..8d6e03d6eb29d524db23848adfa15a8ac6b4b164 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -81,7 +81,11 @@ impl CodeContextRetriever {
 
             if let Some((item, byte_range)) = item.zip(byte_range) {
                 if !name.is_empty() {
-                    let item = format!("{}\n{}", context_spans.join("\n"), item);
+                    let item = if context_spans.is_empty() {
+                        item.to_string()
+                    } else {
+                        format!("{}\n{}", context_spans.join("\n"), item)
+                    };
 
                     let document_text = CODE_CONTEXT_TEMPLATE
                         .replace("<path>", relative_path.to_str().unwrap())
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index c4349c72808a90ff2baeedd2485a11592a56d87c..ccdd9fdaf07605b36f025d3a4bad63a3a2f516c2 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -1,5 +1,9 @@
 use crate::{
-    db::dot, embedding::EmbeddingProvider, vector_store_settings::VectorStoreSettings, VectorStore,
+    db::dot,
+    embedding::EmbeddingProvider,
+    parsing::{CodeContextRetriever, Document},
+    vector_store_settings::VectorStoreSettings,
+    VectorStore,
 };
 use anyhow::Result;
 use async_trait::async_trait;
@@ -9,7 +13,7 @@ use project::{project_settings::ProjectSettings, FakeFs, Project};
 use rand::{rngs::StdRng, Rng};
 use serde_json::json;
 use settings::SettingsStore;
-use std::sync::Arc;
+use std::{path::Path, sync::Arc};
 use unindent::Unindent;
 
 #[ctor::ctor]
@@ -52,24 +56,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
     .await;
 
     let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
-    let rust_language = Arc::new(
-        Language::new(
-            LanguageConfig {
-                name: "Rust".into(),
-                path_suffixes: vec!["rs".into()],
-                ..Default::default()
-            },
-            Some(tree_sitter_rust::language()),
-        )
-        .with_embedding_query(
-            r#"
-            (function_item
-                name: (identifier) @name
-                body: (block)) @item
-            "#,
-        )
-        .unwrap(),
-    );
+    let rust_language = rust_lang();
     languages.add(rust_language);
 
     let db_dir = tempdir::TempDir::new("vector-store").unwrap();
@@ -109,14 +96,59 @@ async fn test_vector_store(cx: &mut TestAppContext) {
 
 #[gpui::test]
 async fn test_code_context_retrieval(cx: &mut TestAppContext) {
-    // let mut retriever = CodeContextRetriever::new(fs);
-
-    // retriever::parse_file(
-    //     "
-    //     //
-    // ",
-    // );
-    //
+    let language = rust_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = "
+        /// A doc comment
+        /// that spans multiple lines
+        fn a() {
+            b
+        }
+
+        impl C for D {
+        }
+    "
+    .unindent();
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.rs"), &text, language)
+        .unwrap();
+
+    assert_eq!(
+        parsed_files,
+        &[
+            Document {
+                name: "a".into(),
+                range: text.find("fn a").unwrap()..(text.find("}").unwrap() + 1),
+                content: "
+                    The below code snippet is from file 'foo.rs'
+
+                    ```rust
+                    /// A doc comment
+                    /// that spans multiple lines
+                    fn a() {
+                        b
+                    }
+                    ```"
+                .unindent(),
+                embedding: vec![],
+            },
+            Document {
+                name: "C for D".into(),
+                range: text.find("impl C").unwrap()..(text.rfind("}").unwrap() + 1),
+                content: "
+                    The below code snippet is from file 'foo.rs'
+
+                    ```rust
+                    impl C for D {
+                    }
+                    ```"
+                .unindent(),
+                embedding: vec![],
+            }
+        ]
+    );
 }
 
 #[gpui::test]
@@ -178,3 +210,71 @@ impl EmbeddingProvider for FakeEmbeddingProvider {
             .collect())
     }
 }
+
+fn rust_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "Rust".into(),
+                path_suffixes: vec!["rs".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_rust::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (
+                (line_comment)* @context
+                .
+                (enum_item
+                    name: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (struct_item
+                    name: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (impl_item
+                    trait: (_)? @name
+                    "for"? @name
+                    type: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (trait_item
+                    name: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (function_item
+                    name: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (macro_definition
+                    name: (_) @name) @item
+            )
+
+            (
+                (line_comment)* @context
+                .
+                (function_signature_item
+                    name: (_) @name) @item
+            )
+            "#,
+        )
+        .unwrap(),
+    )
+}
diff --git a/crates/zed/src/languages/rust/embedding.scm b/crates/zed/src/languages/rust/embedding.scm
index 3aec101e9fbb5d63a49db52869f34757135b0ab2..66e4083de5f0fe8b1adfa2ea657668e4453e4b61 100644
--- a/crates/zed/src/languages/rust/embedding.scm
+++ b/crates/zed/src/languages/rust/embedding.scm
@@ -1,22 +1,50 @@
 (
     (line_comment)* @context
     .
-    [
-        (enum_item
-            name: (_) @name) @item
-        (struct_item
-            name: (_) @name) @item
-        (impl_item
-            trait: (_)? @name
-            "for"? @name
-            type: (_) @name) @item
-        (trait_item
-            name: (_) @name) @item
-        (function_item
-            name: (_) @name) @item
-        (macro_definition
-            name: (_) @name) @item
-        (function_signature_item
-            name: (_) @name) @item
-    ]
+    (enum_item
+        name: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (struct_item
+        name: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (impl_item
+        trait: (_)? @name
+        "for"? @name
+        type: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (trait_item
+        name: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (function_item
+        name: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (macro_definition
+        name: (_) @name) @item
+)
+
+(
+    (line_comment)* @context
+    .
+    (function_signature_item
+        name: (_) @name) @item
 )

From d8fd0be59832d52ef7e21784a43c697c53e789e9 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 17:01:56 -0400
Subject: [PATCH 05/34] update vector store to remove dummy embeddings

---
 crates/vector_store/src/vector_store.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs
index 3d9c32875eef17c6cc58b1bed1637c1b920c2b0f..d35798a58db607f4b979e34ac2c00ae1f7bef8bf 100644
--- a/crates/vector_store/src/vector_store.rs
+++ b/crates/vector_store/src/vector_store.rs
@@ -80,11 +80,10 @@ pub fn init(
         let vector_store = VectorStore::new(
             fs,
             db_file_path,
-            Arc::new(embedding::DummyEmbeddings {}),
-            // Arc::new(OpenAIEmbeddings {
-            //     client: http_client,
-            //     executor: cx.background(),
-            // }),
+            Arc::new(OpenAIEmbeddings {
+                client: http_client,
+                executor: cx.background(),
+            }),
             language_registry,
             cx.clone(),
         )

From b38e3b804c7e1124c8a41ac3fb471c305e522639 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 13 Jul 2023 18:14:44 -0400
Subject: [PATCH 06/34] remove reindexing subscription, and add status methods
 for vector store

Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 Cargo.lock                                    |   1 +
 crates/vector_store/Cargo.toml                |   1 +
 crates/vector_store/src/modal.rs              |   2 +-
 crates/vector_store/src/vector_store.rs       | 379 +++++++-----------
 crates/vector_store/src/vector_store_tests.rs |  78 +++-
 5 files changed, 208 insertions(+), 253 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 4359659a53bad7b2b33bca0fa9e41cd6ae09b11f..239aa6a302ded4391422e1c2d8752236f4019bb3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8493,6 +8493,7 @@ dependencies = [
  "lazy_static",
  "log",
  "matrixmultiply",
+ "parking_lot 0.11.2",
  "picker",
  "project",
  "rand 0.8.5",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index 8e1dea59fd8c0fe890291388fccaa9ac7cd3443d..bac9cdedfafc4567f24b7502e0f9ea9e4d0e71e3 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -33,6 +33,7 @@ async-trait.workspace = true
 bincode = "1.3.3"
 matrixmultiply = "0.3.7"
 tiktoken-rs = "0.5.0"
+parking_lot.workspace = true
 rand.workspace = true
 schemars.workspace = true
 
diff --git a/crates/vector_store/src/modal.rs b/crates/vector_store/src/modal.rs
index b797a208062ee623db11fbbfe40948847639465f..2981fa4e73ef77ce3b54b68da9b177452f6d245e 100644
--- a/crates/vector_store/src/modal.rs
+++ b/crates/vector_store/src/modal.rs
@@ -124,7 +124,7 @@ impl PickerDelegate for SemanticSearchDelegate {
             if let Some(retrieved) = retrieved_cached.log_err() {
                 if !retrieved {
                     let task = vector_store.update(&mut cx, |store, cx| {
-                        store.search(project.clone(), query.to_string(), 10, cx)
+                        store.search_project(project.clone(), query.to_string(), 10, cx)
                     });
 
                     if let Some(results) = task.await.log_err() {
diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs
index d35798a58db607f4b979e34ac2c00ae1f7bef8bf..3f7ab5c6cd1b2e0296ee560071377a84a6c527db 100644
--- a/crates/vector_store/src/vector_store.rs
+++ b/crates/vector_store/src/vector_store.rs
@@ -18,15 +18,19 @@ use gpui::{
 };
 use language::{Language, LanguageRegistry};
 use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
+use parking_lot::Mutex;
 use parsing::{CodeContextRetriever, Document};
-use project::{Fs, PathChange, Project, ProjectEntryId, WorktreeId};
+use project::{Fs, Project, WorktreeId};
 use smol::channel;
 use std::{
-    collections::HashMap,
+    collections::{HashMap, HashSet},
     ops::Range,
     path::{Path, PathBuf},
-    sync::Arc,
-    time::{Duration, Instant, SystemTime},
+    sync::{
+        atomic::{self, AtomicUsize},
+        Arc, Weak,
+    },
+    time::{Instant, SystemTime},
 };
 use util::{
     channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
@@ -99,7 +103,7 @@ pub fn init(
                         let project = workspace.read(cx).project().clone();
                         if project.read(cx).is_local() {
                             vector_store.update(cx, |store, cx| {
-                                store.add_project(project, cx).detach();
+                                store.index_project(project, cx).detach();
                             });
                         }
                     }
@@ -124,13 +128,20 @@ pub struct VectorStore {
     _embed_batch_task: Task<()>,
     _batch_files_task: Task<()>,
     _parsing_files_tasks: Vec<Task<()>>,
+    next_job_id: Arc<AtomicUsize>,
     projects: HashMap<WeakModelHandle<Project>, ProjectState>,
 }
 
 struct ProjectState {
     worktree_db_ids: Vec<(WorktreeId, i64)>,
-    pending_files: HashMap<PathBuf, (PendingFile, SystemTime)>,
-    _subscription: gpui::Subscription,
+    outstanding_jobs: Arc<Mutex<HashSet<JobId>>>,
+}
+
+type JobId = usize;
+
+struct JobHandle {
+    id: JobId,
+    set: Weak<Mutex<HashSet<JobId>>>,
 }
 
 impl ProjectState {
@@ -157,54 +168,15 @@ impl ProjectState {
                 }
             })
     }
-
-    fn update_pending_files(&mut self, pending_file: PendingFile, indexing_time: SystemTime) {
-        // If Pending File Already Exists, Replace it with the new one
-        // but keep the old indexing time
-        if let Some(old_file) = self
-            .pending_files
-            .remove(&pending_file.relative_path.clone())
-        {
-            self.pending_files.insert(
-                pending_file.relative_path.clone(),
-                (pending_file, old_file.1),
-            );
-        } else {
-            self.pending_files.insert(
-                pending_file.relative_path.clone(),
-                (pending_file, indexing_time),
-            );
-        };
-    }
-
-    fn get_outstanding_files(&mut self) -> Vec<PendingFile> {
-        let mut outstanding_files = vec![];
-        let mut remove_keys = vec![];
-        for key in self.pending_files.keys().into_iter() {
-            if let Some(pending_details) = self.pending_files.get(key) {
-                let (pending_file, index_time) = pending_details;
-                if index_time <= &SystemTime::now() {
-                    outstanding_files.push(pending_file.clone());
-                    remove_keys.push(key.clone());
-                }
-            }
-        }
-
-        for key in remove_keys.iter() {
-            self.pending_files.remove(key);
-        }
-
-        return outstanding_files;
-    }
 }
 
-#[derive(Clone, Debug)]
 pub struct PendingFile {
     worktree_db_id: i64,
     relative_path: PathBuf,
     absolute_path: PathBuf,
     language: Arc<Language>,
     modified_time: SystemTime,
+    job_handle: JobHandle,
 }
 
 #[derive(Debug, Clone)]
@@ -221,6 +193,7 @@ enum DbOperation {
         documents: Vec<Document>,
         path: PathBuf,
         mtime: SystemTime,
+        job_handle: JobHandle,
     },
     Delete {
         worktree_id: i64,
@@ -242,6 +215,7 @@ enum EmbeddingJob {
         path: PathBuf,
         mtime: SystemTime,
         documents: Vec<Document>,
+        job_handle: JobHandle,
     },
     Flush,
 }
@@ -274,9 +248,11 @@ impl VectorStore {
                             documents,
                             path,
                             mtime,
+                            job_handle,
                         } => {
                             db.insert_file(worktree_id, path, mtime, documents)
                                 .log_err();
+                            drop(job_handle)
                         }
                         DbOperation::Delete { worktree_id, path } => {
                             db.delete_file(worktree_id, path).log_err();
@@ -298,7 +274,7 @@ impl VectorStore {
 
             // embed_tx/rx: Embed Batch and Send to Database
             let (embed_batch_tx, embed_batch_rx) =
-                channel::unbounded::<Vec<(i64, Vec<Document>, PathBuf, SystemTime)>>();
+                channel::unbounded::<Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>>();
             let _embed_batch_task = cx.background().spawn({
                 let db_update_tx = db_update_tx.clone();
                 let embedding_provider = embedding_provider.clone();
@@ -306,7 +282,7 @@ impl VectorStore {
                     while let Ok(mut embeddings_queue) = embed_batch_rx.recv().await {
                         // Construct Batch
                         let mut batch_documents = vec![];
-                        for (_, documents, _, _) in embeddings_queue.iter() {
+                        for (_, documents, _, _, _) in embeddings_queue.iter() {
                             batch_documents
                                 .extend(documents.iter().map(|document| document.content.as_str()));
                         }
@@ -333,7 +309,7 @@ impl VectorStore {
                                 j += 1;
                             }
 
-                            for (worktree_id, documents, path, mtime) in
+                            for (worktree_id, documents, path, mtime, job_handle) in
                                 embeddings_queue.into_iter()
                             {
                                 for document in documents.iter() {
@@ -350,6 +326,7 @@ impl VectorStore {
                                         documents,
                                         path,
                                         mtime,
+                                        job_handle,
                                     })
                                     .await
                                     .unwrap();
@@ -372,9 +349,16 @@ impl VectorStore {
                             worktree_id,
                             path,
                             mtime,
+                            job_handle,
                         } => {
                             queue_len += &documents.len();
-                            embeddings_queue.push((worktree_id, documents, path, mtime));
+                            embeddings_queue.push((
+                                worktree_id,
+                                documents,
+                                path,
+                                mtime,
+                                job_handle,
+                            ));
                             queue_len >= EMBEDDINGS_BATCH_SIZE
                         }
                         EmbeddingJob::Flush => true,
@@ -420,6 +404,7 @@ impl VectorStore {
                                         worktree_id: pending_file.worktree_db_id,
                                         path: pending_file.relative_path,
                                         mtime: pending_file.modified_time,
+                                        job_handle: pending_file.job_handle,
                                         documents,
                                     })
                                     .unwrap();
@@ -439,6 +424,7 @@ impl VectorStore {
                 embedding_provider,
                 language_registry,
                 db_update_tx,
+                next_job_id: Default::default(),
                 parsing_files_tx,
                 _db_update_task,
                 _embed_batch_task,
@@ -471,11 +457,11 @@ impl VectorStore {
         async move { rx.await? }
     }
 
-    fn add_project(
+    fn index_project(
         &mut self,
         project: ModelHandle<Project>,
         cx: &mut ModelContext<Self>,
-    ) -> Task<Result<()>> {
+    ) -> Task<Result<usize>> {
         let worktree_scans_complete = project
             .read(cx)
             .worktrees(cx)
@@ -494,21 +480,16 @@ impl VectorStore {
             })
             .collect::<Vec<_>>();
 
-        let fs = self.fs.clone();
         let language_registry = self.language_registry.clone();
-        let database_url = self.database_url.clone();
         let db_update_tx = self.db_update_tx.clone();
         let parsing_files_tx = self.parsing_files_tx.clone();
+        let next_job_id = self.next_job_id.clone();
 
         cx.spawn(|this, mut cx| async move {
             futures::future::join_all(worktree_scans_complete).await;
 
             let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
 
-            if let Some(db_directory) = database_url.parent() {
-                fs.create_dir(db_directory).await.log_err();
-            }
-
             let worktrees = project.read_with(&cx, |project, cx| {
                 project
                     .worktrees(cx)
@@ -516,109 +497,115 @@ impl VectorStore {
                     .collect::<Vec<_>>()
             });
 
-            let mut worktree_file_times = HashMap::new();
+            let mut worktree_file_mtimes = HashMap::new();
             let mut db_ids_by_worktree_id = HashMap::new();
             for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) {
                 let db_id = db_id?;
                 db_ids_by_worktree_id.insert(worktree.id(), db_id);
-                worktree_file_times.insert(
+                worktree_file_mtimes.insert(
                     worktree.id(),
                     this.read_with(&cx, |this, _| this.get_file_mtimes(db_id))
                         .await?,
                 );
             }
 
-            cx.background()
-                .spawn({
-                    let db_ids_by_worktree_id = db_ids_by_worktree_id.clone();
-                    let db_update_tx = db_update_tx.clone();
-                    let language_registry = language_registry.clone();
-                    let parsing_files_tx = parsing_files_tx.clone();
-                    async move {
-                        let t0 = Instant::now();
-                        for worktree in worktrees.into_iter() {
-                            let mut file_mtimes =
-                                worktree_file_times.remove(&worktree.id()).unwrap();
-                            for file in worktree.files(false, 0) {
-                                let absolute_path = worktree.absolutize(&file.path);
-
-                                if let Ok(language) = language_registry
-                                    .language_for_file(&absolute_path, None)
-                                    .await
-                                {
-                                    if language
-                                        .grammar()
-                                        .and_then(|grammar| grammar.embedding_config.as_ref())
-                                        .is_none()
-                                    {
-                                        continue;
-                                    }
-
-                                    let path_buf = file.path.to_path_buf();
-                                    let stored_mtime = file_mtimes.remove(&file.path.to_path_buf());
-                                    let already_stored = stored_mtime
-                                        .map_or(false, |existing_mtime| {
-                                            existing_mtime == file.mtime
-                                        });
-
-                                    if !already_stored {
-                                        log::trace!("sending for parsing: {:?}", path_buf);
-                                        parsing_files_tx
-                                            .try_send(PendingFile {
-                                                worktree_db_id: db_ids_by_worktree_id
-                                                    [&worktree.id()],
-                                                relative_path: path_buf,
-                                                absolute_path,
-                                                language,
-                                                modified_time: file.mtime,
-                                            })
-                                            .unwrap();
-                                    }
-                                }
-                            }
-                            for file in file_mtimes.keys() {
-                                db_update_tx
-                                    .try_send(DbOperation::Delete {
-                                        worktree_id: db_ids_by_worktree_id[&worktree.id()],
-                                        path: file.to_owned(),
-                                    })
-                                    .unwrap();
-                            }
-                        }
-                        log::trace!(
-                            "parsing worktree completed in {:?}",
-                            t0.elapsed().as_millis()
-                        );
-                    }
-                })
-                .detach();
-
             // let mut pending_files: Vec<(PathBuf, ((i64, PathBuf, Arc<Language>, SystemTime), SystemTime))> = vec![];
-            this.update(&mut cx, |this, cx| {
-                // The below is managing for updated on save
-                // Currently each time a file is saved, this code is run, and for all the files that were changed, if the current time is
-                // greater than the previous embedded time by the REINDEXING_DELAY variable, we will send the file off to be indexed.
-                let _subscription = cx.subscribe(&project, |this, project, event, cx| {
-                    if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event {
-                        this.project_entries_changed(project, changes.clone(), cx, worktree_id);
-                    }
-                });
-
+            let outstanding_jobs = Arc::new(Mutex::new(HashSet::new()));
+            this.update(&mut cx, |this, _| {
                 this.projects.insert(
                     project.downgrade(),
                     ProjectState {
-                        pending_files: HashMap::new(),
-                        worktree_db_ids: db_ids_by_worktree_id.into_iter().collect(),
-                        _subscription,
+                        worktree_db_ids: db_ids_by_worktree_id
+                            .iter()
+                            .map(|(a, b)| (*a, *b))
+                            .collect(),
+                        outstanding_jobs: outstanding_jobs.clone(),
                     },
                 );
             });
 
-            anyhow::Ok(())
+            cx.background()
+                .spawn(async move {
+                    let mut count = 0;
+                    let t0 = Instant::now();
+                    for worktree in worktrees.into_iter() {
+                        let mut file_mtimes = worktree_file_mtimes.remove(&worktree.id()).unwrap();
+                        for file in worktree.files(false, 0) {
+                            let absolute_path = worktree.absolutize(&file.path);
+
+                            if let Ok(language) = language_registry
+                                .language_for_file(&absolute_path, None)
+                                .await
+                            {
+                                if language
+                                    .grammar()
+                                    .and_then(|grammar| grammar.embedding_config.as_ref())
+                                    .is_none()
+                                {
+                                    continue;
+                                }
+
+                                let path_buf = file.path.to_path_buf();
+                                let stored_mtime = file_mtimes.remove(&file.path.to_path_buf());
+                                let already_stored = stored_mtime
+                                    .map_or(false, |existing_mtime| existing_mtime == file.mtime);
+
+                                if !already_stored {
+                                    log::trace!("sending for parsing: {:?}", path_buf);
+                                    count += 1;
+                                    let job_id = next_job_id.fetch_add(1, atomic::Ordering::SeqCst);
+                                    let job_handle = JobHandle {
+                                        id: job_id,
+                                        set: Arc::downgrade(&outstanding_jobs),
+                                    };
+                                    outstanding_jobs.lock().insert(job_id);
+                                    parsing_files_tx
+                                        .try_send(PendingFile {
+                                            worktree_db_id: db_ids_by_worktree_id[&worktree.id()],
+                                            relative_path: path_buf,
+                                            absolute_path,
+                                            language,
+                                            job_handle,
+                                            modified_time: file.mtime,
+                                        })
+                                        .unwrap();
+                                }
+                            }
+                        }
+                        for file in file_mtimes.keys() {
+                            db_update_tx
+                                .try_send(DbOperation::Delete {
+                                    worktree_id: db_ids_by_worktree_id[&worktree.id()],
+                                    path: file.to_owned(),
+                                })
+                                .unwrap();
+                        }
+                    }
+                    log::trace!(
+                        "parsing worktree completed in {:?}",
+                        t0.elapsed().as_millis()
+                    );
+
+                    Ok(count)
+                })
+                .await
         })
     }
 
-    pub fn search(
+    pub fn remaining_files_to_index_for_project(
+        &self,
+        project: &ModelHandle<Project>,
+    ) -> Option<usize> {
+        Some(
+            self.projects
+                .get(&project.downgrade())?
+                .outstanding_jobs
+                .lock()
+                .len(),
+        )
+    }
+
+    pub fn search_project(
         &mut self,
         project: ModelHandle<Project>,
         phrase: String,
@@ -682,110 +669,16 @@ impl VectorStore {
             })
         })
     }
-
-    fn project_entries_changed(
-        &mut self,
-        project: ModelHandle<Project>,
-        changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
-        cx: &mut ModelContext<'_, VectorStore>,
-        worktree_id: &WorktreeId,
-    ) -> Option<()> {
-        let reindexing_delay = settings::get::<VectorStoreSettings>(cx).reindexing_delay_seconds;
-
-        let worktree = project
-            .read(cx)
-            .worktree_for_id(worktree_id.clone(), cx)?
-            .read(cx)
-            .snapshot();
-
-        let worktree_db_id = self
-            .projects
-            .get(&project.downgrade())?
-            .db_id_for_worktree_id(worktree.id())?;
-        let file_mtimes = self.get_file_mtimes(worktree_db_id);
-
-        let language_registry = self.language_registry.clone();
-
-        cx.spawn(|this, mut cx| async move {
-            let file_mtimes = file_mtimes.await.log_err()?;
-
-            for change in changes.into_iter() {
-                let change_path = change.0.clone();
-                let absolute_path = worktree.absolutize(&change_path);
-
-                // Skip if git ignored or symlink
-                if let Some(entry) = worktree.entry_for_id(change.1) {
-                    if entry.is_ignored || entry.is_symlink || entry.is_external {
-                        continue;
-                    }
-                }
-
-                match change.2 {
-                    PathChange::Removed => this.update(&mut cx, |this, _| {
-                        this.db_update_tx
-                            .try_send(DbOperation::Delete {
-                                worktree_id: worktree_db_id,
-                                path: absolute_path,
-                            })
-                            .unwrap();
-                    }),
-                    _ => {
-                        if let Ok(language) = language_registry
-                            .language_for_file(&change_path.to_path_buf(), None)
-                            .await
-                        {
-                            if language
-                                .grammar()
-                                .and_then(|grammar| grammar.embedding_config.as_ref())
-                                .is_none()
-                            {
-                                continue;
-                            }
-
-                            let modified_time =
-                                change_path.metadata().log_err()?.modified().log_err()?;
-
-                            let existing_time = file_mtimes.get(&change_path.to_path_buf());
-                            let already_stored = existing_time
-                                .map_or(false, |existing_time| &modified_time != existing_time);
-
-                            if !already_stored {
-                                this.update(&mut cx, |this, _| {
-                                    let reindex_time = modified_time
-                                        + Duration::from_secs(reindexing_delay as u64);
-
-                                    let project_state =
-                                        this.projects.get_mut(&project.downgrade())?;
-                                    project_state.update_pending_files(
-                                        PendingFile {
-                                            relative_path: change_path.to_path_buf(),
-                                            absolute_path,
-                                            modified_time,
-                                            worktree_db_id,
-                                            language: language.clone(),
-                                        },
-                                        reindex_time,
-                                    );
-
-                                    for file in project_state.get_outstanding_files() {
-                                        this.parsing_files_tx.try_send(file).unwrap();
-                                    }
-                                    Some(())
-                                });
-                            }
-                        }
-                    }
-                }
-            }
-
-            Some(())
-        })
-        .detach();
-
-        Some(())
-    }
 }
 
 impl Entity for VectorStore {
     type Event = ();
 }
+
+impl Drop for JobHandle {
+    fn drop(&mut self) {
+        if let Some(set) = self.set.upgrade() {
+            set.lock().remove(&self.id);
+        }
+    }
+}
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index ccdd9fdaf07605b36f025d3a4bad63a3a2f516c2..de82bc2f482351166d4f57d37c0a82087dbaa662 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -9,11 +9,17 @@ use anyhow::Result;
 use async_trait::async_trait;
 use gpui::{Task, TestAppContext};
 use language::{Language, LanguageConfig, LanguageRegistry};
-use project::{project_settings::ProjectSettings, FakeFs, Project};
+use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
 use rand::{rngs::StdRng, Rng};
 use serde_json::json;
 use settings::SettingsStore;
-use std::{path::Path, sync::Arc};
+use std::{
+    path::Path,
+    sync::{
+        atomic::{self, AtomicUsize},
+        Arc,
+    },
+};
 use unindent::Unindent;
 
 #[ctor::ctor]
@@ -62,29 +68,37 @@ async fn test_vector_store(cx: &mut TestAppContext) {
     let db_dir = tempdir::TempDir::new("vector-store").unwrap();
     let db_path = db_dir.path().join("db.sqlite");
 
+    let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
     let store = VectorStore::new(
         fs.clone(),
         db_path,
-        Arc::new(FakeEmbeddingProvider),
+        embedding_provider.clone(),
         languages,
         cx.to_async(),
     )
     .await
     .unwrap();
 
-    let project = Project::test(fs, ["/the-root".as_ref()], cx).await;
+    let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
     let worktree_id = project.read_with(cx, |project, cx| {
         project.worktrees(cx).next().unwrap().read(cx).id()
     });
-    store
-        .update(cx, |store, cx| store.add_project(project.clone(), cx))
+    let file_count = store
+        .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await
         .unwrap();
+    assert_eq!(file_count, 2);
     cx.foreground().run_until_parked();
+    store.update(cx, |store, _cx| {
+        assert_eq!(
+            store.remaining_files_to_index_for_project(&project),
+            Some(0)
+        );
+    });
 
     let search_results = store
         .update(cx, |store, cx| {
-            store.search(project.clone(), "aaaa".to_string(), 5, cx)
+            store.search_project(project.clone(), "aaaa".to_string(), 5, cx)
         })
         .await
         .unwrap();
@@ -92,10 +106,45 @@ async fn test_vector_store(cx: &mut TestAppContext) {
     assert_eq!(search_results[0].byte_range.start, 0);
     assert_eq!(search_results[0].name, "aaa");
     assert_eq!(search_results[0].worktree_id, worktree_id);
+
+    fs.save(
+        "/the-root/src/file2.rs".as_ref(),
+        &"
+            fn dddd() { println!(\"ddddd!\"); }
+            struct pqpqpqp {}
+        "
+        .unindent()
+        .into(),
+        Default::default(),
+    )
+    .await
+    .unwrap();
+
+    cx.foreground().run_until_parked();
+
+    let prev_embedding_count = embedding_provider.embedding_count();
+    let file_count = store
+        .update(cx, |store, cx| store.index_project(project.clone(), cx))
+        .await
+        .unwrap();
+    assert_eq!(file_count, 1);
+
+    cx.foreground().run_until_parked();
+    store.update(cx, |store, _cx| {
+        assert_eq!(
+            store.remaining_files_to_index_for_project(&project),
+            Some(0)
+        );
+    });
+
+    assert_eq!(
+        embedding_provider.embedding_count() - prev_embedding_count,
+        2
+    );
 }
 
 #[gpui::test]
-async fn test_code_context_retrieval(cx: &mut TestAppContext) {
+async fn test_code_context_retrieval() {
     let language = rust_lang();
     let mut retriever = CodeContextRetriever::new();
 
@@ -181,11 +230,22 @@ fn test_dot_product(mut rng: StdRng) {
     }
 }
 
-struct FakeEmbeddingProvider;
+#[derive(Default)]
+struct FakeEmbeddingProvider {
+    embedding_count: AtomicUsize,
+}
+
+impl FakeEmbeddingProvider {
+    fn embedding_count(&self) -> usize {
+        self.embedding_count.load(atomic::Ordering::SeqCst)
+    }
+}
 
 #[async_trait]
 impl EmbeddingProvider for FakeEmbeddingProvider {
     async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
+        self.embedding_count
+            .fetch_add(spans.len(), atomic::Ordering::SeqCst);
         Ok(spans
             .iter()
             .map(|span| {

From 3a625d15d30ba26c4500f88de7a16dc980bc0019 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Fri, 14 Jul 2023 11:33:49 -0400
Subject: [PATCH 07/34] update c embedding query for preceding comments

---
 crates/zed/src/languages/c/embedding.scm | 74 +++++++++++++-----------
 1 file changed, 39 insertions(+), 35 deletions(-)

diff --git a/crates/zed/src/languages/c/embedding.scm b/crates/zed/src/languages/c/embedding.scm
index cd1915f62bb5c27f7617bde91327a78129564511..0178abeb18374771967c09c93b9fcfc504e1e556 100644
--- a/crates/zed/src/languages/c/embedding.scm
+++ b/crates/zed/src/languages/c/embedding.scm
@@ -1,39 +1,43 @@
-(declaration
-    (type_qualifier)? @context
-    type: (_)? @context
-    declarator: [
-        (function_declarator
-            declarator: (_) @name)
-        (pointer_declarator
-            "*" @context
-            declarator: (function_declarator
-                declarator: (_) @name))
-        (pointer_declarator
-            "*" @context
-            declarator: (pointer_declarator
-                "*" @context
+(
+    (comment)* @context
+    .
+    (declaration
+        declarator: [
+            (function_declarator
+                declarator: (_) @name)
+            (pointer_declarator
+                "*" @name
                 declarator: (function_declarator
-                    declarator: (_) @name)))
-    ]
-) @item
+                    declarator: (_) @name))
+            (pointer_declarator
+                "*" @name
+                declarator: (pointer_declarator
+                    "*" @name
+                    declarator: (function_declarator
+                        declarator: (_) @name)))
+            ]
+        ) @item
+    )
 
-(function_definition
-    (type_qualifier)? @context
-    type: (_)? @context
-    declarator: [
-        (function_declarator
-            declarator: (_) @name
-                )
-        (pointer_declarator
-            "*" @context
-            declarator: (function_declarator
+(
+    (comment)* @context
+    .
+    (function_definition
+        declarator: [
+            (function_declarator
                 declarator: (_) @name
-                    ))
-        (pointer_declarator
-            "*" @context
-            declarator: (pointer_declarator
-                "*" @context
+                )
+            (pointer_declarator
+                "*" @name
                 declarator: (function_declarator
-                    declarator: (_) @name)))
-    ]
-) @item
+                    declarator: (_) @name
+                    ))
+            (pointer_declarator
+                "*" @name
+                declarator: (pointer_declarator
+                    "*" @name
+                    declarator: (function_declarator
+                        declarator: (_) @name)))
+            ]
+        ) @item
+    )

From d4971e9eadebc9e629ca413a0df309230f2d14fc Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Fri, 14 Jul 2023 13:47:10 -0400
Subject: [PATCH 08/34] update typescript parsing to manage for leading tsdoc
 comments

---
 crates/vector_store/src/parsing.rs            |   8 +-
 .../src/languages/typescript/embedding.scm    | 113 +++++++++---------
 2 files changed, 62 insertions(+), 59 deletions(-)

diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 8d6e03d6eb29d524db23848adfa15a8ac6b4b164..4ce8b6763a33cae8253a1e6518cbc53b19eff030 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -51,6 +51,7 @@ impl CodeContextRetriever {
         let mut documents = Vec::new();
 
         // Iterate through query matches
+        let mut name_ranges: Vec<Range<usize>> = vec![];
         for mat in self.cursor.matches(
             &embedding_config.query,
             tree.root_node(),
@@ -65,7 +66,12 @@ impl CodeContextRetriever {
                     byte_range = Some(capture.node.byte_range());
                     item = content.get(capture.node.byte_range());
                 } else if capture.index == embedding_config.name_capture_ix {
-                    if let Some(name_content) = content.get(capture.node.byte_range()) {
+                    let name_range = capture.node.byte_range();
+                    if name_ranges.contains(&name_range) {
+                        continue;
+                    }
+                    name_ranges.push(name_range.clone());
+                    if let Some(name_content) = content.get(name_range.clone()) {
                         name.push(name_content);
                     }
                 }
diff --git a/crates/zed/src/languages/typescript/embedding.scm b/crates/zed/src/languages/typescript/embedding.scm
index f261a0a56577176108dc1ef2b5cf6de3569a0531..d850f9b82307fc9bd0560c866ca149cffe5a1f5e 100644
--- a/crates/zed/src/languages/typescript/embedding.scm
+++ b/crates/zed/src/languages/typescript/embedding.scm
@@ -1,59 +1,56 @@
-; (internal_module
-;     "namespace" @context
-;     name: (_) @name) @item
-
-(enum_declaration
-    "enum" @context
-    name: (_) @name) @item
-
-; (type_alias_declaration
-;     "type" @context
-;     name: (_) @name) @item
-
-(function_declaration
-    "async"? @context
-    "function" @context
-    name: (_) @name) @item
-
-(interface_declaration
-    "interface" @context
-    name: (_) @name) @item
-
-; (export_statement
-;     (lexical_declaration
-;         ["let" "const"] @context
-;         (variable_declarator
-;             name: (_) @name) @item))
-
-(program
-    (lexical_declaration
-        ["let" "const"] @context
-        (variable_declarator
-            name: (_) @name) @item))
-
-(class_declaration
-    "class" @context
-    name: (_) @name) @item
-
-(method_definition
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @context
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
     [
-        "get"
-        "set"
-        "async"
-        "*"
-        "readonly"
-        "static"
-        (override_modifier)
-        (accessibility_modifier)
-        ]* @context
-    name: (_) @name) @item
-
-; (public_field_definition
-;     [
-;         "declare"
-;         "readonly"
-;         "abstract"
-;         "static"
-;         (accessibility_modifier)
-;         ]* @context
-;     name: (_) @name) @item
+        (export_statement
+            (function_declaration
+                "async"? @name
+                "function" @name
+                name: (_) @name)
+            ) @item
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name) @item
+    ])
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (class_declaration
+        "class" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "readonly"
+            "static"
+            (override_modifier)
+            (accessibility_modifier)
+            ]* @name
+        name: (_) @name) @item
+    )

From 2dae42b1ba49b4e10fe13826674610774078454f Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Fri, 14 Jul 2023 14:25:08 -0400
Subject: [PATCH 09/34] update embedding query for tsx to accomodate for
 leading comments

---
 crates/zed/src/languages/tsx/embedding.scm | 83 ++++++++++++++--------
 1 file changed, 52 insertions(+), 31 deletions(-)

diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm
index 305f634e04ba245115907c1f113fe0c64cab1143..1c47a5a238ff9d944dc321b4eb10b0e56d8a6221 100644
--- a/crates/zed/src/languages/tsx/embedding.scm
+++ b/crates/zed/src/languages/tsx/embedding.scm
@@ -1,35 +1,56 @@
-(enum_declaration
-    "enum" @context
-    name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @context
+        name: (_) @name) @item
+    )
 
-(function_declaration
-    "async"? @context
-    "function" @context
-    name: (_) @name) @item
-
-(interface_declaration
-    "interface" @context
-    name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    [
+        (export_statement
+            (function_declaration
+                "async"? @name
+                "function" @name
+                name: (_) @name)
+            ) @item
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name) @item
+        ])
 
-(program
-    (lexical_declaration
-        ["let" "const"] @context
-        (variable_declarator
-            name: (_) @name) @item))
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
 
-(class_declaration
-    "class" @context
-    name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    (class_declaration
+        "class" @name
+        name: (_) @name) @item
+    )
 
-(method_definition
-    [
-        "get"
-        "set"
-        "async"
-        "*"
-        "readonly"
-        "static"
-        (override_modifier)
-        (accessibility_modifier)
-        ]* @context
-    name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "readonly"
+            "static"
+            (override_modifier)
+            (accessibility_modifier)
+            ]* @name
+        name: (_) @name) @item
+    )

From 4bece54655980ac9c2f6ec5266e9bfc9306cc422 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 09:22:37 -0400
Subject: [PATCH 10/34] update jsx family of languages for preceeding comments
 and nested exports

---
 Cargo.lock                                    |   2 +
 crates/vector_store/Cargo.toml                |   5 +-
 crates/vector_store/src/vector_store_tests.rs | 242 +++++++++++++++++-
 .../src/languages/javascript/embedding.scm    | 139 ++++++----
 crates/zed/src/languages/tsx/embedding.scm    |  85 ++++--
 .../src/languages/typescript/embedding.scm    |  85 ++++--
 6 files changed, 458 insertions(+), 100 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 239aa6a302ded4391422e1c2d8752236f4019bb3..b6049e611ed0c72a0fc13d822545dc994d80af4e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8508,7 +8508,9 @@ dependencies = [
  "theme",
  "tiktoken-rs 0.5.0",
  "tree-sitter",
+ "tree-sitter-javascript",
  "tree-sitter-rust",
+ "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "unindent",
  "util",
  "workspace",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index bac9cdedfafc4567f24b7502e0f9ea9e4d0e71e3..6b2e77e904016eb9e27584e8fc4e9ede71134d0a 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -44,10 +44,13 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
-tree-sitter-rust = "*"
 
 rand.workspace = true
 unindent.workspace = true
 tempdir.workspace = true
 ctor.workspace = true
 env_logger.workspace = true
+
+tree-sitter-javascript = "*"
+tree-sitter-typescript = "*"
+tree-sitter-rust = "*"
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index de82bc2f482351166d4f57d37c0a82087dbaa662..76465b1aaf95ef98b2305e35dbead3628bc461ed 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -144,7 +144,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
 }
 
 #[gpui::test]
-async fn test_code_context_retrieval() {
+async fn test_code_context_retrieval_rust() {
     let language = rust_lang();
     let mut retriever = CodeContextRetriever::new();
 
@@ -200,6 +200,142 @@ async fn test_code_context_retrieval() {
     );
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_javascript() {
+    let language = js_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = "
+/* globals importScripts, backend */
+function _authorize() {}
+
+/**
+ * Sometimes the frontend build is way faster than backend.
+ */
+export async function authorizeBank() {
+    _authorize(pushModal, upgradingAccountId, {});
+}
+
+export class SettingsPage {
+    /* This is a test setting */
+    constructor(page) {
+        this.page = page;
+    }
+}
+
+/* This is a test comment */
+class TestClass {}
+
+/* Schema for editor_events in Clickhouse. */
+export interface ClickhouseEditorEvent {
+    installation_id: string
+    operation: string
+}
+";
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.js"), &text, language)
+        .unwrap();
+
+    let test_documents = &[
+        Document {
+            name: "function _authorize".into(),
+            range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* globals importScripts, backend */
+                    function _authorize() {}
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "async function authorizeBank".into(),
+            range: text.find("export async").unwrap()..224,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /**
+                     * Sometimes the frontend build is way faster than backend.
+                     */
+                    export async function authorizeBank() {
+                        _authorize(pushModal, upgradingAccountId, {});
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class SettingsPage".into(),
+            range: 226..344,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    export class SettingsPage {
+                        /* This is a test setting */
+                        constructor(page) {
+                            this.page = page;
+                        }
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "constructor".into(),
+            range: 291..342,
+            content: "
+                The below code snippet is from file 'foo.js'
+
+                ```javascript
+                /* This is a test setting */
+                constructor(page) {
+                        this.page = page;
+                    }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class TestClass".into(),
+            range: 375..393,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* This is a test comment */
+                    class TestClass {}
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "interface ClickhouseEditorEvent".into(),
+            range: 441..533,
+            content: "
+                    The below code snippet is from file 'foo.js'
+
+                    ```javascript
+                    /* Schema for editor_events in Clickhouse. */
+                    export interface ClickhouseEditorEvent {
+                        installation_id: string
+                        operation: string
+                    }
+                    ```"
+            .unindent(),
+            embedding: vec![],
+        },
+    ];
+
+    for idx in 0..test_documents.len() {
+        assert_eq!(test_documents[idx], parsed_files[idx]);
+    }
+}
+
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
     assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@@ -271,6 +407,110 @@ impl EmbeddingProvider for FakeEmbeddingProvider {
     }
 }
 
+fn js_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "Javascript".into(),
+                path_suffixes: vec!["js".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_typescript::language_tsx()),
+        )
+        .with_embedding_query(
+            &r#"
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (function_declaration
+                        "async"? @name
+                        "function" @name
+                        name: (_) @name)) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (function_declaration
+                    "async"? @name
+                    "function" @name
+                    name: (_) @name) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (class_declaration
+                        "class" @name
+                        name: (_) @name)) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (class_declaration
+                    "class" @name
+                    name: (_) @name) @item
+                    )
+
+            (
+                (comment)* @context
+                .
+                (method_definition
+                    [
+                        "get"
+                        "set"
+                        "async"
+                        "*"
+                        "static"
+                    ]* @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (interface_declaration
+                        "interface" @name
+                        name: (_) @name)) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (interface_declaration
+                    "interface" @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (export_statement
+                    (enum_declaration
+                        "enum" @name
+                        name: (_) @name)) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (enum_declaration
+                    "enum" @name
+                    name: (_) @name) @item
+                )
+
+                    "#
+            .unindent(),
+        )
+        .unwrap(),
+    )
+}
+
 fn rust_lang() -> Arc<Language> {
     Arc::new(
         Language::new(
diff --git a/crates/zed/src/languages/javascript/embedding.scm b/crates/zed/src/languages/javascript/embedding.scm
index ec6eb5ab1a8be481bc7a9987056ce2d1cb7d2474..a2140400318db95a8d29074402ab2d212561a79b 100644
--- a/crates/zed/src/languages/javascript/embedding.scm
+++ b/crates/zed/src/languages/javascript/embedding.scm
@@ -1,56 +1,83 @@
-; (internal_module
-;     "namespace" @context
-;     name: (_) @name) @item
-
-(enum_declaration
-    "enum" @context
-    name: (_) @name) @item
-
-(function_declaration
-    "async"? @context
-    "function" @context
-    name: (_) @name) @item
-
-(interface_declaration
-    "interface" @context
-    name: (_) @name) @item
-
-; (program
-;     (export_statement
-;         (lexical_declaration
-;             ["let" "const"] @context
-;             (variable_declarator
-;                 name: (_) @name) @item)))
-
-(program
-    (lexical_declaration
-        ["let" "const"] @context
-        (variable_declarator
-            name: (_) @name) @item))
-
-(class_declaration
-    "class" @context
-    name: (_) @name) @item
-
-(method_definition
-    [
-        "get"
-        "set"
-        "async"
-        "*"
-        "readonly"
-        "static"
-        (override_modifier)
-        (accessibility_modifier)
-        ]* @context
-    name: (_) @name) @item
-
-; (public_field_definition
-;     [
-;         "declare"
-;         "readonly"
-;         "abstract"
-;         "static"
-;         (accessibility_modifier)
-;         ]* @context
-;     name: (_) @name) @item
+(
+    (comment)* @context
+    .
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (class_declaration
+        "class" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "static"
+            ]* @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )
diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm
index 1c47a5a238ff9d944dc321b4eb10b0e56d8a6221..4bb4fea254d0cf86f2fbb9d5c8f657e06238971f 100644
--- a/crates/zed/src/languages/tsx/embedding.scm
+++ b/crates/zed/src/languages/tsx/embedding.scm
@@ -1,33 +1,29 @@
 (
     (comment)* @context
     .
-    (enum_declaration
-        "enum" @context
-        name: (_) @name) @item
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
     )
 
 (
     (comment)* @context
     .
-    [
-        (export_statement
-            (function_declaration
-                "async"? @name
-                "function" @name
-                name: (_) @name)
-            ) @item
-        (function_declaration
-            "async"? @name
-            "function" @name
-            name: (_) @name) @item
-        ])
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
 
 (
     (comment)* @context
     .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
     )
 
 (
@@ -47,10 +43,57 @@
             "set"
             "async"
             "*"
-            "readonly"
             "static"
-            (override_modifier)
-            (accessibility_modifier)
             ]* @name
         name: (_) @name) @item
     )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (type_alias_declaration
+            "type" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (type_alias_declaration
+        "type" @name
+        name: (_) @name) @item)
diff --git a/crates/zed/src/languages/typescript/embedding.scm b/crates/zed/src/languages/typescript/embedding.scm
index d850f9b82307fc9bd0560c866ca149cffe5a1f5e..4bb4fea254d0cf86f2fbb9d5c8f657e06238971f 100644
--- a/crates/zed/src/languages/typescript/embedding.scm
+++ b/crates/zed/src/languages/typescript/embedding.scm
@@ -1,33 +1,29 @@
 (
     (comment)* @context
     .
-    (enum_declaration
-        "enum" @context
-        name: (_) @name) @item
+    (export_statement
+        (function_declaration
+            "async"? @name
+            "function" @name
+            name: (_) @name)) @item
     )
 
 (
     (comment)* @context
     .
-    [
-        (export_statement
-            (function_declaration
-                "async"? @name
-                "function" @name
-                name: (_) @name)
-            ) @item
-        (function_declaration
-            "async"? @name
-            "function" @name
-            name: (_) @name) @item
-    ])
+    (function_declaration
+        "async"? @name
+        "function" @name
+        name: (_) @name) @item
+    )
 
 (
     (comment)* @context
     .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
+    (export_statement
+        (class_declaration
+            "class" @name
+            name: (_) @name)) @item
     )
 
 (
@@ -47,10 +43,57 @@
             "set"
             "async"
             "*"
-            "readonly"
             "static"
-            (override_modifier)
-            (accessibility_modifier)
             ]* @name
         name: (_) @name) @item
     )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (interface_declaration
+        "interface" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_declaration
+        "enum" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (export_statement
+        (type_alias_declaration
+            "type" @name
+            name: (_) @name)) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (type_alias_declaration
+        "type" @name
+        name: (_) @name) @item)

From cf0dd09b5cdd9fd18c06d43a6774121cb86ce544 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 10:04:32 -0400
Subject: [PATCH 11/34] update vector_store to accomodate for full file parsing
 for JSON, TOML and YAML files

---
 Cargo.lock                                    | 14 ++++++++--
 crates/vector_store/Cargo.toml                |  2 +-
 crates/vector_store/src/parsing.rs            | 26 +++++++++++++++++++
 crates/vector_store/src/vector_store.rs       | 11 ++++----
 crates/vector_store/src/vector_store_tests.rs | 18 ++++++++++++-
 5 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index b6049e611ed0c72a0fc13d822545dc994d80af4e..afd40fd3081b0948d38afc37a6fd7e37066e625e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8134,6 +8134,16 @@ dependencies = [
  "tree-sitter",
 ]
 
+[[package]]
+name = "tree-sitter-toml"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca517f578a98b23d20780247cc2688407fa81effad5b627a5a364ec3339b53e8"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
 [[package]]
 name = "tree-sitter-typescript"
 version = "0.20.2"
@@ -8508,8 +8518,8 @@ dependencies = [
  "theme",
  "tiktoken-rs 0.5.0",
  "tree-sitter",
- "tree-sitter-javascript",
  "tree-sitter-rust",
+ "tree-sitter-toml 0.20.0",
  "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
  "unindent",
  "util",
@@ -9560,7 +9570,7 @@ dependencies = [
  "tree-sitter-ruby",
  "tree-sitter-rust",
  "tree-sitter-scheme",
- "tree-sitter-toml",
+ "tree-sitter-toml 0.5.1",
  "tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
  "tree-sitter-yaml",
  "unindent",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index 6b2e77e904016eb9e27584e8fc4e9ede71134d0a..31119a1ba65363721593d68fa705b75763bafd58 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -51,6 +51,6 @@ tempdir.workspace = true
 ctor.workspace = true
 env_logger.workspace = true
 
-tree-sitter-javascript = "*"
 tree-sitter-typescript = "*"
 tree-sitter-rust = "*"
+tree-sitter-toml = "*"
diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 4ce8b6763a33cae8253a1e6518cbc53b19eff030..216ef1b5e11697519900dd4445234ebacb5ade21 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -13,6 +13,9 @@ pub struct Document {
 
 const CODE_CONTEXT_TEMPLATE: &str =
     "The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
+const ENTIRE_FILE_TEMPLATE: &str =
+    "The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
+pub const PARSEABLE_ENTIRE_FILE_TYPES: [&str; 3] = ["TOML", "YAML", "JSON"];
 
 pub struct CodeContextRetriever {
     pub parser: Parser,
@@ -27,12 +30,35 @@ impl CodeContextRetriever {
         }
     }
 
+    fn _parse_entire_file(
+        &self,
+        relative_path: &Path,
+        language_name: Arc<str>,
+        content: &str,
+    ) -> Result<Vec<Document>> {
+        let document_span = ENTIRE_FILE_TEMPLATE
+            .replace("<path>", relative_path.to_string_lossy().as_ref())
+            .replace("<language>", language_name.as_ref())
+            .replace("item", &content);
+
+        Ok(vec![Document {
+            range: 0..content.len(),
+            content: document_span,
+            embedding: Vec::new(),
+            name: language_name.to_string(),
+        }])
+    }
+
     pub fn parse_file(
         &mut self,
         relative_path: &Path,
         content: &str,
         language: Arc<Language>,
     ) -> Result<Vec<Document>> {
+        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) {
+            return self._parse_entire_file(relative_path, language.name(), &content);
+        }
+
         let grammar = language
             .grammar()
             .ok_or_else(|| anyhow!("no grammar for language"))?;
diff --git a/crates/vector_store/src/vector_store.rs b/crates/vector_store/src/vector_store.rs
index 3f7ab5c6cd1b2e0296ee560071377a84a6c527db..0f55bd9e63f3a95ce113478a406f485600348973 100644
--- a/crates/vector_store/src/vector_store.rs
+++ b/crates/vector_store/src/vector_store.rs
@@ -19,7 +19,7 @@ use gpui::{
 use language::{Language, LanguageRegistry};
 use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
 use parking_lot::Mutex;
-use parsing::{CodeContextRetriever, Document};
+use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES};
 use project::{Fs, Project, WorktreeId};
 use smol::channel;
 use std::{
@@ -537,10 +537,11 @@ impl VectorStore {
                                 .language_for_file(&absolute_path, None)
                                 .await
                             {
-                                if language
-                                    .grammar()
-                                    .and_then(|grammar| grammar.embedding_config.as_ref())
-                                    .is_none()
+                                if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
+                                    && language
+                                        .grammar()
+                                        .and_then(|grammar| grammar.embedding_config.as_ref())
+                                        .is_none()
                                 {
                                     continue;
                                 }
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index 76465b1aaf95ef98b2305e35dbead3628bc461ed..84c9962493a78c4fe7a27fd74581863476440570 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -56,6 +56,9 @@ async fn test_vector_store(cx: &mut TestAppContext) {
                         println!(\"bbbb!\");
                     }
                 ".unindent(),
+                "file3.toml": "
+                    ZZZZZZZ = 5
+                    ".unindent(),
             }
         }),
     )
@@ -63,7 +66,9 @@ async fn test_vector_store(cx: &mut TestAppContext) {
 
     let languages = Arc::new(LanguageRegistry::new(Task::ready(())));
     let rust_language = rust_lang();
+    let toml_language = toml_lang();
     languages.add(rust_language);
+    languages.add(toml_language);
 
     let db_dir = tempdir::TempDir::new("vector-store").unwrap();
     let db_path = db_dir.path().join("db.sqlite");
@@ -87,7 +92,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
         .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await
         .unwrap();
-    assert_eq!(file_count, 2);
+    assert_eq!(file_count, 3);
     cx.foreground().run_until_parked();
     store.update(cx, |store, _cx| {
         assert_eq!(
@@ -578,3 +583,14 @@ fn rust_lang() -> Arc<Language> {
         .unwrap(),
     )
 }
+
+fn toml_lang() -> Arc<Language> {
+    Arc::new(Language::new(
+        LanguageConfig {
+            name: "TOML".into(),
+            path_suffixes: vec!["toml".into()],
+            ..Default::default()
+        },
+        Some(tree_sitter_toml::language()),
+    ))
+}

From 1362c5a3d9753702820bc615dcfd4a4b261f0a3f Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 14:43:29 -0400
Subject: [PATCH 12/34] add embedding treesitter query for cpp

---
 Cargo.lock                                    |   1 +
 crates/vector_store/Cargo.toml                |   1 +
 crates/vector_store/src/vector_store_tests.rs | 312 ++++++++++++++++--
 crates/zed/src/languages/cpp/embedding.scm    |  61 ++++
 4 files changed, 347 insertions(+), 28 deletions(-)
 create mode 100644 crates/zed/src/languages/cpp/embedding.scm

diff --git a/Cargo.lock b/Cargo.lock
index afd40fd3081b0948d38afc37a6fd7e37066e625e..28a0e76d143086ba6af22d8c8d01a69de47872b6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8518,6 +8518,7 @@ dependencies = [
  "theme",
  "tiktoken-rs 0.5.0",
  "tree-sitter",
+ "tree-sitter-cpp",
  "tree-sitter-rust",
  "tree-sitter-toml 0.20.0",
  "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index 31119a1ba65363721593d68fa705b75763bafd58..0009665e26a1b4e6b57c4aab061f6c457138fd2f 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -54,3 +54,4 @@ env_logger.workspace = true
 tree-sitter-typescript = "*"
 tree-sitter-rust = "*"
 tree-sitter-toml = "*"
+tree-sitter-cpp = "*"
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index 84c9962493a78c4fe7a27fd74581863476440570..3a9e1748c54fd576fb1d0b49dada41a39842dad0 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -211,32 +211,33 @@ async fn test_code_context_retrieval_javascript() {
     let mut retriever = CodeContextRetriever::new();
 
     let text = "
-/* globals importScripts, backend */
-function _authorize() {}
-
-/**
- * Sometimes the frontend build is way faster than backend.
- */
-export async function authorizeBank() {
-    _authorize(pushModal, upgradingAccountId, {});
-}
+        /* globals importScripts, backend */
+        function _authorize() {}
+
+        /**
+         * Sometimes the frontend build is way faster than backend.
+         */
+        export async function authorizeBank() {
+            _authorize(pushModal, upgradingAccountId, {});
+        }
 
-export class SettingsPage {
-    /* This is a test setting */
-    constructor(page) {
-        this.page = page;
-    }
-}
+        export class SettingsPage {
+            /* This is a test setting */
+            constructor(page) {
+                this.page = page;
+            }
+        }
 
-/* This is a test comment */
-class TestClass {}
+        /* This is a test comment */
+        class TestClass {}
 
-/* Schema for editor_events in Clickhouse. */
-export interface ClickhouseEditorEvent {
-    installation_id: string
-    operation: string
-}
-";
+        /* Schema for editor_events in Clickhouse. */
+        export interface ClickhouseEditorEvent {
+            installation_id: string
+            operation: string
+        }
+        "
+    .unindent();
 
     let parsed_files = retriever
         .parse_file(Path::new("foo.js"), &text, language)
@@ -258,7 +259,7 @@ export interface ClickhouseEditorEvent {
         },
         Document {
             name: "async function authorizeBank".into(),
-            range: text.find("export async").unwrap()..224,
+            range: text.find("export async").unwrap()..223,
             content: "
                     The below code snippet is from file 'foo.js'
 
@@ -275,7 +276,7 @@ export interface ClickhouseEditorEvent {
         },
         Document {
             name: "class SettingsPage".into(),
-            range: 226..344,
+            range: 225..343,
             content: "
                     The below code snippet is from file 'foo.js'
 
@@ -292,7 +293,7 @@ export interface ClickhouseEditorEvent {
         },
         Document {
             name: "constructor".into(),
-            range: 291..342,
+            range: 290..341,
             content: "
                 The below code snippet is from file 'foo.js'
 
@@ -307,7 +308,7 @@ export interface ClickhouseEditorEvent {
         },
         Document {
             name: "class TestClass".into(),
-            range: 375..393,
+            range: 374..392,
             content: "
                     The below code snippet is from file 'foo.js'
 
@@ -320,7 +321,7 @@ export interface ClickhouseEditorEvent {
         },
         Document {
             name: "interface ClickhouseEditorEvent".into(),
-            range: 441..533,
+            range: 440..532,
             content: "
                     The below code snippet is from file 'foo.js'
 
@@ -341,6 +342,181 @@ export interface ClickhouseEditorEvent {
     }
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_cpp() {
+    let language = cpp_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = "
+    /**
+     * @brief Main function
+     * @returns 0 on exit
+     */
+    int main() { return 0; }
+
+    /**
+    * This is a test comment
+    */
+    class MyClass {       // The class
+        public:             // Access specifier
+        int myNum;        // Attribute (int variable)
+        string myString;  // Attribute (string variable)
+    };
+
+    // This is a test comment
+    enum Color { red, green, blue };
+
+    /** This is a preceeding block comment
+     * This is the second line
+     */
+    struct {           // Structure declaration
+        int myNum;       // Member (int variable)
+        string myString; // Member (string variable)
+    } myStructure;
+
+    /**
+    * @brief Matrix class.
+    */
+    template <typename T,
+              typename = typename std::enable_if<
+                std::is_integral<T>::value || std::is_floating_point<T>::value,
+                bool>::type>
+    class Matrix2 {
+        std::vector<std::vector<T>> _mat;
+
+    public:
+        /**
+        * @brief Constructor
+        * @tparam Integer ensuring integers are being evaluated and not other
+        * data types.
+        * @param size denoting the size of Matrix as size x size
+        */
+        template <typename Integer,
+                  typename = typename std::enable_if<std::is_integral<Integer>::value,
+                  Integer>::type>
+        explicit Matrix(const Integer size) {
+            for (size_t i = 0; i < size; ++i) {
+                _mat.emplace_back(std::vector<T>(size, 0));
+            }
+        }
+    }"
+    .unindent();
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.cpp"), &text, language)
+        .unwrap();
+
+    let test_documents = &[
+        Document {
+            name: "int main".into(),
+            range: 54..78,
+            content: "
+                The below code snippet is from file 'foo.cpp'
+
+                ```cpp
+                /**
+                 * @brief Main function
+                 * @returns 0 on exit
+                 */
+                int main() { return 0; }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class MyClass".into(),
+            range: 112..295,
+            content: "
+                The below code snippet is from file 'foo.cpp'
+
+                ```cpp
+                /**
+                * This is a test comment
+                */
+                class MyClass {       // The class
+                    public:             // Access specifier
+                    int myNum;        // Attribute (int variable)
+                    string myString;  // Attribute (string variable)
+                }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "enum Color".into(),
+            range: 324..355,
+            content: "
+                The below code snippet is from file 'foo.cpp'
+
+                ```cpp
+                // This is a test comment
+                enum Color { red, green, blue }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "struct myStructure".into(),
+            range: 428..581,
+            content: "
+                The below code snippet is from file 'foo.cpp'
+
+                ```cpp
+                /** This is a preceeding block comment
+                 * This is the second line
+                 */
+                struct {           // Structure declaration
+                    int myNum;       // Member (int variable)
+                    string myString; // Member (string variable)
+                } myStructure;
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+        Document {
+            name: "class Matrix2".into(),
+            range: 613..1342,
+            content: "
+                The below code snippet is from file 'foo.cpp'
+
+                ```cpp
+                /**
+                * @brief Matrix class.
+                */
+                template <typename T,
+                          typename = typename std::enable_if<
+                            std::is_integral<T>::value || std::is_floating_point<T>::value,
+                            bool>::type>
+                class Matrix2 {
+                    std::vector<std::vector<T>> _mat;
+
+                public:
+                    /**
+                    * @brief Constructor
+                    * @tparam Integer ensuring integers are being evaluated and not other
+                    * data types.
+                    * @param size denoting the size of Matrix as size x size
+                    */
+                    template <typename Integer,
+                              typename = typename std::enable_if<std::is_integral<Integer>::value,
+                              Integer>::type>
+                    explicit Matrix(const Integer size) {
+                        for (size_t i = 0; i < size; ++i) {
+                            _mat.emplace_back(std::vector<T>(size, 0));
+                        }
+                    }
+                }
+                ```"
+            .unindent(),
+            embedding: vec![],
+        },
+    ];
+
+    for idx in 0..test_documents.len() {
+        assert_eq!(test_documents[idx], parsed_files[idx]);
+    }
+}
+
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
     assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@@ -594,3 +770,83 @@ fn toml_lang() -> Arc<Language> {
         Some(tree_sitter_toml::language()),
     ))
 }
+
+fn cpp_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "CPP".into(),
+                path_suffixes: vec!["cpp".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_cpp::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (
+                (comment)* @context
+                .
+                (function_definition
+                    (type_qualifier)? @name
+                    type: (_)? @name
+                    declarator: [
+                        (function_declarator
+                            declarator: (_) @name)
+                        (pointer_declarator
+                            "*" @name
+                            declarator: (function_declarator
+                            declarator: (_) @name))
+                        (pointer_declarator
+                            "*" @name
+                            declarator: (pointer_declarator
+                                "*" @name
+                            declarator: (function_declarator
+                                declarator: (_) @name)))
+                        (reference_declarator
+                            ["&" "&&"] @name
+                            (function_declarator
+                            declarator: (_) @name))
+                    ]
+                    (type_qualifier)? @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (template_declaration
+                    (class_specifier
+                        "class" @name
+                        name: (_) @name)
+                        ) @item
+            )
+
+            (
+                (comment)* @context
+                .
+                (class_specifier
+                    "class" @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (enum_specifier
+                    "enum" @name
+                    name: (_) @name) @item
+                )
+
+            (
+                (comment)* @context
+                .
+                (declaration
+                    type: (struct_specifier
+                    "struct" @name)
+                    declarator: (_) @name) @item
+            )
+
+            "#,
+        )
+        .unwrap(),
+    )
+}
diff --git a/crates/zed/src/languages/cpp/embedding.scm b/crates/zed/src/languages/cpp/embedding.scm
new file mode 100644
index 0000000000000000000000000000000000000000..bbd93f20dbdf6eddd097f49b7603ec5e0dc9bc59
--- /dev/null
+++ b/crates/zed/src/languages/cpp/embedding.scm
@@ -0,0 +1,61 @@
+(
+    (comment)* @context
+    .
+    (function_definition
+        (type_qualifier)? @name
+        type: (_)? @name
+        declarator: [
+            (function_declarator
+                declarator: (_) @name)
+            (pointer_declarator
+                "*" @name
+                declarator: (function_declarator
+                declarator: (_) @name))
+            (pointer_declarator
+                "*" @name
+                declarator: (pointer_declarator
+                    "*" @name
+                declarator: (function_declarator
+                    declarator: (_) @name)))
+            (reference_declarator
+                ["&" "&&"] @name
+                (function_declarator
+                declarator: (_) @name))
+        ]
+        (type_qualifier)? @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (template_declaration
+        (class_specifier
+            "class" @name
+            name: (_) @name)
+            ) @item
+)
+
+(
+    (comment)* @context
+    .
+    (class_specifier
+        "class" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (enum_specifier
+        "enum" @name
+        name: (_) @name) @item
+    )
+
+(
+    (comment)* @context
+    .
+    (declaration
+        type: (struct_specifier
+        "struct" @name)
+        declarator: (_) @name) @item
+)

From f0bf60fdedc56ec594a5e60f4442e8eb5a998c0b Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 14:53:57 -0400
Subject: [PATCH 13/34] add css as a embeddable file type in which the entire
 file is embedded individually

---
 crates/vector_store/src/parsing.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/vector_store/src/parsing.rs b/crates/vector_store/src/parsing.rs
index 216ef1b5e11697519900dd4445234ebacb5ade21..663f0f473b63358496c8dcbc337aa7ccbe452c76 100644
--- a/crates/vector_store/src/parsing.rs
+++ b/crates/vector_store/src/parsing.rs
@@ -15,7 +15,7 @@ const CODE_CONTEXT_TEMPLATE: &str =
     "The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
 const ENTIRE_FILE_TEMPLATE: &str =
     "The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
-pub const PARSEABLE_ENTIRE_FILE_TYPES: [&str; 3] = ["TOML", "YAML", "JSON"];
+pub const PARSEABLE_ENTIRE_FILE_TYPES: [&str; 4] = ["TOML", "YAML", "JSON", "CSS"];
 
 pub struct CodeContextRetriever {
     pub parser: Parser,

From e630ff38c4f4099e4e9c8d926c6a75c3e364fc58 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 16:29:25 -0400
Subject: [PATCH 14/34] add embedding treesitter queries for elixir

---
 Cargo.lock                                    |  13 +-
 crates/vector_store/Cargo.toml                |   1 +
 crates/vector_store/src/vector_store_tests.rs | 182 ++++++++++++++++++
 crates/zed/src/languages/elixir/embedding.scm |  27 +++
 4 files changed, 222 insertions(+), 1 deletion(-)
 create mode 100644 crates/zed/src/languages/elixir/embedding.scm

diff --git a/Cargo.lock b/Cargo.lock
index 28a0e76d143086ba6af22d8c8d01a69de47872b6..8fcca507d1fa47309793843f2268a87ff59a2e49 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7982,6 +7982,16 @@ dependencies = [
  "tree-sitter",
 ]
 
+[[package]]
+name = "tree-sitter-elixir"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
+
 [[package]]
 name = "tree-sitter-elixir"
 version = "0.1.0"
@@ -8519,6 +8529,7 @@ dependencies = [
  "tiktoken-rs 0.5.0",
  "tree-sitter",
  "tree-sitter-cpp",
+ "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "tree-sitter-rust",
  "tree-sitter-toml 0.20.0",
  "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -9558,7 +9569,7 @@ dependencies = [
  "tree-sitter-c",
  "tree-sitter-cpp",
  "tree-sitter-css",
- "tree-sitter-elixir",
+ "tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
  "tree-sitter-embedded-template",
  "tree-sitter-go",
  "tree-sitter-heex",
diff --git a/crates/vector_store/Cargo.toml b/crates/vector_store/Cargo.toml
index 0009665e26a1b4e6b57c4aab061f6c457138fd2f..6808f6c630ca8dda97fce819765995b78f3d2a9a 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/vector_store/Cargo.toml
@@ -55,3 +55,4 @@ tree-sitter-typescript = "*"
 tree-sitter-rust = "*"
 tree-sitter-toml = "*"
 tree-sitter-cpp = "*"
+tree-sitter-elixir = "*"
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/vector_store/src/vector_store_tests.rs
index 3a9e1748c54fd576fb1d0b49dada41a39842dad0..d55dfcfc7151eb04efb5a33b119ae33b0875d86d 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/vector_store/src/vector_store_tests.rs
@@ -342,6 +342,143 @@ async fn test_code_context_retrieval_javascript() {
     }
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_elixir() {
+    let language = elixir_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = r#"
+defmodule File.Stream do
+    @moduledoc """
+    Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+    The following fields are public:
+
+    * `path`          - the file path
+    * `modes`         - the file modes
+    * `raw`           - a boolean indicating if bin functions should be used
+    * `line_or_bytes` - if reading should read lines or a given number of bytes
+    * `node`          - the node the file belongs to
+
+    """
+
+    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+    @type t :: %__MODULE__{}
+
+    @doc false
+    def __build__(path, modes, line_or_bytes) do
+    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+    modes =
+        case raw do
+        true ->
+            case :lists.keyfind(:read_ahead, 1, modes) do
+            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+            {:read_ahead, _} -> [:raw | modes]
+            false -> [:raw, :read_ahead | modes]
+            end
+
+        false ->
+            modes
+        end
+
+    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+    end
+"#
+    .unindent();
+
+    let parsed_files = retriever
+        .parse_file(Path::new("foo.ex"), &text, language)
+        .unwrap();
+
+    let test_documents = &[
+        Document{
+            name: "defmodule File.Stream".into(),
+            range: 0..1132,
+            content: r#"
+                The below code snippet is from file 'foo.ex'
+
+                ```elixir
+                defmodule File.Stream do
+                    @moduledoc """
+                    Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+                    The following fields are public:
+
+                    * `path`          - the file path
+                    * `modes`         - the file modes
+                    * `raw`           - a boolean indicating if bin functions should be used
+                    * `line_or_bytes` - if reading should read lines or a given number of bytes
+                    * `node`          - the node the file belongs to
+
+                    """
+
+                    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+                    @type t :: %__MODULE__{}
+
+                    @doc false
+                    def __build__(path, modes, line_or_bytes) do
+                    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+                    modes =
+                        case raw do
+                        true ->
+                            case :lists.keyfind(:read_ahead, 1, modes) do
+                            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+                            {:read_ahead, _} -> [:raw | modes]
+                            false -> [:raw, :read_ahead | modes]
+                            end
+
+                        false ->
+                            modes
+                        end
+
+                    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+                    end
+                ```"#.unindent(),
+            embedding: vec![],
+        },
+        Document {
+        name: "def __build__".into(),
+        range: 574..1132,
+        content: r#"
+The below code snippet is from file 'foo.ex'
+
+```elixir
+@doc false
+def __build__(path, modes, line_or_bytes) do
+    raw = :lists.keyfind(:encoding, 1, modes) == false
+
+    modes =
+        case raw do
+        true ->
+            case :lists.keyfind(:read_ahead, 1, modes) do
+            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+            {:read_ahead, _} -> [:raw | modes]
+            false -> [:raw, :read_ahead | modes]
+            end
+
+        false ->
+            modes
+        end
+
+    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+    end
+```"#
+            .unindent(),
+        embedding: vec![],
+    }];
+
+    for idx in 0..test_documents.len() {
+        assert_eq!(test_documents[idx], parsed_files[idx]);
+    }
+}
+
 #[gpui::test]
 async fn test_code_context_retrieval_cpp() {
     let language = cpp_lang();
@@ -850,3 +987,48 @@ fn cpp_lang() -> Arc<Language> {
         .unwrap(),
     )
 }
+
+fn elixir_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "Elixir".into(),
+                path_suffixes: vec!["rs".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_elixir::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (
+                (unary_operator
+                    operator: "@"
+                    operand: (call
+                        target: (identifier) @unary
+                        (#match? @unary "^(doc)$"))
+                    ) @context
+                .
+                (call
+                target: (identifier) @name
+                (arguments
+                [
+                (identifier) @name
+                (call
+                target: (identifier) @name)
+                (binary_operator
+                left: (call
+                target: (identifier) @name)
+                operator: "when")
+                ])
+                (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
+                )
+
+            (call
+                target: (identifier) @name
+                (arguments (alias) @name)
+                (#match? @name "^(defmodule|defprotocol)$")) @item
+            "#,
+        )
+        .unwrap(),
+    )
+}
diff --git a/crates/zed/src/languages/elixir/embedding.scm b/crates/zed/src/languages/elixir/embedding.scm
new file mode 100644
index 0000000000000000000000000000000000000000..16ad20746d4b0c8697ff126fcc5150636cb8b794
--- /dev/null
+++ b/crates/zed/src/languages/elixir/embedding.scm
@@ -0,0 +1,27 @@
+(
+    (unary_operator
+        operator: "@"
+        operand: (call
+            target: (identifier) @unary
+            (#match? @unary "^(doc)$"))
+        ) @context
+    .
+    (call
+        target: (identifier) @name
+        (arguments
+            [
+            (identifier) @name
+            (call
+                target: (identifier) @name)
+                (binary_operator
+                    left: (call
+                    target: (identifier) @name)
+                    operator: "when")
+            ])
+        (#match? @name "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @item
+        )
+
+    (call
+        target: (identifier) @name
+        (arguments (alias) @name)
+        (#match? @name "^(defmodule|defprotocol)$")) @item

From 8b42f5b1b379e175a599067654724b8d6ea48f35 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 17:06:10 -0400
Subject: [PATCH 15/34] rename vector_store crate to semantic_index

---
 Cargo.lock                                    | 274 +++++++++---------
 Cargo.toml                                    |   2 +-
 assets/settings/default.json                  |   4 +-
 .../Cargo.toml                                |   4 +-
 .../README.md                                 |   0
 .../src/db.rs                                 |  16 +-
 .../src/embedding.rs                          |   0
 .../src/modal.rs                              |  12 +-
 .../src/parsing.rs                            |   0
 .../src/semantic_index.rs}                    |  33 ++-
 .../src/semantic_index_settings.rs}           |  10 +-
 .../src/semantic_index_tests.rs}              |  10 +-
 crates/zed/Cargo.toml                         |   2 +-
 crates/zed/src/main.rs                        |   2 +-
 14 files changed, 186 insertions(+), 183 deletions(-)
 rename crates/{vector_store => semantic_index}/Cargo.toml (96%)
 rename crates/{vector_store => semantic_index}/README.md (100%)
 rename crates/{vector_store => semantic_index}/src/db.rs (95%)
 rename crates/{vector_store => semantic_index}/src/embedding.rs (100%)
 rename crates/{vector_store => semantic_index}/src/modal.rs (95%)
 rename crates/{vector_store => semantic_index}/src/parsing.rs (100%)
 rename crates/{vector_store/src/vector_store.rs => semantic_index/src/semantic_index.rs} (96%)
 rename crates/{vector_store/src/vector_store_settings.rs => semantic_index/src/semantic_index_settings.rs} (71%)
 rename crates/{vector_store/src/vector_store_tests.rs => semantic_index/src/semantic_index_tests.rs} (99%)

diff --git a/Cargo.lock b/Cargo.lock
index 8fcca507d1fa47309793843f2268a87ff59a2e49..430a665f98b2a7f353855b9645c2e148dd02fb4b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -179,9 +179,9 @@ checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd"
 
 [[package]]
 name = "allocator-api2"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56fc6cf8dc8c4158eed8649f9b8b0ea1518eb62b544fe9490d66fa0b349eafe9"
+checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
 
 [[package]]
 name = "alsa"
@@ -277,9 +277,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.71"
+version = "1.0.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
+checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
 
 [[package]]
 name = "arrayref"
@@ -481,7 +481,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -529,7 +529,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -572,7 +572,7 @@ checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -680,7 +680,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "matchit",
  "memchr",
  "mime",
@@ -830,7 +830,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.25",
+ "syn 2.0.26",
  "which",
 ]
 
@@ -1243,20 +1243,20 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.3.11"
+version = "4.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d"
+checksum = "98330784c494e49850cb23b8e2afcca13587d2500b2e3f1f78ae20248059c9be"
 dependencies = [
  "clap_builder",
- "clap_derive 4.3.2",
+ "clap_derive 4.3.12",
  "once_cell",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.3.11"
+version = "4.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b"
+checksum = "e182eb5f2562a67dda37e2c57af64d720a9e010c5e860ed87c056586aeafa52e"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1279,14 +1279,14 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.3.2"
+version = "4.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f"
+checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
 dependencies = [
  "heck 0.4.1",
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -1357,7 +1357,7 @@ dependencies = [
  "tiny_http",
  "url",
  "util",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -2204,9 +2204,9 @@ dependencies = [
 
 [[package]]
 name = "dyn-clone"
-version = "1.0.11"
+version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30"
+checksum = "304e6508efa593091e97a9abbc10f90aa7ca635b6d2784feff3c89d41dd12272"
 
 [[package]]
 name = "editor"
@@ -2319,9 +2319,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
 name = "erased-serde"
-version = "0.3.27"
+version = "0.3.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f94c0e13118e7d7533271f754a168ae8400e6a1cc043f2bfd53cc7290f1a1de3"
+checksum = "da96524cc884f6558f1769b6c46686af2fe8e8b4cd253bd5a3cdba8181b8e070"
 dependencies = [
  "serde",
 ]
@@ -2789,7 +2789,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -3033,7 +3033,7 @@ dependencies = [
  "tiny-skia",
  "usvg",
  "util",
- "uuid 1.4.0",
+ "uuid 1.4.1",
  "waker-fn",
 ]
 
@@ -3235,7 +3235,7 @@ checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
 dependencies = [
  "bytes 1.4.0",
  "fnv",
- "itoa 1.0.8",
+ "itoa 1.0.9",
 ]
 
 [[package]]
@@ -3294,7 +3294,7 @@ dependencies = [
  "http-body",
  "httparse",
  "httpdate",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "pin-project-lite 0.2.10",
  "socket2",
  "tokio",
@@ -3499,7 +3499,7 @@ dependencies = [
  "rand 0.7.3",
  "serde",
  "tempfile",
- "uuid 1.4.0",
+ "uuid 1.4.1",
  "winapi 0.3.9",
 ]
 
@@ -3576,9 +3576,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
 
 [[package]]
 name = "itoa"
-version = "1.0.8"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
+checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
 
 [[package]]
 name = "ittapi-rs"
@@ -4722,7 +4722,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -4886,9 +4886,9 @@ dependencies = [
 
 [[package]]
 name = "paste"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35"
+checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
 
 [[package]]
 name = "pathfinder_color"
@@ -4952,9 +4952,9 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
 
 [[package]]
 name = "pest"
-version = "2.7.0"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f73935e4d55e2abf7f130186537b19e7a4abc886a0252380b59248af473a3fc9"
+checksum = "0d2d1d55045829d65aad9d389139882ad623b33b904e7c9f1b10c5b8927298e5"
 dependencies = [
  "thiserror",
  "ucd-trie",
@@ -5010,7 +5010,7 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -5163,7 +5163,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92139198957b410250d43fad93e630d956499a625c527eda65175c8680f83387"
 dependencies = [
  "proc-macro2",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -5211,9 +5211,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.64"
+version = "1.0.66"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
 dependencies = [
  "unicode-ident",
 ]
@@ -5491,9 +5491,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.29"
+version = "1.0.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
+checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
 dependencies = [
  "proc-macro2",
 ]
@@ -5879,7 +5879,7 @@ dependencies = [
  "rkyv_derive",
  "seahash",
  "tinyvec",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -6034,7 +6034,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rust-embed-utils",
- "syn 2.0.25",
+ "syn 2.0.26",
  "walkdir",
 ]
 
@@ -6097,7 +6097,7 @@ dependencies = [
  "bitflags 1.3.2",
  "errno 0.2.8",
  "io-lifetimes 0.5.3",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "libc",
  "linux-raw-sys 0.0.42",
  "once_cell",
@@ -6167,9 +6167,9 @@ dependencies = [
 
 [[package]]
 name = "rustversion"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
+checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
 
 [[package]]
 name = "rustybuzz"
@@ -6189,9 +6189,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.14"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
+checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
 
 [[package]]
 name = "safe_arch"
@@ -6267,9 +6267,9 @@ checksum = "1d51f5df5af43ab3f1360b429fa5e0152ac5ce8c0bd6485cae490332e96846a8"
 
 [[package]]
 name = "scopeguard"
-version = "1.1.0"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "scrypt"
@@ -6329,7 +6329,7 @@ dependencies = [
  "time 0.3.23",
  "tracing",
  "url",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -6355,7 +6355,7 @@ dependencies = [
  "sea-query-derive",
  "serde_json",
  "time 0.3.23",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -6370,7 +6370,7 @@ dependencies = [
  "serde_json",
  "sqlx",
  "time 0.3.23",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -6465,6 +6465,48 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "semantic_index"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "bincode",
+ "ctor",
+ "editor",
+ "env_logger 0.9.3",
+ "futures 0.3.28",
+ "gpui",
+ "isahc",
+ "language",
+ "lazy_static",
+ "log",
+ "matrixmultiply",
+ "parking_lot 0.11.2",
+ "picker",
+ "project",
+ "rand 0.8.5",
+ "rpc",
+ "rusqlite",
+ "schemars",
+ "serde",
+ "serde_json",
+ "settings",
+ "smol",
+ "tempdir",
+ "theme",
+ "tiktoken-rs 0.5.0",
+ "tree-sitter",
+ "tree-sitter-cpp",
+ "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tree-sitter-rust",
+ "tree-sitter-toml 0.20.0",
+ "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unindent",
+ "util",
+ "workspace",
+]
+
 [[package]]
 name = "semver"
 version = "0.11.0"
@@ -6506,7 +6548,7 @@ checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -6531,12 +6573,12 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.102"
+version = "1.0.103"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5062a995d481b2308b6064e9af76011f2921c35f97b0468811ed9f6cd91dfed"
+checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
 dependencies = [
  "indexmap 2.0.0",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "ryu",
  "serde",
 ]
@@ -6561,7 +6603,7 @@ checksum = "1d89a8107374290037607734c0b73a85db7ed80cae314b3c5791f192a496e731"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -6571,7 +6613,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
 dependencies = [
  "form_urlencoded",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "ryu",
  "serde",
 ]
@@ -6702,9 +6744,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
 
 [[package]]
 name = "signal-hook"
-version = "0.3.15"
+version = "0.3.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9"
+checksum = "b824b6e687aff278cdbf3b36f07aa52d4bd4099699324d5da86a2ebce3aa00b3"
 dependencies = [
  "libc",
  "signal-hook-registry",
@@ -6891,7 +6933,7 @@ dependencies = [
  "parking_lot 0.11.2",
  "smol",
  "thread_local",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -6957,7 +6999,7 @@ dependencies = [
  "hkdf",
  "hmac 0.12.1",
  "indexmap 1.9.3",
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "libc",
  "libsqlite3-sys",
  "log",
@@ -6983,7 +7025,7 @@ dependencies = [
  "time 0.3.23",
  "tokio-stream",
  "url",
- "uuid 1.4.0",
+ "uuid 1.4.1",
  "webpki-roots 0.22.6",
  "whoami",
 ]
@@ -7041,9 +7083,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
 [[package]]
 name = "stringprep"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1"
+checksum = "db3737bde7edce97102e0e2b15365bf7a20bfdb5f60f4f9e8d7004258a51a8da"
 dependencies = [
  "unicode-bidi",
  "unicode-normalization",
@@ -7103,7 +7145,7 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7dc09e9364c2045ab5fa38f7b04d077b3359d30c4c2b3ec4bae67a358bd64326"
 dependencies = [
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "ryu",
  "sval",
 ]
@@ -7114,7 +7156,7 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ada6f627e38cbb8860283649509d87bc4a5771141daa41c78fd31f2b9485888d"
 dependencies = [
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "ryu",
  "sval",
 ]
@@ -7229,9 +7271,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.25"
+version = "2.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15e3fc8c0c74267e2df136e5e5fb656a464158aa57624053375eb9c8c6e25ae2"
+checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7485,7 +7527,7 @@ checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -7562,7 +7604,7 @@ version = "0.3.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "59e399c068f43a5d116fedaf73b203fa4f9c519f17e2b34f63221d3792f81446"
 dependencies = [
- "itoa 1.0.8",
+ "itoa 1.0.9",
  "serde",
  "time-core",
  "time-macros",
@@ -7674,7 +7716,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -7767,9 +7809,9 @@ checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b"
 
 [[package]]
 name = "toml_edit"
-version = "0.19.12"
+version = "0.19.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c500344a19072298cd05a7224b3c0c629348b78692bf48466c5238656e315a78"
+checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a"
 dependencies = [
  "indexmap 2.0.0",
  "toml_datetime",
@@ -7879,7 +7921,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
@@ -8285,9 +8327,9 @@ checksum = "7f9af028e052a610d99e066b33304625dea9613170a2563314490a4e6ec5cf7f"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
+checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
 
 [[package]]
 name = "unicode-normalization"
@@ -8427,9 +8469,9 @@ checksum = "bcc7e3b898aa6f6c08e5295b6c89258d1331e9ac578cc992fb818759951bdc22"
 
 [[package]]
 name = "uuid"
-version = "1.4.0"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d023da39d1fde5a8a3fe1f3e01ca9632ada0a63e9797de55a879d6e2236277be"
+checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d"
 dependencies = [
  "getrandom 0.2.10",
  "serde",
@@ -8496,48 +8538,6 @@ dependencies = [
  "workspace",
 ]
 
-[[package]]
-name = "vector_store"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "async-trait",
- "bincode",
- "ctor",
- "editor",
- "env_logger 0.9.3",
- "futures 0.3.28",
- "gpui",
- "isahc",
- "language",
- "lazy_static",
- "log",
- "matrixmultiply",
- "parking_lot 0.11.2",
- "picker",
- "project",
- "rand 0.8.5",
- "rpc",
- "rusqlite",
- "schemars",
- "serde",
- "serde_json",
- "settings",
- "smol",
- "tempdir",
- "theme",
- "tiktoken-rs 0.5.0",
- "tree-sitter",
- "tree-sitter-cpp",
- "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "tree-sitter-rust",
- "tree-sitter-toml 0.20.0",
- "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "unindent",
- "util",
- "workspace",
-]
-
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -8698,7 +8698,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
  "wasm-bindgen-shared",
 ]
 
@@ -8732,7 +8732,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -8745,9 +8745,9 @@ checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
 
 [[package]]
 name = "wasm-encoder"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2f8e9778e04cbf44f58acc301372577375a666b966c50b03ef46144f80436a8"
+checksum = "06a3d1b4a575ffb873679402b2aedb3117555eb65c27b1b86c8a91e574bc2a2a"
 dependencies = [
  "leb128",
 ]
@@ -8969,9 +8969,9 @@ dependencies = [
 
 [[package]]
 name = "wast"
-version = "61.0.0"
+version = "62.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc6b347851b52fd500657d301155c79e8c67595501d179cef87b6f04ebd25ac4"
+checksum = "c7f7ee878019d69436895f019b65f62c33da63595d8e857cbdc87c13ecb29a32"
 dependencies = [
  "leb128",
  "memchr",
@@ -8981,11 +8981,11 @@ dependencies = [
 
 [[package]]
 name = "wat"
-version = "1.0.67"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "459e764d27c3ab7beba1ebd617cc025c7e76dea6e7c5ce3189989a970aea3491"
+checksum = "295572bf24aa5b685a971a83ad3e8b6e684aaad8a9be24bc7bf59bed84cc1c08"
 dependencies = [
- "wast 61.0.0",
+ "wast 62.0.0",
 ]
 
 [[package]]
@@ -9315,9 +9315,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
 
 [[package]]
 name = "winnow"
-version = "0.4.9"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81a2094c43cc94775293eaa0e499fbc30048a6d824ac82c0351a8c0bf9112529"
+checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
 dependencies = [
  "memchr",
 ]
@@ -9399,7 +9399,7 @@ dependencies = [
  "terminal",
  "theme",
  "util",
- "uuid 1.4.0",
+ "uuid 1.4.1",
 ]
 
 [[package]]
@@ -9447,7 +9447,7 @@ name = "xtask"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.3.11",
+ "clap 4.3.14",
  "schemars",
  "serde_json",
  "theme",
@@ -9548,6 +9548,7 @@ dependencies = [
  "rsa",
  "rust-embed",
  "search",
+ "semantic_index",
  "serde",
  "serde_derive",
  "serde_json",
@@ -9589,8 +9590,7 @@ dependencies = [
  "url",
  "urlencoding",
  "util",
- "uuid 1.4.0",
- "vector_store",
+ "uuid 1.4.1",
  "vim",
  "welcome",
  "workspace",
@@ -9621,7 +9621,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.25",
+ "syn 2.0.26",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 529f297f700d5006df3c169f3e663144bc24f9d1..ce3dd9c46221db7f189a66843162fbb483e68aa4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -63,7 +63,7 @@ members = [
     "crates/theme",
     "crates/theme_selector",
     "crates/util",
-    "crates/vector_store",
+    "crates/semantic_index",
     "crates/vim",
     "crates/vcs_menu",
     "crates/workspace",
diff --git a/assets/settings/default.json b/assets/settings/default.json
index 1f8d12a3d9db82a87ca1788fe8369dbfb42b7596..b109b8d595a1e184a668ff2fa226d1ffa7ce2f70 100644
--- a/assets/settings/default.json
+++ b/assets/settings/default.json
@@ -291,8 +291,8 @@
     // the terminal will default to matching the buffer's font family.
     // "font_family": "Zed Mono"
   },
-  // Difference settings for vector_store
-  "vector_store": {
+  // Difference settings for semantic_index
+  "semantic_index": {
     "enabled": false,
     "reindexing_delay_seconds": 600
   },
diff --git a/crates/vector_store/Cargo.toml b/crates/semantic_index/Cargo.toml
similarity index 96%
rename from crates/vector_store/Cargo.toml
rename to crates/semantic_index/Cargo.toml
index 6808f6c630ca8dda97fce819765995b78f3d2a9a..5c5af072c8f614e8eb8111d31c72bf9bbf905ada 100644
--- a/crates/vector_store/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -1,11 +1,11 @@
 [package]
-name = "vector_store"
+name = "semantic_index"
 version = "0.1.0"
 edition = "2021"
 publish = false
 
 [lib]
-path = "src/vector_store.rs"
+path = "src/semantic_index.rs"
 doctest = false
 
 [dependencies]
diff --git a/crates/vector_store/README.md b/crates/semantic_index/README.md
similarity index 100%
rename from crates/vector_store/README.md
rename to crates/semantic_index/README.md
diff --git a/crates/vector_store/src/db.rs b/crates/semantic_index/src/db.rs
similarity index 95%
rename from crates/vector_store/src/db.rs
rename to crates/semantic_index/src/db.rs
index d3d05f8c62c9d5639e641094204caa112e96c54f..1d5a9a475ea826cdb7baa91406b83b2189f95587 100644
--- a/crates/vector_store/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -1,4 +1,4 @@
-use crate::{parsing::Document, VECTOR_STORE_VERSION};
+use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
 use anyhow::{anyhow, Result};
 use project::Fs;
 use rpc::proto::Timestamp;
@@ -55,7 +55,9 @@ impl VectorDatabase {
     }
 
     fn get_existing_version(&self) -> Result<i64> {
-        let mut version_query = self.db.prepare("SELECT version from vector_store_config")?;
+        let mut version_query = self
+            .db
+            .prepare("SELECT version from semantic_index_config")?;
         version_query
             .query_row([], |row| Ok(row.get::<_, i64>(0)?))
             .map_err(|err| anyhow!("version query failed: {err}"))
@@ -66,7 +68,7 @@ impl VectorDatabase {
 
         if self
             .get_existing_version()
-            .map_or(false, |version| version == VECTOR_STORE_VERSION as i64)
+            .map_or(false, |version| version == SEMANTIC_INDEX_VERSION as i64)
         {
             return Ok(());
         }
@@ -74,7 +76,7 @@ impl VectorDatabase {
         self.db
             .execute(
                 "
-                    DROP TABLE vector_store_config;
+                    DROP TABLE semantic_index_config;
                     DROP TABLE worktrees;
                     DROP TABLE files;
                     DROP TABLE documents;
@@ -85,15 +87,15 @@ impl VectorDatabase {
 
         // Initialize Vector Databasing Tables
         self.db.execute(
-            "CREATE TABLE vector_store_config (
+            "CREATE TABLE semantic_index_config (
                 version INTEGER NOT NULL
             )",
             [],
         )?;
 
         self.db.execute(
-            "INSERT INTO vector_store_config (version) VALUES (?1)",
-            params![VECTOR_STORE_VERSION],
+            "INSERT INTO semantic_index_config (version) VALUES (?1)",
+            params![SEMANTIC_INDEX_VERSION],
         )?;
 
         self.db.execute(
diff --git a/crates/vector_store/src/embedding.rs b/crates/semantic_index/src/embedding.rs
similarity index 100%
rename from crates/vector_store/src/embedding.rs
rename to crates/semantic_index/src/embedding.rs
diff --git a/crates/vector_store/src/modal.rs b/crates/semantic_index/src/modal.rs
similarity index 95%
rename from crates/vector_store/src/modal.rs
rename to crates/semantic_index/src/modal.rs
index 2981fa4e73ef77ce3b54b68da9b177452f6d245e..ffc64a195ccfb23009922f71878c17ea90b1e375 100644
--- a/crates/vector_store/src/modal.rs
+++ b/crates/semantic_index/src/modal.rs
@@ -1,4 +1,4 @@
-use crate::{SearchResult, VectorStore};
+use crate::{SearchResult, SemanticIndex};
 use editor::{scroll::autoscroll::Autoscroll, Editor};
 use gpui::{
     actions, elements::*, AnyElement, AppContext, ModelHandle, MouseState, Task, ViewContext,
@@ -20,7 +20,7 @@ pub type SemanticSearch = Picker<SemanticSearchDelegate>;
 pub struct SemanticSearchDelegate {
     workspace: WeakViewHandle<Workspace>,
     project: ModelHandle<Project>,
-    vector_store: ModelHandle<VectorStore>,
+    semantic_index: ModelHandle<SemanticIndex>,
     selected_match_index: usize,
     matches: Vec<SearchResult>,
     history: HashMap<String, Vec<SearchResult>>,
@@ -33,12 +33,12 @@ impl SemanticSearchDelegate {
     pub fn new(
         workspace: WeakViewHandle<Workspace>,
         project: ModelHandle<Project>,
-        vector_store: ModelHandle<VectorStore>,
+        semantic_index: ModelHandle<SemanticIndex>,
     ) -> Self {
         Self {
             workspace,
             project,
-            vector_store,
+            semantic_index,
             selected_match_index: 0,
             matches: vec![],
             history: HashMap::new(),
@@ -105,7 +105,7 @@ impl PickerDelegate for SemanticSearchDelegate {
             return Task::ready(());
         }
 
-        let vector_store = self.vector_store.clone();
+        let semantic_index = self.semantic_index.clone();
         let project = self.project.clone();
         cx.spawn(|this, mut cx| async move {
             cx.background().timer(EMBEDDING_DEBOUNCE_INTERVAL).await;
@@ -123,7 +123,7 @@ impl PickerDelegate for SemanticSearchDelegate {
 
             if let Some(retrieved) = retrieved_cached.log_err() {
                 if !retrieved {
-                    let task = vector_store.update(&mut cx, |store, cx| {
+                    let task = semantic_index.update(&mut cx, |store, cx| {
                         store.search_project(project.clone(), query.to_string(), 10, cx)
                     });
 
diff --git a/crates/vector_store/src/parsing.rs b/crates/semantic_index/src/parsing.rs
similarity index 100%
rename from crates/vector_store/src/parsing.rs
rename to crates/semantic_index/src/parsing.rs
diff --git a/crates/vector_store/src/vector_store.rs b/crates/semantic_index/src/semantic_index.rs
similarity index 96%
rename from crates/vector_store/src/vector_store.rs
rename to crates/semantic_index/src/semantic_index.rs
index 0f55bd9e63f3a95ce113478a406f485600348973..58ffa512ce6e6714cb2af2ea3593d1f7eb96c534 100644
--- a/crates/vector_store/src/vector_store.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -2,12 +2,12 @@ mod db;
 mod embedding;
 mod modal;
 mod parsing;
-mod vector_store_settings;
+mod semantic_index_settings;
 
 #[cfg(test)]
-mod vector_store_tests;
+mod semantic_index_tests;
 
-use crate::vector_store_settings::VectorStoreSettings;
+use crate::semantic_index_settings::SemanticIndexSettings;
 use anyhow::{anyhow, Result};
 use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
@@ -40,7 +40,7 @@ use util::{
 };
 use workspace::{Workspace, WorkspaceCreated};
 
-const VECTOR_STORE_VERSION: usize = 1;
+const SEMANTIC_INDEX_VERSION: usize = 1;
 const EMBEDDINGS_BATCH_SIZE: usize = 150;
 
 pub fn init(
@@ -49,7 +49,7 @@ pub fn init(
     language_registry: Arc<LanguageRegistry>,
     cx: &mut AppContext,
 ) {
-    settings::register::<VectorStoreSettings>(cx);
+    settings::register::<SemanticIndexSettings>(cx);
 
     let db_file_path = EMBEDDINGS_DIR
         .join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
@@ -58,14 +58,14 @@ pub fn init(
     SemanticSearch::init(cx);
     cx.add_action(
         |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext<Workspace>| {
-            if cx.has_global::<ModelHandle<VectorStore>>() {
-                let vector_store = cx.global::<ModelHandle<VectorStore>>().clone();
+            if cx.has_global::<ModelHandle<SemanticIndex>>() {
+                let semantic_index = cx.global::<ModelHandle<SemanticIndex>>().clone();
                 workspace.toggle_modal(cx, |workspace, cx| {
                     let project = workspace.project().clone();
                     let workspace = cx.weak_handle();
                     cx.add_view(|cx| {
                         SemanticSearch::new(
-                            SemanticSearchDelegate::new(workspace, project, vector_store),
+                            SemanticSearchDelegate::new(workspace, project, semantic_index),
                             cx,
                         )
                     })
@@ -75,13 +75,14 @@ pub fn init(
     );
 
     if *RELEASE_CHANNEL == ReleaseChannel::Stable
-        || !settings::get::<VectorStoreSettings>(cx).enabled
+        || !settings::get::<SemanticIndexSettings>(cx).enabled
     {
+        log::info!("NOT ENABLED");
         return;
     }
 
     cx.spawn(move |mut cx| async move {
-        let vector_store = VectorStore::new(
+        let semantic_index = SemanticIndex::new(
             fs,
             db_file_path,
             Arc::new(OpenAIEmbeddings {
@@ -94,15 +95,15 @@ pub fn init(
         .await?;
 
         cx.update(|cx| {
-            cx.set_global(vector_store.clone());
+            cx.set_global(semantic_index.clone());
             cx.subscribe_global::<WorkspaceCreated, _>({
-                let vector_store = vector_store.clone();
+                let semantic_index = semantic_index.clone();
                 move |event, cx| {
                     let workspace = &event.0;
                     if let Some(workspace) = workspace.upgrade(cx) {
                         let project = workspace.read(cx).project().clone();
                         if project.read(cx).is_local() {
-                            vector_store.update(cx, |store, cx| {
+                            semantic_index.update(cx, |store, cx| {
                                 store.index_project(project, cx).detach();
                             });
                         }
@@ -117,7 +118,7 @@ pub fn init(
     .detach();
 }
 
-pub struct VectorStore {
+pub struct SemanticIndex {
     fs: Arc<dyn Fs>,
     database_url: Arc<PathBuf>,
     embedding_provider: Arc<dyn EmbeddingProvider>,
@@ -220,7 +221,7 @@ enum EmbeddingJob {
     Flush,
 }
 
-impl VectorStore {
+impl SemanticIndex {
     async fn new(
         fs: Arc<dyn Fs>,
         database_url: PathBuf,
@@ -672,7 +673,7 @@ impl VectorStore {
     }
 }
 
-impl Entity for VectorStore {
+impl Entity for SemanticIndex {
     type Event = ();
 }
 
diff --git a/crates/vector_store/src/vector_store_settings.rs b/crates/semantic_index/src/semantic_index_settings.rs
similarity index 71%
rename from crates/vector_store/src/vector_store_settings.rs
rename to crates/semantic_index/src/semantic_index_settings.rs
index e1fa7cc05a362829fae1a361097740d04b115b6c..86872457f841e1bfe1b601d1fb6d5d86a12911dc 100644
--- a/crates/vector_store/src/vector_store_settings.rs
+++ b/crates/semantic_index/src/semantic_index_settings.rs
@@ -4,21 +4,21 @@ use serde::{Deserialize, Serialize};
 use settings::Setting;
 
 #[derive(Deserialize, Debug)]
-pub struct VectorStoreSettings {
+pub struct SemanticIndexSettings {
     pub enabled: bool,
     pub reindexing_delay_seconds: usize,
 }
 
 #[derive(Clone, Default, Serialize, Deserialize, JsonSchema, Debug)]
-pub struct VectorStoreSettingsContent {
+pub struct SemanticIndexSettingsContent {
     pub enabled: Option<bool>,
     pub reindexing_delay_seconds: Option<usize>,
 }
 
-impl Setting for VectorStoreSettings {
-    const KEY: Option<&'static str> = Some("vector_store");
+impl Setting for SemanticIndexSettings {
+    const KEY: Option<&'static str> = Some("semantic_index");
 
-    type FileContent = VectorStoreSettingsContent;
+    type FileContent = SemanticIndexSettingsContent;
 
     fn load(
         default_value: &Self::FileContent,
diff --git a/crates/vector_store/src/vector_store_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
similarity index 99%
rename from crates/vector_store/src/vector_store_tests.rs
rename to crates/semantic_index/src/semantic_index_tests.rs
index d55dfcfc7151eb04efb5a33b119ae33b0875d86d..ed48cf256bed1bce335c942b2508d486acf82ce0 100644
--- a/crates/vector_store/src/vector_store_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -2,8 +2,8 @@ use crate::{
     db::dot,
     embedding::EmbeddingProvider,
     parsing::{CodeContextRetriever, Document},
-    vector_store_settings::VectorStoreSettings,
-    VectorStore,
+    semantic_index_settings::SemanticIndexSettings,
+    SemanticIndex,
 };
 use anyhow::Result;
 use async_trait::async_trait;
@@ -30,10 +30,10 @@ fn init_logger() {
 }
 
 #[gpui::test]
-async fn test_vector_store(cx: &mut TestAppContext) {
+async fn test_semantic_index(cx: &mut TestAppContext) {
     cx.update(|cx| {
         cx.set_global(SettingsStore::test(cx));
-        settings::register::<VectorStoreSettings>(cx);
+        settings::register::<SemanticIndexSettings>(cx);
         settings::register::<ProjectSettings>(cx);
     });
 
@@ -74,7 +74,7 @@ async fn test_vector_store(cx: &mut TestAppContext) {
     let db_path = db_dir.path().join("db.sqlite");
 
     let embedding_provider = Arc::new(FakeEmbeddingProvider::default());
-    let store = VectorStore::new(
+    let store = SemanticIndex::new(
         fs.clone(),
         db_path,
         embedding_provider.clone(),
diff --git a/crates/zed/Cargo.toml b/crates/zed/Cargo.toml
index 597e40161fb029eee16cf53208ce0e20d0c0a603..265312bc9a9de5de76465fc2e7e737bc4cb52a4f 100644
--- a/crates/zed/Cargo.toml
+++ b/crates/zed/Cargo.toml
@@ -64,7 +64,7 @@ terminal_view = { path = "../terminal_view" }
 theme = { path = "../theme" }
 theme_selector = { path = "../theme_selector" }
 util = { path = "../util" }
-vector_store = { path = "../vector_store" }
+semantic_index = { path = "../semantic_index" }
 vim = { path = "../vim" }
 workspace = { path = "../workspace" }
 welcome = { path = "../welcome" }
diff --git a/crates/zed/src/main.rs b/crates/zed/src/main.rs
index 4c75d370d517423e395119d2ceb4f3c47b61a21b..3598da5dee2eb9c96b18474bcef5e8e763cbae14 100644
--- a/crates/zed/src/main.rs
+++ b/crates/zed/src/main.rs
@@ -157,7 +157,7 @@ fn main() {
         project_panel::init(cx);
         diagnostics::init(cx);
         search::init(cx);
-        vector_store::init(fs.clone(), http.clone(), languages.clone(), cx);
+        semantic_index::init(fs.clone(), http.clone(), languages.clone(), cx);
         vim::init(cx);
         terminal_view::init(cx);
         copilot::init(http.clone(), node_runtime, cx);

From d83c4ffb072081d0b07f62f3c90f3bff5be48509 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Mon, 17 Jul 2023 17:09:51 -0400
Subject: [PATCH 16/34] remove debug logging for enabled settings

---
 crates/semantic_index/src/semantic_index.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 58ffa512ce6e6714cb2af2ea3593d1f7eb96c534..b59b20370aff967de5b2c805da5c693993e0c23e 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -77,7 +77,6 @@ pub fn init(
     if *RELEASE_CHANNEL == ReleaseChannel::Stable
         || !settings::get::<SemanticIndexSettings>(cx).enabled
     {
-        log::info!("NOT ENABLED");
         return;
     }
 

From afc4c10ec1162b151c33a9ffe051233dca10a5e5 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Mon, 17 Jul 2023 18:10:51 -0700
Subject: [PATCH 17/34] Start work on exposing semantic search via project
 search view

Co-authored-by: Kyle <kyle@zed.dev>
---
 Cargo.lock                                    |   2 +
 crates/search/Cargo.toml                      |   1 +
 crates/search/src/project_search.rs           | 156 +++++-
 crates/semantic_index/Cargo.toml              |   1 +
 crates/semantic_index/src/db.rs               |  12 +-
 crates/semantic_index/src/embedding.rs        |   7 +-
 crates/semantic_index/src/modal.rs            | 172 -------
 crates/semantic_index/src/semantic_index.rs   | 451 +++++++++---------
 .../src/semantic_index_tests.rs               |  18 +-
 9 files changed, 397 insertions(+), 423 deletions(-)
 delete mode 100644 crates/semantic_index/src/modal.rs

diff --git a/Cargo.lock b/Cargo.lock
index 430a665f98b2a7f353855b9645c2e148dd02fb4b..484ef3644b1cdddab26755c4eaf293154bbbcb3f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6430,6 +6430,7 @@ dependencies = [
  "menu",
  "postage",
  "project",
+ "semantic_index",
  "serde",
  "serde_derive",
  "serde_json",
@@ -6484,6 +6485,7 @@ dependencies = [
  "matrixmultiply",
  "parking_lot 0.11.2",
  "picker",
+ "postage",
  "project",
  "rand 0.8.5",
  "rpc",
diff --git a/crates/search/Cargo.toml b/crates/search/Cargo.toml
index 7ef388f7c087638c1ee3f5c2002ab3d2c3371dc7..f6ed6c3fef4bf3a566e27cdd46b7169405a72c97 100644
--- a/crates/search/Cargo.toml
+++ b/crates/search/Cargo.toml
@@ -19,6 +19,7 @@ settings = { path = "../settings" }
 theme = { path = "../theme" }
 util = { path = "../util" }
 workspace = { path = "../workspace" }
+semantic_index = { path = "../semantic_index" }
 anyhow.workspace = true
 futures.workspace = true
 log.workspace = true
diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index ebd504d02c2334aa6876a478937718cb1aa4d496..91d2b142ae27c3b1579d99b04fe6cb8b6f745705 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -2,7 +2,7 @@ use crate::{
     SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex,
     ToggleWholeWord,
 };
-use anyhow::Result;
+use anyhow::{Context, Result};
 use collections::HashMap;
 use editor::{
     items::active_match_index, scroll::autoscroll::Autoscroll, Anchor, Editor, MultiBuffer,
@@ -18,7 +18,9 @@ use gpui::{
     Task, View, ViewContext, ViewHandle, WeakModelHandle, WeakViewHandle,
 };
 use menu::Confirm;
+use postage::stream::Stream;
 use project::{search::SearchQuery, Project};
+use semantic_index::SemanticIndex;
 use smallvec::SmallVec;
 use std::{
     any::{Any, TypeId},
@@ -36,7 +38,10 @@ use workspace::{
     ItemNavHistory, Pane, ToolbarItemLocation, ToolbarItemView, Workspace, WorkspaceId,
 };
 
-actions!(project_search, [SearchInNew, ToggleFocus, NextField]);
+actions!(
+    project_search,
+    [SearchInNew, ToggleFocus, NextField, ToggleSemanticSearch]
+);
 
 #[derive(Default)]
 struct ActiveSearches(HashMap<WeakModelHandle<Project>, WeakViewHandle<ProjectSearchView>>);
@@ -92,6 +97,7 @@ pub struct ProjectSearchView {
     case_sensitive: bool,
     whole_word: bool,
     regex: bool,
+    semantic: Option<SemanticSearchState>,
     panels_with_errors: HashSet<InputPanel>,
     active_match_index: Option<usize>,
     search_id: usize,
@@ -100,6 +106,13 @@ pub struct ProjectSearchView {
     excluded_files_editor: ViewHandle<Editor>,
 }
 
+struct SemanticSearchState {
+    file_count: usize,
+    outstanding_file_count: usize,
+    _progress_task: Task<()>,
+    search_task: Option<Task<Result<()>>>,
+}
+
 pub struct ProjectSearchBar {
     active_project_search: Option<ViewHandle<ProjectSearchView>>,
     subscription: Option<Subscription>,
@@ -198,12 +211,25 @@ impl View for ProjectSearchView {
 
             let theme = theme::current(cx).clone();
             let text = if self.query_editor.read(cx).text(cx).is_empty() {
-                ""
+                Cow::Borrowed("")
+            } else if let Some(semantic) = &self.semantic {
+                if semantic.search_task.is_some() {
+                    Cow::Borrowed("Searching...")
+                } else if semantic.outstanding_file_count > 0 {
+                    Cow::Owned(format!(
+                        "Indexing. {} of {}...",
+                        semantic.file_count - semantic.outstanding_file_count,
+                        semantic.file_count
+                    ))
+                } else {
+                    Cow::Borrowed("Indexing complete")
+                }
             } else if model.pending_search.is_some() {
-                "Searching..."
+                Cow::Borrowed("Searching...")
             } else {
-                "No results"
+                Cow::Borrowed("No results")
             };
+
             MouseEventHandler::<Status, _>::new(0, cx, |_, _| {
                 Label::new(text, theme.search.results_status.clone())
                     .aligned()
@@ -499,6 +525,7 @@ impl ProjectSearchView {
             case_sensitive,
             whole_word,
             regex,
+            semantic: None,
             panels_with_errors: HashSet::new(),
             active_match_index: None,
             query_editor_was_focused: false,
@@ -563,6 +590,35 @@ impl ProjectSearchView {
     }
 
     fn search(&mut self, cx: &mut ViewContext<Self>) {
+        if let Some(semantic) = &mut self.semantic {
+            if semantic.outstanding_file_count > 0 {
+                return;
+            }
+
+            let search_phrase = self.query_editor.read(cx).text(cx);
+            let project = self.model.read(cx).project.clone();
+            if let Some(semantic_index) = SemanticIndex::global(cx) {
+                let search_task = semantic_index.update(cx, |semantic_index, cx| {
+                    semantic_index.search_project(project, search_phrase, 10, cx)
+                });
+                semantic.search_task = Some(cx.spawn(|this, mut cx| async move {
+                    let results = search_task.await.context("search task")?;
+
+                    this.update(&mut cx, |this, cx| {
+                        dbg!(&results);
+                        // TODO: Update results
+
+                        if let Some(semantic) = &mut this.semantic {
+                            semantic.search_task = None;
+                        }
+                    })?;
+
+                    anyhow::Ok(())
+                }));
+            }
+            return;
+        }
+
         if let Some(query) = self.build_search_query(cx) {
             self.model.update(cx, |model, cx| model.search(query, cx));
         }
@@ -876,6 +932,59 @@ impl ProjectSearchBar {
         }
     }
 
+    fn toggle_semantic_search(&mut self, cx: &mut ViewContext<Self>) -> bool {
+        if let Some(search_view) = self.active_project_search.as_ref() {
+            search_view.update(cx, |search_view, cx| {
+                if search_view.semantic.is_some() {
+                    search_view.semantic = None;
+                } else if let Some(semantic_index) = SemanticIndex::global(cx) {
+                    // TODO: confirm that it's ok to send this project
+
+                    let project = search_view.model.read(cx).project.clone();
+                    let index_task = semantic_index.update(cx, |semantic_index, cx| {
+                        semantic_index.index_project(project, cx)
+                    });
+
+                    cx.spawn(|search_view, mut cx| async move {
+                        let (files_to_index, mut files_remaining_rx) = index_task.await?;
+
+                        search_view.update(&mut cx, |search_view, cx| {
+                            search_view.semantic = Some(SemanticSearchState {
+                                file_count: files_to_index,
+                                outstanding_file_count: files_to_index,
+                                search_task: None,
+                                _progress_task: cx.spawn(|search_view, mut cx| async move {
+                                    while let Some(count) = files_remaining_rx.recv().await {
+                                        search_view
+                                            .update(&mut cx, |search_view, cx| {
+                                                if let Some(semantic_search_state) =
+                                                    &mut search_view.semantic
+                                                {
+                                                    semantic_search_state.outstanding_file_count =
+                                                        count;
+                                                    cx.notify();
+                                                    if count == 0 {
+                                                        return;
+                                                    }
+                                                }
+                                            })
+                                            .ok();
+                                    }
+                                }),
+                            });
+                        })?;
+                        anyhow::Ok(())
+                    })
+                    .detach_and_log_err(cx);
+                }
+            });
+            cx.notify();
+            true
+        } else {
+            false
+        }
+    }
+
     fn render_nav_button(
         &self,
         icon: &'static str,
@@ -953,6 +1062,42 @@ impl ProjectSearchBar {
         .into_any()
     }
 
+    fn render_semantic_search_button(&self, cx: &mut ViewContext<Self>) -> AnyElement<Self> {
+        let tooltip_style = theme::current(cx).tooltip.clone();
+        let is_active = if let Some(search) = self.active_project_search.as_ref() {
+            let search = search.read(cx);
+            search.semantic.is_some()
+        } else {
+            false
+        };
+
+        let region_id = 3;
+
+        MouseEventHandler::<Self, _>::new(region_id, cx, |state, cx| {
+            let theme = theme::current(cx);
+            let style = theme
+                .search
+                .option_button
+                .in_state(is_active)
+                .style_for(state);
+            Label::new("Semantic", style.text.clone())
+                .contained()
+                .with_style(style.container)
+        })
+        .on_click(MouseButton::Left, move |_, this, cx| {
+            this.toggle_semantic_search(cx);
+        })
+        .with_cursor_style(CursorStyle::PointingHand)
+        .with_tooltip::<Self>(
+            region_id,
+            format!("Toggle Semantic Search"),
+            Some(Box::new(ToggleSemanticSearch)),
+            tooltip_style,
+            cx,
+        )
+        .into_any()
+    }
+
     fn is_option_enabled(&self, option: SearchOption, cx: &AppContext) -> bool {
         if let Some(search) = self.active_project_search.as_ref() {
             let search = search.read(cx);
@@ -1049,6 +1194,7 @@ impl View for ProjectSearchBar {
                         )
                         .with_child(
                             Flex::row()
+                                .with_child(self.render_semantic_search_button(cx))
                                 .with_child(self.render_option_button(
                                     "Case",
                                     SearchOption::CaseSensitive,
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 5c5af072c8f614e8eb8111d31c72bf9bbf905ada..2d21ff6c1c42710e597101cd024fdde9183bcbc5 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -20,6 +20,7 @@ editor = { path = "../editor" }
 rpc = { path = "../rpc" }
 settings = { path = "../settings" }
 anyhow.workspace = true
+postage.workspace = true
 futures.workspace = true
 smol.workspace = true
 rusqlite = { version = "0.27.0", features = ["blob", "array", "modern_sqlite"] }
diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index 1d5a9a475ea826cdb7baa91406b83b2189f95587..a667ff877c2e02c65e669e10b7fdbc07e319653b 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -1,5 +1,5 @@
 use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
-use anyhow::{anyhow, Result};
+use anyhow::{anyhow, Context, Result};
 use project::Fs;
 use rpc::proto::Timestamp;
 use rusqlite::{
@@ -76,14 +76,14 @@ impl VectorDatabase {
         self.db
             .execute(
                 "
-                    DROP TABLE semantic_index_config;
-                    DROP TABLE worktrees;
-                    DROP TABLE files;
-                    DROP TABLE documents;
+                DROP TABLE IF EXISTS documents;
+                DROP TABLE IF EXISTS files;
+                DROP TABLE IF EXISTS worktrees;
+                DROP TABLE IF EXISTS semantic_index_config;
                 ",
                 [],
             )
-            .ok();
+            .context("failed to drop tables")?;
 
         // Initialize Vector Databasing Tables
         self.db.execute(
diff --git a/crates/semantic_index/src/embedding.rs b/crates/semantic_index/src/embedding.rs
index ea349c8afa4a8d908d60760f8ff1eb6839e3120b..4f49d66ce7eefeb70961fcaab936edf102b715b9 100644
--- a/crates/semantic_index/src/embedding.rs
+++ b/crates/semantic_index/src/embedding.rs
@@ -86,6 +86,7 @@ impl OpenAIEmbeddings {
     async fn send_request(&self, api_key: &str, spans: Vec<&str>) -> Result<Response<AsyncBody>> {
         let request = Request::post("https://api.openai.com/v1/embeddings")
             .redirect_policy(isahc::config::RedirectPolicy::Follow)
+            .timeout(Duration::from_secs(4))
             .header("Content-Type", "application/json")
             .header("Authorization", format!("Bearer {}", api_key))
             .body(
@@ -133,7 +134,11 @@ impl EmbeddingProvider for OpenAIEmbeddings {
                     self.executor.timer(delay).await;
                 }
                 StatusCode::BAD_REQUEST => {
-                    log::info!("BAD REQUEST: {:?}", &response.status());
+                    log::info!(
+                        "BAD REQUEST: {:?} {:?}",
+                        &response.status(),
+                        response.body()
+                    );
                     // Don't worry about delaying bad request, as we can assume
                     // we haven't been rate limited yet.
                     for span in spans.iter_mut() {
diff --git a/crates/semantic_index/src/modal.rs b/crates/semantic_index/src/modal.rs
deleted file mode 100644
index ffc64a195ccfb23009922f71878c17ea90b1e375..0000000000000000000000000000000000000000
--- a/crates/semantic_index/src/modal.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-use crate::{SearchResult, SemanticIndex};
-use editor::{scroll::autoscroll::Autoscroll, Editor};
-use gpui::{
-    actions, elements::*, AnyElement, AppContext, ModelHandle, MouseState, Task, ViewContext,
-    WeakViewHandle,
-};
-use picker::{Picker, PickerDelegate, PickerEvent};
-use project::{Project, ProjectPath};
-use std::{collections::HashMap, sync::Arc, time::Duration};
-use util::ResultExt;
-use workspace::Workspace;
-
-const MIN_QUERY_LEN: usize = 5;
-const EMBEDDING_DEBOUNCE_INTERVAL: Duration = Duration::from_millis(500);
-
-actions!(semantic_search, [Toggle]);
-
-pub type SemanticSearch = Picker<SemanticSearchDelegate>;
-
-pub struct SemanticSearchDelegate {
-    workspace: WeakViewHandle<Workspace>,
-    project: ModelHandle<Project>,
-    semantic_index: ModelHandle<SemanticIndex>,
-    selected_match_index: usize,
-    matches: Vec<SearchResult>,
-    history: HashMap<String, Vec<SearchResult>>,
-}
-
-impl SemanticSearchDelegate {
-    // This is currently searching on every keystroke,
-    // This is wildly overkill, and has the potential to get expensive
-    // We will need to update this to throttle searching
-    pub fn new(
-        workspace: WeakViewHandle<Workspace>,
-        project: ModelHandle<Project>,
-        semantic_index: ModelHandle<SemanticIndex>,
-    ) -> Self {
-        Self {
-            workspace,
-            project,
-            semantic_index,
-            selected_match_index: 0,
-            matches: vec![],
-            history: HashMap::new(),
-        }
-    }
-}
-
-impl PickerDelegate for SemanticSearchDelegate {
-    fn placeholder_text(&self) -> Arc<str> {
-        "Search repository in natural language...".into()
-    }
-
-    fn confirm(&mut self, cx: &mut ViewContext<SemanticSearch>) {
-        if let Some(search_result) = self.matches.get(self.selected_match_index) {
-            // Open Buffer
-            let search_result = search_result.clone();
-            let buffer = self.project.update(cx, |project, cx| {
-                project.open_buffer(
-                    ProjectPath {
-                        worktree_id: search_result.worktree_id,
-                        path: search_result.file_path.clone().into(),
-                    },
-                    cx,
-                )
-            });
-
-            let workspace = self.workspace.clone();
-            let position = search_result.clone().byte_range.start;
-            cx.spawn(|_, mut cx| async move {
-                let buffer = buffer.await?;
-                workspace.update(&mut cx, |workspace, cx| {
-                    let editor = workspace.open_project_item::<Editor>(buffer, cx);
-                    editor.update(cx, |editor, cx| {
-                        editor.change_selections(Some(Autoscroll::center()), cx, |s| {
-                            s.select_ranges([position..position])
-                        });
-                    });
-                })?;
-                Ok::<_, anyhow::Error>(())
-            })
-            .detach_and_log_err(cx);
-            cx.emit(PickerEvent::Dismiss);
-        }
-    }
-
-    fn dismissed(&mut self, _cx: &mut ViewContext<SemanticSearch>) {}
-
-    fn match_count(&self) -> usize {
-        self.matches.len()
-    }
-
-    fn selected_index(&self) -> usize {
-        self.selected_match_index
-    }
-
-    fn set_selected_index(&mut self, ix: usize, _cx: &mut ViewContext<SemanticSearch>) {
-        self.selected_match_index = ix;
-    }
-
-    fn update_matches(&mut self, query: String, cx: &mut ViewContext<SemanticSearch>) -> Task<()> {
-        log::info!("Searching for {:?}...", query);
-        if query.len() < MIN_QUERY_LEN {
-            log::info!("Query below minimum length");
-            return Task::ready(());
-        }
-
-        let semantic_index = self.semantic_index.clone();
-        let project = self.project.clone();
-        cx.spawn(|this, mut cx| async move {
-            cx.background().timer(EMBEDDING_DEBOUNCE_INTERVAL).await;
-
-            let retrieved_cached = this.update(&mut cx, |this, _| {
-                let delegate = this.delegate_mut();
-                if delegate.history.contains_key(&query) {
-                    let historic_results = delegate.history.get(&query).unwrap().to_owned();
-                    delegate.matches = historic_results.clone();
-                    true
-                } else {
-                    false
-                }
-            });
-
-            if let Some(retrieved) = retrieved_cached.log_err() {
-                if !retrieved {
-                    let task = semantic_index.update(&mut cx, |store, cx| {
-                        store.search_project(project.clone(), query.to_string(), 10, cx)
-                    });
-
-                    if let Some(results) = task.await.log_err() {
-                        log::info!("Not queried previously, searching...");
-                        this.update(&mut cx, |this, _| {
-                            let delegate = this.delegate_mut();
-                            delegate.matches = results.clone();
-                            delegate.history.insert(query, results);
-                        })
-                        .ok();
-                    }
-                } else {
-                    log::info!("Already queried, retrieved directly from cached history");
-                }
-            }
-        })
-    }
-
-    fn render_match(
-        &self,
-        ix: usize,
-        mouse_state: &mut MouseState,
-        selected: bool,
-        cx: &AppContext,
-    ) -> AnyElement<Picker<Self>> {
-        let theme = theme::current(cx);
-        let style = &theme.picker.item;
-        let current_style = style.in_state(selected).style_for(mouse_state);
-
-        let search_result = &self.matches[ix];
-
-        let path = search_result.file_path.to_string_lossy();
-        let name = search_result.name.clone();
-
-        Flex::column()
-            .with_child(Text::new(name, current_style.label.text.clone()).with_soft_wrap(false))
-            .with_child(Label::new(
-                path.to_string(),
-                style.inactive_state().default.label.clone(),
-            ))
-            .contained()
-            .with_style(current_style.container)
-            .into_any()
-    }
-}
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index b59b20370aff967de5b2c805da5c693993e0c23e..e6443870aa5312b4a7ea4ecfe72c134841923c66 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -1,6 +1,5 @@
 mod db;
 mod embedding;
-mod modal;
 mod parsing;
 mod semantic_index_settings;
 
@@ -12,25 +11,20 @@ use anyhow::{anyhow, Result};
 use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
 use futures::{channel::oneshot, Future};
-use gpui::{
-    AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, ViewContext,
-    WeakModelHandle,
-};
+use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
 use language::{Language, LanguageRegistry};
-use modal::{SemanticSearch, SemanticSearchDelegate, Toggle};
 use parking_lot::Mutex;
 use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES};
+use postage::watch;
 use project::{Fs, Project, WorktreeId};
 use smol::channel;
 use std::{
-    collections::{HashMap, HashSet},
+    collections::HashMap,
+    mem,
     ops::Range,
     path::{Path, PathBuf},
-    sync::{
-        atomic::{self, AtomicUsize},
-        Arc, Weak,
-    },
-    time::{Instant, SystemTime},
+    sync::{Arc, Weak},
+    time::SystemTime,
 };
 use util::{
     channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
@@ -38,9 +32,8 @@ use util::{
     paths::EMBEDDINGS_DIR,
     ResultExt,
 };
-use workspace::{Workspace, WorkspaceCreated};
 
-const SEMANTIC_INDEX_VERSION: usize = 1;
+const SEMANTIC_INDEX_VERSION: usize = 3;
 const EMBEDDINGS_BATCH_SIZE: usize = 150;
 
 pub fn init(
@@ -55,25 +48,6 @@ pub fn init(
         .join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
         .join("embeddings_db");
 
-    SemanticSearch::init(cx);
-    cx.add_action(
-        |workspace: &mut Workspace, _: &Toggle, cx: &mut ViewContext<Workspace>| {
-            if cx.has_global::<ModelHandle<SemanticIndex>>() {
-                let semantic_index = cx.global::<ModelHandle<SemanticIndex>>().clone();
-                workspace.toggle_modal(cx, |workspace, cx| {
-                    let project = workspace.project().clone();
-                    let workspace = cx.weak_handle();
-                    cx.add_view(|cx| {
-                        SemanticSearch::new(
-                            SemanticSearchDelegate::new(workspace, project, semantic_index),
-                            cx,
-                        )
-                    })
-                });
-            }
-        },
-    );
-
     if *RELEASE_CHANNEL == ReleaseChannel::Stable
         || !settings::get::<SemanticIndexSettings>(cx).enabled
     {
@@ -95,21 +69,6 @@ pub fn init(
 
         cx.update(|cx| {
             cx.set_global(semantic_index.clone());
-            cx.subscribe_global::<WorkspaceCreated, _>({
-                let semantic_index = semantic_index.clone();
-                move |event, cx| {
-                    let workspace = &event.0;
-                    if let Some(workspace) = workspace.upgrade(cx) {
-                        let project = workspace.read(cx).project().clone();
-                        if project.read(cx).is_local() {
-                            semantic_index.update(cx, |store, cx| {
-                                store.index_project(project, cx).detach();
-                            });
-                        }
-                    }
-                }
-            })
-            .detach();
         });
 
         anyhow::Ok(())
@@ -128,20 +87,17 @@ pub struct SemanticIndex {
     _embed_batch_task: Task<()>,
     _batch_files_task: Task<()>,
     _parsing_files_tasks: Vec<Task<()>>,
-    next_job_id: Arc<AtomicUsize>,
     projects: HashMap<WeakModelHandle<Project>, ProjectState>,
 }
 
 struct ProjectState {
     worktree_db_ids: Vec<(WorktreeId, i64)>,
-    outstanding_jobs: Arc<Mutex<HashSet<JobId>>>,
+    outstanding_job_count_rx: watch::Receiver<usize>,
+    outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
 }
 
-type JobId = usize;
-
 struct JobHandle {
-    id: JobId,
-    set: Weak<Mutex<HashSet<JobId>>>,
+    tx: Weak<Mutex<watch::Sender<usize>>>,
 }
 
 impl ProjectState {
@@ -221,6 +177,14 @@ enum EmbeddingJob {
 }
 
 impl SemanticIndex {
+    pub fn global(cx: &AppContext) -> Option<ModelHandle<SemanticIndex>> {
+        if cx.has_global::<ModelHandle<Self>>() {
+            Some(cx.global::<ModelHandle<SemanticIndex>>().clone())
+        } else {
+            None
+        }
+    }
+
     async fn new(
         fs: Arc<dyn Fs>,
         database_url: PathBuf,
@@ -236,184 +200,69 @@ impl SemanticIndex {
             .await?;
 
         Ok(cx.add_model(|cx| {
-            // paths_tx -> embeddings_tx -> db_update_tx
-
-            //db_update_tx/rx: Updating Database
+            // Perform database operations
             let (db_update_tx, db_update_rx) = channel::unbounded();
-            let _db_update_task = cx.background().spawn(async move {
-                while let Ok(job) = db_update_rx.recv().await {
-                    match job {
-                        DbOperation::InsertFile {
-                            worktree_id,
-                            documents,
-                            path,
-                            mtime,
-                            job_handle,
-                        } => {
-                            db.insert_file(worktree_id, path, mtime, documents)
-                                .log_err();
-                            drop(job_handle)
-                        }
-                        DbOperation::Delete { worktree_id, path } => {
-                            db.delete_file(worktree_id, path).log_err();
-                        }
-                        DbOperation::FindOrCreateWorktree { path, sender } => {
-                            let id = db.find_or_create_worktree(&path);
-                            sender.send(id).ok();
-                        }
-                        DbOperation::FileMTimes {
-                            worktree_id: worktree_db_id,
-                            sender,
-                        } => {
-                            let file_mtimes = db.get_file_mtimes(worktree_db_id);
-                            sender.send(file_mtimes).ok();
-                        }
+            let _db_update_task = cx.background().spawn({
+                async move {
+                    while let Ok(job) = db_update_rx.recv().await {
+                        Self::run_db_operation(&db, job)
                     }
                 }
             });
 
-            // embed_tx/rx: Embed Batch and Send to Database
+            // Group documents into batches and send them to the embedding provider.
             let (embed_batch_tx, embed_batch_rx) =
                 channel::unbounded::<Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>>();
             let _embed_batch_task = cx.background().spawn({
                 let db_update_tx = db_update_tx.clone();
                 let embedding_provider = embedding_provider.clone();
                 async move {
-                    while let Ok(mut embeddings_queue) = embed_batch_rx.recv().await {
-                        // Construct Batch
-                        let mut batch_documents = vec![];
-                        for (_, documents, _, _, _) in embeddings_queue.iter() {
-                            batch_documents
-                                .extend(documents.iter().map(|document| document.content.as_str()));
-                        }
-
-                        if let Ok(embeddings) =
-                            embedding_provider.embed_batch(batch_documents).await
-                        {
-                            log::trace!(
-                                "created {} embeddings for {} files",
-                                embeddings.len(),
-                                embeddings_queue.len(),
-                            );
-
-                            let mut i = 0;
-                            let mut j = 0;
-
-                            for embedding in embeddings.iter() {
-                                while embeddings_queue[i].1.len() == j {
-                                    i += 1;
-                                    j = 0;
-                                }
-
-                                embeddings_queue[i].1[j].embedding = embedding.to_owned();
-                                j += 1;
-                            }
-
-                            for (worktree_id, documents, path, mtime, job_handle) in
-                                embeddings_queue.into_iter()
-                            {
-                                for document in documents.iter() {
-                                    // TODO: Update this so it doesn't panic
-                                    assert!(
-                                        document.embedding.len() > 0,
-                                        "Document Embedding Not Complete"
-                                    );
-                                }
-
-                                db_update_tx
-                                    .send(DbOperation::InsertFile {
-                                        worktree_id,
-                                        documents,
-                                        path,
-                                        mtime,
-                                        job_handle,
-                                    })
-                                    .await
-                                    .unwrap();
-                            }
-                        }
+                    while let Ok(embeddings_queue) = embed_batch_rx.recv().await {
+                        Self::compute_embeddings_for_batch(
+                            embeddings_queue,
+                            &embedding_provider,
+                            &db_update_tx,
+                        )
+                        .await;
                     }
                 }
             });
 
-            // batch_tx/rx: Batch Files to Send for Embeddings
+            // Group documents into batches and send them to the embedding provider.
             let (batch_files_tx, batch_files_rx) = channel::unbounded::<EmbeddingJob>();
             let _batch_files_task = cx.background().spawn(async move {
                 let mut queue_len = 0;
                 let mut embeddings_queue = vec![];
-
                 while let Ok(job) = batch_files_rx.recv().await {
-                    let should_flush = match job {
-                        EmbeddingJob::Enqueue {
-                            documents,
-                            worktree_id,
-                            path,
-                            mtime,
-                            job_handle,
-                        } => {
-                            queue_len += &documents.len();
-                            embeddings_queue.push((
-                                worktree_id,
-                                documents,
-                                path,
-                                mtime,
-                                job_handle,
-                            ));
-                            queue_len >= EMBEDDINGS_BATCH_SIZE
-                        }
-                        EmbeddingJob::Flush => true,
-                    };
-
-                    if should_flush {
-                        embed_batch_tx.try_send(embeddings_queue).unwrap();
-                        embeddings_queue = vec![];
-                        queue_len = 0;
-                    }
+                    Self::enqueue_documents_to_embed(
+                        job,
+                        &mut queue_len,
+                        &mut embeddings_queue,
+                        &embed_batch_tx,
+                    );
                 }
             });
 
-            // parsing_files_tx/rx: Parsing Files to Embeddable Documents
+            // Parse files into embeddable documents.
             let (parsing_files_tx, parsing_files_rx) = channel::unbounded::<PendingFile>();
-
             let mut _parsing_files_tasks = Vec::new();
             for _ in 0..cx.background().num_cpus() {
                 let fs = fs.clone();
                 let parsing_files_rx = parsing_files_rx.clone();
                 let batch_files_tx = batch_files_tx.clone();
+                let db_update_tx = db_update_tx.clone();
                 _parsing_files_tasks.push(cx.background().spawn(async move {
                     let mut retriever = CodeContextRetriever::new();
                     while let Ok(pending_file) = parsing_files_rx.recv().await {
-                        if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err()
-                        {
-                            if let Some(documents) = retriever
-                                .parse_file(
-                                    &pending_file.relative_path,
-                                    &content,
-                                    pending_file.language,
-                                )
-                                .log_err()
-                            {
-                                log::trace!(
-                                    "parsed path {:?}: {} documents",
-                                    pending_file.relative_path,
-                                    documents.len()
-                                );
-
-                                batch_files_tx
-                                    .try_send(EmbeddingJob::Enqueue {
-                                        worktree_id: pending_file.worktree_db_id,
-                                        path: pending_file.relative_path,
-                                        mtime: pending_file.modified_time,
-                                        job_handle: pending_file.job_handle,
-                                        documents,
-                                    })
-                                    .unwrap();
-                            }
-                        }
-
-                        if parsing_files_rx.len() == 0 {
-                            batch_files_tx.try_send(EmbeddingJob::Flush).unwrap();
-                        }
+                        Self::parse_file(
+                            &fs,
+                            pending_file,
+                            &mut retriever,
+                            &batch_files_tx,
+                            &parsing_files_rx,
+                            &db_update_tx,
+                        )
+                        .await;
                     }
                 }));
             }
@@ -424,7 +273,6 @@ impl SemanticIndex {
                 embedding_provider,
                 language_registry,
                 db_update_tx,
-                next_job_id: Default::default(),
                 parsing_files_tx,
                 _db_update_task,
                 _embed_batch_task,
@@ -435,6 +283,167 @@ impl SemanticIndex {
         }))
     }
 
+    fn run_db_operation(db: &VectorDatabase, job: DbOperation) {
+        match job {
+            DbOperation::InsertFile {
+                worktree_id,
+                documents,
+                path,
+                mtime,
+                job_handle,
+            } => {
+                db.insert_file(worktree_id, path, mtime, documents)
+                    .log_err();
+                drop(job_handle)
+            }
+            DbOperation::Delete { worktree_id, path } => {
+                db.delete_file(worktree_id, path).log_err();
+            }
+            DbOperation::FindOrCreateWorktree { path, sender } => {
+                let id = db.find_or_create_worktree(&path);
+                sender.send(id).ok();
+            }
+            DbOperation::FileMTimes {
+                worktree_id: worktree_db_id,
+                sender,
+            } => {
+                let file_mtimes = db.get_file_mtimes(worktree_db_id);
+                sender.send(file_mtimes).ok();
+            }
+        }
+    }
+
+    async fn compute_embeddings_for_batch(
+        mut embeddings_queue: Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>,
+        embedding_provider: &Arc<dyn EmbeddingProvider>,
+        db_update_tx: &channel::Sender<DbOperation>,
+    ) {
+        let mut batch_documents = vec![];
+        for (_, documents, _, _, _) in embeddings_queue.iter() {
+            batch_documents.extend(documents.iter().map(|document| document.content.as_str()));
+        }
+
+        if let Ok(embeddings) = embedding_provider.embed_batch(batch_documents).await {
+            log::trace!(
+                "created {} embeddings for {} files",
+                embeddings.len(),
+                embeddings_queue.len(),
+            );
+
+            let mut i = 0;
+            let mut j = 0;
+
+            for embedding in embeddings.iter() {
+                while embeddings_queue[i].1.len() == j {
+                    i += 1;
+                    j = 0;
+                }
+
+                embeddings_queue[i].1[j].embedding = embedding.to_owned();
+                j += 1;
+            }
+
+            for (worktree_id, documents, path, mtime, job_handle) in embeddings_queue.into_iter() {
+                // for document in documents.iter() {
+                //     // TODO: Update this so it doesn't panic
+                //     assert!(
+                //         document.embedding.len() > 0,
+                //         "Document Embedding Not Complete"
+                //     );
+                // }
+
+                db_update_tx
+                    .send(DbOperation::InsertFile {
+                        worktree_id,
+                        documents,
+                        path,
+                        mtime,
+                        job_handle,
+                    })
+                    .await
+                    .unwrap();
+            }
+        }
+    }
+
+    fn enqueue_documents_to_embed(
+        job: EmbeddingJob,
+        queue_len: &mut usize,
+        embeddings_queue: &mut Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>,
+        embed_batch_tx: &channel::Sender<Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>>,
+    ) {
+        let should_flush = match job {
+            EmbeddingJob::Enqueue {
+                documents,
+                worktree_id,
+                path,
+                mtime,
+                job_handle,
+            } => {
+                *queue_len += &documents.len();
+                embeddings_queue.push((worktree_id, documents, path, mtime, job_handle));
+                *queue_len >= EMBEDDINGS_BATCH_SIZE
+            }
+            EmbeddingJob::Flush => true,
+        };
+
+        if should_flush {
+            embed_batch_tx
+                .try_send(mem::take(embeddings_queue))
+                .unwrap();
+            *queue_len = 0;
+        }
+    }
+
+    async fn parse_file(
+        fs: &Arc<dyn Fs>,
+        pending_file: PendingFile,
+        retriever: &mut CodeContextRetriever,
+        batch_files_tx: &channel::Sender<EmbeddingJob>,
+        parsing_files_rx: &channel::Receiver<PendingFile>,
+        db_update_tx: &channel::Sender<DbOperation>,
+    ) {
+        if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() {
+            if let Some(documents) = retriever
+                .parse_file(&pending_file.relative_path, &content, pending_file.language)
+                .log_err()
+            {
+                log::trace!(
+                    "parsed path {:?}: {} documents",
+                    pending_file.relative_path,
+                    documents.len()
+                );
+
+                if documents.len() == 0 {
+                    db_update_tx
+                        .send(DbOperation::InsertFile {
+                            worktree_id: pending_file.worktree_db_id,
+                            documents,
+                            path: pending_file.relative_path,
+                            mtime: pending_file.modified_time,
+                            job_handle: pending_file.job_handle,
+                        })
+                        .await
+                        .unwrap();
+                } else {
+                    batch_files_tx
+                        .try_send(EmbeddingJob::Enqueue {
+                            worktree_id: pending_file.worktree_db_id,
+                            path: pending_file.relative_path,
+                            mtime: pending_file.modified_time,
+                            job_handle: pending_file.job_handle,
+                            documents,
+                        })
+                        .unwrap();
+                }
+            }
+        }
+
+        if parsing_files_rx.len() == 0 {
+            batch_files_tx.try_send(EmbeddingJob::Flush).unwrap();
+        }
+    }
+
     fn find_or_create_worktree(&self, path: PathBuf) -> impl Future<Output = Result<i64>> {
         let (tx, rx) = oneshot::channel();
         self.db_update_tx
@@ -457,11 +466,11 @@ impl SemanticIndex {
         async move { rx.await? }
     }
 
-    fn index_project(
+    pub fn index_project(
         &mut self,
         project: ModelHandle<Project>,
         cx: &mut ModelContext<Self>,
-    ) -> Task<Result<usize>> {
+    ) -> Task<Result<(usize, watch::Receiver<usize>)>> {
         let worktree_scans_complete = project
             .read(cx)
             .worktrees(cx)
@@ -483,7 +492,6 @@ impl SemanticIndex {
         let language_registry = self.language_registry.clone();
         let db_update_tx = self.db_update_tx.clone();
         let parsing_files_tx = self.parsing_files_tx.clone();
-        let next_job_id = self.next_job_id.clone();
 
         cx.spawn(|this, mut cx| async move {
             futures::future::join_all(worktree_scans_complete).await;
@@ -509,8 +517,8 @@ impl SemanticIndex {
                 );
             }
 
-            // let mut pending_files: Vec<(PathBuf, ((i64, PathBuf, Arc<Language>, SystemTime), SystemTime))> = vec![];
-            let outstanding_jobs = Arc::new(Mutex::new(HashSet::new()));
+            let (job_count_tx, job_count_rx) = watch::channel_with(0);
+            let job_count_tx = Arc::new(Mutex::new(job_count_tx));
             this.update(&mut cx, |this, _| {
                 this.projects.insert(
                     project.downgrade(),
@@ -519,7 +527,8 @@ impl SemanticIndex {
                             .iter()
                             .map(|(a, b)| (*a, *b))
                             .collect(),
-                        outstanding_jobs: outstanding_jobs.clone(),
+                        outstanding_job_count_rx: job_count_rx.clone(),
+                        outstanding_job_count_tx: job_count_tx.clone(),
                     },
                 );
             });
@@ -527,7 +536,6 @@ impl SemanticIndex {
             cx.background()
                 .spawn(async move {
                     let mut count = 0;
-                    let t0 = Instant::now();
                     for worktree in worktrees.into_iter() {
                         let mut file_mtimes = worktree_file_mtimes.remove(&worktree.id()).unwrap();
                         for file in worktree.files(false, 0) {
@@ -552,14 +560,11 @@ impl SemanticIndex {
                                     .map_or(false, |existing_mtime| existing_mtime == file.mtime);
 
                                 if !already_stored {
-                                    log::trace!("sending for parsing: {:?}", path_buf);
                                     count += 1;
-                                    let job_id = next_job_id.fetch_add(1, atomic::Ordering::SeqCst);
+                                    *job_count_tx.lock().borrow_mut() += 1;
                                     let job_handle = JobHandle {
-                                        id: job_id,
-                                        set: Arc::downgrade(&outstanding_jobs),
+                                        tx: Arc::downgrade(&job_count_tx),
                                     };
-                                    outstanding_jobs.lock().insert(job_id);
                                     parsing_files_tx
                                         .try_send(PendingFile {
                                             worktree_db_id: db_ids_by_worktree_id[&worktree.id()],
@@ -582,27 +587,22 @@ impl SemanticIndex {
                                 .unwrap();
                         }
                     }
-                    log::trace!(
-                        "parsing worktree completed in {:?}",
-                        t0.elapsed().as_millis()
-                    );
 
-                    Ok(count)
+                    anyhow::Ok((count, job_count_rx))
                 })
                 .await
         })
     }
 
-    pub fn remaining_files_to_index_for_project(
+    pub fn outstanding_job_count_rx(
         &self,
         project: &ModelHandle<Project>,
-    ) -> Option<usize> {
+    ) -> Option<watch::Receiver<usize>> {
         Some(
             self.projects
                 .get(&project.downgrade())?
-                .outstanding_jobs
-                .lock()
-                .len(),
+                .outstanding_job_count_rx
+                .clone(),
         )
     }
 
@@ -678,8 +678,9 @@ impl Entity for SemanticIndex {
 
 impl Drop for JobHandle {
     fn drop(&mut self) {
-        if let Some(set) = self.set.upgrade() {
-            set.lock().remove(&self.id);
+        if let Some(tx) = self.tx.upgrade() {
+            let mut tx = tx.lock();
+            *tx.borrow_mut() -= 1;
         }
     }
 }
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index ed48cf256bed1bce335c942b2508d486acf82ce0..2ccc52d64b598e56be41a0aae5284517c9f0b36b 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -88,18 +88,13 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
     let worktree_id = project.read_with(cx, |project, cx| {
         project.worktrees(cx).next().unwrap().read(cx).id()
     });
-    let file_count = store
+    let (file_count, outstanding_file_count) = store
         .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await
         .unwrap();
     assert_eq!(file_count, 3);
     cx.foreground().run_until_parked();
-    store.update(cx, |store, _cx| {
-        assert_eq!(
-            store.remaining_files_to_index_for_project(&project),
-            Some(0)
-        );
-    });
+    assert_eq!(*outstanding_file_count.borrow(), 0);
 
     let search_results = store
         .update(cx, |store, cx| {
@@ -128,19 +123,14 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
     cx.foreground().run_until_parked();
 
     let prev_embedding_count = embedding_provider.embedding_count();
-    let file_count = store
+    let (file_count, outstanding_file_count) = store
         .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await
         .unwrap();
     assert_eq!(file_count, 1);
 
     cx.foreground().run_until_parked();
-    store.update(cx, |store, _cx| {
-        assert_eq!(
-            store.remaining_files_to_index_for_project(&project),
-            Some(0)
-        );
-    });
+    assert_eq!(*outstanding_file_count.borrow(), 0);
 
     assert_eq!(
         embedding_provider.embedding_count() - prev_embedding_count,

From ed1b1a5ccd58610111eba38373b6ff42a1e05792 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 18 Jul 2023 11:00:21 -0400
Subject: [PATCH 18/34] update logging for open ai embedding and remove
 redundant truncation

---
 crates/semantic_index/src/embedding.rs | 34 ++++++++++++++++----------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/crates/semantic_index/src/embedding.rs b/crates/semantic_index/src/embedding.rs
index 5e2025b644c5be8e7089955a80819f7b71229f3e..d41350f321d3b13aea217b322163cf73cca07269 100644
--- a/crates/semantic_index/src/embedding.rs
+++ b/crates/semantic_index/src/embedding.rs
@@ -67,13 +67,17 @@ impl EmbeddingProvider for DummyEmbeddings {
     }
 }
 
-const INPUT_LIMIT: usize = 8190;
+const OPENAI_INPUT_LIMIT: usize = 8190;
 
 impl OpenAIEmbeddings {
+    pub fn new(client: Arc<dyn HttpClient>, executor: Arc<Background>) -> Self {
+        Self { client, executor }
+    }
+
     fn truncate(span: String) -> String {
         let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref());
-        if tokens.len() > INPUT_LIMIT {
-            tokens.truncate(INPUT_LIMIT);
+        if tokens.len() > OPENAI_INPUT_LIMIT {
+            tokens.truncate(OPENAI_INPUT_LIMIT);
             let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone());
             if result.is_ok() {
                 let transformed = result.unwrap();
@@ -115,6 +119,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
             .ok_or_else(|| anyhow!("no api key"))?;
 
         let mut request_number = 0;
+        let mut truncated = false;
         let mut response: Response<AsyncBody>;
         let mut spans: Vec<String> = spans.iter().map(|x| x.to_string()).collect();
         while request_number < MAX_RETRIES {
@@ -136,15 +141,18 @@ impl EmbeddingProvider for OpenAIEmbeddings {
                     self.executor.timer(delay).await;
                 }
                 StatusCode::BAD_REQUEST => {
-                    log::info!(
-                        "BAD REQUEST: {:?} {:?}",
-                        &response.status(),
-                        response.body()
-                    );
-                    // Don't worry about delaying bad request, as we can assume
-                    // we haven't been rate limited yet.
-                    for span in spans.iter_mut() {
-                        *span = Self::truncate(span.to_string());
+                    // Only truncate if it hasnt been truncated before
+                    if !truncated {
+                        for span in spans.iter_mut() {
+                            *span = Self::truncate(span.clone());
+                        }
+                        truncated = true;
+                    } else {
+                        // If failing once already truncated, log the error and break the loop
+                        let mut body = String::new();
+                        response.body_mut().read_to_string(&mut body).await?;
+                        log::trace!("open ai bad request: {:?} {:?}", &response.status(), body);
+                        break;
                     }
                 }
                 StatusCode::OK => {
@@ -152,7 +160,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
                     response.body_mut().read_to_string(&mut body).await?;
                     let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?;
 
-                    log::info!(
+                    log::trace!(
                         "openai embedding completed. tokens: {:?}",
                         response.usage.total_tokens
                     );

From 80ef92a3e158618d9dcc255fe8689b8597aacb4d Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 18 Jul 2023 11:14:13 -0400
Subject: [PATCH 19/34] fix db schema update process to ensure all tables are
 dropped

---
 crates/semantic_index/src/db.rs             | 25 ++++++++++++---------
 crates/semantic_index/src/semantic_index.rs | 10 +--------
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index a667ff877c2e02c65e669e10b7fdbc07e319653b..74e1021b152f4bdaf36e3b780d9288bed374466b 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -66,24 +66,28 @@ impl VectorDatabase {
     fn initialize_database(&self) -> Result<()> {
         rusqlite::vtab::array::load_module(&self.db)?;
 
+        // Delete existing tables, if SEMANTIC_INDEX_VERSION is bumped
         if self
             .get_existing_version()
             .map_or(false, |version| version == SEMANTIC_INDEX_VERSION as i64)
         {
+            log::trace!("vector database schema up to date");
             return Ok(());
         }
 
+        log::trace!("vector database schema out of date. updating...");
         self.db
-            .execute(
-                "
-                DROP TABLE IF EXISTS documents;
-                DROP TABLE IF EXISTS files;
-                DROP TABLE IF EXISTS worktrees;
-                DROP TABLE IF EXISTS semantic_index_config;
-                ",
-                [],
-            )
-            .context("failed to drop tables")?;
+            .execute("DROP TABLE IF EXISTS documents", [])
+            .context("failed to drop 'documents' table")?;
+        self.db
+            .execute("DROP TABLE IF EXISTS files", [])
+            .context("failed to drop 'files' table")?;
+        self.db
+            .execute("DROP TABLE IF EXISTS worktrees", [])
+            .context("failed to drop 'worktrees' table")?;
+        self.db
+            .execute("DROP TABLE IF EXISTS semantic_index_config", [])
+            .context("failed to drop 'semantic_index_config' table")?;
 
         // Initialize Vector Databasing Tables
         self.db.execute(
@@ -133,6 +137,7 @@ impl VectorDatabase {
             [],
         )?;
 
+        log::trace!("vector database initialized with updated schema.");
         Ok(())
     }
 
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index e6443870aa5312b4a7ea4ecfe72c134841923c66..f6575f6ad7188dbaf7ba56160d72cc12f678de10 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -33,7 +33,7 @@ use util::{
     ResultExt,
 };
 
-const SEMANTIC_INDEX_VERSION: usize = 3;
+const SEMANTIC_INDEX_VERSION: usize = 4;
 const EMBEDDINGS_BATCH_SIZE: usize = 150;
 
 pub fn init(
@@ -344,14 +344,6 @@ impl SemanticIndex {
             }
 
             for (worktree_id, documents, path, mtime, job_handle) in embeddings_queue.into_iter() {
-                // for document in documents.iter() {
-                //     // TODO: Update this so it doesn't panic
-                //     assert!(
-                //         document.embedding.len() > 0,
-                //         "Document Embedding Not Complete"
-                //     );
-                // }
-
                 db_update_tx
                     .send(DbOperation::InsertFile {
                         worktree_id,

From 8d0614ce741a7cd279777bd16dcff6349105f077 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 18 Jul 2023 11:44:58 -0700
Subject: [PATCH 20/34] Populate project search results multi-buffer from
 semantic search

Co-authored-by: Kyle <kyle@zed.dev>
---
 crates/search/src/project_search.rs           | 73 +++++++++++++------
 crates/semantic_index/src/db.rs               | 16 ++--
 crates/semantic_index/src/embedding.rs        |  5 --
 crates/semantic_index/src/semantic_index.rs   | 68 +++++++++--------
 .../src/semantic_index_tests.rs               | 15 ++--
 5 files changed, 104 insertions(+), 73 deletions(-)

diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 91d2b142ae27c3b1579d99b04fe6cb8b6f745705..1097969c00efca6c025da6193251e480b943d7aa 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -2,7 +2,7 @@ use crate::{
     SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex,
     ToggleWholeWord,
 };
-use anyhow::{Context, Result};
+use anyhow::Result;
 use collections::HashMap;
 use editor::{
     items::active_match_index, scroll::autoscroll::Autoscroll, Anchor, Editor, MultiBuffer,
@@ -187,6 +187,53 @@ impl ProjectSearch {
         }));
         cx.notify();
     }
+
+    fn semantic_search(&mut self, query: String, cx: &mut ModelContext<Self>) -> Option<()> {
+        let project = self.project.clone();
+        let semantic_index = SemanticIndex::global(cx)?;
+        let search_task = semantic_index.update(cx, |semantic_index, cx| {
+            semantic_index.search_project(project, query.clone(), 10, cx)
+        });
+
+        self.search_id += 1;
+        // self.active_query = Some(query);
+        self.match_ranges.clear();
+        self.pending_search = Some(cx.spawn(|this, mut cx| async move {
+            let results = search_task.await.log_err()?;
+
+            let (_task, mut match_ranges) = this.update(&mut cx, |this, cx| {
+                this.excerpts.update(cx, |excerpts, cx| {
+                    excerpts.clear(cx);
+
+                    let matches = results
+                        .into_iter()
+                        .map(|result| (result.buffer, vec![result.range]))
+                        .collect();
+
+                    excerpts.stream_excerpts_with_context_lines(matches, 3, cx)
+                })
+            });
+
+            while let Some(match_range) = match_ranges.next().await {
+                this.update(&mut cx, |this, cx| {
+                    this.match_ranges.push(match_range);
+                    while let Ok(Some(match_range)) = match_ranges.try_next() {
+                        this.match_ranges.push(match_range);
+                    }
+                    cx.notify();
+                });
+            }
+
+            this.update(&mut cx, |this, cx| {
+                this.pending_search.take();
+                cx.notify();
+            });
+
+            None
+        }));
+
+        Some(())
+    }
 }
 
 pub enum ViewEvent {
@@ -595,27 +642,9 @@ impl ProjectSearchView {
                 return;
             }
 
-            let search_phrase = self.query_editor.read(cx).text(cx);
-            let project = self.model.read(cx).project.clone();
-            if let Some(semantic_index) = SemanticIndex::global(cx) {
-                let search_task = semantic_index.update(cx, |semantic_index, cx| {
-                    semantic_index.search_project(project, search_phrase, 10, cx)
-                });
-                semantic.search_task = Some(cx.spawn(|this, mut cx| async move {
-                    let results = search_task.await.context("search task")?;
-
-                    this.update(&mut cx, |this, cx| {
-                        dbg!(&results);
-                        // TODO: Update results
-
-                        if let Some(semantic) = &mut this.semantic {
-                            semantic.search_task = None;
-                        }
-                    })?;
-
-                    anyhow::Ok(())
-                }));
-            }
+            let query = self.query_editor.read(cx).text(cx);
+            self.model
+                .update(cx, |model, cx| model.semantic_search(query, cx));
             return;
         }
 
diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index 74e1021b152f4bdaf36e3b780d9288bed374466b..fd99594aab578919f80bd8236270b352a8540993 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -252,7 +252,7 @@ impl VectorDatabase {
         worktree_ids: &[i64],
         query_embedding: &Vec<f32>,
         limit: usize,
-    ) -> Result<Vec<(i64, PathBuf, Range<usize>, String)>> {
+    ) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
         let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
         self.for_each_document(&worktree_ids, |id, embedding| {
             let similarity = dot(&embedding, &query_embedding);
@@ -296,10 +296,7 @@ impl VectorDatabase {
         Ok(())
     }
 
-    fn get_documents_by_ids(
-        &self,
-        ids: &[i64],
-    ) -> Result<Vec<(i64, PathBuf, Range<usize>, String)>> {
+    fn get_documents_by_ids(&self, ids: &[i64]) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
         let mut statement = self.db.prepare(
             "
                 SELECT
@@ -307,7 +304,7 @@ impl VectorDatabase {
                     files.worktree_id,
                     files.relative_path,
                     documents.start_byte,
-                    documents.end_byte, documents.name
+                    documents.end_byte
                 FROM
                     documents, files
                 WHERE
@@ -322,14 +319,13 @@ impl VectorDatabase {
                 row.get::<_, i64>(1)?,
                 row.get::<_, String>(2)?.into(),
                 row.get(3)?..row.get(4)?,
-                row.get(5)?,
             ))
         })?;
 
-        let mut values_by_id = HashMap::<i64, (i64, PathBuf, Range<usize>, String)>::default();
+        let mut values_by_id = HashMap::<i64, (i64, PathBuf, Range<usize>)>::default();
         for row in result_iter {
-            let (id, worktree_id, path, range, name) = row?;
-            values_by_id.insert(id, (worktree_id, path, range, name));
+            let (id, worktree_id, path, range) = row?;
+            values_by_id.insert(id, (worktree_id, path, range));
         }
 
         let mut results = Vec::with_capacity(ids.len());
diff --git a/crates/semantic_index/src/embedding.rs b/crates/semantic_index/src/embedding.rs
index d41350f321d3b13aea217b322163cf73cca07269..728fc9283a1ebcaf13bd035ac3fd0766c9112913 100644
--- a/crates/semantic_index/src/embedding.rs
+++ b/crates/semantic_index/src/embedding.rs
@@ -70,10 +70,6 @@ impl EmbeddingProvider for DummyEmbeddings {
 const OPENAI_INPUT_LIMIT: usize = 8190;
 
 impl OpenAIEmbeddings {
-    pub fn new(client: Arc<dyn HttpClient>, executor: Arc<Background>) -> Self {
-        Self { client, executor }
-    }
-
     fn truncate(span: String) -> String {
         let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref());
         if tokens.len() > OPENAI_INPUT_LIMIT {
@@ -81,7 +77,6 @@ impl OpenAIEmbeddings {
             let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone());
             if result.is_ok() {
                 let transformed = result.unwrap();
-                // assert_ne!(transformed, span);
                 return transformed;
             }
         }
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index f6575f6ad7188dbaf7ba56160d72cc12f678de10..5c6919d4fd46ee80d2e82515a3710cff044a4e10 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -12,7 +12,7 @@ use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
 use futures::{channel::oneshot, Future};
 use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
-use language::{Language, LanguageRegistry};
+use language::{Anchor, Buffer, Language, LanguageRegistry};
 use parking_lot::Mutex;
 use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES};
 use postage::watch;
@@ -93,7 +93,7 @@ pub struct SemanticIndex {
 struct ProjectState {
     worktree_db_ids: Vec<(WorktreeId, i64)>,
     outstanding_job_count_rx: watch::Receiver<usize>,
-    outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
+    _outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
 }
 
 struct JobHandle {
@@ -135,12 +135,9 @@ pub struct PendingFile {
     job_handle: JobHandle,
 }
 
-#[derive(Debug, Clone)]
 pub struct SearchResult {
-    pub worktree_id: WorktreeId,
-    pub name: String,
-    pub byte_range: Range<usize>,
-    pub file_path: PathBuf,
+    pub buffer: ModelHandle<Buffer>,
+    pub range: Range<Anchor>,
 }
 
 enum DbOperation {
@@ -520,7 +517,7 @@ impl SemanticIndex {
                             .map(|(a, b)| (*a, *b))
                             .collect(),
                         outstanding_job_count_rx: job_count_rx.clone(),
-                        outstanding_job_count_tx: job_count_tx.clone(),
+                        _outstanding_job_count_tx: job_count_tx.clone(),
                     },
                 );
             });
@@ -623,7 +620,7 @@ impl SemanticIndex {
         let embedding_provider = self.embedding_provider.clone();
         let database_url = self.database_url.clone();
         let fs = self.fs.clone();
-        cx.spawn(|this, cx| async move {
+        cx.spawn(|this, mut cx| async move {
             let documents = cx
                 .background()
                 .spawn(async move {
@@ -640,26 +637,39 @@ impl SemanticIndex {
                 })
                 .await?;
 
-            this.read_with(&cx, |this, _| {
-                let project_state = if let Some(state) = this.projects.get(&project.downgrade()) {
-                    state
-                } else {
-                    return Err(anyhow!("project not added"));
-                };
-
-                Ok(documents
-                    .into_iter()
-                    .filter_map(|(worktree_db_id, file_path, byte_range, name)| {
-                        let worktree_id = project_state.worktree_id_for_db_id(worktree_db_id)?;
-                        Some(SearchResult {
-                            worktree_id,
-                            name,
-                            byte_range,
-                            file_path,
-                        })
-                    })
-                    .collect())
-            })
+            let mut tasks = Vec::new();
+            let mut ranges = Vec::new();
+            let weak_project = project.downgrade();
+            project.update(&mut cx, |project, cx| {
+                for (worktree_db_id, file_path, byte_range) in documents {
+                    let project_state =
+                        if let Some(state) = this.read(cx).projects.get(&weak_project) {
+                            state
+                        } else {
+                            return Err(anyhow!("project not added"));
+                        };
+                    if let Some(worktree_id) = project_state.worktree_id_for_db_id(worktree_db_id) {
+                        tasks.push(project.open_buffer((worktree_id, file_path), cx));
+                        ranges.push(byte_range);
+                    }
+                }
+
+                Ok(())
+            })?;
+
+            let buffers = futures::future::join_all(tasks).await;
+
+            Ok(buffers
+                .into_iter()
+                .zip(ranges)
+                .filter_map(|(buffer, range)| {
+                    let buffer = buffer.log_err()?;
+                    let range = buffer.read_with(&cx, |buffer, _| {
+                        buffer.anchor_before(range.start)..buffer.anchor_after(range.end)
+                    });
+                    Some(SearchResult { buffer, range })
+                })
+                .collect::<Vec<_>>())
         })
     }
 }
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 2ccc52d64b598e56be41a0aae5284517c9f0b36b..63b28798ad91d67d6786b4b420900135050dfe5b 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -8,7 +8,7 @@ use crate::{
 use anyhow::Result;
 use async_trait::async_trait;
 use gpui::{Task, TestAppContext};
-use language::{Language, LanguageConfig, LanguageRegistry};
+use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
 use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
 use rand::{rngs::StdRng, Rng};
 use serde_json::json;
@@ -85,9 +85,6 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
     .unwrap();
 
     let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
-    let worktree_id = project.read_with(cx, |project, cx| {
-        project.worktrees(cx).next().unwrap().read(cx).id()
-    });
     let (file_count, outstanding_file_count) = store
         .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await
@@ -103,9 +100,13 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
         .await
         .unwrap();
 
-    assert_eq!(search_results[0].byte_range.start, 0);
-    assert_eq!(search_results[0].name, "aaa");
-    assert_eq!(search_results[0].worktree_id, worktree_id);
+    search_results[0].buffer.read_with(cx, |buffer, _cx| {
+        assert_eq!(search_results[0].range.start.to_offset(buffer), 0);
+        assert_eq!(
+            buffer.file().unwrap().path().as_ref(),
+            Path::new("file1.rs")
+        );
+    });
 
     fs.save(
         "/the-root/src/file2.rs".as_ref(),

From 342dbc69459d771f802d7e77fdd7fb20f7445d1f Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 18 Jul 2023 12:01:42 -0700
Subject: [PATCH 21/34] Fix rendering of project search while semantic index is
 indexing or running

Co-authored-by: Kyle <kyle@zed.dev>
---
 crates/search/src/project_search.rs | 32 +++++++++++++----------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 1097969c00efca6c025da6193251e480b943d7aa..5feb94426eb60c67a756c564982a826699bd20a1 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -110,7 +110,6 @@ struct SemanticSearchState {
     file_count: usize,
     outstanding_file_count: usize,
     _progress_task: Task<()>,
-    search_task: Option<Task<Result<()>>>,
 }
 
 pub struct ProjectSearchBar {
@@ -188,18 +187,17 @@ impl ProjectSearch {
         cx.notify();
     }
 
-    fn semantic_search(&mut self, query: String, cx: &mut ModelContext<Self>) -> Option<()> {
-        let project = self.project.clone();
-        let semantic_index = SemanticIndex::global(cx)?;
-        let search_task = semantic_index.update(cx, |semantic_index, cx| {
-            semantic_index.search_project(project, query.clone(), 10, cx)
+    fn semantic_search(&mut self, query: String, cx: &mut ModelContext<Self>) {
+        let search = SemanticIndex::global(cx).map(|index| {
+            index.update(cx, |semantic_index, cx| {
+                semantic_index.search_project(self.project.clone(), query.clone(), 10, cx)
+            })
         });
-
         self.search_id += 1;
         // self.active_query = Some(query);
         self.match_ranges.clear();
         self.pending_search = Some(cx.spawn(|this, mut cx| async move {
-            let results = search_task.await.log_err()?;
+            let results = search?.await.log_err()?;
 
             let (_task, mut match_ranges) = this.update(&mut cx, |this, cx| {
                 this.excerpts.update(cx, |excerpts, cx| {
@@ -231,8 +229,7 @@ impl ProjectSearch {
 
             None
         }));
-
-        Some(())
+        cx.notify();
     }
 }
 
@@ -257,12 +254,10 @@ impl View for ProjectSearchView {
             enum Status {}
 
             let theme = theme::current(cx).clone();
-            let text = if self.query_editor.read(cx).text(cx).is_empty() {
-                Cow::Borrowed("")
+            let text = if model.pending_search.is_some() {
+                Cow::Borrowed("Searching...")
             } else if let Some(semantic) = &self.semantic {
-                if semantic.search_task.is_some() {
-                    Cow::Borrowed("Searching...")
-                } else if semantic.outstanding_file_count > 0 {
+                if semantic.outstanding_file_count > 0 {
                     Cow::Owned(format!(
                         "Indexing. {} of {}...",
                         semantic.file_count - semantic.outstanding_file_count,
@@ -271,8 +266,8 @@ impl View for ProjectSearchView {
                 } else {
                     Cow::Borrowed("Indexing complete")
                 }
-            } else if model.pending_search.is_some() {
-                Cow::Borrowed("Searching...")
+            } else if self.query_editor.read(cx).text(cx).is_empty() {
+                Cow::Borrowed("")
             } else {
                 Cow::Borrowed("No results")
             };
@@ -978,10 +973,10 @@ impl ProjectSearchBar {
                         let (files_to_index, mut files_remaining_rx) = index_task.await?;
 
                         search_view.update(&mut cx, |search_view, cx| {
+                            cx.notify();
                             search_view.semantic = Some(SemanticSearchState {
                                 file_count: files_to_index,
                                 outstanding_file_count: files_to_index,
-                                search_task: None,
                                 _progress_task: cx.spawn(|search_view, mut cx| async move {
                                     while let Some(count) = files_remaining_rx.recv().await {
                                         search_view
@@ -1006,6 +1001,7 @@ impl ProjectSearchBar {
                     })
                     .detach_and_log_err(cx);
                 }
+                cx.notify();
             });
             cx.notify();
             true

From 0e071919a07967f781f87c060c5c94168a844ba6 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 18 Jul 2023 16:09:44 -0400
Subject: [PATCH 22/34] parellelize embedding api calls

---
 crates/semantic_index/src/embedding.rs      |  6 ++-
 crates/semantic_index/src/semantic_index.rs | 54 ++++++++++++++-------
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/crates/semantic_index/src/embedding.rs b/crates/semantic_index/src/embedding.rs
index 728fc9283a1ebcaf13bd035ac3fd0766c9112913..77457ec7f6e34961ab2a784ef6f0d8068c4c1dbb 100644
--- a/crates/semantic_index/src/embedding.rs
+++ b/crates/semantic_index/src/embedding.rs
@@ -106,7 +106,7 @@ impl OpenAIEmbeddings {
 #[async_trait]
 impl EmbeddingProvider for OpenAIEmbeddings {
     async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
-        const BACKOFF_SECONDS: [usize; 3] = [65, 180, 360];
+        const BACKOFF_SECONDS: [usize; 3] = [45, 75, 125];
         const MAX_RETRIES: usize = 3;
 
         let api_key = OPENAI_API_KEY
@@ -133,6 +133,10 @@ impl EmbeddingProvider for OpenAIEmbeddings {
             match response.status() {
                 StatusCode::TOO_MANY_REQUESTS => {
                     let delay = Duration::from_secs(BACKOFF_SECONDS[request_number - 1] as u64);
+                    log::trace!(
+                        "open ai rate limiting, delaying request by {:?} seconds",
+                        delay.as_secs()
+                    );
                     self.executor.timer(delay).await;
                 }
                 StatusCode::BAD_REQUEST => {
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 5c6919d4fd46ee80d2e82515a3710cff044a4e10..44ce45f457004c7167f8c61501c2b03ca239d199 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -24,7 +24,7 @@ use std::{
     ops::Range,
     path::{Path, PathBuf},
     sync::{Arc, Weak},
-    time::SystemTime,
+    time::{Instant, SystemTime},
 };
 use util::{
     channel::{ReleaseChannel, RELEASE_CHANNEL, RELEASE_CHANNEL_NAME},
@@ -34,7 +34,7 @@ use util::{
 };
 
 const SEMANTIC_INDEX_VERSION: usize = 4;
-const EMBEDDINGS_BATCH_SIZE: usize = 150;
+const EMBEDDINGS_BATCH_SIZE: usize = 80;
 
 pub fn init(
     fs: Arc<dyn Fs>,
@@ -84,7 +84,7 @@ pub struct SemanticIndex {
     db_update_tx: channel::Sender<DbOperation>,
     parsing_files_tx: channel::Sender<PendingFile>,
     _db_update_task: Task<()>,
-    _embed_batch_task: Task<()>,
+    _embed_batch_tasks: Vec<Task<()>>,
     _batch_files_task: Task<()>,
     _parsing_files_tasks: Vec<Task<()>>,
     projects: HashMap<WeakModelHandle<Project>, ProjectState>,
@@ -189,6 +189,7 @@ impl SemanticIndex {
         language_registry: Arc<LanguageRegistry>,
         mut cx: AsyncAppContext,
     ) -> Result<ModelHandle<Self>> {
+        let t0 = Instant::now();
         let database_url = Arc::new(database_url);
 
         let db = cx
@@ -196,7 +197,13 @@ impl SemanticIndex {
             .spawn(VectorDatabase::new(fs.clone(), database_url.clone()))
             .await?;
 
+        log::trace!(
+            "db initialization took {:?} milliseconds",
+            t0.elapsed().as_millis()
+        );
+
         Ok(cx.add_model(|cx| {
+            let t0 = Instant::now();
             // Perform database operations
             let (db_update_tx, db_update_rx) = channel::unbounded();
             let _db_update_task = cx.background().spawn({
@@ -210,20 +217,24 @@ impl SemanticIndex {
             // Group documents into batches and send them to the embedding provider.
             let (embed_batch_tx, embed_batch_rx) =
                 channel::unbounded::<Vec<(i64, Vec<Document>, PathBuf, SystemTime, JobHandle)>>();
-            let _embed_batch_task = cx.background().spawn({
-                let db_update_tx = db_update_tx.clone();
-                let embedding_provider = embedding_provider.clone();
-                async move {
-                    while let Ok(embeddings_queue) = embed_batch_rx.recv().await {
-                        Self::compute_embeddings_for_batch(
-                            embeddings_queue,
-                            &embedding_provider,
-                            &db_update_tx,
-                        )
-                        .await;
+            let mut _embed_batch_tasks = Vec::new();
+            for _ in 0..cx.background().num_cpus() {
+                let embed_batch_rx = embed_batch_rx.clone();
+                _embed_batch_tasks.push(cx.background().spawn({
+                    let db_update_tx = db_update_tx.clone();
+                    let embedding_provider = embedding_provider.clone();
+                    async move {
+                        while let Ok(embeddings_queue) = embed_batch_rx.recv().await {
+                            Self::compute_embeddings_for_batch(
+                                embeddings_queue,
+                                &embedding_provider,
+                                &db_update_tx,
+                            )
+                            .await;
+                        }
                     }
-                }
-            });
+                }));
+            }
 
             // Group documents into batches and send them to the embedding provider.
             let (batch_files_tx, batch_files_rx) = channel::unbounded::<EmbeddingJob>();
@@ -264,6 +275,10 @@ impl SemanticIndex {
                 }));
             }
 
+            log::trace!(
+                "semantic index task initialization took {:?} milliseconds",
+                t0.elapsed().as_millis()
+            );
             Self {
                 fs,
                 database_url,
@@ -272,7 +287,7 @@ impl SemanticIndex {
                 db_update_tx,
                 parsing_files_tx,
                 _db_update_task,
-                _embed_batch_task,
+                _embed_batch_tasks,
                 _batch_files_task,
                 _parsing_files_tasks,
                 projects: HashMap::new(),
@@ -460,6 +475,7 @@ impl SemanticIndex {
         project: ModelHandle<Project>,
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<(usize, watch::Receiver<usize>)>> {
+        let t0 = Instant::now();
         let worktree_scans_complete = project
             .read(cx)
             .worktrees(cx)
@@ -577,6 +593,10 @@ impl SemanticIndex {
                         }
                     }
 
+                    log::trace!(
+                        "walking worktree took {:?} milliseconds",
+                        t0.elapsed().as_millis()
+                    );
                     anyhow::Ok((count, job_count_rx))
                 })
                 .await

From 9809ec3d706a19cd409a8a7494fabc06803e0ed7 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 19 Jul 2023 15:47:05 -0400
Subject: [PATCH 23/34] update treesitter parsing to accomodate for collapsed
 nested functions

Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 Cargo.lock                                    |    3 +-
 Cargo.toml                                    |    2 +-
 crates/language/src/language.rs               |   22 +
 crates/semantic_index/Cargo.toml              |    1 +
 crates/semantic_index/src/parsing.rs          |  257 +++-
 crates/semantic_index/src/semantic_index.rs   |    8 +-
 .../src/semantic_index_tests.rs               | 1079 +++++++++--------
 crates/zed/src/languages/rust/config.toml     |    1 +
 crates/zed/src/languages/rust/embedding.scm   |   64 +-
 9 files changed, 813 insertions(+), 624 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7719eb24c228613114bf999f207629ba0c6d4664..8ea6f61da04f215b91b31941ccce795be778a204 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6486,6 +6486,7 @@ dependencies = [
  "parking_lot 0.11.2",
  "picker",
  "postage",
+ "pretty_assertions",
  "project",
  "rand 0.8.5",
  "rpc",
@@ -7991,7 +7992,7 @@ dependencies = [
 [[package]]
 name = "tree-sitter"
 version = "0.20.10"
-source = "git+https://github.com/tree-sitter/tree-sitter?rev=49226023693107fba9a1191136a4f47f38cdca73#49226023693107fba9a1191136a4f47f38cdca73"
+source = "git+https://github.com/tree-sitter/tree-sitter?rev=1c65ca24bc9a734ab70115188f465e12eecf224e#1c65ca24bc9a734ab70115188f465e12eecf224e"
 dependencies = [
  "cc",
  "regex",
diff --git a/Cargo.toml b/Cargo.toml
index 4b6574534845456623d8c1a6510c15817c2b6151..04f2147431ffe183de21e250885ce16b28166ec9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -130,7 +130,7 @@ tree-sitter-yaml = { git = "https://github.com/zed-industries/tree-sitter-yaml",
 tree-sitter-lua = "0.0.14"
 
 [patch.crates-io]
-tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "49226023693107fba9a1191136a4f47f38cdca73" }
+tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "1c65ca24bc9a734ab70115188f465e12eecf224e" }
 async-task = { git = "https://github.com/zed-industries/async-task", rev = "341b57d6de98cdfd7b418567b8de2022ca993a6e" }
 
 # TODO - Remove when a version is released with this PR: https://github.com/servo/core-foundation-rs/pull/457
diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs
index 8c6d6e9c09f17f48c58c42e3d67d144ceb7e56cb..ec233716d6ce5345515600b14e00a212b3dcb3a5 100644
--- a/crates/language/src/language.rs
+++ b/crates/language/src/language.rs
@@ -339,6 +339,8 @@ pub struct LanguageConfig {
     #[serde(default)]
     pub line_comment: Option<Arc<str>>,
     #[serde(default)]
+    pub collapsed_placeholder: String,
+    #[serde(default)]
     pub block_comment: Option<(Arc<str>, Arc<str>)>,
     #[serde(default)]
     pub overrides: HashMap<String, LanguageConfigOverride>,
@@ -408,6 +410,7 @@ impl Default for LanguageConfig {
             line_comment: Default::default(),
             block_comment: Default::default(),
             overrides: Default::default(),
+            collapsed_placeholder: Default::default(),
         }
     }
 }
@@ -525,6 +528,8 @@ pub struct EmbeddingConfig {
     pub item_capture_ix: u32,
     pub name_capture_ix: u32,
     pub context_capture_ix: Option<u32>,
+    pub collapse_capture_ix: Option<u32>,
+    pub keep_capture_ix: Option<u32>,
 }
 
 struct InjectionConfig {
@@ -1246,12 +1251,16 @@ impl Language {
         let mut item_capture_ix = None;
         let mut name_capture_ix = None;
         let mut context_capture_ix = None;
+        let mut collapse_capture_ix = None;
+        let mut keep_capture_ix = None;
         get_capture_indices(
             &query,
             &mut [
                 ("item", &mut item_capture_ix),
                 ("name", &mut name_capture_ix),
                 ("context", &mut context_capture_ix),
+                ("keep", &mut keep_capture_ix),
+                ("collapse", &mut collapse_capture_ix),
             ],
         );
         if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
@@ -1260,6 +1269,8 @@ impl Language {
                 item_capture_ix,
                 name_capture_ix,
                 context_capture_ix,
+                collapse_capture_ix,
+                keep_capture_ix,
             });
         }
         Ok(self)
@@ -1544,9 +1555,20 @@ impl Language {
     pub fn grammar(&self) -> Option<&Arc<Grammar>> {
         self.grammar.as_ref()
     }
+
+    pub fn default_scope(self: &Arc<Self>) -> LanguageScope {
+        LanguageScope {
+            language: self.clone(),
+            override_id: None,
+        }
+    }
 }
 
 impl LanguageScope {
+    pub fn collapsed_placeholder(&self) -> &str {
+        self.language.config.collapsed_placeholder.as_ref()
+    }
+
     pub fn line_comment_prefix(&self) -> Option<&Arc<str>> {
         Override::as_option(
             self.config_override().map(|o| &o.line_comment),
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 2d21ff6c1c42710e597101cd024fdde9183bcbc5..1b3169bfe41940eef5c863901c560e106acf4816 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -46,6 +46,7 @@ rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
 
+pretty_assertions.workspace = true
 rand.workspace = true
 unindent.workspace = true
 tempdir.workspace = true
diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs
index 663f0f473b63358496c8dcbc337aa7ccbe452c76..0d2aeb60fb24dcb347c3a0f870ef6e348e08a88a 100644
--- a/crates/semantic_index/src/parsing.rs
+++ b/crates/semantic_index/src/parsing.rs
@@ -1,6 +1,6 @@
 use anyhow::{anyhow, Ok, Result};
-use language::Language;
-use std::{ops::Range, path::Path, sync::Arc};
+use language::{Grammar, Language};
+use std::{cmp, collections::HashSet, ops::Range, path::Path, sync::Arc};
 use tree_sitter::{Parser, QueryCursor};
 
 #[derive(Debug, PartialEq, Clone)]
@@ -22,6 +22,20 @@ pub struct CodeContextRetriever {
     pub cursor: QueryCursor,
 }
 
+// Every match has an item, this represents the fundamental treesitter symbol and anchors the search
+// Every match has one or more 'name' captures. These indicate the display range of the item for deduplication.
+// If there are preceeding comments, we track this with a context capture
+// If there is a piece that should be collapsed in hierarchical queries, we capture it with a collapse capture
+// If there is a piece that should be kept inside a collapsed node, we capture it with a keep capture
+#[derive(Debug, Clone)]
+pub struct CodeContextMatch {
+    pub start_col: usize,
+    pub item_range: Range<usize>,
+    pub name_range: Range<usize>,
+    pub context_ranges: Vec<Range<usize>>,
+    pub collapse_ranges: Vec<Range<usize>>,
+}
+
 impl CodeContextRetriever {
     pub fn new() -> Self {
         Self {
@@ -49,24 +63,15 @@ impl CodeContextRetriever {
         }])
     }
 
-    pub fn parse_file(
+    fn get_matches_in_file(
         &mut self,
-        relative_path: &Path,
         content: &str,
-        language: Arc<Language>,
-    ) -> Result<Vec<Document>> {
-        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) {
-            return self._parse_entire_file(relative_path, language.name(), &content);
-        }
-
-        let grammar = language
-            .grammar()
-            .ok_or_else(|| anyhow!("no grammar for language"))?;
+        grammar: &Arc<Grammar>,
+    ) -> Result<Vec<CodeContextMatch>> {
         let embedding_config = grammar
             .embedding_config
             .as_ref()
             .ok_or_else(|| anyhow!("no embedding queries"))?;
-
         self.parser.set_language(grammar.ts_language).unwrap();
 
         let tree = self
@@ -74,66 +79,204 @@ impl CodeContextRetriever {
             .parse(&content, None)
             .ok_or_else(|| anyhow!("parsing failed"))?;
 
-        let mut documents = Vec::new();
-
-        // Iterate through query matches
-        let mut name_ranges: Vec<Range<usize>> = vec![];
+        let mut captures: Vec<CodeContextMatch> = Vec::new();
+        let mut collapse_ranges: Vec<Range<usize>> = Vec::new();
+        let mut keep_ranges: Vec<Range<usize>> = Vec::new();
         for mat in self.cursor.matches(
             &embedding_config.query,
             tree.root_node(),
             content.as_bytes(),
         ) {
-            let mut name: Vec<&str> = vec![];
-            let mut item: Option<&str> = None;
-            let mut byte_range: Option<Range<usize>> = None;
-            let mut context_spans: Vec<&str> = vec![];
+            let mut start_col = 0;
+            let mut item_range: Option<Range<usize>> = None;
+            let mut name_range: Option<Range<usize>> = None;
+            let mut context_ranges: Vec<Range<usize>> = Vec::new();
+            collapse_ranges.clear();
+            keep_ranges.clear();
             for capture in mat.captures {
                 if capture.index == embedding_config.item_capture_ix {
-                    byte_range = Some(capture.node.byte_range());
-                    item = content.get(capture.node.byte_range());
+                    item_range = Some(capture.node.byte_range());
+                    start_col = capture.node.start_position().column;
                 } else if capture.index == embedding_config.name_capture_ix {
-                    let name_range = capture.node.byte_range();
-                    if name_ranges.contains(&name_range) {
-                        continue;
-                    }
-                    name_ranges.push(name_range.clone());
-                    if let Some(name_content) = content.get(name_range.clone()) {
-                        name.push(name_content);
-                    }
+                    name_range = Some(capture.node.byte_range());
+                } else if Some(capture.index) == embedding_config.context_capture_ix {
+                    context_ranges.push(capture.node.byte_range());
+                } else if Some(capture.index) == embedding_config.collapse_capture_ix {
+                    collapse_ranges.push(capture.node.byte_range());
+                } else if Some(capture.index) == embedding_config.keep_capture_ix {
+                    keep_ranges.push(capture.node.byte_range());
                 }
+            }
 
-                if let Some(context_capture_ix) = embedding_config.context_capture_ix {
-                    if capture.index == context_capture_ix {
-                        if let Some(context) = content.get(capture.node.byte_range()) {
-                            context_spans.push(context);
-                        }
-                    }
+            if item_range.is_some() && name_range.is_some() {
+                let item_range = item_range.unwrap();
+                captures.push(CodeContextMatch {
+                    start_col,
+                    item_range,
+                    name_range: name_range.unwrap(),
+                    context_ranges,
+                    collapse_ranges: subtract_ranges(&collapse_ranges, &keep_ranges),
+                });
+            }
+        }
+        Ok(captures)
+    }
+
+    pub fn parse_file_with_template(
+        &mut self,
+        relative_path: &Path,
+        content: &str,
+        language: Arc<Language>,
+    ) -> Result<Vec<Document>> {
+        let language_name = language.name();
+        let mut documents = self.parse_file(relative_path, content, language)?;
+        for document in &mut documents {
+            document.content = CODE_CONTEXT_TEMPLATE
+                .replace("<path>", relative_path.to_string_lossy().as_ref())
+                .replace("<language>", language_name.as_ref())
+                .replace("item", &document.content);
+        }
+        Ok(documents)
+    }
+
+    pub fn parse_file(
+        &mut self,
+        relative_path: &Path,
+        content: &str,
+        language: Arc<Language>,
+    ) -> Result<Vec<Document>> {
+        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) {
+            return self._parse_entire_file(relative_path, language.name(), &content);
+        }
+
+        let grammar = language
+            .grammar()
+            .ok_or_else(|| anyhow!("no grammar for language"))?;
+
+        // Iterate through query matches
+        let matches = self.get_matches_in_file(content, grammar)?;
+
+        let language_scope = language.default_scope();
+        let placeholder = language_scope.collapsed_placeholder();
+
+        let mut documents = Vec::new();
+        let mut collapsed_ranges_within = Vec::new();
+        let mut parsed_name_ranges = HashSet::new();
+        for (i, context_match) in matches.iter().enumerate() {
+            if parsed_name_ranges.contains(&context_match.name_range) {
+                continue;
+            }
+
+            collapsed_ranges_within.clear();
+            for remaining_match in &matches[(i + 1)..] {
+                if context_match
+                    .item_range
+                    .contains(&remaining_match.item_range.start)
+                    && context_match
+                        .item_range
+                        .contains(&remaining_match.item_range.end)
+                {
+                    collapsed_ranges_within.extend(remaining_match.collapse_ranges.iter().cloned());
+                } else {
+                    break;
                 }
             }
 
-            if let Some((item, byte_range)) = item.zip(byte_range) {
-                if !name.is_empty() {
-                    let item = if context_spans.is_empty() {
-                        item.to_string()
-                    } else {
-                        format!("{}\n{}", context_spans.join("\n"), item)
-                    };
-
-                    let document_text = CODE_CONTEXT_TEMPLATE
-                        .replace("<path>", relative_path.to_str().unwrap())
-                        .replace("<language>", &language.name().to_lowercase())
-                        .replace("<item>", item.as_str());
-
-                    documents.push(Document {
-                        range: byte_range,
-                        content: document_text,
-                        embedding: Vec::new(),
-                        name: name.join(" ").to_string(),
-                    });
+            let mut document_content = String::new();
+            for context_range in &context_match.context_ranges {
+                document_content.push_str(&content[context_range.clone()]);
+                document_content.push_str("\n");
+            }
+
+            let mut offset = context_match.item_range.start;
+            for collapsed_range in &collapsed_ranges_within {
+                if collapsed_range.start > offset {
+                    add_content_from_range(
+                        &mut document_content,
+                        content,
+                        offset..collapsed_range.start,
+                        context_match.start_col,
+                    );
                 }
+                document_content.push_str(placeholder);
+                offset = collapsed_range.end;
+            }
+
+            if offset < context_match.item_range.end {
+                add_content_from_range(
+                    &mut document_content,
+                    content,
+                    offset..context_match.item_range.end,
+                    context_match.start_col,
+                );
+            }
+
+            if let Some(name) = content.get(context_match.name_range.clone()) {
+                parsed_name_ranges.insert(context_match.name_range.clone());
+                documents.push(Document {
+                    name: name.to_string(),
+                    content: document_content,
+                    range: context_match.item_range.clone(),
+                    embedding: vec![],
+                })
             }
         }
 
         return Ok(documents);
     }
 }
+
+pub(crate) fn subtract_ranges(
+    ranges: &[Range<usize>],
+    ranges_to_subtract: &[Range<usize>],
+) -> Vec<Range<usize>> {
+    let mut result = Vec::new();
+
+    let mut ranges_to_subtract = ranges_to_subtract.iter().peekable();
+
+    for range in ranges {
+        let mut offset = range.start;
+
+        while offset < range.end {
+            if let Some(range_to_subtract) = ranges_to_subtract.peek() {
+                if offset < range_to_subtract.start {
+                    let next_offset = cmp::min(range_to_subtract.start, range.end);
+                    result.push(offset..next_offset);
+                    offset = next_offset;
+                } else {
+                    let next_offset = cmp::min(range_to_subtract.end, range.end);
+                    offset = next_offset;
+                }
+
+                if offset >= range_to_subtract.end {
+                    ranges_to_subtract.next();
+                }
+            } else {
+                result.push(offset..range.end);
+                offset = range.end;
+            }
+        }
+    }
+
+    result
+}
+
+fn add_content_from_range(
+    output: &mut String,
+    content: &str,
+    range: Range<usize>,
+    start_col: usize,
+) {
+    for mut line in content.get(range.clone()).unwrap_or("").lines() {
+        for _ in 0..start_col {
+            if line.starts_with(' ') {
+                line = &line[1..];
+            } else {
+                break;
+            }
+        }
+        output.push_str(line);
+        output.push('\n');
+    }
+    output.pop();
+}
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 44ce45f457004c7167f8c61501c2b03ca239d199..271fd741a643d1e04d5afe57a50b70b6e391cbf7 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -409,7 +409,11 @@ impl SemanticIndex {
     ) {
         if let Some(content) = fs.load(&pending_file.absolute_path).await.log_err() {
             if let Some(documents) = retriever
-                .parse_file(&pending_file.relative_path, &content, pending_file.language)
+                .parse_file_with_template(
+                    &pending_file.relative_path,
+                    &content,
+                    pending_file.language,
+                )
                 .log_err()
             {
                 log::trace!(
@@ -657,6 +661,8 @@ impl SemanticIndex {
                 })
                 .await?;
 
+            dbg!(&documents);
+
             let mut tasks = Vec::new();
             let mut ranges = Vec::new();
             let weak_project = project.downgrade();
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 63b28798ad91d67d6786b4b420900135050dfe5b..c54d5079d37f3b8ad5ce4dbb788f5eb5f68b02c8 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -1,7 +1,7 @@
 use crate::{
     db::dot,
     embedding::EmbeddingProvider,
-    parsing::{CodeContextRetriever, Document},
+    parsing::{subtract_ranges, CodeContextRetriever, Document},
     semantic_index_settings::SemanticIndexSettings,
     SemanticIndex,
 };
@@ -9,6 +9,7 @@ use anyhow::Result;
 use async_trait::async_trait;
 use gpui::{Task, TestAppContext};
 use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
+use pretty_assertions::assert_eq;
 use project::{project_settings::ProjectSettings, FakeFs, Fs, Project};
 use rand::{rngs::StdRng, Rng};
 use serde_json::json;
@@ -104,7 +105,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
         assert_eq!(search_results[0].range.start.to_offset(buffer), 0);
         assert_eq!(
             buffer.file().unwrap().path().as_ref(),
-            Path::new("file1.rs")
+            Path::new("src/file1.rs")
         );
     });
 
@@ -147,503 +148,548 @@ async fn test_code_context_retrieval_rust() {
     let text = "
         /// A doc comment
         /// that spans multiple lines
+        #[gpui::test]
         fn a() {
             b
         }
 
         impl C for D {
         }
+
+        impl E {
+            // This is also a preceding comment
+            pub fn function_1() -> Option<()> {
+                todo!();
+            }
+
+            // This is a preceding comment
+            fn function_2() -> Result<()> {
+                todo!();
+            }
+        }
     "
     .unindent();
 
-    let parsed_files = retriever
+    let documents = retriever
         .parse_file(Path::new("foo.rs"), &text, language)
         .unwrap();
 
-    assert_eq!(
-        parsed_files,
+    assert_documents_eq(
+        &documents,
         &[
-            Document {
-                name: "a".into(),
-                range: text.find("fn a").unwrap()..(text.find("}").unwrap() + 1),
-                content: "
-                    The below code snippet is from file 'foo.rs'
-
-                    ```rust
-                    /// A doc comment
-                    /// that spans multiple lines
-                    fn a() {
-                        b
-                    }
-                    ```"
+            (
+                "
+                /// A doc comment
+                /// that spans multiple lines
+                #[gpui::test]
+                fn a() {
+                    b
+                }"
                 .unindent(),
-                embedding: vec![],
-            },
-            Document {
-                name: "C for D".into(),
-                range: text.find("impl C").unwrap()..(text.rfind("}").unwrap() + 1),
-                content: "
-                    The below code snippet is from file 'foo.rs'
-
-                    ```rust
-                    impl C for D {
-                    }
-                    ```"
+                text.find("fn a").unwrap(),
+            ),
+            (
+                "
+                impl C for D {
+                }"
                 .unindent(),
-                embedding: vec![],
-            }
-        ]
+                text.find("impl C").unwrap(),
+            ),
+            (
+                "
+                impl E {
+                    // This is also a preceding comment
+                    pub fn function_1() -> Option<()> { /* ... */ }
+
+                    // This is a preceding comment
+                    fn function_2() -> Result<()> { /* ... */ }
+                }"
+                .unindent(),
+                text.find("impl E").unwrap(),
+            ),
+            (
+                "
+                // This is also a preceding comment
+                pub fn function_1() -> Option<()> {
+                    todo!();
+                }"
+                .unindent(),
+                text.find("pub fn function_1").unwrap(),
+            ),
+            (
+                "
+                // This is a preceding comment
+                fn function_2() -> Result<()> {
+                    todo!();
+                }"
+                .unindent(),
+                text.find("fn function_2").unwrap(),
+            ),
+        ],
     );
 }
 
-#[gpui::test]
-async fn test_code_context_retrieval_javascript() {
-    let language = js_lang();
-    let mut retriever = CodeContextRetriever::new();
-
-    let text = "
-        /* globals importScripts, backend */
-        function _authorize() {}
-
-        /**
-         * Sometimes the frontend build is way faster than backend.
-         */
-        export async function authorizeBank() {
-            _authorize(pushModal, upgradingAccountId, {});
-        }
-
-        export class SettingsPage {
-            /* This is a test setting */
-            constructor(page) {
-                this.page = page;
-            }
-        }
-
-        /* This is a test comment */
-        class TestClass {}
-
-        /* Schema for editor_events in Clickhouse. */
-        export interface ClickhouseEditorEvent {
-            installation_id: string
-            operation: string
-        }
-        "
-    .unindent();
-
-    let parsed_files = retriever
-        .parse_file(Path::new("foo.js"), &text, language)
-        .unwrap();
-
-    let test_documents = &[
-        Document {
-            name: "function _authorize".into(),
-            range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
-            content: "
-                    The below code snippet is from file 'foo.js'
-
-                    ```javascript
-                    /* globals importScripts, backend */
-                    function _authorize() {}
-                    ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "async function authorizeBank".into(),
-            range: text.find("export async").unwrap()..223,
-            content: "
-                    The below code snippet is from file 'foo.js'
-
-                    ```javascript
-                    /**
-                     * Sometimes the frontend build is way faster than backend.
-                     */
-                    export async function authorizeBank() {
-                        _authorize(pushModal, upgradingAccountId, {});
-                    }
-                    ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "class SettingsPage".into(),
-            range: 225..343,
-            content: "
-                    The below code snippet is from file 'foo.js'
-
-                    ```javascript
-                    export class SettingsPage {
-                        /* This is a test setting */
-                        constructor(page) {
-                            this.page = page;
-                        }
-                    }
-                    ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "constructor".into(),
-            range: 290..341,
-            content: "
-                The below code snippet is from file 'foo.js'
-
-                ```javascript
-                /* This is a test setting */
-                constructor(page) {
-                        this.page = page;
-                    }
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "class TestClass".into(),
-            range: 374..392,
-            content: "
-                    The below code snippet is from file 'foo.js'
-
-                    ```javascript
-                    /* This is a test comment */
-                    class TestClass {}
-                    ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "interface ClickhouseEditorEvent".into(),
-            range: 440..532,
-            content: "
-                    The below code snippet is from file 'foo.js'
-
-                    ```javascript
-                    /* Schema for editor_events in Clickhouse. */
-                    export interface ClickhouseEditorEvent {
-                        installation_id: string
-                        operation: string
-                    }
-                    ```"
-            .unindent(),
-            embedding: vec![],
-        },
-    ];
-
-    for idx in 0..test_documents.len() {
-        assert_eq!(test_documents[idx], parsed_files[idx]);
-    }
-}
-
-#[gpui::test]
-async fn test_code_context_retrieval_elixir() {
-    let language = elixir_lang();
-    let mut retriever = CodeContextRetriever::new();
-
-    let text = r#"
-defmodule File.Stream do
-    @moduledoc """
-    Defines a `File.Stream` struct returned by `File.stream!/3`.
-
-    The following fields are public:
-
-    * `path`          - the file path
-    * `modes`         - the file modes
-    * `raw`           - a boolean indicating if bin functions should be used
-    * `line_or_bytes` - if reading should read lines or a given number of bytes
-    * `node`          - the node the file belongs to
-
-    """
-
-    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
-
-    @type t :: %__MODULE__{}
-
-    @doc false
-    def __build__(path, modes, line_or_bytes) do
-    raw = :lists.keyfind(:encoding, 1, modes) == false
-
-    modes =
-        case raw do
-        true ->
-            case :lists.keyfind(:read_ahead, 1, modes) do
-            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-            {:read_ahead, _} -> [:raw | modes]
-            false -> [:raw, :read_ahead | modes]
-            end
-
-        false ->
-            modes
-        end
-
-    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-    end
-"#
-    .unindent();
-
-    let parsed_files = retriever
-        .parse_file(Path::new("foo.ex"), &text, language)
-        .unwrap();
-
-    let test_documents = &[
-        Document{
-            name: "defmodule File.Stream".into(),
-            range: 0..1132,
-            content: r#"
-                The below code snippet is from file 'foo.ex'
-
-                ```elixir
-                defmodule File.Stream do
-                    @moduledoc """
-                    Defines a `File.Stream` struct returned by `File.stream!/3`.
-
-                    The following fields are public:
-
-                    * `path`          - the file path
-                    * `modes`         - the file modes
-                    * `raw`           - a boolean indicating if bin functions should be used
-                    * `line_or_bytes` - if reading should read lines or a given number of bytes
-                    * `node`          - the node the file belongs to
-
-                    """
-
-                    defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
-
-                    @type t :: %__MODULE__{}
-
-                    @doc false
-                    def __build__(path, modes, line_or_bytes) do
-                    raw = :lists.keyfind(:encoding, 1, modes) == false
-
-                    modes =
-                        case raw do
-                        true ->
-                            case :lists.keyfind(:read_ahead, 1, modes) do
-                            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-                            {:read_ahead, _} -> [:raw | modes]
-                            false -> [:raw, :read_ahead | modes]
-                            end
-
-                        false ->
-                            modes
-                        end
-
-                    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-                    end
-                ```"#.unindent(),
-            embedding: vec![],
-        },
-        Document {
-        name: "def __build__".into(),
-        range: 574..1132,
-        content: r#"
-The below code snippet is from file 'foo.ex'
-
-```elixir
-@doc false
-def __build__(path, modes, line_or_bytes) do
-    raw = :lists.keyfind(:encoding, 1, modes) == false
-
-    modes =
-        case raw do
-        true ->
-            case :lists.keyfind(:read_ahead, 1, modes) do
-            {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-            {:read_ahead, _} -> [:raw | modes]
-            false -> [:raw, :read_ahead | modes]
-            end
-
-        false ->
-            modes
-        end
-
-    %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-    end
-```"#
-            .unindent(),
-        embedding: vec![],
-    }];
-
-    for idx in 0..test_documents.len() {
-        assert_eq!(test_documents[idx], parsed_files[idx]);
-    }
+fn assert_documents_eq(
+    documents: &[Document],
+    expected_contents_and_start_offsets: &[(String, usize)],
+) {
+    assert_eq!(
+        documents
+            .iter()
+            .map(|document| (document.content.clone(), document.range.start))
+            .collect::<Vec<_>>(),
+        expected_contents_and_start_offsets
+    );
 }
 
-#[gpui::test]
-async fn test_code_context_retrieval_cpp() {
-    let language = cpp_lang();
-    let mut retriever = CodeContextRetriever::new();
-
-    let text = "
-    /**
-     * @brief Main function
-     * @returns 0 on exit
-     */
-    int main() { return 0; }
-
-    /**
-    * This is a test comment
-    */
-    class MyClass {       // The class
-        public:             // Access specifier
-        int myNum;        // Attribute (int variable)
-        string myString;  // Attribute (string variable)
-    };
-
-    // This is a test comment
-    enum Color { red, green, blue };
-
-    /** This is a preceeding block comment
-     * This is the second line
-     */
-    struct {           // Structure declaration
-        int myNum;       // Member (int variable)
-        string myString; // Member (string variable)
-    } myStructure;
-
-    /**
-    * @brief Matrix class.
-    */
-    template <typename T,
-              typename = typename std::enable_if<
-                std::is_integral<T>::value || std::is_floating_point<T>::value,
-                bool>::type>
-    class Matrix2 {
-        std::vector<std::vector<T>> _mat;
-
-    public:
-        /**
-        * @brief Constructor
-        * @tparam Integer ensuring integers are being evaluated and not other
-        * data types.
-        * @param size denoting the size of Matrix as size x size
-        */
-        template <typename Integer,
-                  typename = typename std::enable_if<std::is_integral<Integer>::value,
-                  Integer>::type>
-        explicit Matrix(const Integer size) {
-            for (size_t i = 0; i < size; ++i) {
-                _mat.emplace_back(std::vector<T>(size, 0));
-            }
-        }
-    }"
-    .unindent();
-
-    let parsed_files = retriever
-        .parse_file(Path::new("foo.cpp"), &text, language)
-        .unwrap();
-
-    let test_documents = &[
-        Document {
-            name: "int main".into(),
-            range: 54..78,
-            content: "
-                The below code snippet is from file 'foo.cpp'
-
-                ```cpp
-                /**
-                 * @brief Main function
-                 * @returns 0 on exit
-                 */
-                int main() { return 0; }
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "class MyClass".into(),
-            range: 112..295,
-            content: "
-                The below code snippet is from file 'foo.cpp'
-
-                ```cpp
-                /**
-                * This is a test comment
-                */
-                class MyClass {       // The class
-                    public:             // Access specifier
-                    int myNum;        // Attribute (int variable)
-                    string myString;  // Attribute (string variable)
-                }
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "enum Color".into(),
-            range: 324..355,
-            content: "
-                The below code snippet is from file 'foo.cpp'
-
-                ```cpp
-                // This is a test comment
-                enum Color { red, green, blue }
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "struct myStructure".into(),
-            range: 428..581,
-            content: "
-                The below code snippet is from file 'foo.cpp'
-
-                ```cpp
-                /** This is a preceeding block comment
-                 * This is the second line
-                 */
-                struct {           // Structure declaration
-                    int myNum;       // Member (int variable)
-                    string myString; // Member (string variable)
-                } myStructure;
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-        Document {
-            name: "class Matrix2".into(),
-            range: 613..1342,
-            content: "
-                The below code snippet is from file 'foo.cpp'
-
-                ```cpp
-                /**
-                * @brief Matrix class.
-                */
-                template <typename T,
-                          typename = typename std::enable_if<
-                            std::is_integral<T>::value || std::is_floating_point<T>::value,
-                            bool>::type>
-                class Matrix2 {
-                    std::vector<std::vector<T>> _mat;
-
-                public:
-                    /**
-                    * @brief Constructor
-                    * @tparam Integer ensuring integers are being evaluated and not other
-                    * data types.
-                    * @param size denoting the size of Matrix as size x size
-                    */
-                    template <typename Integer,
-                              typename = typename std::enable_if<std::is_integral<Integer>::value,
-                              Integer>::type>
-                    explicit Matrix(const Integer size) {
-                        for (size_t i = 0; i < size; ++i) {
-                            _mat.emplace_back(std::vector<T>(size, 0));
-                        }
-                    }
-                }
-                ```"
-            .unindent(),
-            embedding: vec![],
-        },
-    ];
-
-    for idx in 0..test_documents.len() {
-        assert_eq!(test_documents[idx], parsed_files[idx]);
-    }
-}
+// #[gpui::test]
+// async fn test_code_context_retrieval_javascript() {
+//     let language = js_lang();
+//     let mut retriever = CodeContextRetriever::new();
+
+//     let text = "
+//         /* globals importScripts, backend */
+//         function _authorize() {}
+
+//         /**
+//          * Sometimes the frontend build is way faster than backend.
+//          */
+//         export async function authorizeBank() {
+//             _authorize(pushModal, upgradingAccountId, {});
+//         }
+
+//         export class SettingsPage {
+//             /* This is a test setting */
+//             constructor(page) {
+//                 this.page = page;
+//             }
+//         }
+
+//         /* This is a test comment */
+//         class TestClass {}
+
+//         /* Schema for editor_events in Clickhouse. */
+//         export interface ClickhouseEditorEvent {
+//             installation_id: string
+//             operation: string
+//         }
+//         "
+//     .unindent();
+
+//     let parsed_files = retriever
+//         .parse_file(Path::new("foo.js"), &text, language)
+//         .unwrap();
+
+//     let test_documents = &[
+//         Document {
+//             name: "function _authorize".into(),
+//             range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
+//             content: "
+//                     The below code snippet is from file 'foo.js'
+
+//                     ```javascript
+//                     /* globals importScripts, backend */
+//                     function _authorize() {}
+//                     ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "async function authorizeBank".into(),
+//             range: text.find("export async").unwrap()..223,
+//             content: "
+//                     The below code snippet is from file 'foo.js'
+
+//                     ```javascript
+//                     /**
+//                      * Sometimes the frontend build is way faster than backend.
+//                      */
+//                     export async function authorizeBank() {
+//                         _authorize(pushModal, upgradingAccountId, {});
+//                     }
+//                     ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "class SettingsPage".into(),
+//             range: 225..343,
+//             content: "
+//                     The below code snippet is from file 'foo.js'
+
+//                     ```javascript
+//                     export class SettingsPage {
+//                         /* This is a test setting */
+//                         constructor(page) {
+//                             this.page = page;
+//                         }
+//                     }
+//                     ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "constructor".into(),
+//             range: 290..341,
+//             content: "
+//                 The below code snippet is from file 'foo.js'
+
+//                 ```javascript
+//                 /* This is a test setting */
+//                 constructor(page) {
+//                         this.page = page;
+//                     }
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "class TestClass".into(),
+//             range: 374..392,
+//             content: "
+//                     The below code snippet is from file 'foo.js'
+
+//                     ```javascript
+//                     /* This is a test comment */
+//                     class TestClass {}
+//                     ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "interface ClickhouseEditorEvent".into(),
+//             range: 440..532,
+//             content: "
+//                     The below code snippet is from file 'foo.js'
+
+//                     ```javascript
+//                     /* Schema for editor_events in Clickhouse. */
+//                     export interface ClickhouseEditorEvent {
+//                         installation_id: string
+//                         operation: string
+//                     }
+//                     ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//     ];
+
+//     for idx in 0..test_documents.len() {
+//         assert_eq!(test_documents[idx], parsed_files[idx]);
+//     }
+// }
+
+// #[gpui::test]
+// async fn test_code_context_retrieval_elixir() {
+//     let language = elixir_lang();
+//     let mut retriever = CodeContextRetriever::new();
+
+//     let text = r#"
+// defmodule File.Stream do
+//     @moduledoc """
+//     Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+//     The following fields are public:
+
+//     * `path`          - the file path
+//     * `modes`         - the file modes
+//     * `raw`           - a boolean indicating if bin functions should be used
+//     * `line_or_bytes` - if reading should read lines or a given number of bytes
+//     * `node`          - the node the file belongs to
+
+//     """
+
+//     defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+//     @type t :: %__MODULE__{}
+
+//     @doc false
+//     def __build__(path, modes, line_or_bytes) do
+//     raw = :lists.keyfind(:encoding, 1, modes) == false
+
+//     modes =
+//         case raw do
+//         true ->
+//             case :lists.keyfind(:read_ahead, 1, modes) do
+//             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+//             {:read_ahead, _} -> [:raw | modes]
+//             false -> [:raw, :read_ahead | modes]
+//             end
+
+//         false ->
+//             modes
+//         end
+
+//     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+//     end
+// "#
+//     .unindent();
+
+//     let parsed_files = retriever
+//         .parse_file(Path::new("foo.ex"), &text, language)
+//         .unwrap();
+
+//     let test_documents = &[
+//         Document{
+//             name: "defmodule File.Stream".into(),
+//             range: 0..1132,
+//             content: r#"
+//                 The below code snippet is from file 'foo.ex'
+
+//                 ```elixir
+//                 defmodule File.Stream do
+//                     @moduledoc """
+//                     Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+//                     The following fields are public:
+
+//                     * `path`          - the file path
+//                     * `modes`         - the file modes
+//                     * `raw`           - a boolean indicating if bin functions should be used
+//                     * `line_or_bytes` - if reading should read lines or a given number of bytes
+//                     * `node`          - the node the file belongs to
+
+//                     """
+
+//                     defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+//                     @type t :: %__MODULE__{}
+
+//                     @doc false
+//                     def __build__(path, modes, line_or_bytes) do
+//                     raw = :lists.keyfind(:encoding, 1, modes) == false
+
+//                     modes =
+//                         case raw do
+//                         true ->
+//                             case :lists.keyfind(:read_ahead, 1, modes) do
+//                             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+//                             {:read_ahead, _} -> [:raw | modes]
+//                             false -> [:raw, :read_ahead | modes]
+//                             end
+
+//                         false ->
+//                             modes
+//                         end
+
+//                     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+//                     end
+//                 ```"#.unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//         name: "def __build__".into(),
+//         range: 574..1132,
+//         content: r#"
+// The below code snippet is from file 'foo.ex'
+
+// ```elixir
+// @doc false
+// def __build__(path, modes, line_or_bytes) do
+//     raw = :lists.keyfind(:encoding, 1, modes) == false
+
+//     modes =
+//         case raw do
+//         true ->
+//             case :lists.keyfind(:read_ahead, 1, modes) do
+//             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+//             {:read_ahead, _} -> [:raw | modes]
+//             false -> [:raw, :read_ahead | modes]
+//             end
+
+//         false ->
+//             modes
+//         end
+
+//     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+//     end
+// ```"#
+//             .unindent(),
+//         embedding: vec![],
+//     }];
+
+//     for idx in 0..test_documents.len() {
+//         assert_eq!(test_documents[idx], parsed_files[idx]);
+//     }
+// }
+
+// #[gpui::test]
+// async fn test_code_context_retrieval_cpp() {
+//     let language = cpp_lang();
+//     let mut retriever = CodeContextRetriever::new();
+
+//     let text = "
+//     /**
+//      * @brief Main function
+//      * @returns 0 on exit
+//      */
+//     int main() { return 0; }
+
+//     /**
+//     * This is a test comment
+//     */
+//     class MyClass {       // The class
+//         public:             // Access specifier
+//         int myNum;        // Attribute (int variable)
+//         string myString;  // Attribute (string variable)
+//     };
+
+//     // This is a test comment
+//     enum Color { red, green, blue };
+
+//     /** This is a preceding block comment
+//      * This is the second line
+//      */
+//     struct {           // Structure declaration
+//         int myNum;       // Member (int variable)
+//         string myString; // Member (string variable)
+//     } myStructure;
+
+//     /**
+//     * @brief Matrix class.
+//     */
+//     template <typename T,
+//               typename = typename std::enable_if<
+//                 std::is_integral<T>::value || std::is_floating_point<T>::value,
+//                 bool>::type>
+//     class Matrix2 {
+//         std::vector<std::vector<T>> _mat;
+
+//     public:
+//         /**
+//         * @brief Constructor
+//         * @tparam Integer ensuring integers are being evaluated and not other
+//         * data types.
+//         * @param size denoting the size of Matrix as size x size
+//         */
+//         template <typename Integer,
+//                   typename = typename std::enable_if<std::is_integral<Integer>::value,
+//                   Integer>::type>
+//         explicit Matrix(const Integer size) {
+//             for (size_t i = 0; i < size; ++i) {
+//                 _mat.emplace_back(std::vector<T>(size, 0));
+//             }
+//         }
+//     }"
+//     .unindent();
+
+//     let parsed_files = retriever
+//         .parse_file(Path::new("foo.cpp"), &text, language)
+//         .unwrap();
+
+//     let test_documents = &[
+//         Document {
+//             name: "int main".into(),
+//             range: 54..78,
+//             content: "
+//                 The below code snippet is from file 'foo.cpp'
+
+//                 ```cpp
+//                 /**
+//                  * @brief Main function
+//                  * @returns 0 on exit
+//                  */
+//                 int main() { return 0; }
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "class MyClass".into(),
+//             range: 112..295,
+//             content: "
+//                 The below code snippet is from file 'foo.cpp'
+
+//                 ```cpp
+//                 /**
+//                 * This is a test comment
+//                 */
+//                 class MyClass {       // The class
+//                     public:             // Access specifier
+//                     int myNum;        // Attribute (int variable)
+//                     string myString;  // Attribute (string variable)
+//                 }
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "enum Color".into(),
+//             range: 324..355,
+//             content: "
+//                 The below code snippet is from file 'foo.cpp'
+
+//                 ```cpp
+//                 // This is a test comment
+//                 enum Color { red, green, blue }
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "struct myStructure".into(),
+//             range: 428..581,
+//             content: "
+//                 The below code snippet is from file 'foo.cpp'
+
+//                 ```cpp
+//                 /** This is a preceding block comment
+//                  * This is the second line
+//                  */
+//                 struct {           // Structure declaration
+//                     int myNum;       // Member (int variable)
+//                     string myString; // Member (string variable)
+//                 } myStructure;
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//         Document {
+//             name: "class Matrix2".into(),
+//             range: 613..1342,
+//             content: "
+//                 The below code snippet is from file 'foo.cpp'
+
+//                 ```cpp
+//                 /**
+//                 * @brief Matrix class.
+//                 */
+//                 template <typename T,
+//                           typename = typename std::enable_if<
+//                             std::is_integral<T>::value || std::is_floating_point<T>::value,
+//                             bool>::type>
+//                 class Matrix2 {
+//                     std::vector<std::vector<T>> _mat;
+
+//                 public:
+//                     /**
+//                     * @brief Constructor
+//                     * @tparam Integer ensuring integers are being evaluated and not other
+//                     * data types.
+//                     * @param size denoting the size of Matrix as size x size
+//                     */
+//                     template <typename Integer,
+//                               typename = typename std::enable_if<std::is_integral<Integer>::value,
+//                               Integer>::type>
+//                     explicit Matrix(const Integer size) {
+//                         for (size_t i = 0; i < size; ++i) {
+//                             _mat.emplace_back(std::vector<T>(size, 0));
+//                         }
+//                     }
+//                 }
+//                 ```"
+//             .unindent(),
+//             embedding: vec![],
+//         },
+//     ];
+
+//     for idx in 0..test_documents.len() {
+//         assert_eq!(test_documents[idx], parsed_files[idx]);
+//     }
+// }
 
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {
@@ -826,6 +872,7 @@ fn rust_lang() -> Arc<Language> {
             LanguageConfig {
                 name: "Rust".into(),
                 path_suffixes: vec!["rs".into()],
+                collapsed_placeholder: " /* ... */ ".to_string(),
                 ..Default::default()
             },
             Some(tree_sitter_rust::language()),
@@ -833,54 +880,32 @@ fn rust_lang() -> Arc<Language> {
         .with_embedding_query(
             r#"
             (
-                (line_comment)* @context
-                .
-                (enum_item
-                    name: (_) @name) @item
-            )
-
-            (
-                (line_comment)* @context
+                [(line_comment) (attribute_item)]* @context
                 .
-                (struct_item
-                    name: (_) @name) @item
-            )
+                [
+                    (struct_item
+                        name: (_) @name)
 
-            (
-                (line_comment)* @context
-                .
-                (impl_item
-                    trait: (_)? @name
-                    "for"? @name
-                    type: (_) @name) @item
-            )
+                    (enum_item
+                        name: (_) @name)
 
-            (
-                (line_comment)* @context
-                .
-                (trait_item
-                    name: (_) @name) @item
-            )
+                    (impl_item
+                        trait: (_)? @name
+                        "for"? @name
+                        type: (_) @name)
 
-            (
-                (line_comment)* @context
-                .
-                (function_item
-                    name: (_) @name) @item
-            )
+                    (trait_item
+                        name: (_) @name)
 
-            (
-                (line_comment)* @context
-                .
-                (macro_definition
-                    name: (_) @name) @item
-            )
+                    (function_item
+                        name: (_) @name
+                        body: (block
+                            "{" @keep
+                            "}" @keep) @collapse)
 
-            (
-                (line_comment)* @context
-                .
-                (function_signature_item
-                    name: (_) @name) @item
+                    (macro_definition
+                        name: (_) @name)
+                ] @item
             )
             "#,
         )
@@ -1023,3 +1048,15 @@ fn elixir_lang() -> Arc<Language> {
         .unwrap(),
     )
 }
+
+#[gpui::test]
+fn test_subtract_ranges() {
+    // collapsed_ranges: Vec<Range<usize>>, keep_ranges: Vec<Range<usize>>
+
+    assert_eq!(
+        subtract_ranges(&[0..5, 10..21], &[0..1, 4..5]),
+        vec![1..4, 10..21]
+    );
+
+    assert_eq!(subtract_ranges(&[0..5], &[1..2]), &[0..1, 2..5]);
+}
diff --git a/crates/zed/src/languages/rust/config.toml b/crates/zed/src/languages/rust/config.toml
index 705287f0a758045ce8179bfc8a6bf18e564970b8..8216ba0a74a90a16f2e29be77021f56530649c52 100644
--- a/crates/zed/src/languages/rust/config.toml
+++ b/crates/zed/src/languages/rust/config.toml
@@ -10,3 +10,4 @@ brackets = [
     { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
     { start = "/*", end = " */", close = true, newline = false, not_in = ["string", "comment"] },
 ]
+collapsed_placeholder = " /* ... */ "
diff --git a/crates/zed/src/languages/rust/embedding.scm b/crates/zed/src/languages/rust/embedding.scm
index 66e4083de5f0fe8b1adfa2ea657668e4453e4b61..e4218382a9b1ceb7e087b0d9247d5a4e66b77236 100644
--- a/crates/zed/src/languages/rust/embedding.scm
+++ b/crates/zed/src/languages/rust/embedding.scm
@@ -1,50 +1,28 @@
 (
-    (line_comment)* @context
+    [(line_comment) (attribute_item)]* @context
     .
-    (enum_item
-        name: (_) @name) @item
-)
+    [
+        (struct_item
+            name: (_) @name)
 
-(
-    (line_comment)* @context
-    .
-    (struct_item
-        name: (_) @name) @item
-)
+        (enum_item
+            name: (_) @name)
 
-(
-    (line_comment)* @context
-    .
-    (impl_item
-        trait: (_)? @name
-        "for"? @name
-        type: (_) @name) @item
-)
+        (impl_item
+            trait: (_)? @name
+            "for"? @name
+            type: (_) @name)
 
-(
-    (line_comment)* @context
-    .
-    (trait_item
-        name: (_) @name) @item
-)
+        (trait_item
+            name: (_) @name)
 
-(
-    (line_comment)* @context
-    .
-    (function_item
-        name: (_) @name) @item
-)
-
-(
-    (line_comment)* @context
-    .
-    (macro_definition
-        name: (_) @name) @item
-)
+        (function_item
+            name: (_) @name
+            body: (block
+                "{" @keep
+                "}" @keep) @collapse)
 
-(
-    (line_comment)* @context
-    .
-    (function_signature_item
-        name: (_) @name) @item
-)
+        (macro_definition
+            name: (_) @name)
+        ] @item
+    )

From efe973ebe2f6c4c92159542eb4d8e1bc12455df4 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 19 Jul 2023 16:52:44 -0400
Subject: [PATCH 24/34] add embedding query for json with nested arrays and
 strings

Co-authored-by: maxbrunsfeld <max@zed.dev>
---
 Cargo.lock                                    |   1 +
 crates/language/src/language.rs               |   4 +-
 crates/semantic_index/Cargo.toml              |   1 +
 crates/semantic_index/src/parsing.rs          | 123 ++++++++++--------
 crates/semantic_index/src/semantic_index.rs   |   2 +-
 .../src/semantic_index_tests.rs               | 103 ++++++++++++++-
 crates/zed/src/languages/json/embedding.scm   |  14 ++
 7 files changed, 189 insertions(+), 59 deletions(-)
 create mode 100644 crates/zed/src/languages/json/embedding.scm

diff --git a/Cargo.lock b/Cargo.lock
index 8ea6f61da04f215b91b31941ccce795be778a204..75f66163e3fbf5048b01cbf5079f00f2e9c5ce46 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6502,6 +6502,7 @@ dependencies = [
  "tree-sitter",
  "tree-sitter-cpp",
  "tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tree-sitter-json 0.19.0",
  "tree-sitter-rust",
  "tree-sitter-toml 0.20.0",
  "tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs
index ec233716d6ce5345515600b14e00a212b3dcb3a5..e34358c7c5def79e8141e86e54693bcc99188da0 100644
--- a/crates/language/src/language.rs
+++ b/crates/language/src/language.rs
@@ -526,7 +526,7 @@ pub struct OutlineConfig {
 pub struct EmbeddingConfig {
     pub query: Query,
     pub item_capture_ix: u32,
-    pub name_capture_ix: u32,
+    pub name_capture_ix: Option<u32>,
     pub context_capture_ix: Option<u32>,
     pub collapse_capture_ix: Option<u32>,
     pub keep_capture_ix: Option<u32>,
@@ -1263,7 +1263,7 @@ impl Language {
                 ("collapse", &mut collapse_capture_ix),
             ],
         );
-        if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
+        if let Some(item_capture_ix) = item_capture_ix {
             grammar.embedding_config = Some(EmbeddingConfig {
                 query,
                 item_capture_ix,
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 1b3169bfe41940eef5c863901c560e106acf4816..35b97245124e8922d5e7a46a369e26c71af7731a 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -54,6 +54,7 @@ ctor.workspace = true
 env_logger.workspace = true
 
 tree-sitter-typescript = "*"
+tree-sitter-json = "*"
 tree-sitter-rust = "*"
 tree-sitter-toml = "*"
 tree-sitter-cpp = "*"
diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs
index 0d2aeb60fb24dcb347c3a0f870ef6e348e08a88a..c952ef3a4edf939ddc8ad17c9bab6e17ec5e0cce 100644
--- a/crates/semantic_index/src/parsing.rs
+++ b/crates/semantic_index/src/parsing.rs
@@ -1,6 +1,12 @@
 use anyhow::{anyhow, Ok, Result};
 use language::{Grammar, Language};
-use std::{cmp, collections::HashSet, ops::Range, path::Path, sync::Arc};
+use std::{
+    cmp::{self, Reverse},
+    collections::HashSet,
+    ops::Range,
+    path::Path,
+    sync::Arc,
+};
 use tree_sitter::{Parser, QueryCursor};
 
 #[derive(Debug, PartialEq, Clone)]
@@ -15,7 +21,7 @@ const CODE_CONTEXT_TEMPLATE: &str =
     "The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
 const ENTIRE_FILE_TEMPLATE: &str =
     "The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
-pub const PARSEABLE_ENTIRE_FILE_TYPES: [&str; 4] = ["TOML", "YAML", "JSON", "CSS"];
+pub const PARSEABLE_ENTIRE_FILE_TYPES: &[&str] = &["TOML", "YAML", "CSS"];
 
 pub struct CodeContextRetriever {
     pub parser: Parser,
@@ -30,8 +36,8 @@ pub struct CodeContextRetriever {
 #[derive(Debug, Clone)]
 pub struct CodeContextMatch {
     pub start_col: usize,
-    pub item_range: Range<usize>,
-    pub name_range: Range<usize>,
+    pub item_range: Option<Range<usize>>,
+    pub name_range: Option<Range<usize>>,
     pub context_ranges: Vec<Range<usize>>,
     pub collapse_ranges: Vec<Range<usize>>,
 }
@@ -44,7 +50,7 @@ impl CodeContextRetriever {
         }
     }
 
-    fn _parse_entire_file(
+    fn parse_entire_file(
         &self,
         relative_path: &Path,
         language_name: Arc<str>,
@@ -97,7 +103,7 @@ impl CodeContextRetriever {
                 if capture.index == embedding_config.item_capture_ix {
                     item_range = Some(capture.node.byte_range());
                     start_col = capture.node.start_position().column;
-                } else if capture.index == embedding_config.name_capture_ix {
+                } else if Some(capture.index) == embedding_config.name_capture_ix {
                     name_range = Some(capture.node.byte_range());
                 } else if Some(capture.index) == embedding_config.context_capture_ix {
                     context_ranges.push(capture.node.byte_range());
@@ -108,16 +114,13 @@ impl CodeContextRetriever {
                 }
             }
 
-            if item_range.is_some() && name_range.is_some() {
-                let item_range = item_range.unwrap();
-                captures.push(CodeContextMatch {
-                    start_col,
-                    item_range,
-                    name_range: name_range.unwrap(),
-                    context_ranges,
-                    collapse_ranges: subtract_ranges(&collapse_ranges, &keep_ranges),
-                });
-            }
+            captures.push(CodeContextMatch {
+                start_col,
+                item_range,
+                name_range,
+                context_ranges,
+                collapse_ranges: subtract_ranges(&collapse_ranges, &keep_ranges),
+            });
         }
         Ok(captures)
     }
@@ -129,7 +132,12 @@ impl CodeContextRetriever {
         language: Arc<Language>,
     ) -> Result<Vec<Document>> {
         let language_name = language.name();
-        let mut documents = self.parse_file(relative_path, content, language)?;
+
+        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) {
+            return self.parse_entire_file(relative_path, language_name, &content);
+        }
+
+        let mut documents = self.parse_file(content, language)?;
         for document in &mut documents {
             document.content = CODE_CONTEXT_TEMPLATE
                 .replace("<path>", relative_path.to_string_lossy().as_ref())
@@ -139,16 +147,7 @@ impl CodeContextRetriever {
         Ok(documents)
     }
 
-    pub fn parse_file(
-        &mut self,
-        relative_path: &Path,
-        content: &str,
-        language: Arc<Language>,
-    ) -> Result<Vec<Document>> {
-        if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) {
-            return self._parse_entire_file(relative_path, language.name(), &content);
-        }
-
+    pub fn parse_file(&mut self, content: &str, language: Arc<Language>) -> Result<Vec<Document>> {
         let grammar = language
             .grammar()
             .ok_or_else(|| anyhow!("no grammar for language"))?;
@@ -163,32 +162,49 @@ impl CodeContextRetriever {
         let mut collapsed_ranges_within = Vec::new();
         let mut parsed_name_ranges = HashSet::new();
         for (i, context_match) in matches.iter().enumerate() {
-            if parsed_name_ranges.contains(&context_match.name_range) {
+            // Items which are collapsible but not embeddable have no item range
+            let item_range = if let Some(item_range) = context_match.item_range.clone() {
+                item_range
+            } else {
                 continue;
+            };
+
+            // Checks for deduplication
+            let name;
+            if let Some(name_range) = context_match.name_range.clone() {
+                name = content
+                    .get(name_range.clone())
+                    .map_or(String::new(), |s| s.to_string());
+                if parsed_name_ranges.contains(&name_range) {
+                    continue;
+                }
+                parsed_name_ranges.insert(name_range);
+            } else {
+                name = String::new();
             }
 
             collapsed_ranges_within.clear();
-            for remaining_match in &matches[(i + 1)..] {
-                if context_match
-                    .item_range
-                    .contains(&remaining_match.item_range.start)
-                    && context_match
-                        .item_range
-                        .contains(&remaining_match.item_range.end)
-                {
-                    collapsed_ranges_within.extend(remaining_match.collapse_ranges.iter().cloned());
-                } else {
-                    break;
+            'outer: for remaining_match in &matches[(i + 1)..] {
+                for collapsed_range in &remaining_match.collapse_ranges {
+                    if item_range.start <= collapsed_range.start
+                        && item_range.end >= collapsed_range.end
+                    {
+                        collapsed_ranges_within.push(collapsed_range.clone());
+                    } else {
+                        break 'outer;
+                    }
                 }
             }
 
+            collapsed_ranges_within.sort_by_key(|r| (r.start, Reverse(r.end)));
+
             let mut document_content = String::new();
             for context_range in &context_match.context_ranges {
                 document_content.push_str(&content[context_range.clone()]);
                 document_content.push_str("\n");
             }
 
-            let mut offset = context_match.item_range.start;
+            let mut offset = item_range.start;
             for collapsed_range in &collapsed_ranges_within {
                 if collapsed_range.start > offset {
                     add_content_from_range(
@@ -197,29 +213,30 @@ impl CodeContextRetriever {
                         offset..collapsed_range.start,
                         context_match.start_col,
                     );
+                    offset = collapsed_range.start;
+                }
+
+                if collapsed_range.end > offset {
+                    document_content.push_str(placeholder);
+                    offset = collapsed_range.end;
                 }
-                document_content.push_str(placeholder);
-                offset = collapsed_range.end;
             }
 
-            if offset < context_match.item_range.end {
+            if offset < item_range.end {
                 add_content_from_range(
                     &mut document_content,
                     content,
-                    offset..context_match.item_range.end,
+                    offset..item_range.end,
                     context_match.start_col,
                 );
             }
 
-            if let Some(name) = content.get(context_match.name_range.clone()) {
-                parsed_name_ranges.insert(context_match.name_range.clone());
-                documents.push(Document {
-                    name: name.to_string(),
-                    content: document_content,
-                    range: context_match.item_range.clone(),
-                    embedding: vec![],
-                })
-            }
+            documents.push(Document {
+                name,
+                content: document_content,
+                range: item_range.clone(),
+                embedding: vec![],
+            })
         }
 
         return Ok(documents);
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 271fd741a643d1e04d5afe57a50b70b6e391cbf7..6e0477491518a0c4a18ebfa1c24ddaf51eaf1948 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -33,7 +33,7 @@ use util::{
     ResultExt,
 };
 
-const SEMANTIC_INDEX_VERSION: usize = 4;
+const SEMANTIC_INDEX_VERSION: usize = 5;
 const EMBEDDINGS_BATCH_SIZE: usize = 80;
 
 pub fn init(
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index c54d5079d37f3b8ad5ce4dbb788f5eb5f68b02c8..31c96ca207bb3da1ace202bd81df461f27ba229b 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -170,9 +170,7 @@ async fn test_code_context_retrieval_rust() {
     "
     .unindent();
 
-    let documents = retriever
-        .parse_file(Path::new("foo.rs"), &text, language)
-        .unwrap();
+    let documents = retriever.parse_file(&text, language).unwrap();
 
     assert_documents_eq(
         &documents,
@@ -229,6 +227,76 @@ async fn test_code_context_retrieval_rust() {
     );
 }
 
+#[gpui::test]
+async fn test_code_context_retrieval_json() {
+    let language = json_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = r#"
+        {
+            "array": [1, 2, 3, 4],
+            "string": "abcdefg",
+            "nested_object": {
+                "array_2": [5, 6, 7, 8],
+                "string_2": "hijklmnop",
+                "boolean": true,
+                "none": null
+            }
+        }
+    "#
+    .unindent();
+
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
+
+    assert_documents_eq(
+        &documents,
+        &[(
+            r#"
+                {
+                    "array": [],
+                    "string": "",
+                    "nested_object": {
+                        "array_2": [],
+                        "string_2": "",
+                        "boolean": true,
+                        "none": null
+                    }
+                }"#
+            .unindent(),
+            text.find("{").unwrap(),
+        )],
+    );
+
+    let text = r#"
+        [
+            {
+                "name": "somebody",
+                "age": 42
+            },
+            {
+                "name": "somebody else",
+                "age": 43
+            }
+        ]
+    "#
+    .unindent();
+
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
+
+    assert_documents_eq(
+        &documents,
+        &[(
+            r#"
+            [{
+                    "name": "",
+                    "age": 42
+                }]"#
+            .unindent(),
+            text.find("[").unwrap(),
+        )],
+    );
+}
+
 fn assert_documents_eq(
     documents: &[Document],
     expected_contents_and_start_offsets: &[(String, usize)],
@@ -913,6 +981,35 @@ fn rust_lang() -> Arc<Language> {
     )
 }
 
+fn json_lang() -> Arc<Language> {
+    Arc::new(
+        Language::new(
+            LanguageConfig {
+                name: "JSON".into(),
+                path_suffixes: vec!["json".into()],
+                ..Default::default()
+            },
+            Some(tree_sitter_json::language()),
+        )
+        .with_embedding_query(
+            r#"
+            (document) @item
+
+            (array
+                "[" @keep
+                .
+                (object)? @keep
+                "]" @keep) @collapse
+
+            (pair value: (string
+                "\"" @keep
+                "\"" @keep) @collapse)
+            "#,
+        )
+        .unwrap(),
+    )
+}
+
 fn toml_lang() -> Arc<Language> {
     Arc::new(Language::new(
         LanguageConfig {
diff --git a/crates/zed/src/languages/json/embedding.scm b/crates/zed/src/languages/json/embedding.scm
new file mode 100644
index 0000000000000000000000000000000000000000..fa286e3880aa67d49f710f991d6839ebbd306104
--- /dev/null
+++ b/crates/zed/src/languages/json/embedding.scm
@@ -0,0 +1,14 @@
+; Only produce one embedding for the entire file.
+(document) @item
+
+; Collapse arrays, except for the first object.
+(array
+  "[" @keep
+  .
+  (object)? @keep
+  "]" @keep) @collapse
+
+; Collapse string values (but not keys).
+(pair value: (string
+  "\"" @keep
+  "\"" @keep) @collapse)

From e02d6bc0d41fe5006307833f5e4c2cd62ba7add1 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Thu, 20 Jul 2023 13:46:27 -0400
Subject: [PATCH 25/34] add glob filtering functionality to semantic search

---
 Cargo.lock                                    |  1 +
 crates/search/src/project_search.rs           | 60 +++++++++++++++++--
 crates/semantic_index/Cargo.toml              |  1 +
 crates/semantic_index/src/db.rs               | 39 ++++++++----
 crates/semantic_index/src/semantic_index.rs   | 13 +++-
 .../src/semantic_index_tests.rs               | 57 +++++++++++++++++-
 6 files changed, 149 insertions(+), 22 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 75f66163e3fbf5048b01cbf5079f00f2e9c5ce46..f534a4fe7d68a362fd910f0bd02cbf72b24955fa 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6477,6 +6477,7 @@ dependencies = [
  "editor",
  "env_logger 0.9.3",
  "futures 0.3.28",
+ "globset",
  "gpui",
  "isahc",
  "language",
diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 5feb94426eb60c67a756c564982a826699bd20a1..25fc897707af6be8b97b277a2d65b8d4cf1eeb17 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -187,14 +187,26 @@ impl ProjectSearch {
         cx.notify();
     }
 
-    fn semantic_search(&mut self, query: String, cx: &mut ModelContext<Self>) {
+    fn semantic_search(
+        &mut self,
+        query: String,
+        include_files: Vec<GlobMatcher>,
+        exclude_files: Vec<GlobMatcher>,
+        cx: &mut ModelContext<Self>,
+    ) {
         let search = SemanticIndex::global(cx).map(|index| {
             index.update(cx, |semantic_index, cx| {
-                semantic_index.search_project(self.project.clone(), query.clone(), 10, cx)
+                semantic_index.search_project(
+                    self.project.clone(),
+                    query.clone(),
+                    10,
+                    include_files,
+                    exclude_files,
+                    cx,
+                )
             })
         });
         self.search_id += 1;
-        // self.active_query = Some(query);
         self.match_ranges.clear();
         self.pending_search = Some(cx.spawn(|this, mut cx| async move {
             let results = search?.await.log_err()?;
@@ -638,8 +650,13 @@ impl ProjectSearchView {
             }
 
             let query = self.query_editor.read(cx).text(cx);
-            self.model
-                .update(cx, |model, cx| model.semantic_search(query, cx));
+            if let Some((included_files, exclude_files)) =
+                self.get_included_and_excluded_globsets(cx)
+            {
+                self.model.update(cx, |model, cx| {
+                    model.semantic_search(query, included_files, exclude_files, cx)
+                });
+            }
             return;
         }
 
@@ -648,6 +665,39 @@ impl ProjectSearchView {
         }
     }
 
+    fn get_included_and_excluded_globsets(
+        &mut self,
+        cx: &mut ViewContext<Self>,
+    ) -> Option<(Vec<GlobMatcher>, Vec<GlobMatcher>)> {
+        let text = self.query_editor.read(cx).text(cx);
+        let included_files =
+            match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) {
+                Ok(included_files) => {
+                    self.panels_with_errors.remove(&InputPanel::Include);
+                    included_files
+                }
+                Err(_e) => {
+                    self.panels_with_errors.insert(InputPanel::Include);
+                    cx.notify();
+                    return None;
+                }
+            };
+        let excluded_files =
+            match Self::load_glob_set(&self.excluded_files_editor.read(cx).text(cx)) {
+                Ok(excluded_files) => {
+                    self.panels_with_errors.remove(&InputPanel::Exclude);
+                    excluded_files
+                }
+                Err(_e) => {
+                    self.panels_with_errors.insert(InputPanel::Exclude);
+                    cx.notify();
+                    return None;
+                }
+            };
+
+        Some((included_files, excluded_files))
+    }
+
     fn build_search_query(&mut self, cx: &mut ViewContext<Self>) -> Option<SearchQuery> {
         let text = self.query_editor.read(cx).text(cx);
         let included_files =
diff --git a/crates/semantic_index/Cargo.toml b/crates/semantic_index/Cargo.toml
index 35b97245124e8922d5e7a46a369e26c71af7731a..a1f126bfb841ecb8334aeca391ac4959ef9f57b0 100644
--- a/crates/semantic_index/Cargo.toml
+++ b/crates/semantic_index/Cargo.toml
@@ -37,6 +37,7 @@ tiktoken-rs = "0.5.0"
 parking_lot.workspace = true
 rand.workspace = true
 schemars.workspace = true
+globset.workspace = true
 
 [dev-dependencies]
 gpui = { path = "../gpui", features = ["test-support"] }
diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index fd99594aab578919f80bd8236270b352a8540993..3ba85a275d0a0d6b197bbad22d5ad5bd792a2fbf 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -1,5 +1,6 @@
 use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
 use anyhow::{anyhow, Context, Result};
+use globset::{Glob, GlobMatcher};
 use project::Fs;
 use rpc::proto::Timestamp;
 use rusqlite::{
@@ -252,18 +253,30 @@ impl VectorDatabase {
         worktree_ids: &[i64],
         query_embedding: &Vec<f32>,
         limit: usize,
+        include_globs: Vec<GlobMatcher>,
+        exclude_globs: Vec<GlobMatcher>,
     ) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
         let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
-        self.for_each_document(&worktree_ids, |id, embedding| {
-            let similarity = dot(&embedding, &query_embedding);
-            let ix = match results
-                .binary_search_by(|(_, s)| similarity.partial_cmp(&s).unwrap_or(Ordering::Equal))
+        self.for_each_document(&worktree_ids, |relative_path, id, embedding| {
+            if (include_globs.is_empty()
+                || include_globs
+                    .iter()
+                    .any(|include_glob| include_glob.is_match(relative_path.clone())))
+                && (exclude_globs.is_empty()
+                    || !exclude_globs
+                        .iter()
+                        .any(|exclude_glob| exclude_glob.is_match(relative_path.clone())))
             {
-                Ok(ix) => ix,
-                Err(ix) => ix,
-            };
-            results.insert(ix, (id, similarity));
-            results.truncate(limit);
+                let similarity = dot(&embedding, &query_embedding);
+                let ix = match results.binary_search_by(|(_, s)| {
+                    similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)
+                }) {
+                    Ok(ix) => ix,
+                    Err(ix) => ix,
+                };
+                results.insert(ix, (id, similarity));
+                results.truncate(limit);
+            }
         })?;
 
         let ids = results.into_iter().map(|(id, _)| id).collect::<Vec<_>>();
@@ -273,12 +286,12 @@ impl VectorDatabase {
     fn for_each_document(
         &self,
         worktree_ids: &[i64],
-        mut f: impl FnMut(i64, Vec<f32>),
+        mut f: impl FnMut(String, i64, Vec<f32>),
     ) -> Result<()> {
         let mut query_statement = self.db.prepare(
             "
             SELECT
-                documents.id, documents.embedding
+                files.relative_path, documents.id, documents.embedding
             FROM
                 documents, files
             WHERE
@@ -289,10 +302,10 @@ impl VectorDatabase {
 
         query_statement
             .query_map(params![ids_to_sql(worktree_ids)], |row| {
-                Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
+                Ok((row.get(0)?, row.get(1)?, row.get::<_, Embedding>(2)?))
             })?
             .filter_map(|row| row.ok())
-            .for_each(|(id, embedding)| f(id, embedding.0));
+            .for_each(|(relative_path, id, embedding)| f(relative_path, id, embedding.0));
         Ok(())
     }
 
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 6e0477491518a0c4a18ebfa1c24ddaf51eaf1948..32a11a42ebdcb01205869bcb273784582e291dcf 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -11,6 +11,7 @@ use anyhow::{anyhow, Result};
 use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
 use futures::{channel::oneshot, Future};
+use globset::{Glob, GlobMatcher};
 use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
 use language::{Anchor, Buffer, Language, LanguageRegistry};
 use parking_lot::Mutex;
@@ -624,6 +625,8 @@ impl SemanticIndex {
         project: ModelHandle<Project>,
         phrase: String,
         limit: usize,
+        include_globs: Vec<GlobMatcher>,
+        exclude_globs: Vec<GlobMatcher>,
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<Vec<SearchResult>>> {
         let project_state = if let Some(state) = self.projects.get(&project.downgrade()) {
@@ -657,12 +660,16 @@ impl SemanticIndex {
                         .next()
                         .unwrap();
 
-                    database.top_k_search(&worktree_db_ids, &phrase_embedding, limit)
+                    database.top_k_search(
+                        &worktree_db_ids,
+                        &phrase_embedding,
+                        limit,
+                        include_globs,
+                        exclude_globs,
+                    )
                 })
                 .await?;
 
-            dbg!(&documents);
-
             let mut tasks = Vec::new();
             let mut ranges = Vec::new();
             let weak_project = project.downgrade();
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 31c96ca207bb3da1ace202bd81df461f27ba229b..366d634ddb68df629832b23c3777d6f5cc775b7c 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -7,6 +7,7 @@ use crate::{
 };
 use anyhow::Result;
 use async_trait::async_trait;
+use globset::Glob;
 use gpui::{Task, TestAppContext};
 use language::{Language, LanguageConfig, LanguageRegistry, ToOffset};
 use pretty_assertions::assert_eq;
@@ -96,7 +97,7 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
 
     let search_results = store
         .update(cx, |store, cx| {
-            store.search_project(project.clone(), "aaaa".to_string(), 5, cx)
+            store.search_project(project.clone(), "aaaa".to_string(), 5, vec![], vec![], cx)
         })
         .await
         .unwrap();
@@ -109,6 +110,60 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
         );
     });
 
+    // Test Include Files Functonality
+    let include_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
+    let exclude_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
+    let search_results = store
+        .update(cx, |store, cx| {
+            store.search_project(
+                project.clone(),
+                "aaaa".to_string(),
+                5,
+                include_files,
+                vec![],
+                cx,
+            )
+        })
+        .await
+        .unwrap();
+
+    for res in &search_results {
+        res.buffer.read_with(cx, |buffer, _cx| {
+            assert!(buffer
+                .file()
+                .unwrap()
+                .path()
+                .to_str()
+                .unwrap()
+                .ends_with("rs"));
+        });
+    }
+
+    let search_results = store
+        .update(cx, |store, cx| {
+            store.search_project(
+                project.clone(),
+                "aaaa".to_string(),
+                5,
+                vec![],
+                exclude_files,
+                cx,
+            )
+        })
+        .await
+        .unwrap();
+
+    for res in &search_results {
+        res.buffer.read_with(cx, |buffer, _cx| {
+            assert!(!buffer
+                .file()
+                .unwrap()
+                .path()
+                .to_str()
+                .unwrap()
+                .ends_with("rs"));
+        });
+    }
     fs.save(
         "/the-root/src/file2.rs".as_ref(),
         &"

From 81b05f2a083e999751646f53b72212ceedf3b6d9 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 20 Jul 2023 14:23:11 -0700
Subject: [PATCH 26/34] Optimize glob filtering of semantic search

Co-authored-by: Kyle <kyle@zed.dev>
---
 crates/search/src/project_search.rs           |   1 -
 crates/semantic_index/src/db.rs               |  66 +++++++----
 crates/semantic_index/src/semantic_index.rs   |   2 +-
 .../src/semantic_index_tests.rs               | 103 +++++++++++-------
 4 files changed, 109 insertions(+), 63 deletions(-)

diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 25fc897707af6be8b97b277a2d65b8d4cf1eeb17..28085f59feb16bd9158ee766ba752f4d2cd72340 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -669,7 +669,6 @@ impl ProjectSearchView {
         &mut self,
         cx: &mut ViewContext<Self>,
     ) -> Option<(Vec<GlobMatcher>, Vec<GlobMatcher>)> {
-        let text = self.query_editor.read(cx).text(cx);
         let included_files =
             match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) {
                 Ok(included_files) => {
diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index 3ba85a275d0a0d6b197bbad22d5ad5bd792a2fbf..b1e78b7aff994ca977fbbea41d595f08fb65766a 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -1,6 +1,6 @@
 use crate::{parsing::Document, SEMANTIC_INDEX_VERSION};
 use anyhow::{anyhow, Context, Result};
-use globset::{Glob, GlobMatcher};
+use globset::GlobMatcher;
 use project::Fs;
 use rpc::proto::Timestamp;
 use rusqlite::{
@@ -257,16 +257,11 @@ impl VectorDatabase {
         exclude_globs: Vec<GlobMatcher>,
     ) -> Result<Vec<(i64, PathBuf, Range<usize>)>> {
         let mut results = Vec::<(i64, f32)>::with_capacity(limit + 1);
-        self.for_each_document(&worktree_ids, |relative_path, id, embedding| {
-            if (include_globs.is_empty()
-                || include_globs
-                    .iter()
-                    .any(|include_glob| include_glob.is_match(relative_path.clone())))
-                && (exclude_globs.is_empty()
-                    || !exclude_globs
-                        .iter()
-                        .any(|exclude_glob| exclude_glob.is_match(relative_path.clone())))
-            {
+        self.for_each_document(
+            &worktree_ids,
+            include_globs,
+            exclude_globs,
+            |id, embedding| {
                 let similarity = dot(&embedding, &query_embedding);
                 let ix = match results.binary_search_by(|(_, s)| {
                     similarity.partial_cmp(&s).unwrap_or(Ordering::Equal)
@@ -276,8 +271,8 @@ impl VectorDatabase {
                 };
                 results.insert(ix, (id, similarity));
                 results.truncate(limit);
-            }
-        })?;
+            },
+        )?;
 
         let ids = results.into_iter().map(|(id, _)| id).collect::<Vec<_>>();
         self.get_documents_by_ids(&ids)
@@ -286,26 +281,55 @@ impl VectorDatabase {
     fn for_each_document(
         &self,
         worktree_ids: &[i64],
-        mut f: impl FnMut(String, i64, Vec<f32>),
+        include_globs: Vec<GlobMatcher>,
+        exclude_globs: Vec<GlobMatcher>,
+        mut f: impl FnMut(i64, Vec<f32>),
     ) -> Result<()> {
+        let mut file_query = self.db.prepare(
+            "
+            SELECT
+                id, relative_path
+            FROM
+                files
+            WHERE
+                worktree_id IN rarray(?)
+            ",
+        )?;
+
+        let mut file_ids = Vec::<i64>::new();
+        let mut rows = file_query.query([ids_to_sql(worktree_ids)])?;
+        while let Some(row) = rows.next()? {
+            let file_id = row.get(0)?;
+            let relative_path = row.get_ref(1)?.as_str()?;
+            let included = include_globs.is_empty()
+                || include_globs
+                    .iter()
+                    .any(|glob| glob.is_match(relative_path));
+            let excluded = exclude_globs
+                .iter()
+                .any(|glob| glob.is_match(relative_path));
+            if included && !excluded {
+                file_ids.push(file_id);
+            }
+        }
+
         let mut query_statement = self.db.prepare(
             "
             SELECT
-                files.relative_path, documents.id, documents.embedding
+                id, embedding
             FROM
-                documents, files
+                documents
             WHERE
-                documents.file_id = files.id AND
-                files.worktree_id IN rarray(?)
+                file_id IN rarray(?)
             ",
         )?;
 
         query_statement
-            .query_map(params![ids_to_sql(worktree_ids)], |row| {
-                Ok((row.get(0)?, row.get(1)?, row.get::<_, Embedding>(2)?))
+            .query_map(params![ids_to_sql(&file_ids)], |row| {
+                Ok((row.get(0)?, row.get::<_, Embedding>(1)?))
             })?
             .filter_map(|row| row.ok())
-            .for_each(|(relative_path, id, embedding)| f(relative_path, id, embedding.0));
+            .for_each(|(id, embedding)| f(id, embedding.0));
         Ok(())
     }
 
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 32a11a42ebdcb01205869bcb273784582e291dcf..215ca38a28845fdf8b24d8c5d0a5d1249a03bcec 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -11,7 +11,7 @@ use anyhow::{anyhow, Result};
 use db::VectorDatabase;
 use embedding::{EmbeddingProvider, OpenAIEmbeddings};
 use futures::{channel::oneshot, Future};
-use globset::{Glob, GlobMatcher};
+use globset::GlobMatcher;
 use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle};
 use language::{Anchor, Buffer, Language, LanguageRegistry};
 use parking_lot::Mutex;
diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 366d634ddb68df629832b23c3777d6f5cc775b7c..432f6b5b5328ed7bf72c52a34a762de0baacb7de 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -3,7 +3,7 @@ use crate::{
     embedding::EmbeddingProvider,
     parsing::{subtract_ranges, CodeContextRetriever, Document},
     semantic_index_settings::SemanticIndexSettings,
-    SemanticIndex,
+    SearchResult, SemanticIndex,
 };
 use anyhow::Result;
 use async_trait::async_trait;
@@ -46,21 +46,21 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
             "src": {
                 "file1.rs": "
                     fn aaa() {
-                        println!(\"aaaa!\");
+                        println!(\"aaaaaaaaaaaa!\");
                     }
 
-                    fn zzzzzzzzz() {
+                    fn zzzzz() {
                         println!(\"SLEEPING\");
                     }
                 ".unindent(),
                 "file2.rs": "
                     fn bbb() {
-                        println!(\"bbbb!\");
+                        println!(\"bbbbbbbbbbbbb!\");
                     }
                 ".unindent(),
                 "file3.toml": "
-                    ZZZZZZZ = 5
-                    ".unindent(),
+                    ZZZZZZZZZZZZZZZZZZ = 5
+                ".unindent(),
             }
         }),
     )
@@ -97,27 +97,37 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
 
     let search_results = store
         .update(cx, |store, cx| {
-            store.search_project(project.clone(), "aaaa".to_string(), 5, vec![], vec![], cx)
+            store.search_project(
+                project.clone(),
+                "aaaaaabbbbzz".to_string(),
+                5,
+                vec![],
+                vec![],
+                cx,
+            )
         })
         .await
         .unwrap();
 
-    search_results[0].buffer.read_with(cx, |buffer, _cx| {
-        assert_eq!(search_results[0].range.start.to_offset(buffer), 0);
-        assert_eq!(
-            buffer.file().unwrap().path().as_ref(),
-            Path::new("src/file1.rs")
-        );
-    });
+    assert_search_results(
+        &search_results,
+        &[
+            (Path::new("src/file1.rs").into(), 0),
+            (Path::new("src/file2.rs").into(), 0),
+            (Path::new("src/file3.toml").into(), 0),
+            (Path::new("src/file1.rs").into(), 45),
+        ],
+        cx,
+    );
 
     // Test Include Files Functonality
     let include_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
     let exclude_files = vec![Glob::new("*.rs").unwrap().compile_matcher()];
-    let search_results = store
+    let rust_only_search_results = store
         .update(cx, |store, cx| {
             store.search_project(
                 project.clone(),
-                "aaaa".to_string(),
+                "aaaaaabbbbzz".to_string(),
                 5,
                 include_files,
                 vec![],
@@ -127,23 +137,21 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
         .await
         .unwrap();
 
-    for res in &search_results {
-        res.buffer.read_with(cx, |buffer, _cx| {
-            assert!(buffer
-                .file()
-                .unwrap()
-                .path()
-                .to_str()
-                .unwrap()
-                .ends_with("rs"));
-        });
-    }
+    assert_search_results(
+        &rust_only_search_results,
+        &[
+            (Path::new("src/file1.rs").into(), 0),
+            (Path::new("src/file2.rs").into(), 0),
+            (Path::new("src/file1.rs").into(), 45),
+        ],
+        cx,
+    );
 
-    let search_results = store
+    let no_rust_search_results = store
         .update(cx, |store, cx| {
             store.search_project(
                 project.clone(),
-                "aaaa".to_string(),
+                "aaaaaabbbbzz".to_string(),
                 5,
                 vec![],
                 exclude_files,
@@ -153,17 +161,12 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
         .await
         .unwrap();
 
-    for res in &search_results {
-        res.buffer.read_with(cx, |buffer, _cx| {
-            assert!(!buffer
-                .file()
-                .unwrap()
-                .path()
-                .to_str()
-                .unwrap()
-                .ends_with("rs"));
-        });
-    }
+    assert_search_results(
+        &no_rust_search_results,
+        &[(Path::new("src/file3.toml").into(), 0)],
+        cx,
+    );
+
     fs.save(
         "/the-root/src/file2.rs".as_ref(),
         &"
@@ -195,6 +198,26 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
     );
 }
 
+#[track_caller]
+fn assert_search_results(
+    actual: &[SearchResult],
+    expected: &[(Arc<Path>, usize)],
+    cx: &TestAppContext,
+) {
+    let actual = actual
+        .iter()
+        .map(|search_result| {
+            search_result.buffer.read_with(cx, |buffer, _cx| {
+                (
+                    buffer.file().unwrap().path().clone(),
+                    search_result.range.start.to_offset(buffer),
+                )
+            })
+        })
+        .collect::<Vec<_>>();
+    assert_eq!(actual, expected);
+}
+
 #[gpui::test]
 async fn test_code_context_retrieval_rust() {
     let language = rust_lang();

From c86096a886701c96c6dd09fca36c0281bc140111 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 25 Jul 2023 10:38:37 -0400
Subject: [PATCH 27/34] update semantic index tests for javascript

---
 .../src/semantic_index_tests.rs               | 196 +++++++++++-------
 .../src/languages/javascript/embedding.scm    |  92 ++++----
 crates/zed/src/languages/tsx/embedding.scm    | 106 ++++------
 .../src/languages/typescript/embedding.scm    | 116 +++++------
 4 files changed, 259 insertions(+), 251 deletions(-)

diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 432f6b5b5328ed7bf72c52a34a762de0baacb7de..9bd6efc954ced0f505d8532c3049be9be13466af 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -388,43 +388,103 @@ fn assert_documents_eq(
     );
 }
 
-// #[gpui::test]
-// async fn test_code_context_retrieval_javascript() {
-//     let language = js_lang();
-//     let mut retriever = CodeContextRetriever::new();
+#[gpui::test]
+async fn test_code_context_retrieval_javascript() {
+    let language = js_lang();
+    let mut retriever = CodeContextRetriever::new();
 
-//     let text = "
-//         /* globals importScripts, backend */
-//         function _authorize() {}
+    let text = "
+        /* globals importScripts, backend */
+        function _authorize() {}
+
+        /**
+         * Sometimes the frontend build is way faster than backend.
+         */
+        export async function authorizeBank() {
+            _authorize(pushModal, upgradingAccountId, {});
+        }
 
-//         /**
-//          * Sometimes the frontend build is way faster than backend.
-//          */
-//         export async function authorizeBank() {
-//             _authorize(pushModal, upgradingAccountId, {});
-//         }
+        export class SettingsPage {
+            /* This is a test setting */
+            constructor(page) {
+                this.page = page;
+            }
+        }
 
-//         export class SettingsPage {
-//             /* This is a test setting */
-//             constructor(page) {
-//                 this.page = page;
-//             }
-//         }
+        /* This is a test comment */
+        class TestClass {}
 
-//         /* This is a test comment */
-//         class TestClass {}
+        /* Schema for editor_events in Clickhouse. */
+        export interface ClickhouseEditorEvent {
+            installation_id: string
+            operation: string
+        }
+        "
+    .unindent();
 
-//         /* Schema for editor_events in Clickhouse. */
-//         export interface ClickhouseEditorEvent {
-//             installation_id: string
-//             operation: string
-//         }
-//         "
-//     .unindent();
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 
-//     let parsed_files = retriever
-//         .parse_file(Path::new("foo.js"), &text, language)
-//         .unwrap();
+    assert_documents_eq(
+        &documents,
+        &[
+            (
+                "
+            /* globals importScripts, backend */
+            function _authorize() {}"
+                    .unindent(),
+                37,
+            ),
+            (
+                "
+            /**
+             * Sometimes the frontend build is way faster than backend.
+             */
+            export async function authorizeBank() {
+                _authorize(pushModal, upgradingAccountId, {});
+            }"
+                .unindent(),
+                131,
+            ),
+            (
+                "
+                export class SettingsPage {
+                    /* This is a test setting */
+                    constructor(page) {
+                        this.page = page;
+                    }
+                }"
+                .unindent(),
+                225,
+            ),
+            (
+                "
+                /* This is a test setting */
+                constructor(page) {
+                    this.page = page;
+                }"
+                .unindent(),
+                290,
+            ),
+            (
+                "
+                /* This is a test comment */
+                class TestClass {}"
+                    .unindent(),
+                374,
+            ),
+            (
+                "
+                /* Schema for editor_events in Clickhouse. */
+                export interface ClickhouseEditorEvent {
+                    installation_id: string
+                    operation: string
+                }"
+                .unindent(),
+                440,
+            ),
+        ],
+    )
+}
 
 //     let test_documents = &[
 //         Document {
@@ -924,86 +984,74 @@ fn js_lang() -> Arc<Language> {
             (
                 (comment)* @context
                 .
+                [
                 (export_statement
                     (function_declaration
                         "async"? @name
                         "function" @name
-                        name: (_) @name)) @item
-                    )
-
-            (
-                (comment)* @context
-                .
+                        name: (_) @name))
                 (function_declaration
                     "async"? @name
                     "function" @name
-                    name: (_) @name) @item
-                    )
+                    name: (_) @name)
+                ] @item
+            )
 
             (
                 (comment)* @context
                 .
+                [
                 (export_statement
                     (class_declaration
                         "class" @name
-                        name: (_) @name)) @item
-                    )
-
-            (
-                (comment)* @context
-                .
+                        name: (_) @name))
                 (class_declaration
                     "class" @name
-                    name: (_) @name) @item
-                    )
-
-            (
-                (comment)* @context
-                .
-                (method_definition
-                    [
-                        "get"
-                        "set"
-                        "async"
-                        "*"
-                        "static"
-                    ]* @name
-                    name: (_) @name) @item
-                )
+                    name: (_) @name)
+                ] @item
+            )
 
             (
                 (comment)* @context
                 .
+                [
                 (export_statement
                     (interface_declaration
                         "interface" @name
-                        name: (_) @name)) @item
-                )
-
-            (
-                (comment)* @context
-                .
+                        name: (_) @name))
                 (interface_declaration
                     "interface" @name
-                    name: (_) @name) @item
-                )
+                    name: (_) @name)
+                ] @item
+            )
 
             (
                 (comment)* @context
                 .
+                [
                 (export_statement
                     (enum_declaration
                         "enum" @name
-                        name: (_) @name)) @item
-                )
+                        name: (_) @name))
+                (enum_declaration
+                    "enum" @name
+                    name: (_) @name)
+                ] @item
+            )
 
             (
                 (comment)* @context
                 .
-                (enum_declaration
-                    "enum" @name
+                (method_definition
+                    [
+                        "get"
+                        "set"
+                        "async"
+                        "*"
+                        "static"
+                    ]* @name
                     name: (_) @name) @item
-                )
+            )
 
                     "#
             .unindent(),
diff --git a/crates/zed/src/languages/javascript/embedding.scm b/crates/zed/src/languages/javascript/embedding.scm
index a2140400318db95a8d29074402ab2d212561a79b..ab1a3b6b063c3bf57adad3c302a156fcd0239448 100644
--- a/crates/zed/src/languages/javascript/embedding.scm
+++ b/crates/zed/src/languages/javascript/embedding.scm
@@ -1,38 +1,60 @@
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (function_declaration
+                "async"? @name
+                "function" @name
+                name: (_) @name))
         (function_declaration
             "async"? @name
             "function" @name
-            name: (_) @name)) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (function_declaration
-        "async"? @name
-        "function" @name
-        name: (_) @name) @item
-    )
+    [
+        (export_statement
+            (class_declaration
+                "class" @name
+                name: (_) @name))
+        (class_declaration
+            "class" @name
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (export_statement
-        (class_declaration
-            "class" @name
-            name: (_) @name)) @item
-    )
+    [
+        (export_statement
+            (interface_declaration
+                "interface" @name
+                name: (_) @name))
+        (interface_declaration
+            "interface" @name
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (class_declaration
-        "class" @name
-        name: (_) @name) @item
-    )
+    [
+        (export_statement
+            (enum_declaration
+                "enum" @name
+                name: (_) @name))
+        (enum_declaration
+            "enum" @name
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
@@ -46,38 +68,4 @@
             "static"
             ]* @name
         name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
-        (interface_declaration
-            "interface" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
-        (enum_declaration
-            "enum" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (enum_declaration
-        "enum" @name
-        name: (_) @name) @item
-    )
+)
diff --git a/crates/zed/src/languages/tsx/embedding.scm b/crates/zed/src/languages/tsx/embedding.scm
index 4bb4fea254d0cf86f2fbb9d5c8f657e06238971f..ddcff665841091aa170bd5f9bb60439a2cadb2c5 100644
--- a/crates/zed/src/languages/tsx/embedding.scm
+++ b/crates/zed/src/languages/tsx/embedding.scm
@@ -1,99 +1,85 @@
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (function_declaration
+                "async"? @name
+                "function" @name
+                name: (_) @name))
         (function_declaration
             "async"? @name
             "function" @name
-            name: (_) @name)) @item
+            name: (_) @name)
+        ] @item
     )
 
 (
     (comment)* @context
     .
-    (function_declaration
-        "async"? @name
-        "function" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
+    [
+        (export_statement
+            (class_declaration
+                "class" @name
+                name: (_) @name))
         (class_declaration
             "class" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (class_declaration
-        "class" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (method_definition
-        [
-            "get"
-            "set"
-            "async"
-            "*"
-            "static"
-            ]* @name
-        name: (_) @name) @item
+            name: (_) @name)
+        ] @item
     )
 
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (interface_declaration
+                "interface" @name
+                name: (_) @name))
         (interface_declaration
             "interface" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
+            name: (_) @name)
+        ] @item
     )
 
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (enum_declaration
+                "enum" @name
+                name: (_) @name))
         (enum_declaration
             "enum" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (enum_declaration
-        "enum" @name
-        name: (_) @name) @item
+            name: (_) @name)
+        ] @item
     )
 
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (type_alias_declaration
+                "type" @name
+                name: (_) @name))
         (type_alias_declaration
             "type" @name
-            name: (_) @name)) @item
+            name: (_) @name)
+        ] @item
     )
 
 (
     (comment)* @context
     .
-    (type_alias_declaration
-        "type" @name
-        name: (_) @name) @item)
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "static"
+            ]* @name
+        name: (_) @name) @item
+    )
diff --git a/crates/zed/src/languages/typescript/embedding.scm b/crates/zed/src/languages/typescript/embedding.scm
index 4bb4fea254d0cf86f2fbb9d5c8f657e06238971f..3170cb7c957e51e00c175c7eaa2b4b51deda042a 100644
--- a/crates/zed/src/languages/typescript/embedding.scm
+++ b/crates/zed/src/languages/typescript/embedding.scm
@@ -1,99 +1,85 @@
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (function_declaration
+                "async"? @name
+                "function" @name
+                name: (_) @name))
         (function_declaration
             "async"? @name
             "function" @name
-            name: (_) @name)) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (function_declaration
-        "async"? @name
-        "function" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
+    [
+        (export_statement
+            (class_declaration
+                "class" @name
+                name: (_) @name))
         (class_declaration
             "class" @name
-            name: (_) @name)) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (class_declaration
-        "class" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (method_definition
-        [
-            "get"
-            "set"
-            "async"
-            "*"
-            "static"
-            ]* @name
-        name: (_) @name) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (export_statement
+    [
+        (export_statement
+            (interface_declaration
+                "interface" @name
+                name: (_) @name))
         (interface_declaration
             "interface" @name
-            name: (_) @name)) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (interface_declaration
-        "interface" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
+    [
+        (export_statement
+            (enum_declaration
+                "enum" @name
+                name: (_) @name))
         (enum_declaration
             "enum" @name
-            name: (_) @name)) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (enum_declaration
-        "enum" @name
-        name: (_) @name) @item
-    )
-
-(
-    (comment)* @context
-    .
-    (export_statement
+    [
+        (export_statement
+            (type_alias_declaration
+                "type" @name
+                name: (_) @name))
         (type_alias_declaration
             "type" @name
-            name: (_) @name)) @item
-    )
+            name: (_) @name)
+    ] @item
+)
 
 (
     (comment)* @context
     .
-    (type_alias_declaration
-        "type" @name
-        name: (_) @name) @item)
+    (method_definition
+        [
+            "get"
+            "set"
+            "async"
+            "*"
+            "static"
+            ]* @name
+        name: (_) @name) @item
+)

From 97c3d97792ec0feb1e93de302c377fe8df28fcf0 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 25 Jul 2023 13:30:38 -0400
Subject: [PATCH 28/34] update semantic index tests for cpp

---
 .../src/semantic_index_tests.rs               | 418 +++++++-----------
 1 file changed, 148 insertions(+), 270 deletions(-)

diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 9bd6efc954ced0f505d8532c3049be9be13466af..2ae9a06c0fbbeeb3371ff958612b9d94bc88daef 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -486,105 +486,6 @@ async fn test_code_context_retrieval_javascript() {
     )
 }
 
-//     let test_documents = &[
-//         Document {
-//             name: "function _authorize".into(),
-//             range: text.find("function _authorize").unwrap()..(text.find("}").unwrap() + 1),
-//             content: "
-//                     The below code snippet is from file 'foo.js'
-
-//                     ```javascript
-//                     /* globals importScripts, backend */
-//                     function _authorize() {}
-//                     ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "async function authorizeBank".into(),
-//             range: text.find("export async").unwrap()..223,
-//             content: "
-//                     The below code snippet is from file 'foo.js'
-
-//                     ```javascript
-//                     /**
-//                      * Sometimes the frontend build is way faster than backend.
-//                      */
-//                     export async function authorizeBank() {
-//                         _authorize(pushModal, upgradingAccountId, {});
-//                     }
-//                     ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "class SettingsPage".into(),
-//             range: 225..343,
-//             content: "
-//                     The below code snippet is from file 'foo.js'
-
-//                     ```javascript
-//                     export class SettingsPage {
-//                         /* This is a test setting */
-//                         constructor(page) {
-//                             this.page = page;
-//                         }
-//                     }
-//                     ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "constructor".into(),
-//             range: 290..341,
-//             content: "
-//                 The below code snippet is from file 'foo.js'
-
-//                 ```javascript
-//                 /* This is a test setting */
-//                 constructor(page) {
-//                         this.page = page;
-//                     }
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "class TestClass".into(),
-//             range: 374..392,
-//             content: "
-//                     The below code snippet is from file 'foo.js'
-
-//                     ```javascript
-//                     /* This is a test comment */
-//                     class TestClass {}
-//                     ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "interface ClickhouseEditorEvent".into(),
-//             range: 440..532,
-//             content: "
-//                     The below code snippet is from file 'foo.js'
-
-//                     ```javascript
-//                     /* Schema for editor_events in Clickhouse. */
-//                     export interface ClickhouseEditorEvent {
-//                         installation_id: string
-//                         operation: string
-//                     }
-//                     ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//     ];
-
-//     for idx in 0..test_documents.len() {
-//         assert_eq!(test_documents[idx], parsed_files[idx]);
-//     }
-// }
-
 // #[gpui::test]
 // async fn test_code_context_retrieval_elixir() {
 //     let language = elixir_lang();
@@ -722,180 +623,157 @@ async fn test_code_context_retrieval_javascript() {
 //     }
 // }
 
-// #[gpui::test]
-// async fn test_code_context_retrieval_cpp() {
-//     let language = cpp_lang();
-//     let mut retriever = CodeContextRetriever::new();
-
-//     let text = "
-//     /**
-//      * @brief Main function
-//      * @returns 0 on exit
-//      */
-//     int main() { return 0; }
-
-//     /**
-//     * This is a test comment
-//     */
-//     class MyClass {       // The class
-//         public:             // Access specifier
-//         int myNum;        // Attribute (int variable)
-//         string myString;  // Attribute (string variable)
-//     };
-
-//     // This is a test comment
-//     enum Color { red, green, blue };
-
-//     /** This is a preceding block comment
-//      * This is the second line
-//      */
-//     struct {           // Structure declaration
-//         int myNum;       // Member (int variable)
-//         string myString; // Member (string variable)
-//     } myStructure;
-
-//     /**
-//     * @brief Matrix class.
-//     */
-//     template <typename T,
-//               typename = typename std::enable_if<
-//                 std::is_integral<T>::value || std::is_floating_point<T>::value,
-//                 bool>::type>
-//     class Matrix2 {
-//         std::vector<std::vector<T>> _mat;
-
-//     public:
-//         /**
-//         * @brief Constructor
-//         * @tparam Integer ensuring integers are being evaluated and not other
-//         * data types.
-//         * @param size denoting the size of Matrix as size x size
-//         */
-//         template <typename Integer,
-//                   typename = typename std::enable_if<std::is_integral<Integer>::value,
-//                   Integer>::type>
-//         explicit Matrix(const Integer size) {
-//             for (size_t i = 0; i < size; ++i) {
-//                 _mat.emplace_back(std::vector<T>(size, 0));
-//             }
-//         }
-//     }"
-//     .unindent();
+#[gpui::test]
+async fn test_code_context_retrieval_cpp() {
+    let language = cpp_lang();
+    let mut retriever = CodeContextRetriever::new();
 
-//     let parsed_files = retriever
-//         .parse_file(Path::new("foo.cpp"), &text, language)
-//         .unwrap();
+    let text = "
+    /**
+     * @brief Main function
+     * @returns 0 on exit
+     */
+    int main() { return 0; }
+
+    /**
+    * This is a test comment
+    */
+    class MyClass {       // The class
+        public:           // Access specifier
+        int myNum;        // Attribute (int variable)
+        string myString;  // Attribute (string variable)
+    };
+
+    // This is a test comment
+    enum Color { red, green, blue };
+
+    /** This is a preceding block comment
+     * This is the second line
+     */
+    struct {           // Structure declaration
+        int myNum;       // Member (int variable)
+        string myString; // Member (string variable)
+    } myStructure;
+
+    /**
+     * @brief Matrix class.
+     */
+    template <typename T,
+              typename = typename std::enable_if<
+                std::is_integral<T>::value || std::is_floating_point<T>::value,
+                bool>::type>
+    class Matrix2 {
+        std::vector<std::vector<T>> _mat;
+
+        public:
+            /**
+            * @brief Constructor
+            * @tparam Integer ensuring integers are being evaluated and not other
+            * data types.
+            * @param size denoting the size of Matrix as size x size
+            */
+            template <typename Integer,
+                    typename = typename std::enable_if<std::is_integral<Integer>::value,
+                    Integer>::type>
+            explicit Matrix(const Integer size) {
+                for (size_t i = 0; i < size; ++i) {
+                    _mat.emplace_back(std::vector<T>(size, 0));
+                }
+            }
+    }"
+    .unindent();
 
-//     let test_documents = &[
-//         Document {
-//             name: "int main".into(),
-//             range: 54..78,
-//             content: "
-//                 The below code snippet is from file 'foo.cpp'
-
-//                 ```cpp
-//                 /**
-//                  * @brief Main function
-//                  * @returns 0 on exit
-//                  */
-//                 int main() { return 0; }
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "class MyClass".into(),
-//             range: 112..295,
-//             content: "
-//                 The below code snippet is from file 'foo.cpp'
-
-//                 ```cpp
-//                 /**
-//                 * This is a test comment
-//                 */
-//                 class MyClass {       // The class
-//                     public:             // Access specifier
-//                     int myNum;        // Attribute (int variable)
-//                     string myString;  // Attribute (string variable)
-//                 }
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "enum Color".into(),
-//             range: 324..355,
-//             content: "
-//                 The below code snippet is from file 'foo.cpp'
-
-//                 ```cpp
-//                 // This is a test comment
-//                 enum Color { red, green, blue }
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "struct myStructure".into(),
-//             range: 428..581,
-//             content: "
-//                 The below code snippet is from file 'foo.cpp'
-
-//                 ```cpp
-//                 /** This is a preceding block comment
-//                  * This is the second line
-//                  */
-//                 struct {           // Structure declaration
-//                     int myNum;       // Member (int variable)
-//                     string myString; // Member (string variable)
-//                 } myStructure;
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//             name: "class Matrix2".into(),
-//             range: 613..1342,
-//             content: "
-//                 The below code snippet is from file 'foo.cpp'
-
-//                 ```cpp
-//                 /**
-//                 * @brief Matrix class.
-//                 */
-//                 template <typename T,
-//                           typename = typename std::enable_if<
-//                             std::is_integral<T>::value || std::is_floating_point<T>::value,
-//                             bool>::type>
-//                 class Matrix2 {
-//                     std::vector<std::vector<T>> _mat;
-
-//                 public:
-//                     /**
-//                     * @brief Constructor
-//                     * @tparam Integer ensuring integers are being evaluated and not other
-//                     * data types.
-//                     * @param size denoting the size of Matrix as size x size
-//                     */
-//                     template <typename Integer,
-//                               typename = typename std::enable_if<std::is_integral<Integer>::value,
-//                               Integer>::type>
-//                     explicit Matrix(const Integer size) {
-//                         for (size_t i = 0; i < size; ++i) {
-//                             _mat.emplace_back(std::vector<T>(size, 0));
-//                         }
-//                     }
-//                 }
-//                 ```"
-//             .unindent(),
-//             embedding: vec![],
-//         },
-//     ];
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
 
-//     for idx in 0..test_documents.len() {
-//         assert_eq!(test_documents[idx], parsed_files[idx]);
-//     }
-// }
+    assert_documents_eq(
+        &documents,
+        &[
+            (
+                "
+        /**
+         * @brief Main function
+         * @returns 0 on exit
+         */
+        int main() { return 0; }"
+                    .unindent(),
+                54,
+            ),
+            (
+                "
+                /**
+                * This is a test comment
+                */
+                class MyClass {       // The class
+                    public:           // Access specifier
+                    int myNum;        // Attribute (int variable)
+                    string myString;  // Attribute (string variable)
+                }"
+                .unindent(),
+                112,
+            ),
+            (
+                "
+                // This is a test comment
+                enum Color { red, green, blue }"
+                    .unindent(),
+                322,
+            ),
+            (
+                "
+                /** This is a preceding block comment
+                 * This is the second line
+                 */
+                struct {           // Structure declaration
+                    int myNum;       // Member (int variable)
+                    string myString; // Member (string variable)
+                } myStructure;"
+                    .unindent(),
+                425,
+            ),
+            (
+                "
+                /**
+                 * @brief Matrix class.
+                 */
+                template <typename T,
+                          typename = typename std::enable_if<
+                            std::is_integral<T>::value || std::is_floating_point<T>::value,
+                            bool>::type>
+                class Matrix2 {
+                    std::vector<std::vector<T>> _mat;
+
+                    public:
+                        /**
+                        * @brief Constructor
+                        * @tparam Integer ensuring integers are being evaluated and not other
+                        * data types.
+                        * @param size denoting the size of Matrix as size x size
+                        */
+                        template <typename Integer,
+                                typename = typename std::enable_if<std::is_integral<Integer>::value,
+                                Integer>::type>
+                        explicit Matrix(const Integer size) {
+                            for (size_t i = 0; i < size; ++i) {
+                                _mat.emplace_back(std::vector<T>(size, 0));
+                            }
+                        }
+                }"
+                .unindent(),
+                612,
+            ),
+            (
+                "
+                explicit Matrix(const Integer size) {
+                    for (size_t i = 0; i < size; ++i) {
+                        _mat.emplace_back(std::vector<T>(size, 0));
+                    }
+                }"
+                .unindent(),
+                1226,
+            ),
+        ],
+    );
+}
 
 #[gpui::test]
 fn test_dot_product(mut rng: StdRng) {

From cdceddd2cc3d21f3efd504e8b568a67535093626 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 25 Jul 2023 15:20:35 -0400
Subject: [PATCH 29/34] update semantic index tests for elixir

---
 .../src/semantic_index_tests.rs               | 251 ++++++++----------
 1 file changed, 115 insertions(+), 136 deletions(-)

diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs
index 2ae9a06c0fbbeeb3371ff958612b9d94bc88daef..acf5a9d72b43a1123102e105da2b4b039fba87c6 100644
--- a/crates/semantic_index/src/semantic_index_tests.rs
+++ b/crates/semantic_index/src/semantic_index_tests.rs
@@ -486,142 +486,121 @@ async fn test_code_context_retrieval_javascript() {
     )
 }
 
-// #[gpui::test]
-// async fn test_code_context_retrieval_elixir() {
-//     let language = elixir_lang();
-//     let mut retriever = CodeContextRetriever::new();
-
-//     let text = r#"
-// defmodule File.Stream do
-//     @moduledoc """
-//     Defines a `File.Stream` struct returned by `File.stream!/3`.
-
-//     The following fields are public:
-
-//     * `path`          - the file path
-//     * `modes`         - the file modes
-//     * `raw`           - a boolean indicating if bin functions should be used
-//     * `line_or_bytes` - if reading should read lines or a given number of bytes
-//     * `node`          - the node the file belongs to
-
-//     """
-
-//     defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
-
-//     @type t :: %__MODULE__{}
-
-//     @doc false
-//     def __build__(path, modes, line_or_bytes) do
-//     raw = :lists.keyfind(:encoding, 1, modes) == false
-
-//     modes =
-//         case raw do
-//         true ->
-//             case :lists.keyfind(:read_ahead, 1, modes) do
-//             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-//             {:read_ahead, _} -> [:raw | modes]
-//             false -> [:raw, :read_ahead | modes]
-//             end
-
-//         false ->
-//             modes
-//         end
-
-//     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-//     end
-// "#
-//     .unindent();
-
-//     let parsed_files = retriever
-//         .parse_file(Path::new("foo.ex"), &text, language)
-//         .unwrap();
-
-//     let test_documents = &[
-//         Document{
-//             name: "defmodule File.Stream".into(),
-//             range: 0..1132,
-//             content: r#"
-//                 The below code snippet is from file 'foo.ex'
-
-//                 ```elixir
-//                 defmodule File.Stream do
-//                     @moduledoc """
-//                     Defines a `File.Stream` struct returned by `File.stream!/3`.
-
-//                     The following fields are public:
-
-//                     * `path`          - the file path
-//                     * `modes`         - the file modes
-//                     * `raw`           - a boolean indicating if bin functions should be used
-//                     * `line_or_bytes` - if reading should read lines or a given number of bytes
-//                     * `node`          - the node the file belongs to
-
-//                     """
-
-//                     defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
-
-//                     @type t :: %__MODULE__{}
-
-//                     @doc false
-//                     def __build__(path, modes, line_or_bytes) do
-//                     raw = :lists.keyfind(:encoding, 1, modes) == false
-
-//                     modes =
-//                         case raw do
-//                         true ->
-//                             case :lists.keyfind(:read_ahead, 1, modes) do
-//                             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-//                             {:read_ahead, _} -> [:raw | modes]
-//                             false -> [:raw, :read_ahead | modes]
-//                             end
-
-//                         false ->
-//                             modes
-//                         end
-
-//                     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-//                     end
-//                 ```"#.unindent(),
-//             embedding: vec![],
-//         },
-//         Document {
-//         name: "def __build__".into(),
-//         range: 574..1132,
-//         content: r#"
-// The below code snippet is from file 'foo.ex'
-
-// ```elixir
-// @doc false
-// def __build__(path, modes, line_or_bytes) do
-//     raw = :lists.keyfind(:encoding, 1, modes) == false
-
-//     modes =
-//         case raw do
-//         true ->
-//             case :lists.keyfind(:read_ahead, 1, modes) do
-//             {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
-//             {:read_ahead, _} -> [:raw | modes]
-//             false -> [:raw, :read_ahead | modes]
-//             end
-
-//         false ->
-//             modes
-//         end
-
-//     %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
-
-//     end
-// ```"#
-//             .unindent(),
-//         embedding: vec![],
-//     }];
-
-//     for idx in 0..test_documents.len() {
-//         assert_eq!(test_documents[idx], parsed_files[idx]);
-//     }
-// }
+#[gpui::test]
+async fn test_code_context_retrieval_elixir() {
+    let language = elixir_lang();
+    let mut retriever = CodeContextRetriever::new();
+
+    let text = r#"
+        defmodule File.Stream do
+            @moduledoc """
+            Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+            The following fields are public:
+
+            * `path`          - the file path
+            * `modes`         - the file modes
+            * `raw`           - a boolean indicating if bin functions should be used
+            * `line_or_bytes` - if reading should read lines or a given number of bytes
+            * `node`          - the node the file belongs to
+
+            """
+
+            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+            @type t :: %__MODULE__{}
+
+            @doc false
+            def __build__(path, modes, line_or_bytes) do
+            raw = :lists.keyfind(:encoding, 1, modes) == false
+
+            modes =
+                case raw do
+                true ->
+                    case :lists.keyfind(:read_ahead, 1, modes) do
+                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+                    {:read_ahead, _} -> [:raw | modes]
+                    false -> [:raw, :read_ahead | modes]
+                    end
+
+                false ->
+                    modes
+                end
+
+            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+            end"#
+    .unindent();
+
+    let documents = retriever.parse_file(&text, language.clone()).unwrap();
+
+    assert_documents_eq(
+        &documents,
+        &[(
+            r#"
+        defmodule File.Stream do
+            @moduledoc """
+            Defines a `File.Stream` struct returned by `File.stream!/3`.
+
+            The following fields are public:
+
+            * `path`          - the file path
+            * `modes`         - the file modes
+            * `raw`           - a boolean indicating if bin functions should be used
+            * `line_or_bytes` - if reading should read lines or a given number of bytes
+            * `node`          - the node the file belongs to
+
+            """
+
+            defstruct path: nil, modes: [], line_or_bytes: :line, raw: true, node: nil
+
+            @type t :: %__MODULE__{}
+
+            @doc false
+            def __build__(path, modes, line_or_bytes) do
+            raw = :lists.keyfind(:encoding, 1, modes) == false
+
+            modes =
+                case raw do
+                true ->
+                    case :lists.keyfind(:read_ahead, 1, modes) do
+                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+                    {:read_ahead, _} -> [:raw | modes]
+                    false -> [:raw, :read_ahead | modes]
+                    end
+
+                false ->
+                    modes
+                end
+
+            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+            end"#
+                .unindent(),
+            0,
+        ),(r#"
+            @doc false
+            def __build__(path, modes, line_or_bytes) do
+            raw = :lists.keyfind(:encoding, 1, modes) == false
+
+            modes =
+                case raw do
+                true ->
+                    case :lists.keyfind(:read_ahead, 1, modes) do
+                    {:read_ahead, false} -> [:raw | :lists.keydelete(:read_ahead, 1, modes)]
+                    {:read_ahead, _} -> [:raw | modes]
+                    false -> [:raw, :read_ahead | modes]
+                    end
+
+                false ->
+                    modes
+                end
+
+            %File.Stream{path: path, modes: modes, raw: raw, line_or_bytes: line_or_bytes, node: node()}
+
+            end"#.unindent(), 574)],
+    );
+}
 
 #[gpui::test]
 async fn test_code_context_retrieval_cpp() {

From e8210b827d8bb0871dbebf046506e12d4d6a934d Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 25 Jul 2023 15:24:27 -0400
Subject: [PATCH 30/34] move visible text to just start anchor with context
 lines for semantic search

---
 crates/search/src/project_search.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 28085f59feb16bd9158ee766ba752f4d2cd72340..6903337e07160bdb1bec048ad9d3fe4672c91f09 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -217,7 +217,7 @@ impl ProjectSearch {
 
                     let matches = results
                         .into_iter()
-                        .map(|result| (result.buffer, vec![result.range]))
+                        .map(|result| (result.buffer, vec![result.range.start..result.range.start]))
                         .collect();
 
                     excerpts.stream_excerpts_with_context_lines(matches, 3, cx)

From 75999204adcee661da958dd27fc3acc536d05b67 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Tue, 25 Jul 2023 16:26:37 -0400
Subject: [PATCH 31/34] update project search to only show semantic button
 visible with semantic_index enabled

---
 crates/search/src/project_search.rs         | 24 ++++++++++++++++-----
 crates/semantic_index/src/semantic_index.rs |  6 +++++-
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs
index 6903337e07160bdb1bec048ad9d3fe4672c91f09..ec9108f92cffa392233f22625f40c15da59bf3cc 100644
--- a/crates/search/src/project_search.rs
+++ b/crates/search/src/project_search.rs
@@ -996,6 +996,10 @@ impl ProjectSearchBar {
                     SearchOption::Regex => &mut search_view.regex,
                 };
                 *value = !*value;
+
+                if value.clone() {
+                    search_view.semantic = None;
+                }
                 search_view.search(cx);
             });
             cx.notify();
@@ -1012,6 +1016,9 @@ impl ProjectSearchBar {
                     search_view.semantic = None;
                 } else if let Some(semantic_index) = SemanticIndex::global(cx) {
                     // TODO: confirm that it's ok to send this project
+                    search_view.regex = false;
+                    search_view.case_sensitive = false;
+                    search_view.whole_word = false;
 
                     let project = search_view.model.read(cx).project.clone();
                     let index_task = semantic_index.update(cx, |semantic_index, cx| {
@@ -1266,9 +1273,14 @@ impl View for ProjectSearchBar {
                                 .with_child(self.render_nav_button(">", Direction::Next, cx))
                                 .aligned(),
                         )
-                        .with_child(
-                            Flex::row()
-                                .with_child(self.render_semantic_search_button(cx))
+                        .with_child({
+                            let row = if SemanticIndex::enabled(cx) {
+                                Flex::row().with_child(self.render_semantic_search_button(cx))
+                            } else {
+                                Flex::row()
+                            };
+
+                            let row = row
                                 .with_child(self.render_option_button(
                                     "Case",
                                     SearchOption::CaseSensitive,
@@ -1286,8 +1298,10 @@ impl View for ProjectSearchBar {
                                 ))
                                 .contained()
                                 .with_style(theme.search.option_button_group)
-                                .aligned(),
-                        )
+                                .aligned();
+
+                            row
+                        })
                         .contained()
                         .with_margin_bottom(row_spacing),
                 )
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 215ca38a28845fdf8b24d8c5d0a5d1249a03bcec..7e8d183ba00a9626af53a881a9fa3d272c257a83 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -1,7 +1,7 @@
 mod db;
 mod embedding;
 mod parsing;
-mod semantic_index_settings;
+pub mod semantic_index_settings;
 
 #[cfg(test)]
 mod semantic_index_tests;
@@ -183,6 +183,10 @@ impl SemanticIndex {
         }
     }
 
+    pub fn enabled(cx: &AppContext) -> bool {
+        settings::get::<SemanticIndexSettings>(cx).enabled
+    }
+
     async fn new(
         fs: Arc<dyn Fs>,
         database_url: PathBuf,

From ca6f7d8a804ec486a4608cd627a7e9182f51a3c8 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 26 Jul 2023 09:17:04 -0400
Subject: [PATCH 32/34] add worktree previously indexed functionality to vector
 db

---
 crates/semantic_index/src/db.rs             | 17 +++++++
 crates/semantic_index/src/semantic_index.rs | 53 ++++++++++++++++++++-
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs
index b1e78b7aff994ca977fbbea41d595f08fb65766a..4bc97da0f08e3d56dd249da72cd8deaff56f7e0f 100644
--- a/crates/semantic_index/src/db.rs
+++ b/crates/semantic_index/src/db.rs
@@ -197,6 +197,23 @@ impl VectorDatabase {
         Ok(())
     }
 
+    pub fn worktree_previously_indexed(&self, worktree_root_path: &Path) -> Result<bool> {
+        let mut worktree_query = self
+            .db
+            .prepare("SELECT id FROM worktrees WHERE absolute_path = ?1")?;
+        let worktree_id = worktree_query
+            .query_row(params![worktree_root_path.to_string_lossy()], |row| {
+                Ok(row.get::<_, i64>(0)?)
+            })
+            .map_err(|err| anyhow!(err));
+
+        if worktree_id.is_ok() {
+            return Ok(true);
+        } else {
+            return Ok(false);
+        }
+    }
+
     pub fn find_or_create_worktree(&self, worktree_root_path: &Path) -> Result<i64> {
         // Check that the absolute path doesnt exist
         let mut worktree_query = self
diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 7e8d183ba00a9626af53a881a9fa3d272c257a83..7fee09dcff1d3cdae5b85ec01a11de632da71528 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -34,7 +34,7 @@ use util::{
     ResultExt,
 };
 
-const SEMANTIC_INDEX_VERSION: usize = 5;
+const SEMANTIC_INDEX_VERSION: usize = 6;
 const EMBEDDINGS_BATCH_SIZE: usize = 80;
 
 pub fn init(
@@ -161,6 +161,10 @@ enum DbOperation {
         worktree_id: i64,
         sender: oneshot::Sender<Result<HashMap<PathBuf, SystemTime>>>,
     },
+    WorktreePreviouslyIndexed {
+        path: Arc<Path>,
+        sender: oneshot::Sender<Result<bool>>,
+    },
 }
 
 enum EmbeddingJob {
@@ -327,6 +331,10 @@ impl SemanticIndex {
                 let file_mtimes = db.get_file_mtimes(worktree_db_id);
                 sender.send(file_mtimes).ok();
             }
+            DbOperation::WorktreePreviouslyIndexed { path, sender } => {
+                let worktree_indexed = db.worktree_previously_indexed(path.as_ref());
+                sender.send(worktree_indexed).ok();
+            }
         }
     }
 
@@ -479,6 +487,49 @@ impl SemanticIndex {
         async move { rx.await? }
     }
 
+    fn worktree_previously_indexed(&self, path: Arc<Path>) -> impl Future<Output = Result<bool>> {
+        let (tx, rx) = oneshot::channel();
+        self.db_update_tx
+            .try_send(DbOperation::WorktreePreviouslyIndexed { path, sender: tx })
+            .unwrap();
+        async move { rx.await? }
+    }
+
+    pub fn project_previously_indexed(
+        &mut self,
+        project: ModelHandle<Project>,
+        cx: &mut ModelContext<Self>,
+    ) -> Task<Result<bool>> {
+        let worktree_scans_complete = project
+            .read(cx)
+            .worktrees(cx)
+            .map(|worktree| {
+                let scan_complete = worktree.read(cx).as_local().unwrap().scan_complete();
+                async move {
+                    scan_complete.await;
+                }
+            })
+            .collect::<Vec<_>>();
+
+        let worktrees_indexed_previously = project
+            .read(cx)
+            .worktrees(cx)
+            .map(|worktree| self.worktree_previously_indexed(worktree.read(cx).abs_path()))
+            .collect::<Vec<_>>();
+
+        cx.spawn(|this, mut cx| async move {
+            futures::future::join_all(worktree_scans_complete).await;
+
+            let worktree_indexed_previously =
+                futures::future::join_all(worktrees_indexed_previously).await;
+
+            Ok(worktree_indexed_previously
+                .iter()
+                .filter(|worktree| worktree.is_ok())
+                .all(|v| v.as_ref().log_err().is_some_and(|v| v.to_owned())))
+        })
+    }
+
     pub fn index_project(
         &mut self,
         project: ModelHandle<Project>,

From 394a105639413d83c8486ff3ccac2530f6d7dcf2 Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 26 Jul 2023 10:03:30 -0400
Subject: [PATCH 33/34] fix warnings

---
 crates/semantic_index/src/semantic_index.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 7fee09dcff1d3cdae5b85ec01a11de632da71528..396a0a8607dc6d31d3ff13a6eb1c00a54ccf061f 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -517,7 +517,7 @@ impl SemanticIndex {
             .map(|worktree| self.worktree_previously_indexed(worktree.read(cx).abs_path()))
             .collect::<Vec<_>>();
 
-        cx.spawn(|this, mut cx| async move {
+        cx.spawn(|_, _cx| async move {
             futures::future::join_all(worktree_scans_complete).await;
 
             let worktree_indexed_previously =

From 0b61c93a25c23487c1bb52107d9fa5cec80618cf Mon Sep 17 00:00:00 2001
From: KCaverly <kyle@zed.dev>
Date: Wed, 26 Jul 2023 10:22:33 -0400
Subject: [PATCH 34/34] ensure semantic search is not enabled on stable

---
 crates/semantic_index/src/semantic_index.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs
index 396a0a8607dc6d31d3ff13a6eb1c00a54ccf061f..e4a307573aabc00336863b23a799138c52adc895 100644
--- a/crates/semantic_index/src/semantic_index.rs
+++ b/crates/semantic_index/src/semantic_index.rs
@@ -189,6 +189,7 @@ impl SemanticIndex {
 
     pub fn enabled(cx: &AppContext) -> bool {
         settings::get::<SemanticIndexSettings>(cx).enabled
+            && *RELEASE_CHANNEL != ReleaseChannel::Stable
     }
 
     async fn new(