Added go parsing for semantic search, and added preceeding comments on go and rust.

KCaverly , Alex , and maxbrunsfeld created

Co-authored-by: Alex <alexviscreanu@gmail.com>
Co-authored-by: maxbrunsfeld <max@zed.dev>

Change summary

crates/language/src/language.rs             |  4 -
crates/vector_store/src/parsing.rs          | 23 +++-----
crates/zed/src/languages/go/embedding.scm   | 24 +++++++++
crates/zed/src/languages/rust/embedding.scm | 58 ++++++++--------------
4 files changed, 55 insertions(+), 54 deletions(-)

Detailed changes

crates/language/src/language.rs 🔗

@@ -525,7 +525,6 @@ pub struct EmbeddingConfig {
     pub item_capture_ix: u32,
     pub name_capture_ix: u32,
     pub context_capture_ix: Option<u32>,
-    pub extra_context_capture_ix: Option<u32>,
 }
 
 struct InjectionConfig {
@@ -1246,14 +1245,12 @@ impl Language {
         let mut item_capture_ix = None;
         let mut name_capture_ix = None;
         let mut context_capture_ix = None;
-        let mut extra_context_capture_ix = None;
         get_capture_indices(
             &query,
             &mut [
                 ("item", &mut item_capture_ix),
                 ("name", &mut name_capture_ix),
                 ("context", &mut context_capture_ix),
-                ("context.extra", &mut extra_context_capture_ix),
             ],
         );
         if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
@@ -1262,7 +1259,6 @@ impl Language {
                 item_capture_ix,
                 name_capture_ix,
                 context_capture_ix,
-                extra_context_capture_ix,
             });
         }
         Ok(self)

crates/vector_store/src/parsing.rs 🔗

@@ -53,7 +53,7 @@ impl CodeContextRetriever {
             .ok_or_else(|| anyhow!("parsing failed"))?;
 
         let mut documents = Vec::new();
-        let mut context_spans = Vec::new();
+        let mut document_texts = Vec::new();
 
         // Iterate through query matches
         for mat in self.cursor.matches(
@@ -61,11 +61,10 @@ impl CodeContextRetriever {
             tree.root_node(),
             content.as_bytes(),
         ) {
-            // log::info!("-----MATCH-----");
-
             let mut name: Vec<&str> = vec![];
             let mut item: Option<&str> = None;
             let mut offset: Option<usize> = None;
+            let mut context_spans: Vec<&str> = vec![];
             for capture in mat.captures {
                 if capture.index == embedding_config.item_capture_ix {
                     offset = Some(capture.node.byte_range().start);
@@ -79,25 +78,21 @@ impl CodeContextRetriever {
                 if let Some(context_capture_ix) = embedding_config.context_capture_ix {
                     if capture.index == context_capture_ix {
                         if let Some(context) = content.get(capture.node.byte_range()) {
-                            name.push(context);
+                            context_spans.push(context);
                         }
                     }
                 }
             }
 
             if item.is_some() && offset.is_some() && name.len() > 0 {
-                let context_span = CODE_CONTEXT_TEMPLATE
+                let item = format!("{}\n{}", context_spans.join("\n"), item.unwrap());
+
+                let document_text = CODE_CONTEXT_TEMPLATE
                     .replace("<path>", pending_file.relative_path.to_str().unwrap())
                     .replace("<language>", &pending_file.language.name().to_lowercase())
-                    .replace("<item>", item.unwrap());
-
-                let mut truncated_span = context_span.clone();
-                truncated_span.truncate(100);
-
-                // log::info!("Name:       {:?}", name);
-                // log::info!("Span:       {:?}", truncated_span);
+                    .replace("<item>", item.as_str());
 
-                context_spans.push(context_span);
+                document_texts.push(document_text);
                 documents.push(Document {
                     name: name.join(" "),
                     offset: offset.unwrap(),
@@ -112,7 +107,7 @@ impl CodeContextRetriever {
                 mtime: pending_file.modified_time,
                 documents,
             },
-            context_spans,
+            document_texts,
         ));
     }
 }

crates/zed/src/languages/go/embedding.scm 🔗

@@ -0,0 +1,24 @@
+(
+    (comment)* @context
+    .
+    (type_declaration
+        (type_spec
+            name: (_) @name)
+    ) @item
+)
+
+(
+    (comment)* @context
+    .
+    (function_declaration
+        name: (_) @name
+    ) @item
+)
+
+(
+    (comment)* @context
+    .
+    (method_declaration
+        name: (_) @name
+    ) @item
+)

crates/zed/src/languages/rust/embedding.scm 🔗

@@ -1,36 +1,22 @@
-(struct_item
-    (visibility_modifier)? @context
-    "struct" @context
-    name: (_) @name) @item
-
-(enum_item
-    (visibility_modifier)? @context
-    "enum" @context
-    name: (_) @name) @item
-
-(impl_item
-    "impl" @context
-    trait: (_)? @name
-    "for"? @context
-    type: (_) @name) @item
-
-(trait_item
-    (visibility_modifier)? @context
-    "trait" @context
-    name: (_) @name) @item
-
-(function_item
-    (visibility_modifier)? @context
-    (function_modifiers)? @context
-    "fn" @context
-    name: (_) @name) @item
-
-(function_signature_item
-    (visibility_modifier)? @context
-    (function_modifiers)? @context
-    "fn" @context
-    name: (_) @name) @item
-
-(macro_definition
-    . "macro_rules!" @context
-    name: (_) @name) @item
+(
+    (line_comment)* @context
+    .
+    [
+        (enum_item
+            name: (_) @name) @item
+        (struct_item
+            name: (_) @name) @item
+        (impl_item
+            trait: (_)? @name
+            "for"? @name
+            type: (_) @name) @item
+        (trait_item
+            name: (_) @name) @item
+        (function_item
+            name: (_) @name) @item
+        (macro_definition
+            name: (_) @name) @item
+        (function_signature_item
+            name: (_) @name) @item
+    ]
+)