add support for markdown files to semantic search

KCaverly created

Change summary

crates/semantic_index/src/parsing.rs        | 16 ++++++++++++++++
crates/semantic_index/src/semantic_index.rs |  1 +
2 files changed, 17 insertions(+)

Detailed changes

crates/semantic_index/src/parsing.rs 🔗

@@ -21,6 +21,7 @@ const CODE_CONTEXT_TEMPLATE: &str =
     "The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
 const ENTIRE_FILE_TEMPLATE: &str =
     "The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
+const MARKDOWN_CONTEXT_TEMPLATE: &str = "The below file contents is from file '<path>'\n\n<item>";
 pub const PARSEABLE_ENTIRE_FILE_TYPES: &[&str] =
     &["TOML", "YAML", "CSS", "HEEX", "ERB", "SVELTE", "HTML"];
 
@@ -70,6 +71,19 @@ impl CodeContextRetriever {
         }])
     }
 
+    fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Document>> {
+        let document_span = MARKDOWN_CONTEXT_TEMPLATE
+            .replace("<path>", relative_path.to_string_lossy().as_ref())
+            .replace("<item>", &content);
+
+        Ok(vec![Document {
+            range: 0..content.len(),
+            content: document_span,
+            embedding: Vec::new(),
+            name: "Markdown".to_string(),
+        }])
+    }
+
     fn get_matches_in_file(
         &mut self,
         content: &str,
@@ -136,6 +150,8 @@ impl CodeContextRetriever {
 
         if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) {
             return self.parse_entire_file(relative_path, language_name, &content);
+        } else if &language_name.to_string() == &"Markdown".to_string() {
+            return self.parse_markdown_file(relative_path, &content);
         }
 
         let mut documents = self.parse_file(content, language)?;

crates/semantic_index/src/semantic_index.rs 🔗

@@ -613,6 +613,7 @@ impl SemanticIndex {
                                 .await
                             {
                                 if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
+                                    && &language.name().as_ref() != &"Markdown"
                                     && language
                                         .grammar()
                                         .and_then(|grammar| grammar.embedding_config.as_ref())