diff --git a/crates/semantic_index/src/parsing.rs b/crates/semantic_index/src/parsing.rs index 643db8c79827819a2e274aa2eb82d37cfb6bc4a2..cef23862c563f470000306fde5ac32f95a50a458 100644 --- a/crates/semantic_index/src/parsing.rs +++ b/crates/semantic_index/src/parsing.rs @@ -21,6 +21,7 @@ const CODE_CONTEXT_TEMPLATE: &str = "The below code snippet is from file ''\n\n```\n\n```"; const ENTIRE_FILE_TEMPLATE: &str = "The below snippet is from file ''\n\n```\n\n```"; +const MARKDOWN_CONTEXT_TEMPLATE: &str = "The below file contents is from file ''\n\n"; pub const PARSEABLE_ENTIRE_FILE_TYPES: &[&str] = &["TOML", "YAML", "CSS", "HEEX", "ERB", "SVELTE", "HTML"]; @@ -70,6 +71,19 @@ impl CodeContextRetriever { }]) } + fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result> { + let document_span = MARKDOWN_CONTEXT_TEMPLATE + .replace("", relative_path.to_string_lossy().as_ref()) + .replace("", &content); + + Ok(vec![Document { + range: 0..content.len(), + content: document_span, + embedding: Vec::new(), + name: "Markdown".to_string(), + }]) + } + fn get_matches_in_file( &mut self, content: &str, @@ -136,6 +150,8 @@ impl CodeContextRetriever { if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) { return self.parse_entire_file(relative_path, language_name, &content); + } else if &language_name.to_string() == &"Markdown".to_string() { + return self.parse_markdown_file(relative_path, &content); } let mut documents = self.parse_file(content, language)?; diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index bd114de216a0a30b3271b56c2b627439a7e70a0e..23c75f40149cc4af2fb981844a6330dec3a638bb 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -613,6 +613,7 @@ impl SemanticIndex { .await { if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref()) + && &language.name().as_ref() != &"Markdown" && language .grammar() .and_then(|grammar| grammar.embedding_config.as_ref())