From cb610f37f2dd0f6d449b1aa076e83a8fae808828 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 20 Jan 2023 10:56:20 +0100 Subject: [PATCH] WIP: Search language injections also by file extension There are still a few things left: 1. Add test to verify we can successfully locate a language by its extension 2. Add test to reproduce bug where changing the fenced code block language won't reparse the block with the new language 3. Reparse injections for which we couldn't find a language when the language registry changes. 4. Check why the markdown grammar considers the trailing triple backtick as `(code_block_content)`, as opposed to being part of the outer markdown. --- Cargo.lock | 1 + crates/language/Cargo.toml | 3 ++- crates/language/src/language.rs | 15 +++++++++++++++ crates/language/src/syntax_map.rs | 27 +++++++++++++++++++++++++-- 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 174965952fde76c8d260bbf9f363922609f38b9c..abdf8b8a552cfd594bed8579c6afb6b44ef98e1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3153,6 +3153,7 @@ dependencies = [ "tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json 0.19.0", + "tree-sitter-markdown", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index 62de0c4e44f5836489e75c83fbbdde1f7efe1fec..64db58c8470722fd365dc25f7656f2b152832e0c 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -66,12 +66,13 @@ util = { path = "../util", features = ["test-support"] } ctor = "0.1" env_logger = "0.9" rand = "0.8.3" +tree-sitter-embedded-template = "*" tree-sitter-html = "*" tree-sitter-javascript = "*" tree-sitter-json = "*" +tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } tree-sitter-rust = "*" tree-sitter-python = "*" tree-sitter-typescript = "*" tree-sitter-ruby = "*" -tree-sitter-embedded-template = "*" unindent = "0.1.7" diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 046076a48ec51244a5fc310f5cf0a7e9ea8e98cc..1ddd3e3939956dac4bbecf8362b24648171f42ef 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -476,6 +476,21 @@ impl LanguageRegistry { .cloned() } + pub fn language_for_extension(&self, extension: &str) -> Option> { + let extension = UniCase::new(extension); + self.languages + .read() + .iter() + .find(|language| { + language + .config + .path_suffixes + .iter() + .any(|suffix| UniCase::new(suffix) == extension) + }) + .cloned() + } + pub fn to_vec(&self) -> Vec> { self.languages.read().iter().cloned().collect() } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 9ef4d82fd1ff13ef0747f96322a280101f95d519..9707cf5471ea94fe56b18174603f290c0a1119d6 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -1015,8 +1015,10 @@ fn get_injections( }); if let Some(language_name) = language_name { - if let Some(language) = language_registry.language_for_name(language_name.as_ref()) - { + let language = language_registry + .language_for_name(&language_name) + .or_else(|| language_registry.language_for_extension(&language_name)); + if let Some(language) = language { result = true; let range = text.anchor_before(content_range.start) ..text.anchor_after(content_range.end); @@ -2255,6 +2257,7 @@ mod tests { registry.add(Arc::new(ruby_lang())); registry.add(Arc::new(html_lang())); registry.add(Arc::new(erb_lang())); + registry.add(Arc::new(markdown_lang())); let language = registry.language_for_name(language_name).unwrap(); let mut buffer = Buffer::new(0, 0, Default::default()); @@ -2393,6 +2396,26 @@ mod tests { .unwrap() } + fn markdown_lang() -> Language { + Language::new( + LanguageConfig { + name: "Markdown".into(), + path_suffixes: vec!["md".into()], + ..Default::default() + }, + Some(tree_sitter_markdown::language()), + ) + .with_injection_query( + r#" + (fenced_code_block + (info_string + (language) @language) + (code_fence_content) @content) + "#, + ) + .unwrap() + } + fn range_for_text(buffer: &Buffer, text: &str) -> Range { let start = buffer.as_rope().to_string().find(text).unwrap(); start..start + text.len()