Cargo.lock 🔗
@@ -3153,6 +3153,7 @@ dependencies = [
"tree-sitter-html",
"tree-sitter-javascript",
"tree-sitter-json 0.19.0",
+ "tree-sitter-markdown",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
Antonio Scandurra created
There are still a few things left:
1. Add test to verify we can successfully locate a language by its extension
2. Add test to reproduce bug where changing the fenced code block language
won't reparse the block with the new language
3. Reparse injections for which we couldn't find a language when the language
registry changes.
4. Check why the markdown grammar considers the trailing triple backtick as
`(code_block_content)`, as opposed to being part of the outer markdown.
Cargo.lock | 1 +
crates/language/Cargo.toml | 3 ++-
crates/language/src/language.rs | 15 +++++++++++++++
crates/language/src/syntax_map.rs | 27 +++++++++++++++++++++++++--
4 files changed, 43 insertions(+), 3 deletions(-)
@@ -3153,6 +3153,7 @@ dependencies = [
"tree-sitter-html",
"tree-sitter-javascript",
"tree-sitter-json 0.19.0",
+ "tree-sitter-markdown",
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
@@ -66,12 +66,13 @@ util = { path = "../util", features = ["test-support"] }
ctor = "0.1"
env_logger = "0.9"
rand = "0.8.3"
+tree-sitter-embedded-template = "*"
tree-sitter-html = "*"
tree-sitter-javascript = "*"
tree-sitter-json = "*"
+tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" }
tree-sitter-rust = "*"
tree-sitter-python = "*"
tree-sitter-typescript = "*"
tree-sitter-ruby = "*"
-tree-sitter-embedded-template = "*"
unindent = "0.1.7"
@@ -476,6 +476,21 @@ impl LanguageRegistry {
.cloned()
}
+ pub fn language_for_extension(&self, extension: &str) -> Option<Arc<Language>> {
+ let extension = UniCase::new(extension);
+ self.languages
+ .read()
+ .iter()
+ .find(|language| {
+ language
+ .config
+ .path_suffixes
+ .iter()
+ .any(|suffix| UniCase::new(suffix) == extension)
+ })
+ .cloned()
+ }
+
pub fn to_vec(&self) -> Vec<Arc<Language>> {
self.languages.read().iter().cloned().collect()
}
@@ -1015,8 +1015,10 @@ fn get_injections(
});
if let Some(language_name) = language_name {
- if let Some(language) = language_registry.language_for_name(language_name.as_ref())
- {
+ let language = language_registry
+ .language_for_name(&language_name)
+ .or_else(|| language_registry.language_for_extension(&language_name));
+ if let Some(language) = language {
result = true;
let range = text.anchor_before(content_range.start)
..text.anchor_after(content_range.end);
@@ -2255,6 +2257,7 @@ mod tests {
registry.add(Arc::new(ruby_lang()));
registry.add(Arc::new(html_lang()));
registry.add(Arc::new(erb_lang()));
+ registry.add(Arc::new(markdown_lang()));
let language = registry.language_for_name(language_name).unwrap();
let mut buffer = Buffer::new(0, 0, Default::default());
@@ -2393,6 +2396,26 @@ mod tests {
.unwrap()
}
+ fn markdown_lang() -> Language {
+ Language::new(
+ LanguageConfig {
+ name: "Markdown".into(),
+ path_suffixes: vec!["md".into()],
+ ..Default::default()
+ },
+ Some(tree_sitter_markdown::language()),
+ )
+ .with_injection_query(
+ r#"
+ (fenced_code_block
+ (info_string
+ (language) @language)
+ (code_fence_content) @content)
+ "#,
+ )
+ .unwrap()
+ }
+
fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
let start = buffer.as_rope().to_string().find(text).unwrap();
start..start + text.len()