diff --git a/Cargo.lock b/Cargo.lock index f08a13902ebae7fcf33c163a330193f2013d4b53..f75fd3648bf4fe1e13a896d5254aedda2e19b009 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3180,10 +3180,12 @@ dependencies = [ "tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json 0.19.0", + "tree-sitter-markdown", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-rust", "tree-sitter-typescript", + "unicase", "unindent", "util", ] diff --git a/crates/editor/src/hover_popover.rs b/crates/editor/src/hover_popover.rs index 6d003cae5dbaf42d54a85b56ea941d0851816538..f92b07da1dd49ef4de90e7bb9fc9a00ee4993fd4 100644 --- a/crates/editor/src/hover_popover.rs +++ b/crates/editor/src/hover_popover.rs @@ -331,7 +331,7 @@ impl InfoPopover { if let Some(language) = content .language .clone() - .and_then(|language| project.languages().get_language(&language)) + .and_then(|language| project.languages().language_for_name(&language)) { let runs = language .highlight_text(&content.text.as_str().into(), 0..content.text.len()); diff --git a/crates/feedback/src/feedback_editor.rs b/crates/feedback/src/feedback_editor.rs index 8185fbad9ac61244b1acad73baa9d2323222e974..ce0da1cf3c12bb6921608d900084e673c3b8aa97 100644 --- a/crates/feedback/src/feedback_editor.rs +++ b/crates/feedback/src/feedback_editor.rs @@ -123,7 +123,7 @@ impl FeedbackEditor { } fn new(project: ModelHandle, cx: &mut ViewContext) -> Self { - let markdown_language = project.read(cx).languages().get_language("Markdown"); + let markdown_language = project.read(cx).languages().language_for_name("Markdown"); let buffer = project .update(cx, |project, cx| { diff --git a/crates/language/Cargo.toml b/crates/language/Cargo.toml index ab6c687b7a8e1e00e3b44433ffeab7138de80668..8f1d3d39edd1b2ef34dc902f56bfa8ce00f78d8e 100644 --- a/crates/language/Cargo.toml +++ b/crates/language/Cargo.toml @@ -54,6 +54,7 @@ smol = "1.2" tree-sitter = "0.20" tree-sitter-rust = { version = "*", optional = true } tree-sitter-typescript = { version = "*", optional = true } +unicase = "2.6" [dev-dependencies] client = { path = "../client", features = ["test-support"] } @@ -66,12 +67,13 @@ util = { path = "../util", features = ["test-support"] } ctor = "0.1" env_logger = "0.9" rand = "0.8.3" +tree-sitter-embedded-template = "*" tree-sitter-html = "*" tree-sitter-javascript = "*" tree-sitter-json = "*" +tree-sitter-markdown = { git = "https://github.com/MDeiml/tree-sitter-markdown", rev = "330ecab87a3e3a7211ac69bbadc19eabecdb1cca" } tree-sitter-rust = "*" tree-sitter-python = "*" tree-sitter-typescript = "*" tree-sitter-ruby = "*" -tree-sitter-embedded-template = "*" unindent = "0.1.7" diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index 3c382b83b651741b385adc83a3b59449ff480bda..13b3a86822b0d7b61ff1414c7fe58fa87d026a52 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -797,6 +797,10 @@ impl Buffer { self.parsing_in_background } + pub fn contains_unknown_injections(&self) -> bool { + self.syntax_map.lock().contains_unknown_injections() + } + #[cfg(test)] pub fn set_sync_parse_timeout(&mut self, timeout: Duration) { self.sync_parse_timeout = timeout; @@ -825,7 +829,7 @@ impl Buffer { /// initiate an additional reparse recursively. To avoid concurrent parses /// for the same buffer, we only initiate a new parse if we are not already /// parsing in the background. - fn reparse(&mut self, cx: &mut ModelContext) { + pub fn reparse(&mut self, cx: &mut ModelContext) { if self.parsing_in_background { return; } @@ -842,13 +846,13 @@ impl Buffer { syntax_map.interpolate(&text); let language_registry = syntax_map.language_registry(); let mut syntax_snapshot = syntax_map.snapshot(); - let syntax_map_version = syntax_map.parsed_version(); drop(syntax_map); let parse_task = cx.background().spawn({ let language = language.clone(); + let language_registry = language_registry.clone(); async move { - syntax_snapshot.reparse(&syntax_map_version, &text, language_registry, language); + syntax_snapshot.reparse(&text, language_registry, language); syntax_snapshot } }); @@ -858,7 +862,7 @@ impl Buffer { .block_with_timeout(self.sync_parse_timeout, parse_task) { Ok(new_syntax_snapshot) => { - self.did_finish_parsing(new_syntax_snapshot, parsed_version, cx); + self.did_finish_parsing(new_syntax_snapshot, cx); return; } Err(parse_task) => { @@ -870,9 +874,15 @@ impl Buffer { this.language.as_ref().map_or(true, |current_language| { !Arc::ptr_eq(&language, current_language) }); - let parse_again = - this.version.changed_since(&parsed_version) || grammar_changed; - this.did_finish_parsing(new_syntax_map, parsed_version, cx); + let language_registry_changed = new_syntax_map + .contains_unknown_injections() + && language_registry.map_or(false, |registry| { + registry.version() != new_syntax_map.language_registry_version() + }); + let parse_again = language_registry_changed + || grammar_changed + || this.version.changed_since(&parsed_version); + this.did_finish_parsing(new_syntax_map, cx); this.parsing_in_background = false; if parse_again { this.reparse(cx); @@ -884,14 +894,9 @@ impl Buffer { } } - fn did_finish_parsing( - &mut self, - syntax_snapshot: SyntaxSnapshot, - version: clock::Global, - cx: &mut ModelContext, - ) { + fn did_finish_parsing(&mut self, syntax_snapshot: SyntaxSnapshot, cx: &mut ModelContext) { self.parse_count += 1; - self.syntax_map.lock().did_parse(syntax_snapshot, version); + self.syntax_map.lock().did_parse(syntax_snapshot); self.request_autoindent(cx); cx.emit(Event::Reparsed); cx.notify(); diff --git a/crates/language/src/language.rs b/crates/language/src/language.rs index 045e8dcd6f510772c2050a55cbbbb228823f40f3..6e1a120c819d830a713183d1666c13a34a6dcf2a 100644 --- a/crates/language/src/language.rs +++ b/crates/language/src/language.rs @@ -41,6 +41,7 @@ use std::{ use syntax_map::SyntaxSnapshot; use theme::{SyntaxTheme, Theme}; use tree_sitter::{self, Query}; +use unicase::UniCase; use util::ResultExt; #[cfg(any(test, feature = "test-support"))] @@ -421,6 +422,7 @@ pub struct LanguageRegistry { >, subscription: RwLock<(watch::Sender<()>, watch::Receiver<()>)>, theme: RwLock>>, + version: AtomicUsize, } impl LanguageRegistry { @@ -435,6 +437,7 @@ impl LanguageRegistry { lsp_binary_paths: Default::default(), subscription: RwLock::new(watch::channel()), theme: Default::default(), + version: Default::default(), } } @@ -448,6 +451,7 @@ impl LanguageRegistry { language.set_theme(&theme.editor.syntax); } self.languages.write().push(language); + self.version.fetch_add(1, SeqCst); *self.subscription.write().0.borrow_mut() = (); } @@ -455,6 +459,10 @@ impl LanguageRegistry { self.subscription.read().1.clone() } + pub fn version(&self) -> usize { + self.version.load(SeqCst) + } + pub fn set_theme(&self, theme: Arc) { *self.theme.write() = Some(theme.clone()); for language in self.languages.read().iter() { @@ -466,11 +474,27 @@ impl LanguageRegistry { self.language_server_download_dir = Some(path.into()); } - pub fn get_language(&self, name: &str) -> Option> { + pub fn language_for_name(&self, name: &str) -> Option> { + let name = UniCase::new(name); + self.languages + .read() + .iter() + .find(|language| UniCase::new(language.name()) == name) + .cloned() + } + + pub fn language_for_extension(&self, extension: &str) -> Option> { + let extension = UniCase::new(extension); self.languages .read() .iter() - .find(|language| language.name().to_lowercase() == name.to_lowercase()) + .find(|language| { + language + .config + .path_suffixes + .iter() + .any(|suffix| UniCase::new(suffix) == extension) + }) .cloned() } diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 8d6673085494263970baf3fdbb3a0f6c21a939d0..ada981ec264fabe41e7aa123fc160519ed256ac9 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -5,8 +5,9 @@ use parking_lot::Mutex; use std::{ borrow::Cow, cell::RefCell, - cmp::{Ordering, Reverse}, + cmp::{self, Ordering, Reverse}, collections::BinaryHeap, + iter, ops::{Deref, DerefMut, Range}, sync::Arc, }; @@ -26,8 +27,6 @@ lazy_static! { #[derive(Default)] pub struct SyntaxMap { - parsed_version: clock::Global, - interpolated_version: clock::Global, snapshot: SyntaxSnapshot, language_registry: Option>, } @@ -35,6 +34,9 @@ pub struct SyntaxMap { #[derive(Clone, Default)] pub struct SyntaxSnapshot { layers: SumTree, + parsed_version: clock::Global, + interpolated_version: clock::Global, + language_registry_version: usize, } #[derive(Default)] @@ -89,8 +91,34 @@ struct SyntaxMapMatchesLayer<'a> { struct SyntaxLayer { depth: usize, range: Range, - tree: tree_sitter::Tree, - language: Arc, + content: SyntaxLayerContent, +} + +#[derive(Clone)] +enum SyntaxLayerContent { + Parsed { + tree: tree_sitter::Tree, + language: Arc, + }, + Pending { + language_name: Arc, + }, +} + +impl SyntaxLayerContent { + fn language_id(&self) -> Option { + match self { + SyntaxLayerContent::Parsed { language, .. } => language.id(), + SyntaxLayerContent::Pending { .. } => None, + } + } + + fn tree(&self) -> Option<&Tree> { + match self { + SyntaxLayerContent::Parsed { tree, .. } => Some(tree), + SyntaxLayerContent::Pending { .. } => None, + } + } } #[derive(Debug)] @@ -107,6 +135,7 @@ struct SyntaxLayerSummary { range: Range, last_layer_range: Range, last_layer_language: Option, + contains_unknown_injections: bool, } #[derive(Clone, Debug)] @@ -130,12 +159,26 @@ struct SyntaxLayerPositionBeforeChange { struct ParseStep { depth: usize, - language: Arc, + language: ParseStepLanguage, range: Range, included_ranges: Vec, mode: ParseMode, } +enum ParseStepLanguage { + Loaded { language: Arc }, + Pending { name: Arc }, +} + +impl ParseStepLanguage { + fn id(&self) -> Option { + match self { + ParseStepLanguage::Loaded { language } => language.id(), + ParseStepLanguage::Pending { .. } => None, + } + } +} + enum ParseMode { Single, Combined { @@ -176,30 +219,17 @@ impl SyntaxMap { self.language_registry.clone() } - pub fn parsed_version(&self) -> clock::Global { - self.parsed_version.clone() - } - pub fn interpolate(&mut self, text: &BufferSnapshot) { - self.snapshot.interpolate(&self.interpolated_version, text); - self.interpolated_version = text.version.clone(); + self.snapshot.interpolate(text); } #[cfg(test)] pub fn reparse(&mut self, language: Arc, text: &BufferSnapshot) { - self.snapshot.reparse( - &self.parsed_version, - text, - self.language_registry.clone(), - language, - ); - self.parsed_version = text.version.clone(); - self.interpolated_version = text.version.clone(); + self.snapshot + .reparse(text, self.language_registry.clone(), language); } - pub fn did_parse(&mut self, snapshot: SyntaxSnapshot, version: clock::Global) { - self.interpolated_version = version.clone(); - self.parsed_version = version; + pub fn did_parse(&mut self, snapshot: SyntaxSnapshot) { self.snapshot = snapshot; } @@ -213,10 +243,12 @@ impl SyntaxSnapshot { self.layers.is_empty() } - pub fn interpolate(&mut self, from_version: &clock::Global, text: &BufferSnapshot) { + fn interpolate(&mut self, text: &BufferSnapshot) { let edits = text - .anchored_edits_since::<(usize, Point)>(&from_version) + .anchored_edits_since::<(usize, Point)>(&self.interpolated_version) .collect::>(); + self.interpolated_version = text.version().clone(); + if edits.is_empty() { return; } @@ -276,46 +308,48 @@ impl SyntaxSnapshot { } let mut layer = layer.clone(); - for (edit, edit_range) in &edits[first_edit_ix_for_depth..] { - // Ignore any edits that follow this layer. - if edit_range.start.cmp(&layer.range.end, text).is_ge() { - break; - } - - // Apply any edits that intersect this layer to the layer's syntax tree. - let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() { - tree_sitter::InputEdit { - start_byte: edit.new.start.0 - start_byte, - old_end_byte: edit.new.start.0 - start_byte - + (edit.old.end.0 - edit.old.start.0), - new_end_byte: edit.new.end.0 - start_byte, - start_position: (edit.new.start.1 - start_point).to_ts_point(), - old_end_position: (edit.new.start.1 - start_point - + (edit.old.end.1 - edit.old.start.1)) - .to_ts_point(), - new_end_position: (edit.new.end.1 - start_point).to_ts_point(), - } - } else { - let node = layer.tree.root_node(); - tree_sitter::InputEdit { - start_byte: 0, - old_end_byte: node.end_byte(), - new_end_byte: 0, - start_position: Default::default(), - old_end_position: node.end_position(), - new_end_position: Default::default(), + if let SyntaxLayerContent::Parsed { tree, .. } = &mut layer.content { + for (edit, edit_range) in &edits[first_edit_ix_for_depth..] { + // Ignore any edits that follow this layer. + if edit_range.start.cmp(&layer.range.end, text).is_ge() { + break; } - }; - layer.tree.edit(&tree_edit); - } + // Apply any edits that intersect this layer to the layer's syntax tree. + let tree_edit = if edit_range.start.cmp(&layer.range.start, text).is_ge() { + tree_sitter::InputEdit { + start_byte: edit.new.start.0 - start_byte, + old_end_byte: edit.new.start.0 - start_byte + + (edit.old.end.0 - edit.old.start.0), + new_end_byte: edit.new.end.0 - start_byte, + start_position: (edit.new.start.1 - start_point).to_ts_point(), + old_end_position: (edit.new.start.1 - start_point + + (edit.old.end.1 - edit.old.start.1)) + .to_ts_point(), + new_end_position: (edit.new.end.1 - start_point).to_ts_point(), + } + } else { + let node = tree.root_node(); + tree_sitter::InputEdit { + start_byte: 0, + old_end_byte: node.end_byte(), + new_end_byte: 0, + start_position: Default::default(), + old_end_position: node.end_position(), + new_end_position: Default::default(), + } + }; - debug_assert!( - layer.tree.root_node().end_byte() <= text.len(), - "tree's size {}, is larger than text size {}", - layer.tree.root_node().end_byte(), - text.len(), - ); + tree.edit(&tree_edit); + } + + debug_assert!( + tree.root_node().end_byte() <= text.len(), + "tree's size {}, is larger than text size {}", + tree.root_node().end_byte(), + text.len(), + ); + } layers.push(layer, text); cursor.next(text); @@ -328,12 +362,53 @@ impl SyntaxSnapshot { pub fn reparse( &mut self, - from_version: &clock::Global, text: &BufferSnapshot, registry: Option>, root_language: Arc, ) { - let edits = text.edits_since::(from_version).collect::>(); + let edit_ranges = text + .edits_since::(&self.parsed_version) + .map(|edit| edit.new) + .collect::>(); + self.reparse_with_ranges(text, root_language.clone(), edit_ranges, registry.as_ref()); + + if let Some(registry) = registry { + if registry.version() != self.language_registry_version { + let mut resolved_injection_ranges = Vec::new(); + let mut cursor = self + .layers + .filter::<_, ()>(|summary| summary.contains_unknown_injections); + cursor.next(text); + while let Some(layer) = cursor.item() { + let SyntaxLayerContent::Pending { language_name } = &layer.content else { unreachable!() }; + if language_for_injection(language_name, ®istry).is_some() { + resolved_injection_ranges.push(layer.range.to_offset(text)); + } + + cursor.next(text); + } + drop(cursor); + + if !resolved_injection_ranges.is_empty() { + self.reparse_with_ranges( + text, + root_language, + resolved_injection_ranges, + Some(®istry), + ); + } + self.language_registry_version = registry.version(); + } + } + } + + fn reparse_with_ranges( + &mut self, + text: &BufferSnapshot, + root_language: Arc, + invalidated_ranges: Vec>, + registry: Option<&Arc>, + ) { let max_depth = self.layers.summary().max_depth; let mut cursor = self.layers.cursor::(); cursor.next(&text); @@ -344,7 +419,9 @@ impl SyntaxSnapshot { let mut combined_injection_ranges = HashMap::default(); queue.push(ParseStep { depth: 0, - language: root_language.clone(), + language: ParseStepLanguage::Loaded { + language: root_language, + }, included_ranges: vec![tree_sitter::Range { start_byte: 0, end_byte: text.len(), @@ -415,12 +492,11 @@ impl SyntaxSnapshot { let (step_start_byte, step_start_point) = step.range.start.summary::<(usize, Point)>(text); let step_end_byte = step.range.end.to_offset(text); - let Some(grammar) = step.language.grammar.as_deref() else { continue }; let mut old_layer = cursor.item(); if let Some(layer) = old_layer { if layer.range.to_offset(text) == (step_start_byte..step_end_byte) - && layer.language.id() == step.language.id() + && layer.content.language_id() == step.language.id() { cursor.next(&text); } else { @@ -428,89 +504,105 @@ impl SyntaxSnapshot { } } - let tree; - let changed_ranges; - let mut included_ranges = step.included_ranges; - if let Some(old_layer) = old_layer { - if let ParseMode::Combined { - parent_layer_changed_ranges, - .. - } = step.mode - { - included_ranges = splice_included_ranges( - old_layer.tree.included_ranges(), - &parent_layer_changed_ranges, - &included_ranges, - ); - } + let content = match step.language { + ParseStepLanguage::Loaded { language } => { + let Some(grammar) = language.grammar() else { continue }; + let tree; + let changed_ranges; + let mut included_ranges = step.included_ranges; + if let Some(SyntaxLayerContent::Parsed { tree: old_tree, .. }) = + old_layer.map(|layer| &layer.content) + { + if let ParseMode::Combined { + parent_layer_changed_ranges, + .. + } = step.mode + { + included_ranges = splice_included_ranges( + old_tree.included_ranges(), + &parent_layer_changed_ranges, + &included_ranges, + ); + } - tree = parse_text( - grammar, - text.as_rope(), - step_start_byte, - step_start_point, - included_ranges, - Some(old_layer.tree.clone()), - ); - changed_ranges = join_ranges( - edits.iter().map(|e| e.new.clone()).filter(|range| { - range.start <= step_end_byte && range.end >= step_start_byte - }), - old_layer - .tree - .changed_ranges(&tree) - .map(|r| step_start_byte + r.start_byte..step_start_byte + r.end_byte), - ); - } else { - tree = parse_text( - grammar, - text.as_rope(), - step_start_byte, - step_start_point, - included_ranges, - None, - ); - changed_ranges = vec![step_start_byte..step_end_byte]; - } + tree = parse_text( + grammar, + text.as_rope(), + step_start_byte, + step_start_point, + included_ranges, + Some(old_tree.clone()), + ); + changed_ranges = join_ranges( + invalidated_ranges.iter().cloned().filter(|range| { + range.start <= step_end_byte && range.end >= step_start_byte + }), + old_tree.changed_ranges(&tree).map(|r| { + step_start_byte + r.start_byte..step_start_byte + r.end_byte + }), + ); + } else { + tree = parse_text( + grammar, + text.as_rope(), + step_start_byte, + step_start_point, + included_ranges, + None, + ); + changed_ranges = vec![step_start_byte..step_end_byte]; + } + + if let (Some((config, registry)), false) = ( + grammar.injection_config.as_ref().zip(registry.as_ref()), + changed_ranges.is_empty(), + ) { + for range in &changed_ranges { + changed_regions.insert( + ChangedRegion { + depth: step.depth + 1, + range: text.anchor_before(range.start) + ..text.anchor_after(range.end), + }, + text, + ); + } + get_injections( + config, + text, + tree.root_node_with_offset( + step_start_byte, + step_start_point.to_ts_point(), + ), + registry, + step.depth + 1, + &changed_ranges, + &mut combined_injection_ranges, + &mut queue, + ); + } + + SyntaxLayerContent::Parsed { tree, language } + } + ParseStepLanguage::Pending { name } => SyntaxLayerContent::Pending { + language_name: name, + }, + }; layers.push( SyntaxLayer { depth: step.depth, range: step.range, - tree: tree.clone(), - language: step.language.clone(), + content, }, &text, ); - - if let (Some((config, registry)), false) = ( - grammar.injection_config.as_ref().zip(registry.as_ref()), - changed_ranges.is_empty(), - ) { - for range in &changed_ranges { - changed_regions.insert( - ChangedRegion { - depth: step.depth + 1, - range: text.anchor_before(range.start)..text.anchor_after(range.end), - }, - text, - ); - } - get_injections( - config, - text, - tree.root_node_with_offset(step_start_byte, step_start_point.to_ts_point()), - registry, - step.depth + 1, - &changed_ranges, - &mut combined_injection_ranges, - &mut queue, - ); - } } drop(cursor); self.layers = layers; + self.interpolated_version = text.version.clone(); + self.parsed_version = text.version.clone(); } pub fn single_tree_captures<'a>( @@ -585,23 +677,34 @@ impl SyntaxSnapshot { }); cursor.next(buffer); - std::iter::from_fn(move || { - if let Some(layer) = cursor.item() { - let info = SyntaxLayerInfo { - language: &layer.language, - depth: layer.depth, - node: layer.tree.root_node_with_offset( - layer.range.start.to_offset(buffer), - layer.range.start.to_point(buffer).to_ts_point(), - ), - }; - cursor.next(buffer); - Some(info) - } else { - None + iter::from_fn(move || { + while let Some(layer) = cursor.item() { + if let SyntaxLayerContent::Parsed { tree, language } = &layer.content { + let info = SyntaxLayerInfo { + language, + depth: layer.depth, + node: tree.root_node_with_offset( + layer.range.start.to_offset(buffer), + layer.range.start.to_point(buffer).to_ts_point(), + ), + }; + cursor.next(buffer); + return Some(info); + } else { + cursor.next(buffer); + } } + None }) } + + pub fn contains_unknown_injections(&self) -> bool { + self.layers.summary().contains_unknown_injections + } + + pub fn language_registry_version(&self) -> usize { + self.language_registry_version + } } impl<'a> SyntaxMapCaptures<'a> { @@ -968,15 +1071,14 @@ fn get_injections( changed_ranges: &[Range], combined_injection_ranges: &mut HashMap, Vec>, queue: &mut BinaryHeap, -) -> bool { - let mut result = false; +) { let mut query_cursor = QueryCursorHandle::new(); let mut prev_match = None; combined_injection_ranges.clear(); for pattern in &config.patterns { if let (Some(language_name), true) = (pattern.language.as_ref(), pattern.combined) { - if let Some(language) = language_registry.get_language(language_name) { + if let Some(language) = language_for_injection(language_name, language_registry) { combined_injection_ranges.insert(language, Vec::new()); } } @@ -1004,21 +1106,26 @@ fn get_injections( prev_match = Some((mat.pattern_index, content_range.clone())); let combined = config.patterns[mat.pattern_index].combined; - let language_name = config.patterns[mat.pattern_index] - .language - .as_ref() - .map(|s| Cow::Borrowed(s.as_ref())) - .or_else(|| { - let ix = config.language_capture_ix?; - let node = mat.nodes_for_capture_index(ix).next()?; - Some(Cow::Owned(text.text_for_range(node.byte_range()).collect())) - }); + + let mut language_name = None; + let mut step_range = content_range.clone(); + if let Some(name) = config.patterns[mat.pattern_index].language.as_ref() { + language_name = Some(Cow::Borrowed(name.as_ref())) + } else if let Some(language_node) = config + .language_capture_ix + .and_then(|ix| mat.nodes_for_capture_index(ix).next()) + { + step_range.start = cmp::min(content_range.start, language_node.start_byte()); + step_range.end = cmp::max(content_range.end, language_node.end_byte()); + language_name = Some(Cow::Owned( + text.text_for_range(language_node.byte_range()).collect(), + )) + }; if let Some(language_name) = language_name { - if let Some(language) = language_registry.get_language(language_name.as_ref()) { - result = true; - let range = text.anchor_before(content_range.start) - ..text.anchor_after(content_range.end); + let language = language_for_injection(&language_name, language_registry); + let range = text.anchor_before(step_range.start)..text.anchor_after(step_range.end); + if let Some(language) = language { if combined { combined_injection_ranges .get_mut(&language.clone()) @@ -1027,12 +1134,22 @@ fn get_injections( } else { queue.push(ParseStep { depth, - language, + language: ParseStepLanguage::Loaded { language }, included_ranges: content_ranges, range, mode: ParseMode::Single, }); } + } else { + queue.push(ParseStep { + depth, + language: ParseStepLanguage::Pending { + name: language_name.into(), + }, + included_ranges: content_ranges, + range, + mode: ParseMode::Single, + }); } } } @@ -1043,7 +1160,7 @@ fn get_injections( let range = text.anchor_before(node.start_byte())..text.anchor_after(node.end_byte()); queue.push(ParseStep { depth, - language, + language: ParseStepLanguage::Loaded { language }, range, included_ranges, mode: ParseMode::Combined { @@ -1052,8 +1169,15 @@ fn get_injections( }, }) } +} - result +fn language_for_injection( + language_name: &str, + language_registry: &LanguageRegistry, +) -> Option> { + language_registry + .language_for_name(language_name) + .or_else(|| language_registry.language_for_extension(language_name)) } fn splice_included_ranges( @@ -1282,6 +1406,7 @@ impl Default for SyntaxLayerSummary { range: Anchor::MAX..Anchor::MIN, last_layer_range: Anchor::MIN..Anchor::MAX, last_layer_language: None, + contains_unknown_injections: false, } } } @@ -1303,6 +1428,7 @@ impl sum_tree::Summary for SyntaxLayerSummary { } self.last_layer_range = other.last_layer_range.clone(); self.last_layer_language = other.last_layer_language; + self.contains_unknown_injections |= other.contains_unknown_injections; } } @@ -1352,7 +1478,8 @@ impl sum_tree::Item for SyntaxLayer { max_depth: self.depth, range: self.range.clone(), last_layer_range: self.range.clone(), - last_layer_language: self.language.id(), + last_layer_language: self.content.language_id(), + contains_unknown_injections: matches!(self.content, SyntaxLayerContent::Pending { .. }), } } } @@ -1362,7 +1489,7 @@ impl std::fmt::Debug for SyntaxLayer { f.debug_struct("SyntaxLayer") .field("depth", &self.depth) .field("range", &self.range) - .field("tree", &self.tree) + .field("tree", &self.content.tree()) .finish() } } @@ -1593,6 +1720,84 @@ mod tests { ); } + #[gpui::test] + fn test_dynamic_language_injection() { + let registry = Arc::new(LanguageRegistry::test()); + let markdown = Arc::new(markdown_lang()); + registry.add(markdown.clone()); + registry.add(Arc::new(rust_lang())); + registry.add(Arc::new(ruby_lang())); + + let mut buffer = Buffer::new( + 0, + 0, + r#" + This is a code block: + + ```rs + fn foo() {} + ``` + "# + .unindent(), + ); + + let mut syntax_map = SyntaxMap::new(); + syntax_map.set_language_registry(registry.clone()); + syntax_map.reparse(markdown.clone(), &buffer); + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 0)..Point::new(3, 0), + &[ + "...(fenced_code_block (fenced_code_block_delimiter) (info_string (language)) (code_fence_content) (fenced_code_block_delimiter...", + "...(function_item name: (identifier) parameters: (parameters) body: (block)...", + ], + ); + + // Replace Rust with Ruby in code block. + let macro_name_range = range_for_text(&buffer, "rs"); + buffer.edit([(macro_name_range, "ruby")]); + syntax_map.interpolate(&buffer); + syntax_map.reparse(markdown.clone(), &buffer); + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 0)..Point::new(3, 0), + &[ + "...(fenced_code_block (fenced_code_block_delimiter) (info_string (language)) (code_fence_content) (fenced_code_block_delimiter...", + "...(call method: (identifier) arguments: (argument_list (call method: (identifier) arguments: (argument_list) block: (block)...", + ], + ); + + // Replace Ruby with a language that hasn't been loaded yet. + let macro_name_range = range_for_text(&buffer, "ruby"); + buffer.edit([(macro_name_range, "html")]); + syntax_map.interpolate(&buffer); + syntax_map.reparse(markdown.clone(), &buffer); + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 0)..Point::new(3, 0), + &[ + "...(fenced_code_block (fenced_code_block_delimiter) (info_string (language)) (code_fence_content) (fenced_code_block_delimiter..." + ], + ); + assert!(syntax_map.contains_unknown_injections()); + + registry.add(Arc::new(html_lang())); + syntax_map.reparse(markdown.clone(), &buffer); + assert_layers_for_range( + &syntax_map, + &buffer, + Point::new(3, 0)..Point::new(3, 0), + &[ + "...(fenced_code_block (fenced_code_block_delimiter) (info_string (language)) (code_fence_content) (fenced_code_block_delimiter...", + "(fragment (text))", + ], + ); + assert!(!syntax_map.contains_unknown_injections()); + } + #[gpui::test] fn test_typing_multiple_new_injections() { let (buffer, syntax_map) = test_edit_sequence( @@ -2157,16 +2362,14 @@ mod tests { .zip(new_syntax_map.layers.iter()) { assert_eq!(old_layer.range, new_layer.range); + let Some(old_tree) = old_layer.content.tree() else { continue }; + let Some(new_tree) = new_layer.content.tree() else { continue }; let old_start_byte = old_layer.range.start.to_offset(old_buffer); let new_start_byte = new_layer.range.start.to_offset(new_buffer); let old_start_point = old_layer.range.start.to_point(old_buffer).to_ts_point(); let new_start_point = new_layer.range.start.to_point(new_buffer).to_ts_point(); - let old_node = old_layer - .tree - .root_node_with_offset(old_start_byte, old_start_point); - let new_node = new_layer - .tree - .root_node_with_offset(new_start_byte, new_start_point); + let old_node = old_tree.root_node_with_offset(old_start_byte, old_start_point); + let new_node = new_tree.root_node_with_offset(new_start_byte, new_start_point); check_node_edits( old_layer.depth, &old_layer.range, @@ -2254,7 +2457,8 @@ mod tests { registry.add(Arc::new(ruby_lang())); registry.add(Arc::new(html_lang())); registry.add(Arc::new(erb_lang())); - let language = registry.get_language(language_name).unwrap(); + registry.add(Arc::new(markdown_lang())); + let language = registry.language_for_name(language_name).unwrap(); let mut buffer = Buffer::new(0, 0, Default::default()); let mut mutated_syntax_map = SyntaxMap::new(); @@ -2392,6 +2596,26 @@ mod tests { .unwrap() } + fn markdown_lang() -> Language { + Language::new( + LanguageConfig { + name: "Markdown".into(), + path_suffixes: vec!["md".into()], + ..Default::default() + }, + Some(tree_sitter_markdown::language()), + ) + .with_injection_query( + r#" + (fenced_code_block + (info_string + (language) @language) + (code_fence_content) @content) + "#, + ) + .unwrap() + } + fn range_for_text(buffer: &Buffer, text: &str) -> Range { let start = buffer.as_rope().to_string().find(text).unwrap(); start..start + text.len() diff --git a/crates/project/src/project.rs b/crates/project/src/project.rs index 995a6514c5d3d8472b295f8d30769989177ef76e..f324865b5cd291aac25b330940475893266c7b26 100644 --- a/crates/project/src/project.rs +++ b/crates/project/src/project.rs @@ -1765,10 +1765,14 @@ impl Project { if let Some(project) = project.upgrade(&cx) { project.update(&mut cx, |project, cx| { let mut buffers_without_language = Vec::new(); + let mut buffers_with_unknown_injections = Vec::new(); for buffer in project.opened_buffers.values() { - if let Some(buffer) = buffer.upgrade(cx) { - if buffer.read(cx).language().is_none() { - buffers_without_language.push(buffer); + if let Some(handle) = buffer.upgrade(cx) { + let buffer = &handle.read(cx); + if buffer.language().is_none() { + buffers_without_language.push(handle); + } else if buffer.contains_unknown_injections() { + buffers_with_unknown_injections.push(handle); } } } @@ -1777,6 +1781,10 @@ impl Project { project.assign_language_to_buffer(&buffer, cx); project.register_buffer_with_language_server(&buffer, cx); } + + for buffer in buffers_with_unknown_injections { + buffer.update(cx, |buffer, cx| buffer.reparse(cx)); + } }); } } diff --git a/crates/zed/src/languages/markdown/injections.scm b/crates/zed/src/languages/markdown/injections.scm new file mode 100644 index 0000000000000000000000000000000000000000..577054b4040d174954e365371842c459e1dfc1ba --- /dev/null +++ b/crates/zed/src/languages/markdown/injections.scm @@ -0,0 +1,4 @@ +(fenced_code_block + (info_string + (language) @language) + (code_fence_content) @content) diff --git a/crates/zed/src/zed.rs b/crates/zed/src/zed.rs index 78d10670f7347e6d0055277500198424587c6710..c9f8b2d408441aee28a63b1b6242e62aafb61dcc 100644 --- a/crates/zed/src/zed.rs +++ b/crates/zed/src/zed.rs @@ -234,7 +234,11 @@ pub fn init(app_state: &Arc, cx: &mut gpui::MutableAppContext) { |workspace: &mut Workspace, _: &DebugElements, cx: &mut ViewContext| { let content = to_string_pretty(&cx.debug_elements()).unwrap(); let project = workspace.project().clone(); - let json_language = project.read(cx).languages().get_language("JSON").unwrap(); + let json_language = project + .read(cx) + .languages() + .language_for_name("JSON") + .unwrap(); if project.read(cx).is_remote() { cx.propagate_action(); } else if let Some(buffer) = project @@ -597,7 +601,7 @@ fn open_telemetry_log_file( .update(cx, |project, cx| project.create_buffer("", None, cx)) .expect("creating buffers on a local workspace always succeeds"); buffer.update(cx, |buffer, cx| { - buffer.set_language(app_state.languages.get_language("JSON"), cx); + buffer.set_language(app_state.languages.language_for_name("JSON"), cx); buffer.edit( [( 0..0, @@ -646,7 +650,7 @@ fn open_bundled_file( .unwrap_or_else(|| Cow::Borrowed(b"File not found")); let text = str::from_utf8(text.as_ref()).unwrap(); project - .create_buffer(text, project.languages().get_language(language), cx) + .create_buffer(text, project.languages().language_for_name(language), cx) .expect("creating buffers on a local workspace always succeeds") }); let buffer =