Add a test demonstrating ERB language loading bug (#32278)

Max Brunsfeld created

Fixes https://github.com/zed-industries/zed/issues/12174

Release Notes:

- Fixed a bug where ERB files were not parsed correctly when the
languages were initially loaded.

Change summary

Cargo.toml                                         |  2 
crates/language/src/language_registry.rs           |  4 
crates/language/src/syntax_map.rs                  | 69 +++++++++--
crates/language/src/syntax_map/syntax_map_tests.rs | 88 +++++++++++++++
4 files changed, 145 insertions(+), 18 deletions(-)

Detailed changes

Cargo.toml 🔗

@@ -698,6 +698,8 @@ codegen-units = 16
 [profile.dev.package]
 taffy = { opt-level = 3 }
 cranelift-codegen = { opt-level = 3 }
+cranelift-codegen-meta = { opt-level = 3 }
+cranelift-codegen-shared = { opt-level = 3 }
 resvg = { opt-level = 3 }
 rustybuzz = { opt-level = 3 }
 ttf-parser = { opt-level = 3 }

crates/language/src/language_registry.rs 🔗

@@ -39,7 +39,7 @@ use util::{ResultExt, maybe, post_inc};
 #[derive(
     Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize, JsonSchema,
 )]
-pub struct LanguageName(SharedString);
+pub struct LanguageName(pub SharedString);
 
 impl LanguageName {
     pub fn new(s: &str) -> Self {
@@ -1000,6 +1000,7 @@ impl LanguageRegistry {
                     txs.push(tx);
                 }
                 AvailableGrammar::Unloaded(wasm_path) => {
+                    log::trace!("start loading grammar {name:?}");
                     let this = self.clone();
                     let wasm_path = wasm_path.clone();
                     *grammar = AvailableGrammar::Loading(wasm_path.clone(), vec![tx]);
@@ -1025,6 +1026,7 @@ impl LanguageRegistry {
                                 Err(error) => AvailableGrammar::LoadFailed(error.clone()),
                             };
 
+                            log::trace!("finish loading grammar {name:?}");
                             let old_value = this.state.write().grammars.insert(name, value);
                             if let Some(AvailableGrammar::Loading(_, txs)) = old_value {
                                 for tx in txs {

crates/language/src/syntax_map.rs 🔗

@@ -7,6 +7,7 @@ use crate::{
 use anyhow::Context as _;
 use collections::HashMap;
 use futures::FutureExt;
+use gpui::SharedString;
 use std::{
     borrow::Cow,
     cmp::{self, Ordering, Reverse},
@@ -183,6 +184,13 @@ enum ParseStepLanguage {
 }
 
 impl ParseStepLanguage {
+    fn name(&self) -> SharedString {
+        match self {
+            ParseStepLanguage::Loaded { language } => language.name().0,
+            ParseStepLanguage::Pending { name } => name.into(),
+        }
+    }
+
     fn id(&self) -> Option<LanguageId> {
         match self {
             ParseStepLanguage::Loaded { language } => Some(language.id),
@@ -415,7 +423,9 @@ impl SyntaxSnapshot {
                         .and_then(|language| language.ok())
                         .is_some()
                     {
-                        resolved_injection_ranges.push(layer.range.to_offset(text));
+                        let range = layer.range.to_offset(text);
+                        log::trace!("reparse range {range:?} for language {language_name:?}");
+                        resolved_injection_ranges.push(range);
                     }
 
                     cursor.next(text);
@@ -442,7 +452,10 @@ impl SyntaxSnapshot {
         invalidated_ranges: Vec<Range<usize>>,
         registry: Option<&Arc<LanguageRegistry>>,
     ) {
-        log::trace!("reparse. invalidated ranges:{:?}", invalidated_ranges);
+        log::trace!(
+            "reparse. invalidated ranges:{:?}",
+            LogOffsetRanges(&invalidated_ranges, text),
+        );
 
         let max_depth = self.layers.summary().max_depth;
         let mut cursor = self.layers.cursor::<SyntaxLayerSummary>(text);
@@ -470,6 +483,13 @@ impl SyntaxSnapshot {
         loop {
             let step = queue.pop();
             let position = if let Some(step) = &step {
+                log::trace!(
+                    "parse step depth:{}, range:{:?}, language:{} ({:?})",
+                    step.depth,
+                    LogAnchorRange(&step.range, text),
+                    step.language.name(),
+                    step.language.id(),
+                );
                 SyntaxLayerPosition {
                     depth: step.depth,
                     range: step.range.clone(),
@@ -568,13 +588,13 @@ impl SyntaxSnapshot {
                             .to_ts_point();
                     }
 
-                    if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_start)) =
-                        old_layer.map(|layer| (&layer.content, layer.range.start))
+                    if let Some((SyntaxLayerContent::Parsed { tree: old_tree, .. }, layer_range)) =
+                        old_layer.map(|layer| (&layer.content, layer.range.clone()))
                     {
                         log::trace!(
-                            "existing layer. language:{}, start:{:?}, ranges:{:?}",
+                            "existing layer. language:{}, range:{:?}, included_ranges:{:?}",
                             language.name(),
-                            LogPoint(layer_start.to_point(text)),
+                            LogAnchorRange(&layer_range, text),
                             LogIncludedRanges(&old_tree.included_ranges())
                         );
 
@@ -613,7 +633,7 @@ impl SyntaxSnapshot {
                         }
 
                         log::trace!(
-                            "update layer. language:{}, start:{:?}, included_ranges:{:?}",
+                            "update layer. language:{}, range:{:?}, included_ranges:{:?}",
                             language.name(),
                             LogAnchorRange(&step.range, text),
                             LogIncludedRanges(&included_ranges),
@@ -761,28 +781,36 @@ impl SyntaxSnapshot {
     #[cfg(debug_assertions)]
     fn check_invariants(&self, text: &BufferSnapshot) {
         let mut max_depth = 0;
-        let mut prev_range: Option<Range<Anchor>> = None;
+        let mut prev_layer: Option<(Range<Anchor>, Option<LanguageId>)> = None;
         for layer in self.layers.iter() {
             match Ord::cmp(&layer.depth, &max_depth) {
                 Ordering::Less => {
                     panic!("layers out of order")
                 }
                 Ordering::Equal => {
-                    if let Some(prev_range) = prev_range {
+                    if let Some((prev_range, prev_language_id)) = prev_layer {
                         match layer.range.start.cmp(&prev_range.start, text) {
                             Ordering::Less => panic!("layers out of order"),
-                            Ordering::Equal => {
-                                assert!(layer.range.end.cmp(&prev_range.end, text).is_ge())
-                            }
+                            Ordering::Equal => match layer.range.end.cmp(&prev_range.end, text) {
+                                Ordering::Less => panic!("layers out of order"),
+                                Ordering::Equal => {
+                                    if layer.content.language_id() < prev_language_id {
+                                        panic!("layers out of order")
+                                    }
+                                }
+                                Ordering::Greater => {}
+                            },
                             Ordering::Greater => {}
                         }
                     }
+                    prev_layer = Some((layer.range.clone(), layer.content.language_id()));
+                }
+                Ordering::Greater => {
+                    prev_layer = None;
                 }
-                Ordering::Greater => {}
             }
 
             max_depth = layer.depth;
-            prev_range = Some(layer.range.clone());
         }
     }
 
@@ -1642,7 +1670,7 @@ impl Ord for ParseStep {
         Ord::cmp(&other.depth, &self.depth)
             .then_with(|| Ord::cmp(&range_b.start, &range_a.start))
             .then_with(|| Ord::cmp(&range_a.end, &range_b.end))
-            .then_with(|| self.language.id().cmp(&other.language.id()))
+            .then_with(|| other.language.id().cmp(&self.language.id()))
     }
 }
 
@@ -1888,6 +1916,7 @@ impl ToTreeSitterPoint for Point {
 struct LogIncludedRanges<'a>(&'a [tree_sitter::Range]);
 struct LogPoint(Point);
 struct LogAnchorRange<'a>(&'a Range<Anchor>, &'a text::BufferSnapshot);
+struct LogOffsetRanges<'a>(&'a [Range<usize>], &'a text::BufferSnapshot);
 struct LogChangedRegions<'a>(&'a ChangeRegionSet, &'a text::BufferSnapshot);
 
 impl fmt::Debug for LogIncludedRanges<'_> {
@@ -1909,6 +1938,16 @@ impl fmt::Debug for LogAnchorRange<'_> {
     }
 }
 
+impl fmt::Debug for LogOffsetRanges<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_list()
+            .entries(self.0.iter().map(|range| {
+                LogPoint(range.start.to_point(self.1))..LogPoint(range.end.to_point(self.1))
+            }))
+            .finish()
+    }
+}
+
 impl fmt::Debug for LogChangedRegions<'_> {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.debug_list()

crates/language/src/syntax_map/syntax_map_tests.rs 🔗

@@ -788,15 +788,99 @@ fn test_empty_combined_injections_inside_injections(cx: &mut App) {
             "(template...",
             // Markdown inline content
             "(inline)",
-            // HTML within the ERB
-            "(document (text))",
             // The ruby syntax tree should be empty, since there are
             // no interpolations in the ERB template.
             "(program)",
+            // HTML within the ERB
+            "(document (text))",
         ],
     );
 }
 
+#[gpui::test]
+fn test_syntax_map_languages_loading_with_erb(cx: &mut App) {
+    let text = r#"
+        <body>
+            <% if @one %>
+                <div class=one>
+            <% else %>
+                <div class=two>
+            <% end %>
+            </div>
+        </body>
+    "#
+    .unindent();
+
+    let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
+    let mut buffer = Buffer::new(0, BufferId::new(1).unwrap(), text);
+
+    let mut syntax_map = SyntaxMap::new(&buffer);
+    syntax_map.set_language_registry(registry.clone());
+
+    let language = Arc::new(erb_lang());
+
+    log::info!("parsing");
+    registry.add(language.clone());
+    syntax_map.reparse(language.clone(), &buffer);
+
+    log::info!("loading html");
+    registry.add(Arc::new(html_lang()));
+    syntax_map.reparse(language.clone(), &buffer);
+
+    log::info!("loading ruby");
+    registry.add(Arc::new(ruby_lang()));
+    syntax_map.reparse(language.clone(), &buffer);
+
+    assert_capture_ranges(
+        &syntax_map,
+        &buffer,
+        &["tag", "ivar"],
+        "
+            <«body»>
+                <% if «@one» %>
+                    <«div» class=one>
+                <% else %>
+                    <«div» class=two>
+                <% end %>
+                </«div»>
+            </«body»>
+        ",
+    );
+
+    let text = r#"
+        <body>
+            <% if @one«_hundred» %>
+                <div class=one>
+            <% else %>
+                <div class=two>
+            <% end %>
+            </div>
+        </body>
+    "#
+    .unindent();
+
+    log::info!("editing");
+    buffer.edit_via_marked_text(&text);
+    syntax_map.interpolate(&buffer);
+    syntax_map.reparse(language.clone(), &buffer);
+
+    assert_capture_ranges(
+        &syntax_map,
+        &buffer,
+        &["tag", "ivar"],
+        "
+            <«body»>
+                <% if «@one_hundred» %>
+                    <«div» class=one>
+                <% else %>
+                    <«div» class=two>
+                <% end %>
+                </«div»>
+            </«body»>
+        ",
+    );
+}
+
 #[gpui::test(iterations = 50)]
 fn test_random_syntax_map_edits_rust_macros(rng: StdRng, cx: &mut App) {
     let text = r#"