From cd87307289eb94f288e560e779597a73b3744814 Mon Sep 17 00:00:00 2001 From: Vitaly Slobodin Date: Wed, 5 Nov 2025 11:16:28 +0000 Subject: [PATCH] language: Fix language detection for injected syntax layers (#41111) Closes #40632 **TL;DR:** The `wrap selections in tag` action was unavailable in ERB files, even when the cursor was positioned in HTML content (outside of Ruby code blocks). This happened because `syntax_layer_at()` incorrectly returned the Ruby language for positions that were actually in HTML. **NOTE:** I am not familiar with that part of Zed so it could be that the fix here is completely incorrect. Previously, `syntax_layer_at` incorrectly reported injected languages (e.g., Ruby in ERB files) even when the cursor was in the base language content (HTML). This broke actions like `wrap selections in tag` that depend on language-specific configuration. The issue had two parts: 1. Missing start boundary check: The filter only checked if a layer's end was after the cursor (`end_byte() > offset`), not if it started before, causing layers outside the cursor position to be included. See the `BEFORE` video: when I click on the HTML part it reports `Ruby` language instead of `HTML`. 2. Wrong boundary reference for injections: For injected layers with `included_sub_ranges` (like Ruby code blocks in ERB), checking the root node boundaries returned the entire file range instead of the actual injection ranges. This fix: - Adds the containment check using half-open range semantics [start, end) for root node boundaries. That ensures proper reporting of the detected language when a cursor (`|`) is located right after the injection: ``` <%= yield %>| ``` - Checks `included_sub_ranges` for injected layers to determine if the cursor is actually within an injection - Falls back to root node boundaries for base layers without sub-ranges. This is the original behavior. Fixes ERB language support where actions should be available based on the cursor's actual language context. I think that also applies to some other template languages like HEEX (Phoenix) and `*.pug`. On short videos below you can see how I navigate through the ERB template and the terminal on the right outputs the detected language if you apply the following patch: ```diff diff --git i/crates/editor/src/editor.rs w/crates/editor/src/editor.rs index 15af61f5d2..54a8e0ae37 100644 --- i/crates/editor/src/editor.rs +++ w/crates/editor/src/editor.rs @@ -10671,6 +10671,7 @@ impl Editor { for selection in self.selections.disjoint_anchors_arc().iter() { if snapshot .language_at(selection.start) + .inspect(|language| println!("Detected language: {:?}", language)) .and_then(|lang| lang.config().wrap_characters.as_ref()) .is_some() { ``` **Before:** https://github.com/user-attachments/assets/3f8358f4-d343-462e-b6b1-3f1f2e8c533d **After:** https://github.com/user-attachments/assets/c1b9f065-1b44-45a2-8a24-76b7d812130d Here is the ERB template: ``` <%= yield %> ``` Release Notes: - N/A --- crates/language/src/buffer.rs | 14 ++++++- crates/language/src/buffer_tests.rs | 60 +++++++++++++++++++++++++---- crates/language/src/syntax_map.rs | 12 ++++-- 3 files changed, 74 insertions(+), 12 deletions(-) diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index c72350f38561e7aea62b7d3402eaa24bbdb08044..69e6b0a553cdb8c7ec90f1f19099f7cbc2a03e97 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -3366,7 +3366,19 @@ impl BufferSnapshot { pub fn syntax_layer_at(&self, position: D) -> Option> { let offset = position.to_offset(self); self.syntax_layers_for_range(offset..offset, false) - .filter(|l| l.node().end_byte() > offset) + .filter(|l| { + if let Some(ranges) = l.included_sub_ranges { + ranges.iter().any(|range| { + let start = range.start.to_offset(self); + start <= offset && { + let end = range.end.to_offset(self); + offset < end + } + }) + } else { + l.node().start_byte() <= offset && l.node().end_byte() > offset + } + }) .last() } diff --git a/crates/language/src/buffer_tests.rs b/crates/language/src/buffer_tests.rs index f824639ad762191f4168586551af51fb4e37c8dc..f0267ebd99b3b1bf806058f98453714daed93ef5 100644 --- a/crates/language/src/buffer_tests.rs +++ b/crates/language/src/buffer_tests.rs @@ -2633,7 +2633,7 @@ fn test_language_scope_at_with_combined_injections(cx: &mut App) { buffer.set_language_registry(language_registry.clone()); buffer.set_language( language_registry - .language_for_name("ERB") + .language_for_name("HTML+ERB") .now_or_never() .unwrap() .ok(), @@ -2753,6 +2753,50 @@ fn test_language_at_for_markdown_code_block(cx: &mut App) { }); } +#[gpui::test] +fn test_syntax_layer_at_for_injected_languages(cx: &mut App) { + init_settings(cx, |_| {}); + + cx.new(|cx| { + let text = r#" + ```html+erb +
Hello
+ <%= link_to "Some", "https://zed.dev" %> + ``` + "# + .unindent(); + + let language_registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone())); + language_registry.add(Arc::new(erb_lang())); + language_registry.add(Arc::new(html_lang())); + language_registry.add(Arc::new(ruby_lang())); + + let mut buffer = Buffer::local(text, cx); + buffer.set_language_registry(language_registry.clone()); + buffer.set_language( + language_registry + .language_for_name("HTML+ERB") + .now_or_never() + .unwrap() + .ok(), + cx, + ); + + let snapshot = buffer.snapshot(); + + // Test points in the code line + let html_point = Point::new(1, 4); + let language = snapshot.language_at(html_point).unwrap(); + assert_eq!(language.name().as_ref(), "HTML"); + + let ruby_point = Point::new(2, 6); + let language = snapshot.language_at(ruby_point).unwrap(); + assert_eq!(language.name().as_ref(), "Ruby"); + + buffer + }); +} + #[gpui::test] fn test_serialization(cx: &mut gpui::App) { let mut now = Instant::now(); @@ -3655,7 +3699,7 @@ fn html_lang() -> Language { fn erb_lang() -> Language { Language::new( LanguageConfig { - name: "ERB".into(), + name: "HTML+ERB".into(), matcher: LanguageMatcher { path_suffixes: vec!["erb".to_string()], ..Default::default() @@ -3673,15 +3717,15 @@ fn erb_lang() -> Language { .with_injection_query( r#" ( - (code) @injection.content - (#set! injection.language "ruby") - (#set! injection.combined) + (code) @content + (#set! "language" "ruby") + (#set! "combined") ) ( - (content) @injection.content - (#set! injection.language "html") - (#set! injection.combined) + (content) @content + (#set! "language" "html") + (#set! "combined") ) "#, ) diff --git a/crates/language/src/syntax_map.rs b/crates/language/src/syntax_map.rs index 528b25d47193ebdbbf0dbe12cd18c66e31ad37d0..a9ac2faad9da9d5e07261ec826dda138921717a6 100644 --- a/crates/language/src/syntax_map.rs +++ b/crates/language/src/syntax_map.rs @@ -587,6 +587,8 @@ impl SyntaxSnapshot { let changed_ranges; let mut included_ranges = step.included_ranges; + let is_combined = matches!(step.mode, ParseMode::Combined { .. }); + for range in &mut included_ranges { range.start_byte -= step_start_byte; range.end_byte -= step_start_byte; @@ -749,16 +751,20 @@ impl SyntaxSnapshot { ); } - let included_sub_ranges: Option>> = - (included_ranges.len() > 1).then_some( + let included_sub_ranges: Option>> = if is_combined { + Some( included_ranges .into_iter() + .filter(|r| r.start_byte < r.end_byte) .map(|r| { text.anchor_before(r.start_byte + step_start_byte) ..text.anchor_after(r.end_byte + step_start_byte) }) .collect(), - ); + ) + } else { + None + }; SyntaxLayerContent::Parsed { tree, language,