language: Fix language detection for injected syntax layers (#41111)

Vitaly Slobodin created

Closes #40632

**TL;DR:** The `wrap selections in tag` action was unavailable in ERB
files, even when the cursor was positioned in HTML content (outside of
Ruby code blocks). This happened because `syntax_layer_at()` incorrectly
returned the Ruby language for positions that were actually in HTML.
**NOTE:** I am not familiar with that part of Zed so it could be that
the fix here is completely incorrect.

Previously, `syntax_layer_at` incorrectly reported injected languages
(e.g., Ruby in ERB files) even when the cursor was in the base language
content (HTML). This broke actions like `wrap selections in tag` that
depend on language-specific configuration.

The issue had two parts:
1. Missing start boundary check: The filter only checked if a layer's
end was after the cursor (`end_byte() > offset`), not if it started
before, causing layers outside the cursor position to be included. See
the `BEFORE` video: when I click on the HTML part it reports `Ruby`
language instead of `HTML`.
2. Wrong boundary reference for injections: For injected layers with
`included_sub_ranges` (like Ruby code blocks in ERB), checking the root
node boundaries returned the entire file range instead of the actual
injection ranges.

This fix:
- Adds the containment check using half-open range semantics [start,
end) for root node boundaries. That ensures proper reporting of the
detected language when a cursor (`|`) is located right after the
injection:

   ```
   <body>
    <%= yield %>|
  </body>
   ```

- Checks `included_sub_ranges` for injected layers to determine if the
cursor is actually within an injection
- Falls back to root node boundaries for base layers without sub-ranges.
This is the original behavior.

Fixes ERB language support where actions should be available based on
the cursor's actual language context. I think that also applies to some
other template languages like HEEX (Phoenix) and `*.pug`. On short
videos below you can see how I navigate through the ERB template and the
terminal on the right outputs the detected language if you apply the
following patch:

```diff
diff --git i/crates/editor/src/editor.rs w/crates/editor/src/editor.rs
index 15af61f5d2..54a8e0ae37 100644
--- i/crates/editor/src/editor.rs
+++ w/crates/editor/src/editor.rs
@@ -10671,6 +10671,7 @@ impl Editor {
         for selection in self.selections.disjoint_anchors_arc().iter() {
             if snapshot
                 .language_at(selection.start)
+                .inspect(|language| println!("Detected language: {:?}", language))
                 .and_then(|lang| lang.config().wrap_characters.as_ref())
                 .is_some()
             {
```

**Before:**


https://github.com/user-attachments/assets/3f8358f4-d343-462e-b6b1-3f1f2e8c533d


**After:**



https://github.com/user-attachments/assets/c1b9f065-1b44-45a2-8a24-76b7d812130d



Here is the ERB template:

```
<!DOCTYPE html>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <style>
      /* Email styles need to be inline */
    </style>
  </head>
  <body>
    <%= yield %>
  </body>
</html>
```

Release Notes:

- N/A

Change summary

crates/language/src/buffer.rs       | 14 ++++++
crates/language/src/buffer_tests.rs | 60 ++++++++++++++++++++++++++----
crates/language/src/syntax_map.rs   | 12 ++++-
3 files changed, 74 insertions(+), 12 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -3366,7 +3366,19 @@ impl BufferSnapshot {
     pub fn syntax_layer_at<D: ToOffset>(&self, position: D) -> Option<SyntaxLayer<'_>> {
         let offset = position.to_offset(self);
         self.syntax_layers_for_range(offset..offset, false)
-            .filter(|l| l.node().end_byte() > offset)
+            .filter(|l| {
+                if let Some(ranges) = l.included_sub_ranges {
+                    ranges.iter().any(|range| {
+                        let start = range.start.to_offset(self);
+                        start <= offset && {
+                            let end = range.end.to_offset(self);
+                            offset < end
+                        }
+                    })
+                } else {
+                    l.node().start_byte() <= offset && l.node().end_byte() > offset
+                }
+            })
             .last()
     }
 

crates/language/src/buffer_tests.rs 🔗

@@ -2633,7 +2633,7 @@ fn test_language_scope_at_with_combined_injections(cx: &mut App) {
         buffer.set_language_registry(language_registry.clone());
         buffer.set_language(
             language_registry
-                .language_for_name("ERB")
+                .language_for_name("HTML+ERB")
                 .now_or_never()
                 .unwrap()
                 .ok(),
@@ -2753,6 +2753,50 @@ fn test_language_at_for_markdown_code_block(cx: &mut App) {
     });
 }
 
+#[gpui::test]
+fn test_syntax_layer_at_for_injected_languages(cx: &mut App) {
+    init_settings(cx, |_| {});
+
+    cx.new(|cx| {
+        let text = r#"
+            ```html+erb
+            <div>Hello</div>
+            <%= link_to "Some", "https://zed.dev" %>
+            ```
+        "#
+        .unindent();
+
+        let language_registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
+        language_registry.add(Arc::new(erb_lang()));
+        language_registry.add(Arc::new(html_lang()));
+        language_registry.add(Arc::new(ruby_lang()));
+
+        let mut buffer = Buffer::local(text, cx);
+        buffer.set_language_registry(language_registry.clone());
+        buffer.set_language(
+            language_registry
+                .language_for_name("HTML+ERB")
+                .now_or_never()
+                .unwrap()
+                .ok(),
+            cx,
+        );
+
+        let snapshot = buffer.snapshot();
+
+        // Test points in the code line
+        let html_point = Point::new(1, 4);
+        let language = snapshot.language_at(html_point).unwrap();
+        assert_eq!(language.name().as_ref(), "HTML");
+
+        let ruby_point = Point::new(2, 6);
+        let language = snapshot.language_at(ruby_point).unwrap();
+        assert_eq!(language.name().as_ref(), "Ruby");
+
+        buffer
+    });
+}
+
 #[gpui::test]
 fn test_serialization(cx: &mut gpui::App) {
     let mut now = Instant::now();
@@ -3655,7 +3699,7 @@ fn html_lang() -> Language {
 fn erb_lang() -> Language {
     Language::new(
         LanguageConfig {
-            name: "ERB".into(),
+            name: "HTML+ERB".into(),
             matcher: LanguageMatcher {
                 path_suffixes: vec!["erb".to_string()],
                 ..Default::default()
@@ -3673,15 +3717,15 @@ fn erb_lang() -> Language {
     .with_injection_query(
         r#"
             (
-                (code) @injection.content
-                (#set! injection.language "ruby")
-                (#set! injection.combined)
+                (code) @content
+                (#set! "language" "ruby")
+                (#set! "combined")
             )
 
             (
-                (content) @injection.content
-                (#set! injection.language "html")
-                (#set! injection.combined)
+                (content) @content
+                (#set! "language" "html")
+                (#set! "combined")
             )
         "#,
     )

crates/language/src/syntax_map.rs 🔗

@@ -587,6 +587,8 @@ impl SyntaxSnapshot {
                     let changed_ranges;
 
                     let mut included_ranges = step.included_ranges;
+                    let is_combined = matches!(step.mode, ParseMode::Combined { .. });
+
                     for range in &mut included_ranges {
                         range.start_byte -= step_start_byte;
                         range.end_byte -= step_start_byte;
@@ -749,16 +751,20 @@ impl SyntaxSnapshot {
                         );
                     }
 
-                    let included_sub_ranges: Option<Vec<Range<Anchor>>> =
-                        (included_ranges.len() > 1).then_some(
+                    let included_sub_ranges: Option<Vec<Range<Anchor>>> = if is_combined {
+                        Some(
                             included_ranges
                                 .into_iter()
+                                .filter(|r| r.start_byte < r.end_byte)
                                 .map(|r| {
                                     text.anchor_before(r.start_byte + step_start_byte)
                                         ..text.anchor_after(r.end_byte + step_start_byte)
                                 })
                                 .collect(),
-                        );
+                        )
+                    } else {
+                        None
+                    };
                     SyntaxLayerContent::Parsed {
                         tree,
                         language,