language: Fix `language_scope_at` for markdown code comments (#29230)

Smit Barmase created

Closes #29176

This PR fix an issue where uncommenting a code block in Markdown would
add Markdown comments instead of removing the language-specific
comments.

Why?
`language_scope_at` for comments in a code block in Markdown would
result in the language being detected as Markdown. This happens because
the smallest range, such as `//` or `#` on the Markdown layer, is
preferred over `// whole comment line` for any other language. This
results in language detection as Markdown for that point.

To fix this, we also use a depth factor and try to prefer the layer with
greater depth over one with lesser depth. In this case, the code block's
language depth would be preferred over Markdown. The smallest range is
now used as a tiebreaker.

Added test for this case.

Release Notes:

- Fixed issue where uncommenting a code block in Markdown would add
Markdown comments instead of removing the language comments.

Change summary

crates/language/src/buffer.rs       | 22 ++++++++----
crates/language/src/buffer_tests.rs | 53 +++++++++++++++++++++++++++++++
2 files changed, 68 insertions(+), 7 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -3167,7 +3167,7 @@ impl BufferSnapshot {
     pub fn language_scope_at<D: ToOffset>(&self, position: D) -> Option<LanguageScope> {
         let offset = position.to_offset(self);
         let mut scope = None;
-        let mut smallest_range: Option<Range<usize>> = None;
+        let mut smallest_range_and_depth: Option<(Range<usize>, usize)> = None;
 
         // Use the layer that has the smallest node intersecting the given point.
         for layer in self
@@ -3179,7 +3179,7 @@ impl BufferSnapshot {
             let mut range = None;
             loop {
                 let child_range = cursor.node().byte_range();
-                if !child_range.to_inclusive().contains(&offset) {
+                if !child_range.contains(&offset) {
                     break;
                 }
 
@@ -3190,11 +3190,19 @@ impl BufferSnapshot {
             }
 
             if let Some(range) = range {
-                if smallest_range
-                    .as_ref()
-                    .map_or(true, |smallest_range| range.len() < smallest_range.len())
-                {
-                    smallest_range = Some(range);
+                if smallest_range_and_depth.as_ref().map_or(
+                    true,
+                    |(smallest_range, smallest_range_depth)| {
+                        if layer.depth > *smallest_range_depth {
+                            true
+                        } else if layer.depth == *smallest_range_depth {
+                            range.len() < smallest_range.len()
+                        } else {
+                            false
+                        }
+                    },
+                ) {
+                    smallest_range_and_depth = Some((range, layer.depth));
                     scope = Some(LanguageScope {
                         language: layer.language.clone(),
                         override_id: layer.override_id(offset, &self.text),

crates/language/src/buffer_tests.rs 🔗

@@ -2507,6 +2507,59 @@ fn test_language_at_with_hidden_languages(cx: &mut App) {
     });
 }
 
+#[gpui::test]
+fn test_language_at_for_markdown_code_block(cx: &mut App) {
+    init_settings(cx, |_| {});
+
+    cx.new(|cx| {
+        let text = r#"
+            ```rs
+            let a = 2;
+            // let b = 3;
+            ```
+        "#
+        .unindent();
+
+        let language_registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
+        language_registry.add(Arc::new(markdown_lang()));
+        language_registry.add(Arc::new(markdown_inline_lang()));
+        language_registry.add(Arc::new(rust_lang()));
+
+        let mut buffer = Buffer::local(text, cx);
+        buffer.set_language_registry(language_registry.clone());
+        buffer.set_language(
+            language_registry
+                .language_for_name("Markdown")
+                .now_or_never()
+                .unwrap()
+                .ok(),
+            cx,
+        );
+
+        let snapshot = buffer.snapshot();
+
+        // Test points in the code line
+        for point in [Point::new(1, 4), Point::new(1, 6)] {
+            let config = snapshot.language_scope_at(point).unwrap();
+            assert_eq!(config.language_name(), "Rust".into());
+
+            let language = snapshot.language_at(point).unwrap();
+            assert_eq!(language.name().as_ref(), "Rust");
+        }
+
+        // Test points in the comment line to verify it's still detected as Rust
+        for point in [Point::new(2, 4), Point::new(2, 6)] {
+            let config = snapshot.language_scope_at(point).unwrap();
+            assert_eq!(config.language_name(), "Rust".into());
+
+            let language = snapshot.language_at(point).unwrap();
+            assert_eq!(language.name().as_ref(), "Rust");
+        }
+
+        buffer
+    });
+}
+
 #[gpui::test]
 fn test_serialization(cx: &mut gpui::App) {
     let mut now = Instant::now();