Fix language injections sticking after language comment removed (#46134)

Jeff Brennan and Joseph T. Lyons created

Closes #46104

Release Notes: 

- Fixed language injections sticking after language comment removed

---

This is working well for the Python SQL comments

![comment_sync_demo](https://github.com/user-attachments/assets/ba5160ce-4867-4b49-9f44-141ddc2730a1)

```python
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

# sql
cmd = "SELECT col1, col2 FROM tbl"

df = spark.sql(  # sql
    """
    WITH cte AS (
        SELECT col1, col2, COUNT(*) AS n
        FROM tbl
        GROUP BY ALL
    )
    SELECT * FROM cte
    """
)
```


And go comments

![comment_sync_demo_go_updated](https://github.com/user-attachments/assets/73e81a94-e8fc-4dc3-82f8-09e966d35bc9)


```go
package main

func test() {
    var _ = /* sql */ `SELECT id, name FROM products`
    var _ = /* sql */ "SELECT id, name FROM products"

    var _ = /* sql */ `SELECT id, name FROM products`

    var _ = /* sql*/ "SELECT id, name FROM products"
}
```

Note: `f04b252dd9` was a simpler implementation that worked for
non-inline comments in Python, but produced the following Go behavior

![comment_sync_demo_go](https://github.com/user-attachments/assets/40336705-9799-4b0a-b457-4974d172e1c8)

---------

Co-authored-by: Joseph T. Lyons <JosephTLyons@gmail.com>

Change summary

crates/language/src/syntax_map.rs                  | 23 +++
crates/language/src/syntax_map/syntax_map_tests.rs | 93 +++++++++++++++
2 files changed, 113 insertions(+), 3 deletions(-)

Detailed changes

crates/language/src/syntax_map.rs 🔗

@@ -779,7 +779,26 @@ impl SyntaxSnapshot {
                         grammar.injection_config.as_ref().zip(registry.as_ref()),
                         changed_ranges.is_empty(),
                     ) {
-                        for range in &changed_ranges {
+                        // Handle invalidation and reactivation of injections on comment update
+                        let mut expanded_ranges: Vec<_> = changed_ranges
+                            .iter()
+                            .map(|range| {
+                                let start_row = range.start.to_point(text).row.saturating_sub(1);
+                                let end_row = range.end.to_point(text).row.saturating_add(2);
+                                text.point_to_offset(Point::new(start_row, 0))
+                                    ..text.point_to_offset(Point::new(end_row, 0)).min(text.len())
+                            })
+                            .collect();
+                        expanded_ranges.sort_unstable_by_key(|r| r.start);
+                        expanded_ranges.dedup_by(|b, a| {
+                            let overlaps = b.start <= a.end;
+                            if overlaps {
+                                a.end = a.end.max(b.end);
+                            }
+                            overlaps
+                        });
+
+                        for range in &expanded_ranges {
                             changed_regions.insert(
                                 ChangedRegion {
                                     depth: step.depth + 1,
@@ -799,7 +818,7 @@ impl SyntaxSnapshot {
                             ),
                             registry,
                             step.depth + 1,
-                            &changed_ranges,
+                            &expanded_ranges,
                             &mut combined_injection_ranges,
                             &mut queue,
                         );

crates/language/src/syntax_map/syntax_map_tests.rs 🔗

@@ -1,10 +1,12 @@
 use super::*;
 use crate::{
-    LanguageConfig, LanguageMatcher, buffer_tests::markdown_inline_lang, markdown_lang, rust_lang,
+    LanguageConfig, LanguageMatcher, LanguageQueries, buffer_tests::markdown_inline_lang,
+    markdown_lang, rust_lang,
 };
 use gpui::App;
 use pretty_assertions::assert_eq;
 use rand::rngs::StdRng;
+use std::borrow::Cow;
 use std::{env, ops::Range, sync::Arc};
 use text::{Buffer, BufferId, ReplicaId};
 use tree_sitter::Node;
@@ -796,6 +798,61 @@ fn test_empty_combined_injections_inside_injections(cx: &mut App) {
     );
 }
 
+#[gpui::test]
+fn test_comment_triggered_injection_toggle(cx: &mut App) {
+    let registry = Arc::new(LanguageRegistry::test(cx.background_executor().clone()));
+
+    let python = Arc::new(python_lang());
+    let comment = Arc::new(comment_lang());
+    registry.add(python.clone());
+    registry.add(comment);
+    // Note: SQL is an extension language (not built-in as of v0.222.0), so we can use
+    // contains_unknown_injections() to detect when the injection is triggered.
+    // We register a mock "comment" language because Python injects all comments as
+    // language "comment", and we only want SQL to trigger unknown injections.
+
+    // Start with Python code with incomplete #sq comment (not enough to trigger injection)
+    let mut buffer = Buffer::new(
+        ReplicaId::LOCAL,
+        BufferId::new(1).unwrap(),
+        "#sq\ncmd = \"SELECT col1, col2 FROM tbl\"".to_string(),
+    );
+
+    let mut syntax_map = SyntaxMap::new(&buffer);
+    syntax_map.set_language_registry(registry);
+    syntax_map.reparse(python.clone(), &buffer);
+
+    // Should have no unknown injections (#sq doesn't match the injection pattern)
+    assert!(
+        !syntax_map.contains_unknown_injections(),
+        "Expected no unknown injections with incomplete #sq comment"
+    );
+
+    // Complete the comment by adding 'l' to make #sql
+    let sq_end = buffer.as_rope().to_string().find('\n').unwrap();
+    buffer.edit([(sq_end..sq_end, "l")]);
+    syntax_map.interpolate(&buffer);
+    syntax_map.reparse(python.clone(), &buffer);
+
+    // Should now have unknown injections (SQL injection triggered but SQL not registered)
+    assert!(
+        syntax_map.contains_unknown_injections(),
+        "Expected unknown injections after completing #sql comment"
+    );
+
+    // Remove the 'l' to go back to #sq
+    let l_position = buffer.as_rope().to_string().find("l\n").unwrap();
+    buffer.edit([(l_position..l_position + 1, "")]);
+    syntax_map.interpolate(&buffer);
+    syntax_map.reparse(python, &buffer);
+
+    // Should have no unknown injections again - SQL injection should be invalidated
+    assert!(
+        !syntax_map.contains_unknown_injections(),
+        "Expected no unknown injections after removing 'l' from #sql comment"
+    );
+}
+
 #[gpui::test]
 fn test_syntax_map_languages_loading_with_erb(cx: &mut App) {
     let text = r#"
@@ -1339,6 +1396,40 @@ fn heex_lang() -> Language {
     .unwrap()
 }
 
+fn python_lang() -> Language {
+    Language::new(
+        LanguageConfig {
+            name: "Python".into(),
+            matcher: LanguageMatcher {
+                path_suffixes: vec!["py".to_string()],
+                ..Default::default()
+            },
+            line_comments: vec!["# ".into()],
+            ..Default::default()
+        },
+        Some(tree_sitter_python::LANGUAGE.into()),
+    )
+    .with_queries(LanguageQueries {
+        injections: Some(Cow::from(include_str!(
+            "../../../languages/src/python/injections.scm"
+        ))),
+        ..Default::default()
+    })
+    .expect("Could not parse Python queries")
+}
+
+fn comment_lang() -> Language {
+    // Mock "comment" language to satisfy Python's comment injection.
+    // Uses JSON grammar as a stand-in since we just need it to be registered.
+    Language::new(
+        LanguageConfig {
+            name: "comment".into(),
+            ..Default::default()
+        },
+        Some(tree_sitter_json::LANGUAGE.into()),
+    )
+}
+
 fn range_for_text(buffer: &Buffer, text: &str) -> Range<usize> {
     let start = buffer.as_rope().to_string().find(text).unwrap();
     start..start + text.len()