Add inclusive range scope overrides. Don't auto-close quotes at the ends of line comments (#20206)

Max Brunsfeld created

Closes #9195
Closes #19787

Release Notes:

- Fixed an issue where single quotation marks were spuriously
auto-closed when typing in line comments

Change summary

crates/language/src/buffer_tests.rs              | 15 ++
crates/language/src/language.rs                  | 85 ++++++++++-------
crates/language/src/syntax_map.rs                | 16 ++-
crates/languages/src/c/overrides.scm             |  2 
crates/languages/src/cpp/overrides.scm           |  2 
crates/languages/src/css/overrides.scm           |  2 
crates/languages/src/go/overrides.scm            |  2 
crates/languages/src/javascript/overrides.scm    |  2 
crates/languages/src/python/overrides.scm        |  2 
crates/languages/src/rust/overrides.scm          |  2 
crates/languages/src/tsx/overrides.scm           |  2 
crates/languages/src/typescript/overrides.scm    |  2 
docs/src/extensions/languages.md                 | 37 ++++++
extensions/astro/languages/astro/overrides.scm   |  2 
extensions/elixir/languages/elixir/overrides.scm |  2 
extensions/scheme/languages/scheme/overrides.scm |  2 
extensions/toml/languages/toml/overrides.scm     |  2 
17 files changed, 119 insertions(+), 60 deletions(-)

Detailed changes

crates/language/src/buffer_tests.rs 🔗

@@ -2120,8 +2120,8 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
                         },
                     ],
                     disabled_scopes_by_bracket_ix: vec![
-                        Vec::new(), //
-                        vec!["string".into()],
+                        Vec::new(),                              //
+                        vec!["string".into(), "comment".into()], // single quotes disabled
                     ],
                 },
                 overrides: [(
@@ -2142,6 +2142,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
             r#"
                 (jsx_element) @element
                 (string) @string
+                (comment) @comment.inclusive
                 [
                     (jsx_opening_element)
                     (jsx_closing_element)
@@ -2155,7 +2156,7 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
             a["b"] = <C d="e">
                 <F></F>
                 { g() }
-            </C>;
+            </C>; // a comment
         "#
         .unindent();
 
@@ -2170,6 +2171,14 @@ fn test_language_scope_at_with_javascript(cx: &mut AppContext) {
             &[true, true]
         );
 
+        let comment_config = snapshot
+            .language_scope_at(text.find("comment").unwrap() + "comment".len())
+            .unwrap();
+        assert_eq!(
+            comment_config.brackets().map(|e| e.1).collect::<Vec<_>>(),
+            &[true, false]
+        );
+
         let string_config = snapshot
             .language_scope_at(text.find("b\"").unwrap())
             .unwrap();

crates/language/src/language.rs 🔗

@@ -945,7 +945,14 @@ struct RunnableConfig {
 
 struct OverrideConfig {
     query: Query,
-    values: HashMap<u32, (String, LanguageConfigOverride)>,
+    values: HashMap<u32, OverrideEntry>,
+}
+
+#[derive(Debug)]
+struct OverrideEntry {
+    name: String,
+    range_is_inclusive: bool,
+    value: LanguageConfigOverride,
 }
 
 #[derive(Default, Clone)]
@@ -1265,58 +1272,66 @@ impl Language {
         };
 
         let mut override_configs_by_id = HashMap::default();
-        for (ix, name) in query.capture_names().iter().enumerate() {
-            if !name.starts_with('_') {
-                let value = self.config.overrides.remove(*name).unwrap_or_default();
-                for server_name in &value.opt_into_language_servers {
-                    if !self
-                        .config
-                        .scope_opt_in_language_servers
-                        .contains(server_name)
-                    {
-                        util::debug_panic!("Server {server_name:?} has been opted-in by scope {name:?} but has not been marked as an opt-in server");
-                    }
-                }
+        for (ix, mut name) in query.capture_names().iter().copied().enumerate() {
+            let mut range_is_inclusive = false;
+            if name.starts_with('_') {
+                continue;
+            }
+            if let Some(prefix) = name.strip_suffix(".inclusive") {
+                name = prefix;
+                range_is_inclusive = true;
+            }
 
-                override_configs_by_id.insert(ix as u32, (name.to_string(), value));
+            let value = self.config.overrides.get(name).cloned().unwrap_or_default();
+            for server_name in &value.opt_into_language_servers {
+                if !self
+                    .config
+                    .scope_opt_in_language_servers
+                    .contains(server_name)
+                {
+                    util::debug_panic!("Server {server_name:?} has been opted-in by scope {name:?} but has not been marked as an opt-in server");
+                }
             }
-        }
 
-        if !self.config.overrides.is_empty() {
-            let keys = self.config.overrides.keys().collect::<Vec<_>>();
-            Err(anyhow!(
-                "language {:?} has overrides in config not in query: {keys:?}",
-                self.config.name
-            ))?;
+            override_configs_by_id.insert(
+                ix as u32,
+                OverrideEntry {
+                    name: name.to_string(),
+                    range_is_inclusive,
+                    value,
+                },
+            );
         }
 
-        for disabled_scope_name in self
-            .config
-            .brackets
-            .disabled_scopes_by_bracket_ix
-            .iter()
-            .flatten()
-        {
+        let referenced_override_names = self.config.overrides.keys().chain(
+            self.config
+                .brackets
+                .disabled_scopes_by_bracket_ix
+                .iter()
+                .flatten(),
+        );
+
+        for referenced_name in referenced_override_names {
             if !override_configs_by_id
                 .values()
-                .any(|(scope_name, _)| scope_name == disabled_scope_name)
+                .any(|entry| entry.name == *referenced_name)
             {
                 Err(anyhow!(
-                    "language {:?} has overrides in config not in query: {disabled_scope_name:?}",
+                    "language {:?} has overrides in config not in query: {referenced_name:?}",
                     self.config.name
                 ))?;
             }
         }
 
-        for (name, override_config) in override_configs_by_id.values_mut() {
-            override_config.disabled_bracket_ixs = self
+        for entry in override_configs_by_id.values_mut() {
+            entry.value.disabled_bracket_ixs = self
                 .config
                 .brackets
                 .disabled_scopes_by_bracket_ix
                 .iter()
                 .enumerate()
                 .filter_map(|(ix, disabled_scope_names)| {
-                    if disabled_scope_names.contains(name) {
+                    if disabled_scope_names.contains(&entry.name) {
                         Some(ix as u16)
                     } else {
                         None
@@ -1534,14 +1549,14 @@ impl LanguageScope {
         let id = self.override_id?;
         let grammar = self.language.grammar.as_ref()?;
         let override_config = grammar.override_config.as_ref()?;
-        override_config.values.get(&id).map(|e| e.0.as_str())
+        override_config.values.get(&id).map(|e| e.name.as_str())
     }
 
     fn config_override(&self) -> Option<&LanguageConfigOverride> {
         let id = self.override_id?;
         let grammar = self.language.grammar.as_ref()?;
         let override_config = grammar.override_config.as_ref()?;
-        override_config.values.get(&id).map(|e| &e.1)
+        override_config.values.get(&id).map(|e| &e.value)
     }
 }
 

crates/language/src/syntax_map.rs 🔗

@@ -1520,18 +1520,24 @@ impl<'a> SyntaxLayer<'a> {
         let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
 
         let mut query_cursor = QueryCursorHandle::new();
-        query_cursor.set_byte_range(offset..offset);
+        query_cursor.set_byte_range(offset.saturating_sub(1)..offset.saturating_add(1));
 
         let mut smallest_match: Option<(u32, Range<usize>)> = None;
         for mat in query_cursor.matches(&config.query, self.node(), text) {
             for capture in mat.captures {
-                if !config.values.contains_key(&capture.index) {
+                let Some(override_entry) = config.values.get(&capture.index) else {
                     continue;
-                }
+                };
 
                 let range = capture.node.byte_range();
-                if offset <= range.start || offset >= range.end {
-                    continue;
+                if override_entry.range_is_inclusive {
+                    if offset < range.start || offset > range.end {
+                        continue;
+                    }
+                } else {
+                    if offset <= range.start || offset >= range.end {
+                        continue;
+                    }
                 }
 
                 if let Some((_, smallest_range)) = &smallest_match {

docs/src/extensions/languages.md 🔗

@@ -218,15 +218,44 @@ Note that we couldn't use JSON as an example here because it doesn't support lan
 
 ### Syntax overrides
 
-The `overrides.scm` file defines syntax overrides.
+The `overrides.scm` file defines syntactic _scopes_ that can be used to override certain editor settings within specific language constructs.
 
-Here's an example from an `overrides.scm` file for JSON:
+For example, there is a language-specific setting called `word_characters` that controls which non-alphabetic characters are considered part of a word, for filtering autocomplete suggestions. In JavaScript, "$" and "#" are considered word characters. But when your cursor is within a _string_ in JavaScript, "-" is _also_ considered a word character. To achieve this, the JavaScript `overrides.scm` file contains the following pattern:
 
 ```scheme
-(string) @string
+[
+  (string)
+  (template_string)
+] @string
+```
+
+And the JavaScript `config.toml` contains this setting:
+
+```toml
+word_characters = ["#", "$"]
+
+[overrides.string]
+word_characters = ["-"]
 ```
 
-This query explicitly marks strings for highlighting, potentially overriding default behavior. For a complete list of supported captures, refer to the [Syntax highlighting](#syntax-highlighting) section above.
+You can also disable certain auto-closing brackets in a specific scope. For example, to prevent auto-closing `'` within strings, you could put the following in the JavaScript `config.toml`:
+
+```toml
+brackets = [
+  { start = "'", end = "'", close = true, newline = false, not_in = ["string"] },
+  # other pairs...
+]
+```
+
+#### Range inclusivity
+
+By default, the ranges defined in `overrides.scm` are _exclusive_. So in the case above, if you cursor was _outside_ the quotation marks delimiting the string, the `string` scope would not take effect. Sometimes, you may want to make the range _inclusive_. You can do this by adding the `.inclusive` suffix to the capture name in the query.
+
+For example, in JavaScript, we also disable auto-closing of single quotes within comments. And the comment scope must extend all the way to the newline after a line comment. To achieve this, the JavaScript `overrides.scm` contains the following pattern:
+
+```scheme
+(comment) @comment.inclusive
+```
 
 ### Text redactions