Start work on language config overrides

Max Brunsfeld and Julia Risley created

Co-authored-by: Julia Risley <julia@zed.dev>

Change summary

crates/editor/src/editor.rs                      |   9 
crates/editor/src/multi_buffer.rs                |  11 +
crates/language/src/buffer.rs                    |  23 +++
crates/language/src/language.rs                  | 123 +++++++++++++++--
crates/language/src/syntax_map.rs                |  35 +++++
crates/zed/src/languages/javascript/contexts.scm |   0 
crates/zed/src/languages/tsx/config.toml         |   9 +
crates/zed/src/languages/tsx/overrides.scm       |   2 
8 files changed, 189 insertions(+), 23 deletions(-)

Detailed changes

crates/editor/src/editor.rs 🔗

@@ -1737,7 +1737,7 @@ impl Editor {
         for (selection, autoclose_region) in
             self.selections_with_autoclose_regions(selections, &snapshot)
         {
-            if let Some(language) = snapshot.language_at(selection.head()) {
+            if let Some(language) = snapshot.language_config_at(selection.head()) {
                 // Determine if the inserted text matches the opening or closing
                 // bracket of any of this language's bracket pairs.
                 let mut bracket_pair = None;
@@ -1898,7 +1898,7 @@ impl Editor {
                         let end = selection.end;
 
                         let mut insert_extra_newline = false;
-                        if let Some(language) = buffer.language_at(start) {
+                        if let Some(language) = buffer.language_config_at(start) {
                             let leading_whitespace_len = buffer
                                 .reversed_chars_at(start)
                                 .take_while(|c| c.is_whitespace() && *c != '\n')
@@ -4533,7 +4533,10 @@ impl Editor {
 
             // TODO: Handle selections that cross excerpts
             for selection in &mut selections {
-                let language = if let Some(language) = snapshot.language_at(selection.start) {
+                let start_column = snapshot.indent_size_for_line(selection.start.row).len;
+                let language = if let Some(language) =
+                    snapshot.language_config_at(Point::new(selection.start.row, start_column))
+                {
                     language
                 } else {
                     continue;

crates/editor/src/multi_buffer.rs 🔗

@@ -10,9 +10,9 @@ use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task};
 pub use language::Completion;
 use language::{
     char_kind, AutoindentMode, Buffer, BufferChunks, BufferSnapshot, CharKind, Chunk, CursorShape,
-    DiagnosticEntry, IndentSize, Language, OffsetRangeExt, OffsetUtf16, Outline, OutlineItem,
-    Point, PointUtf16, Selection, TextDimension, ToOffset as _, ToOffsetUtf16 as _, ToPoint as _,
-    ToPointUtf16 as _, TransactionId, Unclipped,
+    DiagnosticEntry, IndentSize, Language, LanguageConfigYeet, OffsetRangeExt, OffsetUtf16,
+    Outline, OutlineItem, Point, PointUtf16, Selection, TextDimension, ToOffset as _,
+    ToOffsetUtf16 as _, ToPoint as _, ToPointUtf16 as _, TransactionId, Unclipped,
 };
 use std::{
     borrow::Cow,
@@ -2691,6 +2691,11 @@ impl MultiBufferSnapshot {
             .and_then(|(buffer, offset)| buffer.language_at(offset))
     }
 
+    pub fn language_config_at<'a, T: ToOffset>(&'a self, point: T) -> Option<LanguageConfigYeet> {
+        self.point_to_buffer_offset(point)
+            .and_then(|(buffer, offset)| buffer.language_config_at(offset))
+    }
+
     pub fn is_dirty(&self) -> bool {
         self.is_dirty
     }

crates/language/src/buffer.rs 🔗

@@ -9,7 +9,7 @@ use crate::{
     syntax_map::{
         SyntaxMap, SyntaxMapCapture, SyntaxMapCaptures, SyntaxSnapshot, ToTreeSitterPoint,
     },
-    CodeLabel, Outline,
+    CodeLabel, LanguageConfigYeet, Outline,
 };
 use anyhow::{anyhow, Result};
 use clock::ReplicaId;
@@ -2015,6 +2015,27 @@ impl BufferSnapshot {
             .or(self.language.as_ref())
     }
 
+    pub fn language_config_at<D: ToOffset>(&self, position: D) -> Option<LanguageConfigYeet> {
+        let offset = position.to_offset(self);
+
+        if let Some(layer_info) = self
+            .syntax
+            .layers_for_range(offset..offset, &self.text)
+            .filter(|l| l.node.end_byte() > offset)
+            .last()
+        {
+            Some(LanguageConfigYeet {
+                language: layer_info.language.clone(),
+                override_id: layer_info.override_id(offset, &self.text),
+            })
+        } else {
+            self.language.clone().map(|language| LanguageConfigYeet {
+                language,
+                override_id: None,
+            })
+        }
+    }
+
     pub fn surrounding_word<T: ToOffset>(&self, start: T) -> (Range<usize>, Option<CharKind>) {
         let mut start = start.to_offset(self);
         let mut end = start;

crates/language/src/language.rs 🔗

@@ -22,7 +22,10 @@ use lazy_static::lazy_static;
 use parking_lot::{Mutex, RwLock};
 use postage::watch;
 use regex::Regex;
-use serde::{de, Deserialize, Deserializer};
+use serde::{
+    de::{self},
+    Deserialize, Deserializer,
+};
 use serde_json::Value;
 use std::{
     any::Any,
@@ -243,6 +246,45 @@ pub struct LanguageConfig {
     pub line_comment: Option<Arc<str>>,
     #[serde(default)]
     pub block_comment: Option<(Arc<str>, Arc<str>)>,
+    #[serde(default)]
+    pub overrides: HashMap<String, LanguageConfigOverride>,
+}
+
+#[derive(Clone)]
+pub struct LanguageConfigYeet {
+    language: Arc<Language>,
+    override_id: Option<u32>,
+}
+
+#[derive(Deserialize)]
+pub struct LanguageConfigOverride {
+    #[serde(default)]
+    pub line_comment: Override<Arc<str>>,
+    #[serde(default)]
+    pub block_comment: Override<(Arc<str>, Arc<str>)>,
+}
+
+#[derive(Deserialize)]
+#[serde(untagged)]
+pub enum Override<T> {
+    Remove { remove: bool },
+    Set(T),
+}
+
+impl<T> Default for Override<T> {
+    fn default() -> Self {
+        Override::Remove { remove: false }
+    }
+}
+
+impl<T> Override<T> {
+    fn as_option<'a>(this: Option<&'a Self>, original: &'a Option<T>) -> Option<&'a T> {
+        match this {
+            Some(Self::Set(value)) => Some(value),
+            Some(Self::Remove { remove: true }) => None,
+            Some(Self::Remove { remove: false }) | None => original.as_ref(),
+        }
+    }
 }
 
 impl Default for LanguageConfig {
@@ -257,6 +299,7 @@ impl Default for LanguageConfig {
             autoclose_before: Default::default(),
             line_comment: Default::default(),
             block_comment: Default::default(),
+            overrides: Default::default(),
         }
     }
 }
@@ -311,6 +354,7 @@ pub struct Grammar {
     pub(crate) indents_config: Option<IndentConfig>,
     pub(crate) outline_config: Option<OutlineConfig>,
     pub(crate) injection_config: Option<InjectionConfig>,
+    pub(crate) override_config: Option<OverrideConfig>,
     pub(crate) highlight_map: Mutex<HighlightMap>,
 }
 
@@ -336,6 +380,11 @@ struct InjectionConfig {
     patterns: Vec<InjectionPatternConfig>,
 }
 
+struct OverrideConfig {
+    query: Query,
+    values: HashMap<u32, LanguageConfigOverride>,
+}
+
 #[derive(Default, Clone)]
 struct InjectionPatternConfig {
     language: Option<Box<str>>,
@@ -635,6 +684,7 @@ impl Language {
                     outline_config: None,
                     indents_config: None,
                     injection_config: None,
+                    override_config: None,
                     ts_language,
                     highlight_map: Default::default(),
                 })
@@ -775,6 +825,25 @@ impl Language {
         Ok(self)
     }
 
+    pub fn with_override_query(mut self, source: &str) -> Result<Self> {
+        let query = Query::new(self.grammar_mut().ts_language, source)?;
+
+        let mut values = HashMap::default();
+        for (ix, name) in query.capture_names().iter().enumerate() {
+            if let Some(override_name) = name.strip_prefix("override.") {
+                let value = self
+                    .config
+                    .overrides
+                    .remove(override_name)
+                    .ok_or_else(|| anyhow!("no such override {override_name}"))?;
+                values.insert(ix as u32, value);
+            }
+        }
+
+        self.grammar_mut().override_config = Some(OverrideConfig { query, values });
+        Ok(self)
+    }
+
     fn grammar_mut(&mut self) -> &mut Grammar {
         Arc::get_mut(self.grammar.as_mut().unwrap()).unwrap()
     }
@@ -800,17 +869,6 @@ impl Language {
         self.config.name.clone()
     }
 
-    pub fn line_comment_prefix(&self) -> Option<&Arc<str>> {
-        self.config.line_comment.as_ref()
-    }
-
-    pub fn block_comment_delimiters(&self) -> Option<(&Arc<str>, &Arc<str>)> {
-        self.config
-            .block_comment
-            .as_ref()
-            .map(|(start, end)| (start, end))
-    }
-
     pub async fn disk_based_diagnostic_sources(&self) -> &[String] {
         match self.adapter.as_ref() {
             Some(adapter) => &adapter.disk_based_diagnostic_sources,
@@ -886,10 +944,6 @@ impl Language {
         result
     }
 
-    pub fn brackets(&self) -> &[BracketPair] {
-        &self.config.brackets
-    }
-
     pub fn path_suffixes(&self) -> &[String] {
         &self.config.path_suffixes
     }
@@ -912,6 +966,43 @@ impl Language {
     }
 }
 
+impl LanguageConfigYeet {
+    pub fn line_comment_prefix(&self) -> Option<&Arc<str>> {
+        Override::as_option(
+            self.over_ride().map(|o| &o.line_comment),
+            &self.language.config.line_comment,
+        )
+    }
+
+    pub fn block_comment_delimiters(&self) -> Option<(&Arc<str>, &Arc<str>)> {
+        Override::as_option(
+            self.over_ride().map(|o| &o.block_comment),
+            &self.language.config.block_comment,
+        )
+        .map(|e| (&e.0, &e.1))
+    }
+
+    pub fn brackets(&self) -> &[BracketPair] {
+        &self.language.config.brackets
+    }
+
+    pub fn should_autoclose_before(&self, c: char) -> bool {
+        c.is_whitespace() || self.language.config.autoclose_before.contains(c)
+    }
+
+    fn over_ride(&self) -> Option<&LanguageConfigOverride> {
+        self.override_id.and_then(|id| {
+            self.language
+                .grammar
+                .as_ref()?
+                .override_config
+                .as_ref()?
+                .values
+                .get(&id)
+        })
+    }
+}
+
 impl Hash for Language {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
         self.id().hash(state)

crates/language/src/syntax_map.rs 🔗

@@ -1127,6 +1127,41 @@ fn splice_included_ranges(
     ranges
 }
 
+impl<'a> SyntaxLayerInfo<'a> {
+    pub(crate) fn override_id(&self, offset: usize, text: &text::BufferSnapshot) -> Option<u32> {
+        let text = TextProvider(text.as_rope());
+        let config = self.language.grammar.as_ref()?.override_config.as_ref()?;
+
+        let mut query_cursor = QueryCursorHandle::new();
+        query_cursor.set_byte_range(offset..offset);
+
+        let mut smallest_match: Option<(u32, Range<usize>)> = None;
+        for mat in query_cursor.matches(&config.query, self.node, text) {
+            for capture in mat.captures {
+                if !config.values.contains_key(&capture.index) {
+                    continue;
+                }
+
+                let range = capture.node.byte_range();
+                if offset <= range.start || offset >= range.end {
+                    continue;
+                }
+
+                if let Some((_, smallest_range)) = &smallest_match {
+                    if range.len() < smallest_range.len() {
+                        smallest_match = Some((capture.index, range))
+                    }
+                    continue;
+                }
+
+                smallest_match = Some((capture.index, range));
+            }
+        }
+
+        smallest_match.map(|(index, _)| index)
+    }
+}
+
 impl std::ops::Deref for SyntaxMap {
     type Target = SyntaxSnapshot;
 

crates/zed/src/languages/tsx/config.toml 🔗

@@ -12,3 +12,12 @@ brackets = [
     { start = "`", end = "`", close = true, newline = false },
     { start = "/*", end = " */", close = true, newline = false },
 ]
+
+[overrides.element]
+line_comment = { remove = true }
+block_comment = ["{/* ", " */}"]
+
+[overrides.string]
+brackets = [
+    { start = "{", end = "}", close = true, newline = true },
+]