language_config.rs

  1use crate::LanguageName;
  2use collections::{HashMap, HashSet, IndexSet};
  3use gpui_shared_string::SharedString;
  4use lsp::LanguageServerName;
  5use regex::Regex;
  6use schemars::{JsonSchema, SchemaGenerator, json_schema};
  7use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
  8use std::{num::NonZeroU32, path::Path, sync::Arc};
  9use util::serde::default_true;
 10
 11/// Controls the soft-wrapping behavior in the editor.
 12#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
 13#[serde(rename_all = "snake_case")]
 14pub enum SoftWrap {
 15    /// Prefer a single line generally, unless an overly long line is encountered.
 16    None,
 17    /// Deprecated: use None instead. Left to avoid breaking existing users' configs.
 18    /// Prefer a single line generally, unless an overly long line is encountered.
 19    PreferLine,
 20    /// Soft wrap lines that exceed the editor width.
 21    EditorWidth,
 22    /// Soft wrap line at the preferred line length or the editor width (whichever is smaller).
 23    #[serde(alias = "preferred_line_length")]
 24    Bounded,
 25}
 26
 27/// Top-level configuration for a language, typically loaded from a `config.toml`
 28/// shipped alongside the grammar.
 29#[derive(Clone, Debug, Deserialize, JsonSchema)]
 30pub struct LanguageConfig {
 31    /// Human-readable name of the language.
 32    pub name: LanguageName,
 33    /// The name of this language for a Markdown code fence block
 34    pub code_fence_block_name: Option<Arc<str>>,
 35    /// Alternative language names that Jupyter kernels may report for this language.
 36    /// Used when a kernel's `language` field differs from Zed's language name.
 37    /// For example, the Nu extension would set this to `["nushell"]`.
 38    #[serde(default)]
 39    pub kernel_language_names: Vec<Arc<str>>,
 40    // The name of the grammar in a WASM bundle (experimental).
 41    pub grammar: Option<Arc<str>>,
 42    /// The criteria for matching this language to a given file.
 43    #[serde(flatten)]
 44    pub matcher: LanguageMatcher,
 45    /// List of bracket types in a language.
 46    #[serde(default)]
 47    pub brackets: BracketPairConfig,
 48    /// If set to true, auto indentation uses last non empty line to determine
 49    /// the indentation level for a new line.
 50    #[serde(default = "auto_indent_using_last_non_empty_line_default")]
 51    pub auto_indent_using_last_non_empty_line: bool,
 52    // Whether indentation of pasted content should be adjusted based on the context.
 53    #[serde(default)]
 54    pub auto_indent_on_paste: Option<bool>,
 55    /// A regex that is used to determine whether the indentation level should be
 56    /// increased in the following line.
 57    #[serde(default, deserialize_with = "deserialize_regex")]
 58    #[schemars(schema_with = "regex_json_schema")]
 59    pub increase_indent_pattern: Option<Regex>,
 60    /// A regex that is used to determine whether the indentation level should be
 61    /// decreased in the following line.
 62    #[serde(default, deserialize_with = "deserialize_regex")]
 63    #[schemars(schema_with = "regex_json_schema")]
 64    pub decrease_indent_pattern: Option<Regex>,
 65    /// A list of rules for decreasing indentation. Each rule pairs a regex with a set of valid
 66    /// "block-starting" tokens. When a line matches a pattern, its indentation is aligned with
 67    /// the most recent line that began with a corresponding token. This enables context-aware
 68    /// outdenting, like aligning an `else` with its `if`.
 69    #[serde(default)]
 70    pub decrease_indent_patterns: Vec<DecreaseIndentConfig>,
 71    /// A list of characters that trigger the automatic insertion of a closing
 72    /// bracket when they immediately precede the point where an opening
 73    /// bracket is inserted.
 74    #[serde(default)]
 75    pub autoclose_before: String,
 76    /// A placeholder used internally by Semantic Index.
 77    #[serde(default)]
 78    pub collapsed_placeholder: String,
 79    /// A line comment string that is inserted in e.g. `toggle comments` action.
 80    /// A language can have multiple flavours of line comments. All of the provided line comments are
 81    /// used for comment continuations on the next line, but only the first one is used for Editor::ToggleComments.
 82    #[serde(default)]
 83    pub line_comments: Vec<Arc<str>>,
 84    /// Delimiters and configuration for recognizing and formatting block comments.
 85    #[serde(default)]
 86    pub block_comment: Option<BlockCommentConfig>,
 87    /// Delimiters and configuration for recognizing and formatting documentation comments.
 88    #[serde(default, alias = "documentation")]
 89    pub documentation_comment: Option<BlockCommentConfig>,
 90    /// List markers that are inserted unchanged on newline (e.g., `- `, `* `, `+ `).
 91    #[serde(default)]
 92    pub unordered_list: Vec<Arc<str>>,
 93    /// Configuration for ordered lists with auto-incrementing numbers on newline (e.g., `1. ` becomes `2. `).
 94    #[serde(default)]
 95    pub ordered_list: Vec<OrderedListConfig>,
 96    /// Configuration for task lists where multiple markers map to a single continuation prefix (e.g., `- [x] ` continues as `- [ ] `).
 97    #[serde(default)]
 98    pub task_list: Option<TaskListConfig>,
 99    /// A list of additional regex patterns that should be treated as prefixes
100    /// for creating boundaries during rewrapping, ensuring content from one
101    /// prefixed section doesn't merge with another (e.g., markdown list items).
102    /// By default, Zed treats as paragraph and comment prefixes as boundaries.
103    #[serde(default, deserialize_with = "deserialize_regex_vec")]
104    #[schemars(schema_with = "regex_vec_json_schema")]
105    pub rewrap_prefixes: Vec<Regex>,
106    /// A list of language servers that are allowed to run on subranges of a given language.
107    #[serde(default)]
108    pub scope_opt_in_language_servers: Vec<LanguageServerName>,
109    #[serde(default)]
110    pub overrides: HashMap<String, LanguageConfigOverride>,
111    /// A list of characters that Zed should treat as word characters for the
112    /// purpose of features that operate on word boundaries, like 'move to next word end'
113    /// or a whole-word search in buffer search.
114    #[serde(default)]
115    pub word_characters: HashSet<char>,
116    /// Whether to indent lines using tab characters, as opposed to multiple
117    /// spaces.
118    #[serde(default)]
119    pub hard_tabs: Option<bool>,
120    /// How many columns a tab should occupy.
121    #[serde(default)]
122    #[schemars(range(min = 1, max = 128))]
123    pub tab_size: Option<NonZeroU32>,
124    /// How to soft-wrap long lines of text.
125    #[serde(default)]
126    pub soft_wrap: Option<SoftWrap>,
127    /// When set, selections can be wrapped using prefix/suffix pairs on both sides.
128    #[serde(default)]
129    pub wrap_characters: Option<WrapCharactersConfig>,
130    /// The name of a Prettier parser that will be used for this language when no file path is available.
131    /// If there's a parser name in the language settings, that will be used instead.
132    #[serde(default)]
133    pub prettier_parser_name: Option<String>,
134    /// If true, this language is only for syntax highlighting via an injection into other
135    /// languages, but should not appear to the user as a distinct language.
136    #[serde(default)]
137    pub hidden: bool,
138    /// If configured, this language contains JSX style tags, and should support auto-closing of those tags.
139    #[serde(default)]
140    pub jsx_tag_auto_close: Option<JsxTagAutoCloseConfig>,
141    /// A list of characters that Zed should treat as word characters for completion queries.
142    #[serde(default)]
143    pub completion_query_characters: HashSet<char>,
144    /// A list of characters that Zed should treat as word characters for linked edit operations.
145    #[serde(default)]
146    pub linked_edit_characters: HashSet<char>,
147    /// A list of preferred debuggers for this language.
148    #[serde(default)]
149    pub debuggers: IndexSet<SharedString>,
150}
151
152impl LanguageConfig {
153    pub const FILE_NAME: &str = "config.toml";
154
155    pub fn load(config_path: impl AsRef<Path>) -> anyhow::Result<Self> {
156        let config = std::fs::read_to_string(config_path.as_ref())?;
157        toml::from_str(&config).map_err(Into::into)
158    }
159}
160
161impl Default for LanguageConfig {
162    fn default() -> Self {
163        Self {
164            name: LanguageName::new_static(""),
165            code_fence_block_name: None,
166            kernel_language_names: Default::default(),
167            grammar: None,
168            matcher: LanguageMatcher::default(),
169            brackets: Default::default(),
170            auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(),
171            auto_indent_on_paste: None,
172            increase_indent_pattern: Default::default(),
173            decrease_indent_pattern: Default::default(),
174            decrease_indent_patterns: Default::default(),
175            autoclose_before: Default::default(),
176            line_comments: Default::default(),
177            block_comment: Default::default(),
178            documentation_comment: Default::default(),
179            unordered_list: Default::default(),
180            ordered_list: Default::default(),
181            task_list: Default::default(),
182            rewrap_prefixes: Default::default(),
183            scope_opt_in_language_servers: Default::default(),
184            overrides: Default::default(),
185            word_characters: Default::default(),
186            collapsed_placeholder: Default::default(),
187            hard_tabs: None,
188            tab_size: None,
189            soft_wrap: None,
190            wrap_characters: None,
191            prettier_parser_name: None,
192            hidden: false,
193            jsx_tag_auto_close: None,
194            completion_query_characters: Default::default(),
195            linked_edit_characters: Default::default(),
196            debuggers: Default::default(),
197        }
198    }
199}
200
201#[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
202pub struct DecreaseIndentConfig {
203    #[serde(default, deserialize_with = "deserialize_regex")]
204    #[schemars(schema_with = "regex_json_schema")]
205    pub pattern: Option<Regex>,
206    #[serde(default)]
207    pub valid_after: Vec<String>,
208}
209
210/// Configuration for continuing ordered lists with auto-incrementing numbers.
211#[derive(Clone, Debug, Deserialize, JsonSchema)]
212pub struct OrderedListConfig {
213    /// A regex pattern with a capture group for the number portion (e.g., `(\\d+)\\. `).
214    pub pattern: String,
215    /// A format string where `{1}` is replaced with the incremented number (e.g., `{1}. `).
216    pub format: String,
217}
218
219/// Configuration for continuing task lists on newline.
220#[derive(Clone, Debug, Deserialize, JsonSchema)]
221pub struct TaskListConfig {
222    /// The list markers to match (e.g., `- [ ] `, `- [x] `).
223    pub prefixes: Vec<Arc<str>>,
224    /// The marker to insert when continuing the list on a new line (e.g., `- [ ] `).
225    pub continuation: Arc<str>,
226}
227
228#[derive(Clone, Debug, Serialize, Deserialize, Default, JsonSchema)]
229pub struct LanguageMatcher {
230    /// Given a list of `LanguageConfig`'s, the language of a file can be determined based on the path extension matching any of the `path_suffixes`.
231    #[serde(default)]
232    pub path_suffixes: Vec<String>,
233    /// A regex pattern that determines whether the language should be assigned to a file or not.
234    #[serde(
235        default,
236        serialize_with = "serialize_regex",
237        deserialize_with = "deserialize_regex"
238    )]
239    #[schemars(schema_with = "regex_json_schema")]
240    pub first_line_pattern: Option<Regex>,
241    /// Alternative names for this language used in vim/emacs modelines.
242    /// These are matched case-insensitively against the `mode` (emacs) or
243    /// `filetype`/`ft` (vim) specified in the modeline.
244    #[serde(default)]
245    pub modeline_aliases: Vec<String>,
246}
247
248impl Ord for LanguageMatcher {
249    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
250        self.path_suffixes
251            .cmp(&other.path_suffixes)
252            .then_with(|| {
253                self.first_line_pattern
254                    .as_ref()
255                    .map(Regex::as_str)
256                    .cmp(&other.first_line_pattern.as_ref().map(Regex::as_str))
257            })
258            .then_with(|| self.modeline_aliases.cmp(&other.modeline_aliases))
259    }
260}
261
262impl PartialOrd for LanguageMatcher {
263    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
264        Some(self.cmp(other))
265    }
266}
267
268impl Eq for LanguageMatcher {}
269
270impl PartialEq for LanguageMatcher {
271    fn eq(&self, other: &Self) -> bool {
272        self.path_suffixes == other.path_suffixes
273            && self.first_line_pattern.as_ref().map(Regex::as_str)
274                == other.first_line_pattern.as_ref().map(Regex::as_str)
275            && self.modeline_aliases == other.modeline_aliases
276    }
277}
278
279/// The configuration for JSX tag auto-closing.
280#[derive(Clone, Deserialize, JsonSchema, Debug)]
281pub struct JsxTagAutoCloseConfig {
282    /// The name of the node for a opening tag
283    pub open_tag_node_name: String,
284    /// The name of the node for an closing tag
285    pub close_tag_node_name: String,
286    /// The name of the node for a complete element with children for open and close tags
287    pub jsx_element_node_name: String,
288    /// The name of the node found within both opening and closing
289    /// tags that describes the tag name
290    pub tag_name_node_name: String,
291    /// Alternate Node names for tag names.
292    /// Specifically needed as TSX represents the name in `<Foo.Bar>`
293    /// as `member_expression` rather than `identifier` as usual
294    #[serde(default)]
295    pub tag_name_node_name_alternates: Vec<String>,
296    /// Some grammars are smart enough to detect a closing tag
297    /// that is not valid i.e. doesn't match it's corresponding
298    /// opening tag or does not have a corresponding opening tag
299    /// This should be set to the name of the node for invalid
300    /// closing tags if the grammar contains such a node, otherwise
301    /// detecting already closed tags will not work properly
302    #[serde(default)]
303    pub erroneous_close_tag_node_name: Option<String>,
304    /// See above for erroneous_close_tag_node_name for details
305    /// This should be set if the node used for the tag name
306    /// within erroneous closing tags is different from the
307    /// normal tag name node name
308    #[serde(default)]
309    pub erroneous_close_tag_name_node_name: Option<String>,
310}
311
312/// The configuration for block comments for this language.
313#[derive(Clone, Debug, JsonSchema, PartialEq)]
314pub struct BlockCommentConfig {
315    /// A start tag of block comment.
316    pub start: Arc<str>,
317    /// A end tag of block comment.
318    pub end: Arc<str>,
319    /// A character to add as a prefix when a new line is added to a block comment.
320    pub prefix: Arc<str>,
321    /// A indent to add for prefix and end line upon new line.
322    #[schemars(range(min = 1, max = 128))]
323    pub tab_size: u32,
324}
325
326impl<'de> Deserialize<'de> for BlockCommentConfig {
327    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
328    where
329        D: Deserializer<'de>,
330    {
331        #[derive(Deserialize)]
332        #[serde(untagged)]
333        enum BlockCommentConfigHelper {
334            New {
335                start: Arc<str>,
336                end: Arc<str>,
337                prefix: Arc<str>,
338                tab_size: u32,
339            },
340            Old([Arc<str>; 2]),
341        }
342
343        match BlockCommentConfigHelper::deserialize(deserializer)? {
344            BlockCommentConfigHelper::New {
345                start,
346                end,
347                prefix,
348                tab_size,
349            } => Ok(BlockCommentConfig {
350                start,
351                end,
352                prefix,
353                tab_size,
354            }),
355            BlockCommentConfigHelper::Old([start, end]) => Ok(BlockCommentConfig {
356                start,
357                end,
358                prefix: "".into(),
359                tab_size: 0,
360            }),
361        }
362    }
363}
364
365#[derive(Clone, Deserialize, Default, Debug, JsonSchema)]
366pub struct LanguageConfigOverride {
367    #[serde(default)]
368    pub line_comments: Override<Vec<Arc<str>>>,
369    #[serde(default)]
370    pub block_comment: Override<BlockCommentConfig>,
371    #[serde(skip)]
372    pub disabled_bracket_ixs: Vec<u16>,
373    #[serde(default)]
374    pub word_characters: Override<HashSet<char>>,
375    #[serde(default)]
376    pub completion_query_characters: Override<HashSet<char>>,
377    #[serde(default)]
378    pub linked_edit_characters: Override<HashSet<char>>,
379    #[serde(default)]
380    pub opt_into_language_servers: Vec<LanguageServerName>,
381    #[serde(default)]
382    pub prefer_label_for_snippet: Option<bool>,
383}
384
385#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)]
386#[serde(untagged)]
387pub enum Override<T> {
388    Remove { remove: bool },
389    Set(T),
390}
391
392impl<T> Default for Override<T> {
393    fn default() -> Self {
394        Override::Remove { remove: false }
395    }
396}
397
398impl<T> Override<T> {
399    pub fn as_option<'a>(this: Option<&'a Self>, original: Option<&'a T>) -> Option<&'a T> {
400        match this {
401            Some(Self::Set(value)) => Some(value),
402            Some(Self::Remove { remove: true }) => None,
403            Some(Self::Remove { remove: false }) | None => original,
404        }
405    }
406}
407
408/// Configuration of handling bracket pairs for a given language.
409///
410/// This struct includes settings for defining which pairs of characters are considered brackets and
411/// also specifies any language-specific scopes where these pairs should be ignored for bracket matching purposes.
412#[derive(Clone, Debug, Default, JsonSchema)]
413#[schemars(with = "Vec::<BracketPairContent>")]
414pub struct BracketPairConfig {
415    /// A list of character pairs that should be treated as brackets in the context of a given language.
416    pub pairs: Vec<BracketPair>,
417    /// A list of tree-sitter scopes for which a given bracket should not be active.
418    /// N-th entry in `[Self::disabled_scopes_by_bracket_ix]` contains a list of disabled scopes for an n-th entry in `[Self::pairs]`
419    pub disabled_scopes_by_bracket_ix: Vec<Vec<String>>,
420}
421
422impl BracketPairConfig {
423    pub fn is_closing_brace(&self, c: char) -> bool {
424        self.pairs.iter().any(|pair| pair.end.starts_with(c))
425    }
426}
427
428#[derive(Deserialize, JsonSchema)]
429pub struct BracketPairContent {
430    #[serde(flatten)]
431    pub bracket_pair: BracketPair,
432    #[serde(default)]
433    pub not_in: Vec<String>,
434}
435
436impl<'de> Deserialize<'de> for BracketPairConfig {
437    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
438    where
439        D: Deserializer<'de>,
440    {
441        let result = Vec::<BracketPairContent>::deserialize(deserializer)?;
442        let (brackets, disabled_scopes_by_bracket_ix) = result
443            .into_iter()
444            .map(|entry| (entry.bracket_pair, entry.not_in))
445            .unzip();
446
447        Ok(BracketPairConfig {
448            pairs: brackets,
449            disabled_scopes_by_bracket_ix,
450        })
451    }
452}
453
454/// Describes a single bracket pair and how an editor should react to e.g. inserting
455/// an opening bracket or to a newline character insertion in between `start` and `end` characters.
456#[derive(Clone, Debug, Default, Deserialize, PartialEq, JsonSchema)]
457pub struct BracketPair {
458    /// Starting substring for a bracket.
459    pub start: String,
460    /// Ending substring for a bracket.
461    pub end: String,
462    /// True if `end` should be automatically inserted right after `start` characters.
463    pub close: bool,
464    /// True if selected text should be surrounded by `start` and `end` characters.
465    #[serde(default = "default_true")]
466    pub surround: bool,
467    /// True if an extra newline should be inserted while the cursor is in the middle
468    /// of that bracket pair.
469    pub newline: bool,
470}
471
472#[derive(Clone, Debug, Deserialize, JsonSchema)]
473pub struct WrapCharactersConfig {
474    /// Opening token split into a prefix and suffix. The first caret goes
475    /// after the prefix (i.e., between prefix and suffix).
476    pub start_prefix: String,
477    pub start_suffix: String,
478    /// Closing token split into a prefix and suffix. The second caret goes
479    /// after the prefix (i.e., between prefix and suffix).
480    pub end_prefix: String,
481    pub end_suffix: String,
482}
483
484pub fn auto_indent_using_last_non_empty_line_default() -> bool {
485    true
486}
487
488pub fn deserialize_regex<'de, D: Deserializer<'de>>(d: D) -> Result<Option<Regex>, D::Error> {
489    let source = Option::<String>::deserialize(d)?;
490    if let Some(source) = source {
491        Ok(Some(regex::Regex::new(&source).map_err(de::Error::custom)?))
492    } else {
493        Ok(None)
494    }
495}
496
497pub fn regex_json_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
498    json_schema!({
499        "type": "string"
500    })
501}
502
503pub fn serialize_regex<S>(regex: &Option<Regex>, serializer: S) -> Result<S::Ok, S::Error>
504where
505    S: Serializer,
506{
507    match regex {
508        Some(regex) => serializer.serialize_str(regex.as_str()),
509        None => serializer.serialize_none(),
510    }
511}
512
513pub fn deserialize_regex_vec<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<Regex>, D::Error> {
514    let sources = Vec::<String>::deserialize(d)?;
515    sources
516        .into_iter()
517        .map(|source| regex::Regex::new(&source))
518        .collect::<Result<_, _>>()
519        .map_err(de::Error::custom)
520}
521
522pub fn regex_vec_json_schema(_: &mut SchemaGenerator) -> schemars::Schema {
523    json_schema!({
524        "type": "array",
525        "items": { "type": "string" }
526    })
527}