language_config.rs

  1use crate::LanguageName;
  2use collections::{HashMap, HashSet, IndexSet};
  3use gpui::SharedString;
  4use lsp::LanguageServerName;
  5use regex::Regex;
  6use schemars::{JsonSchema, SchemaGenerator, json_schema};
  7use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
  8use std::{num::NonZeroU32, path::Path, sync::Arc};
  9use util::serde::default_true;
 10
 11/// Controls the soft-wrapping behavior in the editor.
 12#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
 13#[serde(rename_all = "snake_case")]
 14pub enum SoftWrap {
 15    /// Prefer a single line generally, unless an overly long line is encountered.
 16    None,
 17    /// Deprecated: use None instead. Left to avoid breaking existing users' configs.
 18    /// Prefer a single line generally, unless an overly long line is encountered.
 19    PreferLine,
 20    /// Soft wrap lines that exceed the editor width.
 21    EditorWidth,
 22    /// Soft wrap lines at the preferred line length.
 23    PreferredLineLength,
 24    /// Soft wrap line at the preferred line length or the editor width (whichever is smaller).
 25    Bounded,
 26}
 27
 28/// Top-level configuration for a language, typically loaded from a `config.toml`
 29/// shipped alongside the grammar.
 30#[derive(Clone, Debug, Deserialize, JsonSchema)]
 31pub struct LanguageConfig {
 32    /// Human-readable name of the language.
 33    pub name: LanguageName,
 34    /// The name of this language for a Markdown code fence block
 35    pub code_fence_block_name: Option<Arc<str>>,
 36    /// Alternative language names that Jupyter kernels may report for this language.
 37    /// Used when a kernel's `language` field differs from Zed's language name.
 38    /// For example, the Nu extension would set this to `["nushell"]`.
 39    #[serde(default)]
 40    pub kernel_language_names: Vec<Arc<str>>,
 41    // The name of the grammar in a WASM bundle (experimental).
 42    pub grammar: Option<Arc<str>>,
 43    /// The criteria for matching this language to a given file.
 44    #[serde(flatten)]
 45    pub matcher: LanguageMatcher,
 46    /// List of bracket types in a language.
 47    #[serde(default)]
 48    pub brackets: BracketPairConfig,
 49    /// If set to true, auto indentation uses last non empty line to determine
 50    /// the indentation level for a new line.
 51    #[serde(default = "auto_indent_using_last_non_empty_line_default")]
 52    pub auto_indent_using_last_non_empty_line: bool,
 53    // Whether indentation of pasted content should be adjusted based on the context.
 54    #[serde(default)]
 55    pub auto_indent_on_paste: Option<bool>,
 56    /// A regex that is used to determine whether the indentation level should be
 57    /// increased in the following line.
 58    #[serde(default, deserialize_with = "deserialize_regex")]
 59    #[schemars(schema_with = "regex_json_schema")]
 60    pub increase_indent_pattern: Option<Regex>,
 61    /// A regex that is used to determine whether the indentation level should be
 62    /// decreased in the following line.
 63    #[serde(default, deserialize_with = "deserialize_regex")]
 64    #[schemars(schema_with = "regex_json_schema")]
 65    pub decrease_indent_pattern: Option<Regex>,
 66    /// A list of rules for decreasing indentation. Each rule pairs a regex with a set of valid
 67    /// "block-starting" tokens. When a line matches a pattern, its indentation is aligned with
 68    /// the most recent line that began with a corresponding token. This enables context-aware
 69    /// outdenting, like aligning an `else` with its `if`.
 70    #[serde(default)]
 71    pub decrease_indent_patterns: Vec<DecreaseIndentConfig>,
 72    /// A list of characters that trigger the automatic insertion of a closing
 73    /// bracket when they immediately precede the point where an opening
 74    /// bracket is inserted.
 75    #[serde(default)]
 76    pub autoclose_before: String,
 77    /// A placeholder used internally by Semantic Index.
 78    #[serde(default)]
 79    pub collapsed_placeholder: String,
 80    /// A line comment string that is inserted in e.g. `toggle comments` action.
 81    /// A language can have multiple flavours of line comments. All of the provided line comments are
 82    /// used for comment continuations on the next line, but only the first one is used for Editor::ToggleComments.
 83    #[serde(default)]
 84    pub line_comments: Vec<Arc<str>>,
 85    /// Delimiters and configuration for recognizing and formatting block comments.
 86    #[serde(default)]
 87    pub block_comment: Option<BlockCommentConfig>,
 88    /// Delimiters and configuration for recognizing and formatting documentation comments.
 89    #[serde(default, alias = "documentation")]
 90    pub documentation_comment: Option<BlockCommentConfig>,
 91    /// List markers that are inserted unchanged on newline (e.g., `- `, `* `, `+ `).
 92    #[serde(default)]
 93    pub unordered_list: Vec<Arc<str>>,
 94    /// Configuration for ordered lists with auto-incrementing numbers on newline (e.g., `1. ` becomes `2. `).
 95    #[serde(default)]
 96    pub ordered_list: Vec<OrderedListConfig>,
 97    /// Configuration for task lists where multiple markers map to a single continuation prefix (e.g., `- [x] ` continues as `- [ ] `).
 98    #[serde(default)]
 99    pub task_list: Option<TaskListConfig>,
100    /// A list of additional regex patterns that should be treated as prefixes
101    /// for creating boundaries during rewrapping, ensuring content from one
102    /// prefixed section doesn't merge with another (e.g., markdown list items).
103    /// By default, Zed treats as paragraph and comment prefixes as boundaries.
104    #[serde(default, deserialize_with = "deserialize_regex_vec")]
105    #[schemars(schema_with = "regex_vec_json_schema")]
106    pub rewrap_prefixes: Vec<Regex>,
107    /// A list of language servers that are allowed to run on subranges of a given language.
108    #[serde(default)]
109    pub scope_opt_in_language_servers: Vec<LanguageServerName>,
110    #[serde(default)]
111    pub overrides: HashMap<String, LanguageConfigOverride>,
112    /// A list of characters that Zed should treat as word characters for the
113    /// purpose of features that operate on word boundaries, like 'move to next word end'
114    /// or a whole-word search in buffer search.
115    #[serde(default)]
116    pub word_characters: HashSet<char>,
117    /// Whether to indent lines using tab characters, as opposed to multiple
118    /// spaces.
119    #[serde(default)]
120    pub hard_tabs: Option<bool>,
121    /// How many columns a tab should occupy.
122    #[serde(default)]
123    #[schemars(range(min = 1, max = 128))]
124    pub tab_size: Option<NonZeroU32>,
125    /// How to soft-wrap long lines of text.
126    #[serde(default)]
127    pub soft_wrap: Option<SoftWrap>,
128    /// When set, selections can be wrapped using prefix/suffix pairs on both sides.
129    #[serde(default)]
130    pub wrap_characters: Option<WrapCharactersConfig>,
131    /// The name of a Prettier parser that will be used for this language when no file path is available.
132    /// If there's a parser name in the language settings, that will be used instead.
133    #[serde(default)]
134    pub prettier_parser_name: Option<String>,
135    /// If true, this language is only for syntax highlighting via an injection into other
136    /// languages, but should not appear to the user as a distinct language.
137    #[serde(default)]
138    pub hidden: bool,
139    /// If configured, this language contains JSX style tags, and should support auto-closing of those tags.
140    #[serde(default)]
141    pub jsx_tag_auto_close: Option<JsxTagAutoCloseConfig>,
142    /// A list of characters that Zed should treat as word characters for completion queries.
143    #[serde(default)]
144    pub completion_query_characters: HashSet<char>,
145    /// A list of characters that Zed should treat as word characters for linked edit operations.
146    #[serde(default)]
147    pub linked_edit_characters: HashSet<char>,
148    /// A list of preferred debuggers for this language.
149    #[serde(default)]
150    pub debuggers: IndexSet<SharedString>,
151    /// A list of import namespace segments that aren't expected to appear in file paths. For
152    /// example, "super" and "crate" in Rust.
153    #[serde(default)]
154    pub ignored_import_segments: HashSet<Arc<str>>,
155    /// Regular expression that matches substrings to omit from import paths, to make the paths more
156    /// similar to how they are specified when imported. For example, "/mod\.rs$" or "/__init__\.py$".
157    #[serde(default, deserialize_with = "deserialize_regex")]
158    #[schemars(schema_with = "regex_json_schema")]
159    pub import_path_strip_regex: Option<Regex>,
160}
161
162impl LanguageConfig {
163    pub const FILE_NAME: &str = "config.toml";
164
165    pub fn load(config_path: impl AsRef<Path>) -> anyhow::Result<Self> {
166        let config = std::fs::read_to_string(config_path.as_ref())?;
167        toml::from_str(&config).map_err(Into::into)
168    }
169}
170
171impl Default for LanguageConfig {
172    fn default() -> Self {
173        Self {
174            name: LanguageName::new_static(""),
175            code_fence_block_name: None,
176            kernel_language_names: Default::default(),
177            grammar: None,
178            matcher: LanguageMatcher::default(),
179            brackets: Default::default(),
180            auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(),
181            auto_indent_on_paste: None,
182            increase_indent_pattern: Default::default(),
183            decrease_indent_pattern: Default::default(),
184            decrease_indent_patterns: Default::default(),
185            autoclose_before: Default::default(),
186            line_comments: Default::default(),
187            block_comment: Default::default(),
188            documentation_comment: Default::default(),
189            unordered_list: Default::default(),
190            ordered_list: Default::default(),
191            task_list: Default::default(),
192            rewrap_prefixes: Default::default(),
193            scope_opt_in_language_servers: Default::default(),
194            overrides: Default::default(),
195            word_characters: Default::default(),
196            collapsed_placeholder: Default::default(),
197            hard_tabs: None,
198            tab_size: None,
199            soft_wrap: None,
200            wrap_characters: None,
201            prettier_parser_name: None,
202            hidden: false,
203            jsx_tag_auto_close: None,
204            completion_query_characters: Default::default(),
205            linked_edit_characters: Default::default(),
206            debuggers: Default::default(),
207            ignored_import_segments: Default::default(),
208            import_path_strip_regex: None,
209        }
210    }
211}
212
213#[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
214pub struct DecreaseIndentConfig {
215    #[serde(default, deserialize_with = "deserialize_regex")]
216    #[schemars(schema_with = "regex_json_schema")]
217    pub pattern: Option<Regex>,
218    #[serde(default)]
219    pub valid_after: Vec<String>,
220}
221
222/// Configuration for continuing ordered lists with auto-incrementing numbers.
223#[derive(Clone, Debug, Deserialize, JsonSchema)]
224pub struct OrderedListConfig {
225    /// A regex pattern with a capture group for the number portion (e.g., `(\\d+)\\. `).
226    pub pattern: String,
227    /// A format string where `{1}` is replaced with the incremented number (e.g., `{1}. `).
228    pub format: String,
229}
230
231/// Configuration for continuing task lists on newline.
232#[derive(Clone, Debug, Deserialize, JsonSchema)]
233pub struct TaskListConfig {
234    /// The list markers to match (e.g., `- [ ] `, `- [x] `).
235    pub prefixes: Vec<Arc<str>>,
236    /// The marker to insert when continuing the list on a new line (e.g., `- [ ] `).
237    pub continuation: Arc<str>,
238}
239
240#[derive(Clone, Debug, Serialize, Deserialize, Default, JsonSchema)]
241pub struct LanguageMatcher {
242    /// Given a list of `LanguageConfig`'s, the language of a file can be determined based on the path extension matching any of the `path_suffixes`.
243    #[serde(default)]
244    pub path_suffixes: Vec<String>,
245    /// A regex pattern that determines whether the language should be assigned to a file or not.
246    #[serde(
247        default,
248        serialize_with = "serialize_regex",
249        deserialize_with = "deserialize_regex"
250    )]
251    #[schemars(schema_with = "regex_json_schema")]
252    pub first_line_pattern: Option<Regex>,
253    /// Alternative names for this language used in vim/emacs modelines.
254    /// These are matched case-insensitively against the `mode` (emacs) or
255    /// `filetype`/`ft` (vim) specified in the modeline.
256    #[serde(default)]
257    pub modeline_aliases: Vec<String>,
258}
259
260impl Ord for LanguageMatcher {
261    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
262        self.path_suffixes
263            .cmp(&other.path_suffixes)
264            .then_with(|| {
265                self.first_line_pattern
266                    .as_ref()
267                    .map(Regex::as_str)
268                    .cmp(&other.first_line_pattern.as_ref().map(Regex::as_str))
269            })
270            .then_with(|| self.modeline_aliases.cmp(&other.modeline_aliases))
271    }
272}
273
274impl PartialOrd for LanguageMatcher {
275    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
276        Some(self.cmp(other))
277    }
278}
279
280impl Eq for LanguageMatcher {}
281
282impl PartialEq for LanguageMatcher {
283    fn eq(&self, other: &Self) -> bool {
284        self.path_suffixes == other.path_suffixes
285            && self.first_line_pattern.as_ref().map(Regex::as_str)
286                == other.first_line_pattern.as_ref().map(Regex::as_str)
287            && self.modeline_aliases == other.modeline_aliases
288    }
289}
290
291/// The configuration for JSX tag auto-closing.
292#[derive(Clone, Deserialize, JsonSchema, Debug)]
293pub struct JsxTagAutoCloseConfig {
294    /// The name of the node for a opening tag
295    pub open_tag_node_name: String,
296    /// The name of the node for an closing tag
297    pub close_tag_node_name: String,
298    /// The name of the node for a complete element with children for open and close tags
299    pub jsx_element_node_name: String,
300    /// The name of the node found within both opening and closing
301    /// tags that describes the tag name
302    pub tag_name_node_name: String,
303    /// Alternate Node names for tag names.
304    /// Specifically needed as TSX represents the name in `<Foo.Bar>`
305    /// as `member_expression` rather than `identifier` as usual
306    #[serde(default)]
307    pub tag_name_node_name_alternates: Vec<String>,
308    /// Some grammars are smart enough to detect a closing tag
309    /// that is not valid i.e. doesn't match it's corresponding
310    /// opening tag or does not have a corresponding opening tag
311    /// This should be set to the name of the node for invalid
312    /// closing tags if the grammar contains such a node, otherwise
313    /// detecting already closed tags will not work properly
314    #[serde(default)]
315    pub erroneous_close_tag_node_name: Option<String>,
316    /// See above for erroneous_close_tag_node_name for details
317    /// This should be set if the node used for the tag name
318    /// within erroneous closing tags is different from the
319    /// normal tag name node name
320    #[serde(default)]
321    pub erroneous_close_tag_name_node_name: Option<String>,
322}
323
324/// The configuration for block comments for this language.
325#[derive(Clone, Debug, JsonSchema, PartialEq)]
326pub struct BlockCommentConfig {
327    /// A start tag of block comment.
328    pub start: Arc<str>,
329    /// A end tag of block comment.
330    pub end: Arc<str>,
331    /// A character to add as a prefix when a new line is added to a block comment.
332    pub prefix: Arc<str>,
333    /// A indent to add for prefix and end line upon new line.
334    #[schemars(range(min = 1, max = 128))]
335    pub tab_size: u32,
336}
337
338impl<'de> Deserialize<'de> for BlockCommentConfig {
339    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
340    where
341        D: Deserializer<'de>,
342    {
343        #[derive(Deserialize)]
344        #[serde(untagged)]
345        enum BlockCommentConfigHelper {
346            New {
347                start: Arc<str>,
348                end: Arc<str>,
349                prefix: Arc<str>,
350                tab_size: u32,
351            },
352            Old([Arc<str>; 2]),
353        }
354
355        match BlockCommentConfigHelper::deserialize(deserializer)? {
356            BlockCommentConfigHelper::New {
357                start,
358                end,
359                prefix,
360                tab_size,
361            } => Ok(BlockCommentConfig {
362                start,
363                end,
364                prefix,
365                tab_size,
366            }),
367            BlockCommentConfigHelper::Old([start, end]) => Ok(BlockCommentConfig {
368                start,
369                end,
370                prefix: "".into(),
371                tab_size: 0,
372            }),
373        }
374    }
375}
376
377#[derive(Clone, Deserialize, Default, Debug, JsonSchema)]
378pub struct LanguageConfigOverride {
379    #[serde(default)]
380    pub line_comments: Override<Vec<Arc<str>>>,
381    #[serde(default)]
382    pub block_comment: Override<BlockCommentConfig>,
383    #[serde(skip)]
384    pub disabled_bracket_ixs: Vec<u16>,
385    #[serde(default)]
386    pub word_characters: Override<HashSet<char>>,
387    #[serde(default)]
388    pub completion_query_characters: Override<HashSet<char>>,
389    #[serde(default)]
390    pub linked_edit_characters: Override<HashSet<char>>,
391    #[serde(default)]
392    pub opt_into_language_servers: Vec<LanguageServerName>,
393    #[serde(default)]
394    pub prefer_label_for_snippet: Option<bool>,
395}
396
397#[derive(Clone, Deserialize, Debug, Serialize, JsonSchema)]
398#[serde(untagged)]
399pub enum Override<T> {
400    Remove { remove: bool },
401    Set(T),
402}
403
404impl<T> Default for Override<T> {
405    fn default() -> Self {
406        Override::Remove { remove: false }
407    }
408}
409
410impl<T> Override<T> {
411    pub fn as_option<'a>(this: Option<&'a Self>, original: Option<&'a T>) -> Option<&'a T> {
412        match this {
413            Some(Self::Set(value)) => Some(value),
414            Some(Self::Remove { remove: true }) => None,
415            Some(Self::Remove { remove: false }) | None => original,
416        }
417    }
418}
419
420/// Configuration of handling bracket pairs for a given language.
421///
422/// This struct includes settings for defining which pairs of characters are considered brackets and
423/// also specifies any language-specific scopes where these pairs should be ignored for bracket matching purposes.
424#[derive(Clone, Debug, Default, JsonSchema)]
425#[schemars(with = "Vec::<BracketPairContent>")]
426pub struct BracketPairConfig {
427    /// A list of character pairs that should be treated as brackets in the context of a given language.
428    pub pairs: Vec<BracketPair>,
429    /// A list of tree-sitter scopes for which a given bracket should not be active.
430    /// N-th entry in `[Self::disabled_scopes_by_bracket_ix]` contains a list of disabled scopes for an n-th entry in `[Self::pairs]`
431    pub disabled_scopes_by_bracket_ix: Vec<Vec<String>>,
432}
433
434impl BracketPairConfig {
435    pub fn is_closing_brace(&self, c: char) -> bool {
436        self.pairs.iter().any(|pair| pair.end.starts_with(c))
437    }
438}
439
440#[derive(Deserialize, JsonSchema)]
441pub struct BracketPairContent {
442    #[serde(flatten)]
443    pub bracket_pair: BracketPair,
444    #[serde(default)]
445    pub not_in: Vec<String>,
446}
447
448impl<'de> Deserialize<'de> for BracketPairConfig {
449    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
450    where
451        D: Deserializer<'de>,
452    {
453        let result = Vec::<BracketPairContent>::deserialize(deserializer)?;
454        let (brackets, disabled_scopes_by_bracket_ix) = result
455            .into_iter()
456            .map(|entry| (entry.bracket_pair, entry.not_in))
457            .unzip();
458
459        Ok(BracketPairConfig {
460            pairs: brackets,
461            disabled_scopes_by_bracket_ix,
462        })
463    }
464}
465
466/// Describes a single bracket pair and how an editor should react to e.g. inserting
467/// an opening bracket or to a newline character insertion in between `start` and `end` characters.
468#[derive(Clone, Debug, Default, Deserialize, PartialEq, JsonSchema)]
469pub struct BracketPair {
470    /// Starting substring for a bracket.
471    pub start: String,
472    /// Ending substring for a bracket.
473    pub end: String,
474    /// True if `end` should be automatically inserted right after `start` characters.
475    pub close: bool,
476    /// True if selected text should be surrounded by `start` and `end` characters.
477    #[serde(default = "default_true")]
478    pub surround: bool,
479    /// True if an extra newline should be inserted while the cursor is in the middle
480    /// of that bracket pair.
481    pub newline: bool,
482}
483
484#[derive(Clone, Debug, Deserialize, JsonSchema)]
485pub struct WrapCharactersConfig {
486    /// Opening token split into a prefix and suffix. The first caret goes
487    /// after the prefix (i.e., between prefix and suffix).
488    pub start_prefix: String,
489    pub start_suffix: String,
490    /// Closing token split into a prefix and suffix. The second caret goes
491    /// after the prefix (i.e., between prefix and suffix).
492    pub end_prefix: String,
493    pub end_suffix: String,
494}
495
496pub fn auto_indent_using_last_non_empty_line_default() -> bool {
497    true
498}
499
500pub fn deserialize_regex<'de, D: Deserializer<'de>>(d: D) -> Result<Option<Regex>, D::Error> {
501    let source = Option::<String>::deserialize(d)?;
502    if let Some(source) = source {
503        Ok(Some(regex::Regex::new(&source).map_err(de::Error::custom)?))
504    } else {
505        Ok(None)
506    }
507}
508
509pub fn regex_json_schema(_: &mut schemars::SchemaGenerator) -> schemars::Schema {
510    json_schema!({
511        "type": "string"
512    })
513}
514
515pub fn serialize_regex<S>(regex: &Option<Regex>, serializer: S) -> Result<S::Ok, S::Error>
516where
517    S: Serializer,
518{
519    match regex {
520        Some(regex) => serializer.serialize_str(regex.as_str()),
521        None => serializer.serialize_none(),
522    }
523}
524
525pub fn deserialize_regex_vec<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<Regex>, D::Error> {
526    let sources = Vec::<String>::deserialize(d)?;
527    sources
528        .into_iter()
529        .map(|source| regex::Regex::new(&source))
530        .collect::<Result<_, _>>()
531        .map_err(de::Error::custom)
532}
533
534pub fn regex_vec_json_schema(_: &mut SchemaGenerator) -> schemars::Schema {
535    json_schema!({
536        "type": "array",
537        "items": { "type": "string" }
538    })
539}