language: Add context-aware decrease indent for Python (#33370)

Smit Barmase created

Closes #33238, follow-up to
https://github.com/zed-industries/zed/pull/29625.

Changes:

- Removed `significant_indentation`, which was the way to introduce
indentation scoping in languages like Python. However, it turned out to
be unnecessarily complicated to define and maintain.
- Introduced `decrease_indent_patterns`, which takes a `pattern` keyword
to automatically outdent and `valid_after` keywords to treat as valid
code points to snap to. The outdent happens to the most recent
`valid_after` keyword that also has less or equal indentation than the
currently typed keyword.

Fixes:

1. In Python, typing `except`, `finally`, `else`, and so on now
automatically indents intelligently based on the context in which it
appears. For instance:

```py
try:
    if a == 1:
        try:
             b = 2
             ^  # <-- typing "except:" here would indent it to inner try block
```

but,

```py
try:
    if a == 1:
        try:
             b = 2
    ^  # <-- typing "except:" here would indent it to outer try block
```

2. Fixes comments not maintaining indent.

Release Notes:

- Improved auto outdent for Python while typing keywords like `except`,
`else`, `finally`, etc.
- Fixed the issue where comments in Python would not maintain their
indentation.

Change summary

crates/editor/src/editor_tests.rs       | 167 +++++++++++++++-----------
crates/language/src/buffer.rs           | 102 ++++++++++-----
crates/language/src/language.rs         |  31 ++++
crates/languages/src/python.rs          |   2 
crates/languages/src/python/config.toml |  11 +
crates/languages/src/python/indents.scm |  83 ++-----------
6 files changed, 213 insertions(+), 183 deletions(-)

Detailed changes

crates/editor/src/editor_tests.rs 🔗

@@ -21771,9 +21771,9 @@ async fn test_tab_in_leading_whitespace_auto_indents_for_python(cx: &mut TestApp
     cx.set_state(indoc! {"
         def main():
         ˇ    try:
-        ˇ       fetch()
+        ˇ        fetch()
         ˇ    except ValueError:
-        ˇ       handle_error()
+        ˇ        handle_error()
         ˇ    else:
         ˇ        match value:
         ˇ            case _:
@@ -21901,74 +21901,101 @@ async fn test_outdent_after_input_for_python(cx: &mut TestAppContext) {
             finally:ˇ
     "});
 
-    // TODO: test `except` auto outdents when typed inside `try` block right after for block
-    // cx.set_state(indoc! {"
-    //     def main():
-    //         try:
-    //             for i in range(n):
-    //                 pass
-    //             ˇ
-    // "});
-    // cx.update_editor(|editor, window, cx| {
-    //     editor.handle_input("except:", window, cx);
-    // });
-    // cx.assert_editor_state(indoc! {"
-    //     def main():
-    //         try:
-    //             for i in range(n):
-    //                 pass
-    //         except:ˇ
-    // "});
-
-    // TODO: test `else` auto outdents when typed inside `except` block right after for block
-    // cx.set_state(indoc! {"
-    //     def main():
-    //         try:
-    //             i = 2
-    //         except:
-    //             for i in range(n):
-    //                 pass
-    //             ˇ
-    // "});
-    // cx.update_editor(|editor, window, cx| {
-    //     editor.handle_input("else:", window, cx);
-    // });
-    // cx.assert_editor_state(indoc! {"
-    //     def main():
-    //         try:
-    //             i = 2
-    //         except:
-    //             for i in range(n):
-    //                 pass
-    //         else:ˇ
-    // "});
-
-    // TODO: test `finally` auto outdents when typed inside `else` block right after for block
-    // cx.set_state(indoc! {"
-    //     def main():
-    //         try:
-    //             i = 2
-    //         except:
-    //             j = 2
-    //         else:
-    //             for i in range(n):
-    //                 pass
-    //             ˇ
-    // "});
-    // cx.update_editor(|editor, window, cx| {
-    //     editor.handle_input("finally:", window, cx);
-    // });
-    // cx.assert_editor_state(indoc! {"
-    //     def main():
-    //         try:
-    //             i = 2
-    //         except:
-    //             j = 2
-    //         else:
-    //             for i in range(n):
-    //                 pass
-    //         finally:ˇ
-    // "});
+    // test `else` does not outdents when typed inside `except` block right after for block
+    cx.set_state(indoc! {"
+        def main():
+            try:
+                i = 2
+            except:
+                for i in range(n):
+                    pass
+                ˇ
+    "});
+    cx.update_editor(|editor, window, cx| {
+        editor.handle_input("else:", window, cx);
+    });
+    cx.assert_editor_state(indoc! {"
+        def main():
+            try:
+                i = 2
+            except:
+                for i in range(n):
+                    pass
+                else:ˇ
+    "});
+
+    // test `finally` auto outdents when typed inside `else` block right after for block
+    cx.set_state(indoc! {"
+        def main():
+            try:
+                i = 2
+            except:
+                j = 2
+            else:
+                for i in range(n):
+                    pass
+                ˇ
+    "});
+    cx.update_editor(|editor, window, cx| {
+        editor.handle_input("finally:", window, cx);
+    });
+    cx.assert_editor_state(indoc! {"
+        def main():
+            try:
+                i = 2
+            except:
+                j = 2
+            else:
+                for i in range(n):
+                    pass
+            finally:ˇ
+    "});
+
+    // test `except` outdents to inner "try" block
+    cx.set_state(indoc! {"
+        def main():
+            try:
+                i = 2
+                if i == 2:
+                    try:
+                        i = 3
+                        ˇ
+    "});
+    cx.update_editor(|editor, window, cx| {
+        editor.handle_input("except:", window, cx);
+    });
+    cx.assert_editor_state(indoc! {"
+        def main():
+            try:
+                i = 2
+                if i == 2:
+                    try:
+                        i = 3
+                    except:ˇ
+    "});
+
+    // test `except` outdents to outer "try" block
+    cx.set_state(indoc! {"
+        def main():
+            try:
+                i = 2
+                if i == 2:
+                    try:
+                        i = 3
+                ˇ
+    "});
+    cx.update_editor(|editor, window, cx| {
+        editor.handle_input("except:", window, cx);
+    });
+    cx.assert_editor_state(indoc! {"
+        def main():
+            try:
+                i = 2
+                if i == 2:
+                    try:
+                        i = 3
+            except:ˇ
+    "});
 
     // test `else` stays at correct indent when typed after `for` block
     cx.set_state(indoc! {"

crates/language/src/buffer.rs 🔗

@@ -2913,7 +2913,12 @@ impl BufferSnapshot {
     ) -> Option<impl Iterator<Item = Option<IndentSuggestion>> + '_> {
         let config = &self.language.as_ref()?.config;
         let prev_non_blank_row = self.prev_non_blank_row(row_range.start);
-        let significant_indentation = config.significant_indentation;
+
+        #[derive(Debug, Clone)]
+        struct StartPosition {
+            start: Point,
+            suffix: SharedString,
+        }
 
         // Find the suggested indentation ranges based on the syntax tree.
         let start = Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0);
@@ -2929,13 +2934,13 @@ impl BufferSnapshot {
             .collect::<Vec<_>>();
 
         let mut indent_ranges = Vec::<Range<Point>>::new();
+        let mut start_positions = Vec::<StartPosition>::new();
         let mut outdent_positions = Vec::<Point>::new();
         while let Some(mat) = matches.peek() {
             let mut start: Option<Point> = None;
             let mut end: Option<Point> = None;
-            let mut outdent: Option<Point> = None;
 
-            let config = &indent_configs[mat.grammar_index];
+            let config = indent_configs[mat.grammar_index];
             for capture in mat.captures {
                 if capture.index == config.indent_capture_ix {
                     start.get_or_insert(Point::from_ts_point(capture.node.start_position()));
@@ -2945,21 +2950,18 @@ impl BufferSnapshot {
                 } else if Some(capture.index) == config.end_capture_ix {
                     end = Some(Point::from_ts_point(capture.node.start_position()));
                 } else if Some(capture.index) == config.outdent_capture_ix {
-                    let point = Point::from_ts_point(capture.node.start_position());
-                    outdent.get_or_insert(point);
-                    outdent_positions.push(point);
+                    outdent_positions.push(Point::from_ts_point(capture.node.start_position()));
+                } else if let Some(suffix) = config.suffixed_start_captures.get(&capture.index) {
+                    start_positions.push(StartPosition {
+                        start: Point::from_ts_point(capture.node.start_position()),
+                        suffix: suffix.clone(),
+                    });
                 }
             }
 
             matches.advance();
-            // in case of significant indentation expand end to outdent position
-            let end = if significant_indentation {
-                outdent.or(end)
-            } else {
-                end
-            };
             if let Some((start, end)) = start.zip(end) {
-                if start.row == end.row && (!significant_indentation || start.column < end.column) {
+                if start.row == end.row {
                     continue;
                 }
                 let range = start..end;
@@ -2997,24 +2999,26 @@ impl BufferSnapshot {
             matches.advance();
         }
 
-        // we don't use outdent positions to truncate in case of significant indentation
-        // rather we use them to expand (handled above)
-        if !significant_indentation {
-            outdent_positions.sort();
-            for outdent_position in outdent_positions {
-                // find the innermost indent range containing this outdent_position
-                // set its end to the outdent position
-                if let Some(range_to_truncate) = indent_ranges
-                    .iter_mut()
-                    .filter(|indent_range| indent_range.contains(&outdent_position))
-                    .next_back()
-                {
-                    range_to_truncate.end = outdent_position;
-                }
+        outdent_positions.sort();
+        for outdent_position in outdent_positions {
+            // find the innermost indent range containing this outdent_position
+            // set its end to the outdent position
+            if let Some(range_to_truncate) = indent_ranges
+                .iter_mut()
+                .filter(|indent_range| indent_range.contains(&outdent_position))
+                .next_back()
+            {
+                range_to_truncate.end = outdent_position;
             }
         }
 
+        start_positions.sort_by_key(|b| b.start);
+
         // Find the suggested indentation increases and decreased based on regexes.
+        let mut regex_outdent_map = HashMap::default();
+        let mut last_seen_suffix: HashMap<String, Vec<Point>> = HashMap::default();
+        let mut start_positions_iter = start_positions.iter().peekable();
+
         let mut indent_change_rows = Vec::<(u32, Ordering)>::new();
         self.for_each_line(
             Point::new(prev_non_blank_row.unwrap_or(row_range.start), 0)
@@ -3034,6 +3038,33 @@ impl BufferSnapshot {
                 {
                     indent_change_rows.push((row + 1, Ordering::Greater));
                 }
+                while let Some(pos) = start_positions_iter.peek() {
+                    if pos.start.row < row {
+                        let pos = start_positions_iter.next().unwrap();
+                        last_seen_suffix
+                            .entry(pos.suffix.to_string())
+                            .or_default()
+                            .push(pos.start);
+                    } else {
+                        break;
+                    }
+                }
+                for rule in &config.decrease_indent_patterns {
+                    if rule.pattern.as_ref().map_or(false, |r| r.is_match(line)) {
+                        let row_start_column = self.indent_size_for_line(row).len;
+                        let basis_row = rule
+                            .valid_after
+                            .iter()
+                            .filter_map(|valid_suffix| last_seen_suffix.get(valid_suffix))
+                            .flatten()
+                            .filter(|start_point| start_point.column <= row_start_column)
+                            .max_by_key(|start_point| start_point.row);
+                        if let Some(outdent_to_row) = basis_row {
+                            regex_outdent_map.insert(row, outdent_to_row.row);
+                        }
+                        break;
+                    }
+                }
             },
         );
 
@@ -3043,6 +3074,7 @@ impl BufferSnapshot {
         } else {
             row_range.start.saturating_sub(1)
         };
+
         let mut prev_row_start = Point::new(prev_row, self.indent_size_for_line(prev_row).len);
         Some(row_range.map(move |row| {
             let row_start = Point::new(row, self.indent_size_for_line(row).len);
@@ -3080,17 +3112,17 @@ impl BufferSnapshot {
                 if range.start.row == prev_row && range.end > row_start {
                     indent_from_prev_row = true;
                 }
-                if significant_indentation && self.is_line_blank(row) && range.start.row == prev_row
-                {
-                    indent_from_prev_row = true;
-                }
-                if !significant_indentation || !self.is_line_blank(row) {
-                    if range.end > prev_row_start && range.end <= row_start {
-                        outdent_to_row = outdent_to_row.min(range.start.row);
-                    }
+                if range.end > prev_row_start && range.end <= row_start {
+                    outdent_to_row = outdent_to_row.min(range.start.row);
                 }
             }
 
+            if let Some(basis_row) = regex_outdent_map.get(&row) {
+                indent_from_prev_row = false;
+                outdent_to_row = *basis_row;
+                from_regex = true;
+            }
+
             let within_error = error_ranges
                 .iter()
                 .any(|e| e.start.row < row && e.end > row_start);

crates/language/src/language.rs 🔗

@@ -696,10 +696,6 @@ pub struct LanguageConfig {
     #[serde(default)]
     #[schemars(schema_with = "bracket_pair_config_json_schema")]
     pub brackets: BracketPairConfig,
-    /// If set to true, indicates the language uses significant whitespace/indentation
-    /// for syntax structure (like Python) rather than brackets/braces for code blocks.
-    #[serde(default)]
-    pub significant_indentation: bool,
     /// If set to true, auto indentation uses last non empty line to determine
     /// the indentation level for a new line.
     #[serde(default = "auto_indent_using_last_non_empty_line_default")]
@@ -717,6 +713,12 @@ pub struct LanguageConfig {
     #[serde(default, deserialize_with = "deserialize_regex")]
     #[schemars(schema_with = "regex_json_schema")]
     pub decrease_indent_pattern: Option<Regex>,
+    /// A list of rules for decreasing indentation. Each rule pairs a regex with a set of valid
+    /// "block-starting" tokens. When a line matches a pattern, its indentation is aligned with
+    /// the most recent line that began with a corresponding token. This enables context-aware
+    /// outdenting, like aligning an `else` with its `if`.
+    #[serde(default)]
+    pub decrease_indent_patterns: Vec<DecreaseIndentConfig>,
     /// A list of characters that trigger the automatic insertion of a closing
     /// bracket when they immediately precede the point where an opening
     /// bracket is inserted.
@@ -776,6 +778,15 @@ pub struct LanguageConfig {
     pub documentation: Option<DocumentationConfig>,
 }
 
+#[derive(Clone, Debug, Deserialize, Default, JsonSchema)]
+pub struct DecreaseIndentConfig {
+    #[serde(default, deserialize_with = "deserialize_regex")]
+    #[schemars(schema_with = "regex_json_schema")]
+    pub pattern: Option<Regex>,
+    #[serde(default)]
+    pub valid_after: Vec<String>,
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize, Default, JsonSchema)]
 pub struct LanguageMatcher {
     /// Given a list of `LanguageConfig`'s, the language of a file can be determined based on the path extension matching any of the `path_suffixes`.
@@ -899,6 +910,7 @@ impl Default for LanguageConfig {
             auto_indent_on_paste: None,
             increase_indent_pattern: Default::default(),
             decrease_indent_pattern: Default::default(),
+            decrease_indent_patterns: Default::default(),
             autoclose_before: Default::default(),
             line_comments: Default::default(),
             block_comment: Default::default(),
@@ -914,7 +926,6 @@ impl Default for LanguageConfig {
             jsx_tag_auto_close: None,
             completion_query_characters: Default::default(),
             debuggers: Default::default(),
-            significant_indentation: Default::default(),
             documentation: None,
         }
     }
@@ -1092,6 +1103,7 @@ struct IndentConfig {
     start_capture_ix: Option<u32>,
     end_capture_ix: Option<u32>,
     outdent_capture_ix: Option<u32>,
+    suffixed_start_captures: HashMap<u32, SharedString>,
 }
 
 pub struct OutlineConfig {
@@ -1522,6 +1534,14 @@ impl Language {
                 ("outdent", &mut outdent_capture_ix),
             ],
         );
+
+        let mut suffixed_start_captures = HashMap::default();
+        for (ix, name) in query.capture_names().iter().enumerate() {
+            if let Some(suffix) = name.strip_prefix("start.") {
+                suffixed_start_captures.insert(ix as u32, suffix.to_owned().into());
+            }
+        }
+
         if let Some(indent_capture_ix) = indent_capture_ix {
             grammar.indents_config = Some(IndentConfig {
                 query,
@@ -1529,6 +1549,7 @@ impl Language {
                 start_capture_ix,
                 end_capture_ix,
                 outdent_capture_ix,
+                suffixed_start_captures,
             });
         }
         Ok(self)

crates/languages/src/python.rs 🔗

@@ -1395,7 +1395,7 @@ mod tests {
 
             // dedent "else" on the line after a closing paren
             append(&mut buffer, "\n  else:\n", cx);
-            assert_eq!(buffer.text(), "if a:\n  b(\n  )\nelse:\n");
+            assert_eq!(buffer.text(), "if a:\n  b(\n  )\nelse:\n  ");
 
             buffer
         });

crates/languages/src/python/config.toml 🔗

@@ -28,6 +28,11 @@ brackets = [
 
 auto_indent_using_last_non_empty_line = false
 debuggers = ["Debugpy"]
-significant_indentation = true
-increase_indent_pattern = "^\\s*(try)\\b.*:"
-decrease_indent_pattern = "^\\s*(else|elif|except|finally)\\b.*:"
+increase_indent_pattern = "^[^#].*:\\s*$"
+decrease_indent_patterns = [
+  { pattern = "^\\s*elif\\b.*:",    valid_after = ["if", "elif"] },
+  { pattern = "^\\s*else\\b.*:",    valid_after = ["if", "elif", "for", "while", "except"] },
+  { pattern = "^\\s*except\\b.*:",  valid_after = ["try", "except"] },
+  { pattern = "^\\s*finally\\b.*:", valid_after = ["try", "except", "else"] },
+  { pattern = "^\\s*case\\b.*:",    valid_after = ["match", "case"] }
+]

crates/languages/src/python/indents.scm 🔗

@@ -1,72 +1,17 @@
-(_ "(" ")" @end) @indent
 (_ "[" "]" @end) @indent
 (_ "{" "}" @end) @indent
+(_ "(" ")" @end) @indent
 
-(function_definition
-  ":" @start
-  body: (block) @indent
-)
-
-(if_statement
-  ":" @start
-  consequence: (block) @indent
-  alternative: (_)? @outdent
-)
-
-(else_clause
-  ":" @start
-  body: (block) @indent
-)
-
-(elif_clause
-  ":" @start
-  consequence: (block) @indent
-)
-
-(for_statement
-  ":" @start
-  body: (block) @indent
-)
-
-(with_statement
-  ":" @start
-  body: (block) @indent
-)
-
-(while_statement
-  ":" @start
-  body: (block) @indent
-)
-
-(match_statement
-  ":" @start
-  body: (block) @indent
-)
-
-(class_definition
-  ":" @start
-  body: (block) @indent
-)
-
-(case_clause
-  ":" @start
-  consequence: (block) @indent
-)
-
-(try_statement
-  ":" @start
-  body: (block) @indent
-  (except_clause)? @outdent
-  (else_clause)? @outdent
-  (finally_clause)? @outdent
-)
-
-(except_clause
-  ":" @start
-  (block) @indent
-)
-
-(finally_clause
-  ":" @start
-  (block) @indent
-)
+(function_definition) @start.def
+(class_definition) @start.class
+(if_statement) @start.if
+(for_statement) @start.for
+(while_statement) @start.while
+(with_statement) @start.with
+(match_statement) @start.match
+(try_statement) @start.try
+(elif_clause) @start.elif
+(else_clause) @start.else
+(except_clause) @start.except
+(finally_clause) @start.finally
+(case_pattern) @start.case