Cargo.lock 🔗
@@ -4813,6 +4813,7 @@ dependencies = [
"pretty_assertions",
"project",
"rand 0.8.5",
+ "regex",
"release_channel",
"rpc",
"schemars",
Smit Barmase created
Closes #19644 #18151
Now, rewrapping markdown lists (unordered, ordered, and to-do lists) and
block quotes wrap them separately, without merging them together.
Additionally, it correctly indents subsequent lines.
With this input:
```md
1. This is a list item that is short.
2. This list item is a bit longer because I want to see if it wraps correctly after a rewrap operation in Zed. What do you think?
3. another short item
```
Output would be:
```md
1. This is a list item that is short.
2. This list item is a bit longer because I want to see if it wraps correctly
after a rewrap operation in Zed. What do you think?
3. another short item
```
Instead of:
```md
1. This is a list item that is short. 2. This list item is a bit longer because
I want to see if it wraps correctly after a rewrap operation in Zed. What
do you think? 3. another short item
```
Release Notes:
- Improved rewrap for markdown lists, todos, and block quotes.
Cargo.lock | 1
crates/editor/Cargo.toml | 1
crates/editor/src/editor.rs | 194 +++++++++++++++++-------
crates/editor/src/editor_tests.rs | 81 ++++++++++
crates/language/src/language.rs | 34 ++++
crates/languages/src/markdown/config.toml | 6
6 files changed, 256 insertions(+), 61 deletions(-)
@@ -4813,6 +4813,7 @@ dependencies = [
"pretty_assertions",
"project",
"rand 0.8.5",
+ "regex",
"release_channel",
"rpc",
"schemars",
@@ -61,6 +61,7 @@ parking_lot.workspace = true
pretty_assertions.workspace = true
project.workspace = true
rand.workspace = true
+regex.workspace = true
rpc.workspace = true
schemars.workspace = true
serde.workspace = true
@@ -11541,66 +11541,90 @@ impl Editor {
let language_settings = buffer.language_settings_at(selection.head(), cx);
let language_scope = buffer.language_scope_at(selection.head());
+ let indent_and_prefix_for_row =
+ |row: u32| -> (IndentSize, Option<String>, Option<String>) {
+ let indent = buffer.indent_size_for_line(MultiBufferRow(row));
+ let (comment_prefix, rewrap_prefix) =
+ if let Some(language_scope) = &language_scope {
+ let indent_end = Point::new(row, indent.len);
+ let comment_prefix = language_scope
+ .line_comment_prefixes()
+ .iter()
+ .find(|prefix| buffer.contains_str_at(indent_end, prefix))
+ .map(|prefix| prefix.to_string());
+ let line_end = Point::new(row, buffer.line_len(MultiBufferRow(row)));
+ let line_text_after_indent = buffer
+ .text_for_range(indent_end..line_end)
+ .collect::<String>();
+ let rewrap_prefix = language_scope
+ .rewrap_prefixes()
+ .iter()
+ .find_map(|prefix_regex| {
+ prefix_regex.find(&line_text_after_indent).map(|mat| {
+ if mat.start() == 0 {
+ Some(mat.as_str().to_string())
+ } else {
+ None
+ }
+ })
+ })
+ .flatten();
+ (comment_prefix, rewrap_prefix)
+ } else {
+ (None, None)
+ };
+ (indent, comment_prefix, rewrap_prefix)
+ };
+
let mut ranges = Vec::new();
- let mut current_range_start = first_row;
let from_empty_selection = selection.is_empty();
+ let mut current_range_start = first_row;
let mut prev_row = first_row;
- let mut prev_indent = buffer.indent_size_for_line(MultiBufferRow(first_row));
- let mut prev_comment_prefix = if let Some(language_scope) = &language_scope {
- let indent = buffer.indent_size_for_line(MultiBufferRow(first_row));
- let indent_end = Point::new(first_row, indent.len);
- language_scope
- .line_comment_prefixes()
- .iter()
- .find(|prefix| buffer.contains_str_at(indent_end, prefix))
- .cloned()
- } else {
- None
- };
+ let (
+ mut current_range_indent,
+ mut current_range_comment_prefix,
+ mut current_range_rewrap_prefix,
+ ) = indent_and_prefix_for_row(first_row);
for row in non_blank_rows_iter.skip(1) {
let has_paragraph_break = row > prev_row + 1;
- let row_indent = buffer.indent_size_for_line(MultiBufferRow(row));
- let row_comment_prefix = if let Some(language_scope) = &language_scope {
- let indent = buffer.indent_size_for_line(MultiBufferRow(row));
- let indent_end = Point::new(row, indent.len);
- language_scope
- .line_comment_prefixes()
- .iter()
- .find(|prefix| buffer.contains_str_at(indent_end, prefix))
- .cloned()
- } else {
- None
- };
+ let (row_indent, row_comment_prefix, row_rewrap_prefix) =
+ indent_and_prefix_for_row(row);
+
+ let has_indent_change = row_indent != current_range_indent;
+ let has_comment_change = row_comment_prefix != current_range_comment_prefix;
- let has_boundary_change =
- row_indent != prev_indent || row_comment_prefix != prev_comment_prefix;
+ let has_boundary_change = has_comment_change
+ || row_rewrap_prefix.is_some()
+ || (has_indent_change && current_range_comment_prefix.is_some());
if has_paragraph_break || has_boundary_change {
ranges.push((
language_settings.clone(),
Point::new(current_range_start, 0)
..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))),
- prev_indent,
- prev_comment_prefix.clone(),
+ current_range_indent,
+ current_range_comment_prefix.clone(),
+ current_range_rewrap_prefix.clone(),
from_empty_selection,
));
current_range_start = row;
+ current_range_indent = row_indent;
+ current_range_comment_prefix = row_comment_prefix;
+ current_range_rewrap_prefix = row_rewrap_prefix;
}
-
prev_row = row;
- prev_indent = row_indent;
- prev_comment_prefix = row_comment_prefix;
}
ranges.push((
language_settings.clone(),
Point::new(current_range_start, 0)
..Point::new(prev_row, buffer.line_len(MultiBufferRow(prev_row))),
- prev_indent,
- prev_comment_prefix,
+ current_range_indent,
+ current_range_comment_prefix,
+ current_range_rewrap_prefix,
from_empty_selection,
));
@@ -11610,8 +11634,14 @@ impl Editor {
let mut edits = Vec::new();
let mut rewrapped_row_ranges = Vec::<RangeInclusive<u32>>::new();
- for (language_settings, wrap_range, indent_size, comment_prefix, from_empty_selection) in
- wrap_ranges
+ for (
+ language_settings,
+ wrap_range,
+ indent_size,
+ comment_prefix,
+ rewrap_prefix,
+ from_empty_selection,
+ ) in wrap_ranges
{
let mut start_row = wrap_range.start.row;
let mut end_row = wrap_range.end.row;
@@ -11627,12 +11657,16 @@ impl Editor {
let tab_size = language_settings.tab_size;
- let mut line_prefix = indent_size.chars().collect::<String>();
+ let indent_prefix = indent_size.chars().collect::<String>();
+ let mut line_prefix = indent_prefix.clone();
let mut inside_comment = false;
if let Some(prefix) = &comment_prefix {
line_prefix.push_str(prefix);
inside_comment = true;
}
+ if let Some(prefix) = &rewrap_prefix {
+ line_prefix.push_str(prefix);
+ }
let allow_rewrap_based_on_language = match language_settings.allow_rewrap {
RewrapBehavior::InComments => inside_comment,
@@ -11679,12 +11713,18 @@ impl Editor {
let selection_text = buffer.text_for_range(start..end).collect::<String>();
let Some(lines_without_prefixes) = selection_text
.lines()
- .map(|line| {
- line.strip_prefix(&line_prefix)
- .or_else(|| line.trim_start().strip_prefix(&line_prefix.trim_start()))
- .with_context(|| {
- format!("line did not start with prefix {line_prefix:?}: {line:?}")
- })
+ .enumerate()
+ .map(|(ix, line)| {
+ let line_trimmed = line.trim_start();
+ if rewrap_prefix.is_some() && ix > 0 {
+ Ok(line_trimmed)
+ } else {
+ line_trimmed
+ .strip_prefix(&line_prefix.trim_start())
+ .with_context(|| {
+ format!("line did not start with prefix {line_prefix:?}: {line:?}")
+ })
+ }
})
.collect::<Result<Vec<_>, _>>()
.log_err()
@@ -11697,8 +11737,16 @@ impl Editor {
.language_settings_at(Point::new(start_row, 0), cx)
.preferred_line_length as usize
});
+
+ let subsequent_lines_prefix = if let Some(rewrap_prefix_str) = &rewrap_prefix {
+ format!("{}{}", indent_prefix, " ".repeat(rewrap_prefix_str.len()))
+ } else {
+ line_prefix.clone()
+ };
+
let wrapped_text = wrap_with_prefix(
line_prefix,
+ subsequent_lines_prefix,
lines_without_prefixes.join("\n"),
wrap_column,
tab_size,
@@ -21200,18 +21248,22 @@ fn test_word_breaking_tokenizer() {
}
fn wrap_with_prefix(
- line_prefix: String,
+ first_line_prefix: String,
+ subsequent_lines_prefix: String,
unwrapped_text: String,
wrap_column: usize,
tab_size: NonZeroU32,
preserve_existing_whitespace: bool,
) -> String {
- let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size);
+ let first_line_prefix_len = char_len_with_expanded_tabs(0, &first_line_prefix, tab_size);
+ let subsequent_lines_prefix_len =
+ char_len_with_expanded_tabs(0, &subsequent_lines_prefix, tab_size);
let mut wrapped_text = String::new();
- let mut current_line = line_prefix.clone();
+ let mut current_line = first_line_prefix.clone();
+ let mut is_first_line = true;
let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
- let mut current_line_len = line_prefix_len;
+ let mut current_line_len = first_line_prefix_len;
let mut in_whitespace = false;
for token in tokenizer {
let have_preceding_whitespace = in_whitespace;
@@ -21221,13 +21273,19 @@ fn wrap_with_prefix(
grapheme_len,
} => {
in_whitespace = false;
+ let current_prefix_len = if is_first_line {
+ first_line_prefix_len
+ } else {
+ subsequent_lines_prefix_len
+ };
if current_line_len + grapheme_len > wrap_column
- && current_line_len != line_prefix_len
+ && current_line_len != current_prefix_len
{
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
+ is_first_line = false;
+ current_line = subsequent_lines_prefix.clone();
+ current_line_len = subsequent_lines_prefix_len;
}
current_line.push_str(token);
current_line_len += grapheme_len;
@@ -21244,32 +21302,46 @@ fn wrap_with_prefix(
token = " ";
grapheme_len = 1;
}
+ let current_prefix_len = if is_first_line {
+ first_line_prefix_len
+ } else {
+ subsequent_lines_prefix_len
+ };
if current_line_len + grapheme_len > wrap_column {
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- } else if current_line_len != line_prefix_len || preserve_existing_whitespace {
+ is_first_line = false;
+ current_line = subsequent_lines_prefix.clone();
+ current_line_len = subsequent_lines_prefix_len;
+ } else if current_line_len != current_prefix_len || preserve_existing_whitespace {
current_line.push_str(token);
current_line_len += grapheme_len;
}
}
WordBreakToken::Newline => {
in_whitespace = true;
+ let current_prefix_len = if is_first_line {
+ first_line_prefix_len
+ } else {
+ subsequent_lines_prefix_len
+ };
if preserve_existing_whitespace {
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
+ is_first_line = false;
+ current_line = subsequent_lines_prefix.clone();
+ current_line_len = subsequent_lines_prefix_len;
} else if have_preceding_whitespace {
continue;
- } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len
+ } else if current_line_len + 1 > wrap_column
+ && current_line_len != current_prefix_len
{
wrapped_text.push_str(current_line.trim_end());
wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- } else if current_line_len != line_prefix_len {
+ is_first_line = false;
+ current_line = subsequent_lines_prefix.clone();
+ current_line_len = subsequent_lines_prefix_len;
+ } else if current_line_len != current_prefix_len {
current_line.push(' ');
current_line_len += 1;
}
@@ -21287,6 +21359,7 @@ fn wrap_with_prefix(
fn test_wrap_with_prefix() {
assert_eq!(
wrap_with_prefix(
+ "# ".to_string(),
"# ".to_string(),
"abcdefg".to_string(),
4,
@@ -21297,6 +21370,7 @@ fn test_wrap_with_prefix() {
);
assert_eq!(
wrap_with_prefix(
+ "".to_string(),
"".to_string(),
"\thello world".to_string(),
8,
@@ -21307,6 +21381,7 @@ fn test_wrap_with_prefix() {
);
assert_eq!(
wrap_with_prefix(
+ "// ".to_string(),
"// ".to_string(),
"xx \nyy zz aa bb cc".to_string(),
12,
@@ -21317,6 +21392,7 @@ fn test_wrap_with_prefix() {
);
assert_eq!(
wrap_with_prefix(
+ String::new(),
String::new(),
"这是什么 \n 钢笔".to_string(),
3,
@@ -5210,6 +5210,10 @@ async fn test_rewrap(cx: &mut TestAppContext) {
let markdown_language = Arc::new(Language::new(
LanguageConfig {
name: "Markdown".into(),
+ rewrap_prefixes: vec![
+ regex::Regex::new("\\d+\\.\\s+").unwrap(),
+ regex::Regex::new("[-*+]\\s+").unwrap(),
+ ],
..LanguageConfig::default()
},
None,
@@ -5372,7 +5376,82 @@ async fn test_rewrap(cx: &mut TestAppContext) {
A long long long line of markdown text
to wrap.ˇ
"},
- markdown_language,
+ markdown_language.clone(),
+ &mut cx,
+ );
+
+ // Test that rewrapping boundary works and preserves relative indent for Markdown documents
+ assert_rewrap(
+ indoc! {"
+ «1. This is a numbered list item that is very long and needs to be wrapped properly.
+ 2. This is a numbered list item that is very long and needs to be wrapped properly.
+ - This is an unordered list item that is also very long and should not merge with the numbered item.ˇ»
+ "},
+ indoc! {"
+ «1. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ 2. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ - This is an unordered list item that is
+ also very long and should not merge
+ with the numbered item.ˇ»
+ "},
+ markdown_language.clone(),
+ &mut cx,
+ );
+
+ // Test that rewrapping add indents for rewrapping boundary if not exists already.
+ assert_rewrap(
+ indoc! {"
+ «1. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ 2. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ - This is an unordered list item that is
+ also very long and should not merge with
+ the numbered item.ˇ»
+ "},
+ indoc! {"
+ «1. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ 2. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ - This is an unordered list item that is
+ also very long and should not merge
+ with the numbered item.ˇ»
+ "},
+ markdown_language.clone(),
+ &mut cx,
+ );
+
+ // Test that rewrapping maintain indents even when they already exists.
+ assert_rewrap(
+ indoc! {"
+ «1. This is a numbered list
+ item that is very long and needs to be wrapped properly.
+ 2. This is a numbered list
+ item that is very long and needs to be wrapped properly.
+ - This is an unordered list item that is also very long and
+ should not merge with the numbered item.ˇ»
+ "},
+ indoc! {"
+ «1. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ 2. This is a numbered list item that is
+ very long and needs to be wrapped
+ properly.
+ - This is an unordered list item that is
+ also very long and should not merge
+ with the numbered item.ˇ»
+ "},
+ markdown_language.clone(),
&mut cx,
);
@@ -39,7 +39,7 @@ use lsp::{CodeActionKind, InitializeParams, LanguageServerBinary, LanguageServer
pub use manifest::{ManifestDelegate, ManifestName, ManifestProvider, ManifestQuery};
use parking_lot::Mutex;
use regex::Regex;
-use schemars::{JsonSchema, json_schema};
+use schemars::{JsonSchema, SchemaGenerator, json_schema};
use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
use serde_json::Value;
use settings::WorktreeId;
@@ -730,6 +730,13 @@ pub struct LanguageConfig {
/// Starting and closing characters of a block comment.
#[serde(default)]
pub block_comment: Option<(Arc<str>, Arc<str>)>,
+ /// A list of additional regex patterns that should be treated as prefixes
+ /// for creating boundaries during rewrapping, ensuring content from one
+ /// prefixed section doesn't merge with another (e.g., markdown list items).
+ /// By default, Zed treats as paragraph and comment prefixes as boundaries.
+ #[serde(default, deserialize_with = "deserialize_regex_vec")]
+ #[schemars(schema_with = "regex_vec_json_schema")]
+ pub rewrap_prefixes: Vec<Regex>,
/// A list of language servers that are allowed to run on subranges of a given language.
#[serde(default)]
pub scope_opt_in_language_servers: Vec<LanguageServerName>,
@@ -909,6 +916,7 @@ impl Default for LanguageConfig {
autoclose_before: Default::default(),
line_comments: Default::default(),
block_comment: Default::default(),
+ rewrap_prefixes: Default::default(),
scope_opt_in_language_servers: Default::default(),
overrides: Default::default(),
word_characters: Default::default(),
@@ -955,6 +963,22 @@ where
}
}
+fn deserialize_regex_vec<'de, D: Deserializer<'de>>(d: D) -> Result<Vec<Regex>, D::Error> {
+ let sources = Vec::<String>::deserialize(d)?;
+ let mut regexes = Vec::new();
+ for source in sources {
+ regexes.push(regex::Regex::new(&source).map_err(de::Error::custom)?);
+ }
+ Ok(regexes)
+}
+
+fn regex_vec_json_schema(_: &mut SchemaGenerator) -> schemars::Schema {
+ json_schema!({
+ "type": "array",
+ "items": { "type": "string" }
+ })
+}
+
#[doc(hidden)]
#[cfg(any(test, feature = "test-support"))]
pub struct FakeLspAdapter {
@@ -1831,6 +1855,14 @@ impl LanguageScope {
.map(|e| (&e.0, &e.1))
}
+ /// Returns additional regex patterns that act as prefix markers for creating
+ /// boundaries during rewrapping.
+ ///
+ /// By default, Zed treats as paragraph and comment prefixes as boundaries.
+ pub fn rewrap_prefixes(&self) -> &[Regex] {
+ &self.language.config.rewrap_prefixes
+ }
+
/// Returns a list of language-specific word characters.
///
/// By default, Zed treats alphanumeric characters (and '_') as word characters for
@@ -13,6 +13,12 @@ brackets = [
{ start = "'", end = "'", close = false, newline = false },
{ start = "`", end = "`", close = false, newline = false },
]
+rewrap_prefixes = [
+ "[-*+]\\s+",
+ "\\d+\\.\\s+",
+ ">\\s*",
+ "[-*+]\\s+\\[[\\sx]\\]\\s+"
+]
auto_indent_on_paste = false
auto_indent_using_last_non_empty_line = false