Detailed changes
@@ -4729,7 +4729,6 @@ dependencies = [
"tree-sitter-rust",
"tree-sitter-typescript",
"ui",
- "unicode-script",
"unicode-segmentation",
"unindent",
"url",
@@ -17106,6 +17105,8 @@ dependencies = [
"tempfile",
"tendril",
"unicase",
+ "unicode-script",
+ "unicode-segmentation",
"util_macros",
"walkdir",
"workspace-hack",
@@ -1431,7 +1431,9 @@
"language_servers": ["erlang-ls", "!elp", "..."]
},
"Git Commit": {
- "allow_rewrap": "anywhere"
+ "allow_rewrap": "anywhere",
+ "preferred_line_length": 72,
+ "soft_wrap": "bounded"
},
"Go": {
"code_actions_on_format": {
@@ -82,7 +82,6 @@ tree-sitter-rust = { workspace = true, optional = true }
tree-sitter-typescript = { workspace = true, optional = true }
tree-sitter-python = { workspace = true, optional = true }
unicode-segmentation.workspace = true
-unicode-script.workspace = true
unindent = { workspace = true, optional = true }
ui.workspace = true
url.workspace = true
@@ -201,7 +201,7 @@ use ui::{
ButtonSize, ButtonStyle, ContextMenu, Disclosure, IconButton, IconButtonShape, IconName,
IconSize, Indicator, Key, Tooltip, h_flex, prelude::*,
};
-use util::{RangeExt, ResultExt, TryFutureExt, maybe, post_inc};
+use util::{RangeExt, ResultExt, TryFutureExt, maybe, post_inc, wrap_with_prefix};
use workspace::{
CollaboratorId, Item as WorkspaceItem, ItemId, ItemNavHistory, OpenInTerminal, OpenTerminal,
RestoreOnStartupBehavior, SERIALIZATION_THROTTLE_TIME, SplitDirection, TabBarSettings, Toast,
@@ -19440,347 +19440,6 @@ fn update_uncommitted_diff_for_buffer(
})
}
-fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize {
- let tab_size = tab_size.get() as usize;
- let mut width = offset;
-
- for ch in text.chars() {
- width += if ch == '\t' {
- tab_size - (width % tab_size)
- } else {
- 1
- };
- }
-
- width - offset
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_string_size_with_expanded_tabs() {
- let nz = |val| NonZeroU32::new(val).unwrap();
- assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0);
- assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5);
- assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9);
- assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6);
- assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8);
- assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16);
- assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8);
- assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9);
- }
-}
-
-/// Tokenizes a string into runs of text that should stick together, or that is whitespace.
-struct WordBreakingTokenizer<'a> {
- input: &'a str,
-}
-
-impl<'a> WordBreakingTokenizer<'a> {
- fn new(input: &'a str) -> Self {
- Self { input }
- }
-}
-
-fn is_char_ideographic(ch: char) -> bool {
- use unicode_script::Script::*;
- use unicode_script::UnicodeScript;
- matches!(ch.script(), Han | Tangut | Yi)
-}
-
-fn is_grapheme_ideographic(text: &str) -> bool {
- text.chars().any(is_char_ideographic)
-}
-
-fn is_grapheme_whitespace(text: &str) -> bool {
- text.chars().any(|x| x.is_whitespace())
-}
-
-fn should_stay_with_preceding_ideograph(text: &str) -> bool {
- text.chars().next().map_or(false, |ch| {
- matches!(ch, '。' | '、' | ',' | '?' | '!' | ':' | ';' | '…')
- })
-}
-
-#[derive(PartialEq, Eq, Debug, Clone, Copy)]
-enum WordBreakToken<'a> {
- Word { token: &'a str, grapheme_len: usize },
- InlineWhitespace { token: &'a str, grapheme_len: usize },
- Newline,
-}
-
-impl<'a> Iterator for WordBreakingTokenizer<'a> {
- /// Yields a span, the count of graphemes in the token, and whether it was
- /// whitespace. Note that it also breaks at word boundaries.
- type Item = WordBreakToken<'a>;
-
- fn next(&mut self) -> Option<Self::Item> {
- use unicode_segmentation::UnicodeSegmentation;
- if self.input.is_empty() {
- return None;
- }
-
- let mut iter = self.input.graphemes(true).peekable();
- let mut offset = 0;
- let mut grapheme_len = 0;
- if let Some(first_grapheme) = iter.next() {
- let is_newline = first_grapheme == "\n";
- let is_whitespace = is_grapheme_whitespace(first_grapheme);
- offset += first_grapheme.len();
- grapheme_len += 1;
- if is_grapheme_ideographic(first_grapheme) && !is_whitespace {
- if let Some(grapheme) = iter.peek().copied() {
- if should_stay_with_preceding_ideograph(grapheme) {
- offset += grapheme.len();
- grapheme_len += 1;
- }
- }
- } else {
- let mut words = self.input[offset..].split_word_bound_indices().peekable();
- let mut next_word_bound = words.peek().copied();
- if next_word_bound.map_or(false, |(i, _)| i == 0) {
- next_word_bound = words.next();
- }
- while let Some(grapheme) = iter.peek().copied() {
- if next_word_bound.map_or(false, |(i, _)| i == offset) {
- break;
- };
- if is_grapheme_whitespace(grapheme) != is_whitespace
- || (grapheme == "\n") != is_newline
- {
- break;
- };
- offset += grapheme.len();
- grapheme_len += 1;
- iter.next();
- }
- }
- let token = &self.input[..offset];
- self.input = &self.input[offset..];
- if token == "\n" {
- Some(WordBreakToken::Newline)
- } else if is_whitespace {
- Some(WordBreakToken::InlineWhitespace {
- token,
- grapheme_len,
- })
- } else {
- Some(WordBreakToken::Word {
- token,
- grapheme_len,
- })
- }
- } else {
- None
- }
- }
-}
-
-#[test]
-fn test_word_breaking_tokenizer() {
- let tests: &[(&str, &[WordBreakToken<'static>])] = &[
- ("", &[]),
- (" ", &[whitespace(" ", 2)]),
- ("Ʒ", &[word("Ʒ", 1)]),
- ("Ǽ", &[word("Ǽ", 1)]),
- ("⋑", &[word("⋑", 1)]),
- ("⋑⋑", &[word("⋑⋑", 2)]),
- (
- "原理,进而",
- &[word("原", 1), word("理,", 2), word("进", 1), word("而", 1)],
- ),
- (
- "hello world",
- &[word("hello", 5), whitespace(" ", 1), word("world", 5)],
- ),
- (
- "hello, world",
- &[word("hello,", 6), whitespace(" ", 1), word("world", 5)],
- ),
- (
- " hello world",
- &[
- whitespace(" ", 2),
- word("hello", 5),
- whitespace(" ", 1),
- word("world", 5),
- ],
- ),
- (
- "这是什么 \n 钢笔",
- &[
- word("这", 1),
- word("是", 1),
- word("什", 1),
- word("么", 1),
- whitespace(" ", 1),
- newline(),
- whitespace(" ", 1),
- word("钢", 1),
- word("笔", 1),
- ],
- ),
- (" mutton", &[whitespace(" ", 1), word("mutton", 6)]),
- ];
-
- fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
- WordBreakToken::Word {
- token,
- grapheme_len,
- }
- }
-
- fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
- WordBreakToken::InlineWhitespace {
- token,
- grapheme_len,
- }
- }
-
- fn newline() -> WordBreakToken<'static> {
- WordBreakToken::Newline
- }
-
- for (input, result) in tests {
- assert_eq!(
- WordBreakingTokenizer::new(input)
- .collect::<Vec<_>>()
- .as_slice(),
- *result,
- );
- }
-}
-
-fn wrap_with_prefix(
- line_prefix: String,
- unwrapped_text: String,
- wrap_column: usize,
- tab_size: NonZeroU32,
- preserve_existing_whitespace: bool,
-) -> String {
- let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size);
- let mut wrapped_text = String::new();
- let mut current_line = line_prefix.clone();
-
- let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
- let mut current_line_len = line_prefix_len;
- let mut in_whitespace = false;
- for token in tokenizer {
- let have_preceding_whitespace = in_whitespace;
- match token {
- WordBreakToken::Word {
- token,
- grapheme_len,
- } => {
- in_whitespace = false;
- if current_line_len + grapheme_len > wrap_column
- && current_line_len != line_prefix_len
- {
- wrapped_text.push_str(current_line.trim_end());
- wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- }
- current_line.push_str(token);
- current_line_len += grapheme_len;
- }
- WordBreakToken::InlineWhitespace {
- mut token,
- mut grapheme_len,
- } => {
- in_whitespace = true;
- if have_preceding_whitespace && !preserve_existing_whitespace {
- continue;
- }
- if !preserve_existing_whitespace {
- token = " ";
- grapheme_len = 1;
- }
- if current_line_len + grapheme_len > wrap_column {
- wrapped_text.push_str(current_line.trim_end());
- wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- } else if current_line_len != line_prefix_len || preserve_existing_whitespace {
- current_line.push_str(token);
- current_line_len += grapheme_len;
- }
- }
- WordBreakToken::Newline => {
- in_whitespace = true;
- if preserve_existing_whitespace {
- wrapped_text.push_str(current_line.trim_end());
- wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- } else if have_preceding_whitespace {
- continue;
- } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len
- {
- wrapped_text.push_str(current_line.trim_end());
- wrapped_text.push('\n');
- current_line.truncate(line_prefix.len());
- current_line_len = line_prefix_len;
- } else if current_line_len != line_prefix_len {
- current_line.push(' ');
- current_line_len += 1;
- }
- }
- }
- }
-
- if !current_line.is_empty() {
- wrapped_text.push_str(¤t_line);
- }
- wrapped_text
-}
-
-#[test]
-fn test_wrap_with_prefix() {
- assert_eq!(
- wrap_with_prefix(
- "# ".to_string(),
- "abcdefg".to_string(),
- 4,
- NonZeroU32::new(4).unwrap(),
- false,
- ),
- "# abcdefg"
- );
- assert_eq!(
- wrap_with_prefix(
- "".to_string(),
- "\thello world".to_string(),
- 8,
- NonZeroU32::new(4).unwrap(),
- false,
- ),
- "hello\nworld"
- );
- assert_eq!(
- wrap_with_prefix(
- "// ".to_string(),
- "xx \nyy zz aa bb cc".to_string(),
- 12,
- NonZeroU32::new(4).unwrap(),
- false,
- ),
- "// xx yy zz\n// aa bb cc"
- );
- assert_eq!(
- wrap_with_prefix(
- String::new(),
- "这是什么 \n 钢笔".to_string(),
- 3,
- NonZeroU32::new(4).unwrap(),
- false,
- ),
- "这是什\n么 钢\n笔"
- );
-}
-
pub trait CollaborationHub {
fn collaborators<'a>(&self, cx: &'a App) -> &'a HashMap<PeerId, Collaborator>;
fn user_participant_indices<'a>(&self, cx: &'a App) -> &'a HashMap<u64, ParticipantIndex>;
@@ -7396,10 +7396,7 @@ impl Element for EditorElement {
editor.gutter_dimensions = gutter_dimensions;
editor.set_visible_line_count(bounds.size.height / line_height, window, cx);
- if matches!(
- editor.mode,
- EditorMode::AutoHeight { .. } | EditorMode::Minimap { .. }
- ) {
+ if matches!(editor.mode, EditorMode::Minimap { .. }) {
snapshot
} else {
let wrap_width_for = |column: u32| (column as f32 * em_advance).ceil();
@@ -9390,6 +9387,7 @@ fn compute_auto_height_layout(
let font_size = style.text.font_size.to_pixels(window.rem_size());
let line_height = style.text.line_height_in_pixels(window.rem_size());
let em_width = window.text_system().em_width(font_id, font_size).unwrap();
+ let em_advance = window.text_system().em_advance(font_id, font_size).unwrap();
let mut snapshot = editor.snapshot(window, cx);
let gutter_dimensions = snapshot
@@ -9406,10 +9404,18 @@ fn compute_auto_height_layout(
let overscroll = size(em_width, px(0.));
let editor_width = text_width - gutter_dimensions.margin - overscroll.width - em_width;
- if !matches!(editor.soft_wrap_mode(cx), SoftWrap::None) {
- if editor.set_wrap_width(Some(editor_width), cx) {
- snapshot = editor.snapshot(window, cx);
- }
+ let content_offset = point(gutter_dimensions.margin, Pixels::ZERO);
+ let editor_content_width = editor_width - content_offset.x;
+ let wrap_width_for = |column: u32| (column as f32 * em_advance).ceil();
+ let wrap_width = match editor.soft_wrap_mode(cx) {
+ SoftWrap::GitDiff => None,
+ SoftWrap::None => Some(wrap_width_for(MAX_LINE_LEN as u32 / 2)),
+ SoftWrap::EditorWidth => Some(editor_content_width),
+ SoftWrap::Column(column) => Some(wrap_width_for(column)),
+ SoftWrap::Bounded(column) => Some(editor_content_width.min(wrap_width_for(column))),
+ };
+ if editor.set_wrap_width(wrap_width, cx) {
+ snapshot = editor.snapshot(window, cx);
}
let scroll_height = (snapshot.max_point().row().next_row().0 as f32) * line_height;
@@ -54,6 +54,7 @@ use project::{
use serde::{Deserialize, Serialize};
use settings::{Settings as _, SettingsStore};
use std::future::Future;
+use std::num::NonZeroU32;
use std::path::{Path, PathBuf};
use std::{collections::HashSet, sync::Arc, time::Duration, usize};
use strum::{IntoEnumIterator, VariantNames};
@@ -62,7 +63,7 @@ use ui::{
Checkbox, ContextMenu, ElevationIndex, PopoverMenu, Scrollbar, ScrollbarState, SplitButton,
Tooltip, prelude::*,
};
-use util::{ResultExt, TryFutureExt, maybe};
+use util::{ResultExt, TryFutureExt, maybe, wrap_with_prefix};
use workspace::AppState;
use notifications::status_toast::{StatusToast, ToastIcon};
@@ -382,7 +383,6 @@ pub(crate) fn commit_message_editor(
commit_editor.set_show_gutter(false, cx);
commit_editor.set_show_wrap_guides(false, cx);
commit_editor.set_show_indent_guides(false, cx);
- commit_editor.set_hard_wrap(Some(72), cx);
let placeholder = placeholder.unwrap_or("Enter commit message".into());
commit_editor.set_placeholder_text(placeholder, cx);
commit_editor
@@ -1484,8 +1484,22 @@ impl GitPanel {
fn custom_or_suggested_commit_message(&self, cx: &mut Context<Self>) -> Option<String> {
let message = self.commit_editor.read(cx).text(cx);
+ let width = self
+ .commit_editor
+ .read(cx)
+ .buffer()
+ .read(cx)
+ .language_settings(cx)
+ .preferred_line_length as usize;
if !message.trim().is_empty() {
+ let message = wrap_with_prefix(
+ String::new(),
+ message,
+ width,
+ NonZeroU32::new(8).unwrap(), // tab size doesn't matter when prefix is empty
+ false,
+ );
return Some(message);
}
@@ -666,7 +666,7 @@ pub struct CodeLabel {
pub filter_range: Range<usize>,
}
-#[derive(Clone, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Deserialize, JsonSchema)]
pub struct LanguageConfig {
/// Human-readable name of the language.
pub name: LanguageName,
@@ -777,7 +777,7 @@ pub struct LanguageMatcher {
}
/// The configuration for JSX tag auto-closing.
-#[derive(Clone, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Deserialize, JsonSchema)]
pub struct JsxTagAutoCloseConfig {
/// The name of the node for a opening tag
pub open_tag_node_name: String,
@@ -810,7 +810,7 @@ pub struct JsxTagAutoCloseConfig {
}
/// The configuration for documentation block for this language.
-#[derive(Clone, Deserialize, JsonSchema)]
+#[derive(Clone, Debug, Deserialize, JsonSchema)]
pub struct DocumentationConfig {
/// A start tag of documentation block.
pub start: Arc<str>,
@@ -37,6 +37,8 @@ smol.workspace = true
take-until.workspace = true
tempfile.workspace = true
unicase.workspace = true
+unicode-script.workspace = true
+unicode-segmentation.workspace = true
util_macros = { workspace = true, optional = true }
walkdir.workspace = true
workspace-hack.workspace = true
@@ -14,6 +14,7 @@ use anyhow::Result;
use futures::Future;
use itertools::Either;
use regex::Regex;
+use std::num::NonZeroU32;
use std::sync::{LazyLock, OnceLock};
use std::{
borrow::Cow,
@@ -183,29 +184,208 @@ pub fn truncate_lines_to_byte_limit(s: &str, max_bytes: usize) -> &str {
truncate_to_byte_limit(s, max_bytes)
}
-#[test]
-fn test_truncate_lines_to_byte_limit() {
- let text = "Line 1\nLine 2\nLine 3\nLine 4";
+fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize {
+ let tab_size = tab_size.get() as usize;
+ let mut width = offset;
- // Limit that includes all lines
- assert_eq!(truncate_lines_to_byte_limit(text, 100), text);
+ for ch in text.chars() {
+ width += if ch == '\t' {
+ tab_size - (width % tab_size)
+ } else {
+ 1
+ };
+ }
- // Exactly the first line
- assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n");
+ width - offset
+}
- // Limit between lines
- assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n");
- assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n");
+/// Tokenizes a string into runs of text that should stick together, or that is whitespace.
+struct WordBreakingTokenizer<'a> {
+ input: &'a str,
+}
- // Limit before first newline
- assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line ");
+impl<'a> WordBreakingTokenizer<'a> {
+ fn new(input: &'a str) -> Self {
+ Self { input }
+ }
+}
- // Test with non-ASCII characters
- let text_utf8 = "Line 1\nLíne 2\nLine 3";
- assert_eq!(
- truncate_lines_to_byte_limit(text_utf8, 15),
- "Line 1\nLíne 2\n"
- );
+fn is_char_ideographic(ch: char) -> bool {
+ use unicode_script::Script::*;
+ use unicode_script::UnicodeScript;
+ matches!(ch.script(), Han | Tangut | Yi)
+}
+
+fn is_grapheme_ideographic(text: &str) -> bool {
+ text.chars().any(is_char_ideographic)
+}
+
+fn is_grapheme_whitespace(text: &str) -> bool {
+ text.chars().any(|x| x.is_whitespace())
+}
+
+fn should_stay_with_preceding_ideograph(text: &str) -> bool {
+ text.chars().next().map_or(false, |ch| {
+ matches!(ch, '。' | '、' | ',' | '?' | '!' | ':' | ';' | '…')
+ })
+}
+
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+enum WordBreakToken<'a> {
+ Word { token: &'a str, grapheme_len: usize },
+ InlineWhitespace { token: &'a str, grapheme_len: usize },
+ Newline,
+}
+
+impl<'a> Iterator for WordBreakingTokenizer<'a> {
+ /// Yields a span, the count of graphemes in the token, and whether it was
+ /// whitespace. Note that it also breaks at word boundaries.
+ type Item = WordBreakToken<'a>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ use unicode_segmentation::UnicodeSegmentation;
+ if self.input.is_empty() {
+ return None;
+ }
+
+ let mut iter = self.input.graphemes(true).peekable();
+ let mut offset = 0;
+ let mut grapheme_len = 0;
+ if let Some(first_grapheme) = iter.next() {
+ let is_newline = first_grapheme == "\n";
+ let is_whitespace = is_grapheme_whitespace(first_grapheme);
+ offset += first_grapheme.len();
+ grapheme_len += 1;
+ if is_grapheme_ideographic(first_grapheme) && !is_whitespace {
+ if let Some(grapheme) = iter.peek().copied() {
+ if should_stay_with_preceding_ideograph(grapheme) {
+ offset += grapheme.len();
+ grapheme_len += 1;
+ }
+ }
+ } else {
+ let mut words = self.input[offset..].split_word_bound_indices().peekable();
+ let mut next_word_bound = words.peek().copied();
+ if next_word_bound.map_or(false, |(i, _)| i == 0) {
+ next_word_bound = words.next();
+ }
+ while let Some(grapheme) = iter.peek().copied() {
+ if next_word_bound.map_or(false, |(i, _)| i == offset) {
+ break;
+ };
+ if is_grapheme_whitespace(grapheme) != is_whitespace
+ || (grapheme == "\n") != is_newline
+ {
+ break;
+ };
+ offset += grapheme.len();
+ grapheme_len += 1;
+ iter.next();
+ }
+ }
+ let token = &self.input[..offset];
+ self.input = &self.input[offset..];
+ if token == "\n" {
+ Some(WordBreakToken::Newline)
+ } else if is_whitespace {
+ Some(WordBreakToken::InlineWhitespace {
+ token,
+ grapheme_len,
+ })
+ } else {
+ Some(WordBreakToken::Word {
+ token,
+ grapheme_len,
+ })
+ }
+ } else {
+ None
+ }
+ }
+}
+
+pub fn wrap_with_prefix(
+ line_prefix: String,
+ unwrapped_text: String,
+ wrap_column: usize,
+ tab_size: NonZeroU32,
+ preserve_existing_whitespace: bool,
+) -> String {
+ let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size);
+ let mut wrapped_text = String::new();
+ let mut current_line = line_prefix.clone();
+
+ let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
+ let mut current_line_len = line_prefix_len;
+ let mut in_whitespace = false;
+ for token in tokenizer {
+ let have_preceding_whitespace = in_whitespace;
+ match token {
+ WordBreakToken::Word {
+ token,
+ grapheme_len,
+ } => {
+ in_whitespace = false;
+ if current_line_len + grapheme_len > wrap_column
+ && current_line_len != line_prefix_len
+ {
+ wrapped_text.push_str(current_line.trim_end());
+ wrapped_text.push('\n');
+ current_line.truncate(line_prefix.len());
+ current_line_len = line_prefix_len;
+ }
+ current_line.push_str(token);
+ current_line_len += grapheme_len;
+ }
+ WordBreakToken::InlineWhitespace {
+ mut token,
+ mut grapheme_len,
+ } => {
+ in_whitespace = true;
+ if have_preceding_whitespace && !preserve_existing_whitespace {
+ continue;
+ }
+ if !preserve_existing_whitespace {
+ token = " ";
+ grapheme_len = 1;
+ }
+ if current_line_len + grapheme_len > wrap_column {
+ wrapped_text.push_str(current_line.trim_end());
+ wrapped_text.push('\n');
+ current_line.truncate(line_prefix.len());
+ current_line_len = line_prefix_len;
+ } else if current_line_len != line_prefix_len || preserve_existing_whitespace {
+ current_line.push_str(token);
+ current_line_len += grapheme_len;
+ }
+ }
+ WordBreakToken::Newline => {
+ in_whitespace = true;
+ if preserve_existing_whitespace {
+ wrapped_text.push_str(current_line.trim_end());
+ wrapped_text.push('\n');
+ current_line.truncate(line_prefix.len());
+ current_line_len = line_prefix_len;
+ } else if have_preceding_whitespace {
+ continue;
+ } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len
+ {
+ wrapped_text.push_str(current_line.trim_end());
+ wrapped_text.push('\n');
+ current_line.truncate(line_prefix.len());
+ current_line_len = line_prefix_len;
+ } else if current_line_len != line_prefix_len {
+ current_line.push(' ');
+ current_line_len += 1;
+ }
+ }
+ }
+ }
+
+ if !current_line.is_empty() {
+ wrapped_text.push_str(¤t_line);
+ }
+ wrapped_text
}
pub fn post_inc<T: From<u8> + AddAssign<T> + Copy>(value: &mut T) -> T {
@@ -1302,4 +1482,161 @@ Line 3"#
(0..8).collect::<Vec<usize>>()
);
}
+
+ #[test]
+ fn test_truncate_lines_to_byte_limit() {
+ let text = "Line 1\nLine 2\nLine 3\nLine 4";
+
+ // Limit that includes all lines
+ assert_eq!(truncate_lines_to_byte_limit(text, 100), text);
+
+ // Exactly the first line
+ assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n");
+
+ // Limit between lines
+ assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n");
+ assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n");
+
+ // Limit before first newline
+ assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line ");
+
+ // Test with non-ASCII characters
+ let text_utf8 = "Line 1\nLíne 2\nLine 3";
+ assert_eq!(
+ truncate_lines_to_byte_limit(text_utf8, 15),
+ "Line 1\nLíne 2\n"
+ );
+ }
+
+ #[test]
+ fn test_string_size_with_expanded_tabs() {
+ let nz = |val| NonZeroU32::new(val).unwrap();
+ assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0);
+ assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5);
+ assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9);
+ assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6);
+ assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8);
+ assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16);
+ assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8);
+ assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9);
+ }
+
+ #[test]
+ fn test_word_breaking_tokenizer() {
+ let tests: &[(&str, &[WordBreakToken<'static>])] = &[
+ ("", &[]),
+ (" ", &[whitespace(" ", 2)]),
+ ("Ʒ", &[word("Ʒ", 1)]),
+ ("Ǽ", &[word("Ǽ", 1)]),
+ ("⋑", &[word("⋑", 1)]),
+ ("⋑⋑", &[word("⋑⋑", 2)]),
+ (
+ "原理,进而",
+ &[word("原", 1), word("理,", 2), word("进", 1), word("而", 1)],
+ ),
+ (
+ "hello world",
+ &[word("hello", 5), whitespace(" ", 1), word("world", 5)],
+ ),
+ (
+ "hello, world",
+ &[word("hello,", 6), whitespace(" ", 1), word("world", 5)],
+ ),
+ (
+ " hello world",
+ &[
+ whitespace(" ", 2),
+ word("hello", 5),
+ whitespace(" ", 1),
+ word("world", 5),
+ ],
+ ),
+ (
+ "这是什么 \n 钢笔",
+ &[
+ word("这", 1),
+ word("是", 1),
+ word("什", 1),
+ word("么", 1),
+ whitespace(" ", 1),
+ newline(),
+ whitespace(" ", 1),
+ word("钢", 1),
+ word("笔", 1),
+ ],
+ ),
+ (" mutton", &[whitespace(" ", 1), word("mutton", 6)]),
+ ];
+
+ fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
+ WordBreakToken::Word {
+ token,
+ grapheme_len,
+ }
+ }
+
+ fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
+ WordBreakToken::InlineWhitespace {
+ token,
+ grapheme_len,
+ }
+ }
+
+ fn newline() -> WordBreakToken<'static> {
+ WordBreakToken::Newline
+ }
+
+ for (input, result) in tests {
+ assert_eq!(
+ WordBreakingTokenizer::new(input)
+ .collect::<Vec<_>>()
+ .as_slice(),
+ *result,
+ );
+ }
+ }
+
+ #[test]
+ fn test_wrap_with_prefix() {
+ assert_eq!(
+ wrap_with_prefix(
+ "# ".to_string(),
+ "abcdefg".to_string(),
+ 4,
+ NonZeroU32::new(4).unwrap(),
+ false,
+ ),
+ "# abcdefg"
+ );
+ assert_eq!(
+ wrap_with_prefix(
+ "".to_string(),
+ "\thello world".to_string(),
+ 8,
+ NonZeroU32::new(4).unwrap(),
+ false,
+ ),
+ "hello\nworld"
+ );
+ assert_eq!(
+ wrap_with_prefix(
+ "// ".to_string(),
+ "xx \nyy zz aa bb cc".to_string(),
+ 12,
+ NonZeroU32::new(4).unwrap(),
+ false,
+ ),
+ "// xx yy zz\n// aa bb cc"
+ );
+ assert_eq!(
+ wrap_with_prefix(
+ String::new(),
+ "这是什么 \n 钢笔".to_string(),
+ 3,
+ NonZeroU32::new(4).unwrap(),
+ false,
+ ),
+ "这是什\n么 钢\n笔"
+ );
+ }
}