From baeb7d27b8c1121fceb9908253103457ea1a7ddc Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 13:58:16 -0600 Subject: [PATCH 1/8] Clarify word movement function names and improve test coverage Co-Authored-By: Keith Simmons --- crates/editor/src/editor.rs | 13 ++--- crates/editor/src/movement.rs | 102 ++++++++++++++++++++++++++++++---- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index 30888d8a408ada78170e6dd6a39979392e085fe4..c3858b434993820b6458825532d52ec8b09b734c 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -3480,7 +3480,7 @@ impl Editor { let mut selections = self.local_selections::(cx); for selection in &mut selections { let head = selection.head().to_display_point(&display_map); - let cursor = movement::prev_word_boundary(&display_map, head).to_point(&display_map); + let cursor = movement::previous_word_start(&display_map, head).to_point(&display_map); selection.start = cursor.clone(); selection.end = cursor; selection.reversed = false; @@ -3498,7 +3498,7 @@ impl Editor { let mut selections = self.local_selections::(cx); for selection in &mut selections { let head = selection.head().to_display_point(&display_map); - let cursor = movement::prev_word_boundary(&display_map, head).to_point(&display_map); + let cursor = movement::previous_word_start(&display_map, head).to_point(&display_map); selection.set_head(cursor); selection.goal = SelectionGoal::None; } @@ -3517,7 +3517,7 @@ impl Editor { if selection.is_empty() { let head = selection.head().to_display_point(&display_map); let cursor = - movement::prev_word_boundary(&display_map, head).to_point(&display_map); + movement::previous_word_start(&display_map, head).to_point(&display_map); selection.set_head(cursor); selection.goal = SelectionGoal::None; } @@ -3536,7 +3536,7 @@ impl Editor { let mut selections = self.local_selections::(cx); for selection in &mut selections { let head = selection.head().to_display_point(&display_map); - let cursor = movement::next_word_boundary(&display_map, head).to_point(&display_map); + let cursor = movement::next_word_end(&display_map, head).to_point(&display_map); selection.start = cursor; selection.end = cursor; selection.reversed = false; @@ -3554,7 +3554,7 @@ impl Editor { let mut selections = self.local_selections::(cx); for selection in &mut selections { let head = selection.head().to_display_point(&display_map); - let cursor = movement::next_word_boundary(&display_map, head).to_point(&display_map); + let cursor = movement::next_word_end(&display_map, head).to_point(&display_map); selection.set_head(cursor); selection.goal = SelectionGoal::None; } @@ -3572,8 +3572,7 @@ impl Editor { for selection in &mut selections { if selection.is_empty() { let head = selection.head().to_display_point(&display_map); - let cursor = - movement::next_word_boundary(&display_map, head).to_point(&display_map); + let cursor = movement::next_word_end(&display_map, head).to_point(&display_map); selection.set_head(cursor); selection.goal = SelectionGoal::None; } diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index 5951e2c20db8b9f675f0d6a25afb7e5590e8dc7b..a090d8dde2420c5073e5786847b446e768ebd149 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -132,7 +132,7 @@ pub fn line_end( } } -pub fn prev_word_boundary(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { +pub fn previous_word_start(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { let mut line_start = 0; if point.row() > 0 { if let Some(indent) = map.soft_wrap_indent(point.row() - 1) { @@ -171,7 +171,7 @@ pub fn prev_word_boundary(map: &DisplaySnapshot, mut point: DisplayPoint) -> Dis boundary } -pub fn next_word_boundary(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { +pub fn next_word_end(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { let mut prev_char_kind = None; for c in map.chars_at(point) { let char_kind = char_kind(c); @@ -297,6 +297,84 @@ mod tests { ); } + #[gpui::test] + fn test_previous_word_start(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + dbg!(&display_points); + assert_eq!( + previous_word_start(&snapshot, display_points[1]), + display_points[0] + ); + } + + assert("\n| |lorem", cx); + assert(" |lorem|", cx); + assert(" |lor|em", cx); + assert("\nlorem\n| |ipsum", cx); + assert("\n\n|\n|", cx); + assert(" |lorem |ipsum", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|-#$@|ipsum", cx); + assert("|lorem_|ipsum", cx); + } + + #[gpui::test] + fn test_next_word_end(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + next_word_end(&snapshot, display_points[0]), + display_points[1] + ); + } + + assert("\n| lorem|", cx); + assert(" |lorem|", cx); + assert(" lor|em|", cx); + assert(" lorem| |\nipsum\n", cx); + assert("\n|\n|\n\n", cx); + assert("lorem| ipsum| ", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|#$@-|ipsum", cx); + assert("lorem|_ipsum|", cx); + } + + // Returns a snapshot from text containing '|' character markers with the markers removed, and DisplayPoints for each one. + fn marked_snapshot( + text: &str, + cx: &mut gpui::MutableAppContext, + ) -> (DisplaySnapshot, Vec) { + let mut marked_offsets = Vec::new(); + let chunks = text.split('|'); + let mut text = String::new(); + + for chunk in chunks { + text.push_str(chunk); + marked_offsets.push(text.len()); + } + marked_offsets.pop(); + + let tab_size = 4; + let family_id = cx.font_cache().load_family(&["Helvetica"]).unwrap(); + let font_id = cx + .font_cache() + .select_font(family_id, &Default::default()) + .unwrap(); + let font_size = 14.0; + + let buffer = MultiBuffer::build_simple(&text, cx); + let display_map = cx + .add_model(|cx| DisplayMap::new(buffer, tab_size, font_id, font_size, None, 1, 1, cx)); + let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx)); + let marked_display_points = marked_offsets + .into_iter() + .map(|offset| offset.to_display_point(&snapshot)) + .collect(); + + (snapshot, marked_display_points) + } + #[gpui::test] fn test_prev_next_word_boundary_multibyte(cx: &mut gpui::MutableAppContext) { let tab_size = 4; @@ -312,44 +390,44 @@ mod tests { .add_model(|cx| DisplayMap::new(buffer, tab_size, font_id, font_size, None, 1, 1, cx)); let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx)); assert_eq!( - prev_word_boundary(&snapshot, DisplayPoint::new(0, 12)), + previous_word_start(&snapshot, DisplayPoint::new(0, 12)), DisplayPoint::new(0, 7) ); assert_eq!( - prev_word_boundary(&snapshot, DisplayPoint::new(0, 7)), + previous_word_start(&snapshot, DisplayPoint::new(0, 7)), DisplayPoint::new(0, 2) ); assert_eq!( - prev_word_boundary(&snapshot, DisplayPoint::new(0, 6)), + previous_word_start(&snapshot, DisplayPoint::new(0, 6)), DisplayPoint::new(0, 2) ); assert_eq!( - prev_word_boundary(&snapshot, DisplayPoint::new(0, 2)), + previous_word_start(&snapshot, DisplayPoint::new(0, 2)), DisplayPoint::new(0, 0) ); assert_eq!( - prev_word_boundary(&snapshot, DisplayPoint::new(0, 1)), + previous_word_start(&snapshot, DisplayPoint::new(0, 1)), DisplayPoint::new(0, 0) ); assert_eq!( - next_word_boundary(&snapshot, DisplayPoint::new(0, 0)), + next_word_end(&snapshot, DisplayPoint::new(0, 0)), DisplayPoint::new(0, 1) ); assert_eq!( - next_word_boundary(&snapshot, DisplayPoint::new(0, 1)), + next_word_end(&snapshot, DisplayPoint::new(0, 1)), DisplayPoint::new(0, 6) ); assert_eq!( - next_word_boundary(&snapshot, DisplayPoint::new(0, 2)), + next_word_end(&snapshot, DisplayPoint::new(0, 2)), DisplayPoint::new(0, 6) ); assert_eq!( - next_word_boundary(&snapshot, DisplayPoint::new(0, 6)), + next_word_end(&snapshot, DisplayPoint::new(0, 6)), DisplayPoint::new(0, 12) ); assert_eq!( - next_word_boundary(&snapshot, DisplayPoint::new(0, 7)), + next_word_end(&snapshot, DisplayPoint::new(0, 7)), DisplayPoint::new(0, 12) ); } From 210fa4c4431e8d00a4c7470ec44c385bf3a18d34 Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 14:14:55 -0600 Subject: [PATCH 2/8] Remove CharKind::Newline This is just a character, and so it seems clearer to refer to it specifically when we want to know if a character is a newline. There was only one case where we relied on Newline being different from Whitespace, and we special-cased that instance. Changing this actually makes us match the behavior of VS Code when double-clicking runs of multiple newlines. /cc @as-cii Co-Authored-By: Keith Simmons --- crates/editor/src/movement.rs | 20 ++++++++++---------- crates/editor/src/multi_buffer.rs | 4 ++-- crates/language/src/buffer.rs | 5 +---- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index a090d8dde2420c5073e5786847b446e768ebd149..9c87ca301675f5c580df969197681b09328088d4 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -151,17 +151,14 @@ pub fn previous_word_start(map: &DisplaySnapshot, mut point: DisplayPoint) -> Di let mut boundary = DisplayPoint::new(point.row(), 0); let mut column = 0; - let mut prev_char_kind = CharKind::Newline; + let mut prev_char_kind = CharKind::Whitespace; for c in map.chars_at(DisplayPoint::new(point.row(), 0)) { if column >= point.column() { break; } let char_kind = char_kind(c); - if char_kind != prev_char_kind - && char_kind != CharKind::Whitespace - && char_kind != CharKind::Newline - { + if char_kind != prev_char_kind && char_kind != CharKind::Whitespace && c != '\n' { *boundary.column_mut() = column; } @@ -179,10 +176,7 @@ pub fn next_word_end(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayP if c == '\n' { break; } - if prev_char_kind != char_kind - && prev_char_kind != CharKind::Whitespace - && prev_char_kind != CharKind::Newline - { + if prev_char_kind != char_kind && prev_char_kind != CharKind::Whitespace { break; } } @@ -441,7 +435,7 @@ mod tests { .select_font(family_id, &Default::default()) .unwrap(); let font_size = 14.0; - let buffer = MultiBuffer::build_simple("lorem ipsum dolor\n sit", cx); + let buffer = MultiBuffer::build_simple("lorem ipsum dolor\n sit\n\n\n\n", cx); let display_map = cx .add_model(|cx| DisplayMap::new(buffer, tab_size, font_id, font_size, None, 1, 1, cx)); let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx)); @@ -502,5 +496,11 @@ mod tests { surrounding_word(&snapshot, DisplayPoint::new(1, 7)), DisplayPoint::new(1, 4)..DisplayPoint::new(1, 7), ); + + // Don't consider runs of multiple newlines to be a "word" + assert_eq!( + surrounding_word(&snapshot, DisplayPoint::new(3, 0)), + DisplayPoint::new(3, 0)..DisplayPoint::new(3, 0), + ); } } diff --git a/crates/editor/src/multi_buffer.rs b/crates/editor/src/multi_buffer.rs index e145488d650cb49d44d6b3de3af9e6304f02609e..715548f8135bc05568873da6b782a3bfdf2d1112 100644 --- a/crates/editor/src/multi_buffer.rs +++ b/crates/editor/src/multi_buffer.rs @@ -1407,7 +1407,7 @@ impl MultiBufferSnapshot { ); for ch in prev_chars { - if Some(char_kind(ch)) == word_kind { + if Some(char_kind(ch)) == word_kind && ch != '\n' { start -= ch.len_utf8(); } else { break; @@ -1415,7 +1415,7 @@ impl MultiBufferSnapshot { } for ch in next_chars { - if Some(char_kind(ch)) == word_kind { + if Some(char_kind(ch)) == word_kind && ch != '\n' { end += ch.len_utf8(); } else { break; diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index a4fee82805845fc6aed39de37a441d0a66e55c02..763d08c05388b9a3986798030e9f027dd8fa40cc 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -271,7 +271,6 @@ pub(crate) struct DiagnosticEndpoint { #[derive(Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Debug)] pub enum CharKind { - Newline, Punctuation, Whitespace, Word, @@ -2259,9 +2258,7 @@ pub fn contiguous_ranges( } pub fn char_kind(c: char) -> CharKind { - if c == '\n' { - CharKind::Newline - } else if c.is_whitespace() { + if c.is_whitespace() { CharKind::Whitespace } else if c.is_alphanumeric() || c == '_' { CharKind::Word From 0c89ad3ac02ab27988dd82b474370fcc98e5ebbc Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 15:41:42 -0600 Subject: [PATCH 3/8] Make multi-byte and surrounding_word tests more readable Just merge multi-byte tests into the main word movement tests --- crates/editor/src/movement.rs | 246 ++++++++++------------------------ 1 file changed, 68 insertions(+), 178 deletions(-) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index 9c87ca301675f5c580df969197681b09328088d4..b5bafebe67e21552963fd2b567c1aab35a199258 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -222,6 +222,74 @@ mod tests { use crate::{Buffer, DisplayMap, MultiBuffer}; use language::Point; + #[gpui::test] + fn test_previous_word_start(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + dbg!(&display_points); + assert_eq!( + previous_word_start(&snapshot, display_points[1]), + display_points[0] + ); + } + + assert("\n| |lorem", cx); + assert(" |lorem|", cx); + assert(" |lor|em", cx); + assert("\nlorem\n| |ipsum", cx); + assert("\n\n|\n|", cx); + assert(" |lorem |ipsum", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|-#$@|ipsum", cx); + assert("|lorem_|ipsum", cx); + assert(" |defγ|", cx); + assert(" |bcΔ|", cx); + assert(" ab|——|cd", cx); + } + + #[gpui::test] + fn test_next_word_end(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + next_word_end(&snapshot, display_points[0]), + display_points[1] + ); + } + + assert("\n| lorem|", cx); + assert(" |lorem|", cx); + assert(" lor|em|", cx); + assert(" lorem| |\nipsum\n", cx); + assert("\n|\n|\n\n", cx); + assert("lorem| ipsum| ", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|#$@-|ipsum", cx); + assert("lorem|_ipsum|", cx); + assert(" |bcΔ|", cx); + assert(" ab|——|cd", cx); + } + + #[gpui::test] + fn test_surrounding_word(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + surrounding_word(&snapshot, display_points[1]), + display_points[0]..display_points[2] + ); + } + + assert("||lorem| ipsum", cx); + assert("|lo|rem| ipsum", cx); + assert("|lorem|| ipsum", cx); + assert("lorem| | |ipsum", cx); + assert("lorem\n|||\nipsum", cx); + assert("lorem\n||ipsum|", cx); + assert("lorem,|| |ipsum", cx); + assert("|lorem||, ipsum", cx); + } + #[gpui::test] fn test_move_up_and_down_with_excerpts(cx: &mut gpui::MutableAppContext) { let family_id = cx.font_cache().load_family(&["Helvetica"]).unwrap(); @@ -291,49 +359,6 @@ mod tests { ); } - #[gpui::test] - fn test_previous_word_start(cx: &mut gpui::MutableAppContext) { - fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { - let (snapshot, display_points) = marked_snapshot(marked_text, cx); - dbg!(&display_points); - assert_eq!( - previous_word_start(&snapshot, display_points[1]), - display_points[0] - ); - } - - assert("\n| |lorem", cx); - assert(" |lorem|", cx); - assert(" |lor|em", cx); - assert("\nlorem\n| |ipsum", cx); - assert("\n\n|\n|", cx); - assert(" |lorem |ipsum", cx); - assert("lorem|-|ipsum", cx); - assert("lorem|-#$@|ipsum", cx); - assert("|lorem_|ipsum", cx); - } - - #[gpui::test] - fn test_next_word_end(cx: &mut gpui::MutableAppContext) { - fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { - let (snapshot, display_points) = marked_snapshot(marked_text, cx); - assert_eq!( - next_word_end(&snapshot, display_points[0]), - display_points[1] - ); - } - - assert("\n| lorem|", cx); - assert(" |lorem|", cx); - assert(" lor|em|", cx); - assert(" lorem| |\nipsum\n", cx); - assert("\n|\n|\n\n", cx); - assert("lorem| ipsum| ", cx); - assert("lorem|-|ipsum", cx); - assert("lorem|#$@-|ipsum", cx); - assert("lorem|_ipsum|", cx); - } - // Returns a snapshot from text containing '|' character markers with the markers removed, and DisplayPoints for each one. fn marked_snapshot( text: &str, @@ -368,139 +393,4 @@ mod tests { (snapshot, marked_display_points) } - - #[gpui::test] - fn test_prev_next_word_boundary_multibyte(cx: &mut gpui::MutableAppContext) { - let tab_size = 4; - let family_id = cx.font_cache().load_family(&["Helvetica"]).unwrap(); - let font_id = cx - .font_cache() - .select_font(family_id, &Default::default()) - .unwrap(); - let font_size = 14.0; - - let buffer = MultiBuffer::build_simple("a bcΔ defγ hi—jk", cx); - let display_map = cx - .add_model(|cx| DisplayMap::new(buffer, tab_size, font_id, font_size, None, 1, 1, cx)); - let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx)); - assert_eq!( - previous_word_start(&snapshot, DisplayPoint::new(0, 12)), - DisplayPoint::new(0, 7) - ); - assert_eq!( - previous_word_start(&snapshot, DisplayPoint::new(0, 7)), - DisplayPoint::new(0, 2) - ); - assert_eq!( - previous_word_start(&snapshot, DisplayPoint::new(0, 6)), - DisplayPoint::new(0, 2) - ); - assert_eq!( - previous_word_start(&snapshot, DisplayPoint::new(0, 2)), - DisplayPoint::new(0, 0) - ); - assert_eq!( - previous_word_start(&snapshot, DisplayPoint::new(0, 1)), - DisplayPoint::new(0, 0) - ); - - assert_eq!( - next_word_end(&snapshot, DisplayPoint::new(0, 0)), - DisplayPoint::new(0, 1) - ); - assert_eq!( - next_word_end(&snapshot, DisplayPoint::new(0, 1)), - DisplayPoint::new(0, 6) - ); - assert_eq!( - next_word_end(&snapshot, DisplayPoint::new(0, 2)), - DisplayPoint::new(0, 6) - ); - assert_eq!( - next_word_end(&snapshot, DisplayPoint::new(0, 6)), - DisplayPoint::new(0, 12) - ); - assert_eq!( - next_word_end(&snapshot, DisplayPoint::new(0, 7)), - DisplayPoint::new(0, 12) - ); - } - - #[gpui::test] - fn test_surrounding_word(cx: &mut gpui::MutableAppContext) { - let tab_size = 4; - let family_id = cx.font_cache().load_family(&["Helvetica"]).unwrap(); - let font_id = cx - .font_cache() - .select_font(family_id, &Default::default()) - .unwrap(); - let font_size = 14.0; - let buffer = MultiBuffer::build_simple("lorem ipsum dolor\n sit\n\n\n\n", cx); - let display_map = cx - .add_model(|cx| DisplayMap::new(buffer, tab_size, font_id, font_size, None, 1, 1, cx)); - let snapshot = display_map.update(cx, |map, cx| map.snapshot(cx)); - - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 0)), - DisplayPoint::new(0, 0)..DisplayPoint::new(0, 5), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 2)), - DisplayPoint::new(0, 0)..DisplayPoint::new(0, 5), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 5)), - DisplayPoint::new(0, 0)..DisplayPoint::new(0, 5), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 6)), - DisplayPoint::new(0, 6)..DisplayPoint::new(0, 11), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 7)), - DisplayPoint::new(0, 6)..DisplayPoint::new(0, 11), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 11)), - DisplayPoint::new(0, 6)..DisplayPoint::new(0, 11), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 13)), - DisplayPoint::new(0, 11)..DisplayPoint::new(0, 14), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 14)), - DisplayPoint::new(0, 14)..DisplayPoint::new(0, 19), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 17)), - DisplayPoint::new(0, 14)..DisplayPoint::new(0, 19), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(0, 19)), - DisplayPoint::new(0, 14)..DisplayPoint::new(0, 19), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(1, 0)), - DisplayPoint::new(1, 0)..DisplayPoint::new(1, 4), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(1, 1)), - DisplayPoint::new(1, 0)..DisplayPoint::new(1, 4), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(1, 6)), - DisplayPoint::new(1, 4)..DisplayPoint::new(1, 7), - ); - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(1, 7)), - DisplayPoint::new(1, 4)..DisplayPoint::new(1, 7), - ); - - // Don't consider runs of multiple newlines to be a "word" - assert_eq!( - surrounding_word(&snapshot, DisplayPoint::new(3, 0)), - DisplayPoint::new(3, 0)..DisplayPoint::new(3, 0), - ); - } } From 5b548747050eb1c7a0c645027c35bb5e8a5a775e Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 16:01:22 -0600 Subject: [PATCH 4/8] Extract logic for scanning over character boundaries --- crates/editor/src/movement.rs | 101 ++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index b5bafebe67e21552963fd2b567c1aab35a199258..fe66a23c3dcbfecabfd804ee806ff9ea79667e64 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -132,64 +132,84 @@ pub fn line_end( } } -pub fn previous_word_start(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { +pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + find_boundary_reversed(map, point, |left, right| { + char_kind(left) != char_kind(right) && !right.is_whitespace() + }) +} + +pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + find_boundary(map, point, |left, right| { + char_kind(left) != char_kind(right) && !left.is_whitespace() + }) +} + +fn find_boundary( + map: &DisplaySnapshot, + mut start: DisplayPoint, + is_boundary: impl Fn(char, char) -> bool, +) -> DisplayPoint { + let mut prev_ch = None; + for ch in map.chars_at(start) { + if let Some(prev_ch) = prev_ch { + if ch == '\n' { + break; + } + if is_boundary(prev_ch, ch) { + break; + } + } + + if ch == '\n' { + *start.row_mut() += 1; + *start.column_mut() = 0; + } else { + *start.column_mut() += ch.len_utf8() as u32; + } + prev_ch = Some(ch); + } + map.clip_point(start, Bias::Right) +} + +fn find_boundary_reversed( + map: &DisplaySnapshot, + mut start: DisplayPoint, + is_boundary: impl Fn(char, char) -> bool, +) -> DisplayPoint { let mut line_start = 0; - if point.row() > 0 { - if let Some(indent) = map.soft_wrap_indent(point.row() - 1) { + if start.row() > 0 { + if let Some(indent) = map.soft_wrap_indent(start.row() - 1) { line_start = indent; } } - if point.column() == line_start { - if point.row() == 0 { + if start.column() == line_start { + if start.row() == 0 { return DisplayPoint::new(0, 0); } else { - let row = point.row() - 1; - point = map.clip_point(DisplayPoint::new(row, map.line_len(row)), Bias::Left); + let row = start.row() - 1; + start = map.clip_point(DisplayPoint::new(row, map.line_len(row)), Bias::Left); } } - let mut boundary = DisplayPoint::new(point.row(), 0); + let mut boundary = DisplayPoint::new(start.row(), 0); let mut column = 0; - let mut prev_char_kind = CharKind::Whitespace; - for c in map.chars_at(DisplayPoint::new(point.row(), 0)) { - if column >= point.column() { + let mut prev_ch = None; + for ch in map.chars_at(DisplayPoint::new(start.row(), 0)) { + if column >= start.column() { break; } - let char_kind = char_kind(c); - if char_kind != prev_char_kind && char_kind != CharKind::Whitespace && c != '\n' { - *boundary.column_mut() = column; - } - - prev_char_kind = char_kind; - column += c.len_utf8() as u32; - } - boundary -} - -pub fn next_word_end(map: &DisplaySnapshot, mut point: DisplayPoint) -> DisplayPoint { - let mut prev_char_kind = None; - for c in map.chars_at(point) { - let char_kind = char_kind(c); - if let Some(prev_char_kind) = prev_char_kind { - if c == '\n' { - break; - } - if prev_char_kind != char_kind && prev_char_kind != CharKind::Whitespace { - break; + if let Some(prev_ch) = prev_ch { + if is_boundary(prev_ch, ch) { + *boundary.column_mut() = column; } } - if c == '\n' { - *point.row_mut() += 1; - *point.column_mut() = 0; - } else { - *point.column_mut() += c.len_utf8() as u32; - } - prev_char_kind = Some(char_kind); + prev_ch = Some(ch); + column += ch.len_utf8() as u32; } - map.clip_point(point, Bias::Right) + boundary } pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool { @@ -226,7 +246,6 @@ mod tests { fn test_previous_word_start(cx: &mut gpui::MutableAppContext) { fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { let (snapshot, display_points) = marked_snapshot(marked_text, cx); - dbg!(&display_points); assert_eq!( previous_word_start(&snapshot, display_points[1]), display_points[0] From c0d05c82b7bf3ca3355f2cf82a76537f0338a7b4 Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 17:37:36 -0600 Subject: [PATCH 5/8] WIP: Start on previous_subword_start Co-Authored-By: Max Brunsfeld Co-Authored-By: Keith Simmons --- crates/editor/src/movement.rs | 98 +++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 28 deletions(-) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index fe66a23c3dcbfecabfd804ee806ff9ea79667e64..c8f217b57e0ff12d58174862287955c149c0e1d5 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -138,40 +138,22 @@ pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> Displa }) } +pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + find_boundary_reversed(map, point, |left, right| { + (char_kind(left) != char_kind(right) + || left == '_' && right != '_' + || left.is_lowercase() && right.is_uppercase()) + && !right.is_whitespace() + }) +} + pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { find_boundary(map, point, |left, right| { char_kind(left) != char_kind(right) && !left.is_whitespace() }) } -fn find_boundary( - map: &DisplaySnapshot, - mut start: DisplayPoint, - is_boundary: impl Fn(char, char) -> bool, -) -> DisplayPoint { - let mut prev_ch = None; - for ch in map.chars_at(start) { - if let Some(prev_ch) = prev_ch { - if ch == '\n' { - break; - } - if is_boundary(prev_ch, ch) { - break; - } - } - - if ch == '\n' { - *start.row_mut() += 1; - *start.column_mut() = 0; - } else { - *start.column_mut() += ch.len_utf8() as u32; - } - prev_ch = Some(ch); - } - map.clip_point(start, Bias::Right) -} - -fn find_boundary_reversed( +pub fn find_boundary_reversed( map: &DisplaySnapshot, mut start: DisplayPoint, is_boundary: impl Fn(char, char) -> bool, @@ -212,6 +194,33 @@ fn find_boundary_reversed( boundary } +pub fn find_boundary( + map: &DisplaySnapshot, + mut start: DisplayPoint, + is_boundary: impl Fn(char, char) -> bool, +) -> DisplayPoint { + let mut prev_ch = None; + for ch in map.chars_at(start) { + if let Some(prev_ch) = prev_ch { + if ch == '\n' { + break; + } + if is_boundary(prev_ch, ch) { + break; + } + } + + if ch == '\n' { + *start.row_mut() += 1; + *start.column_mut() = 0; + } else { + *start.column_mut() += ch.len_utf8() as u32; + } + prev_ch = Some(ch); + } + map.clip_point(start, Bias::Right) +} + pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool { let ix = map.clip_point(point, Bias::Left).to_offset(map, Bias::Left); let text = &map.buffer_snapshot; @@ -266,6 +275,39 @@ mod tests { assert(" ab|——|cd", cx); } + #[gpui::test] + fn test_previous_subword_start(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + previous_subword_start(&snapshot, display_points[1]), + display_points[0] + ); + } + + // Subword boundaries are respected + assert("lorem_|ip|sum", cx); + assert("lorem_|ipsum|", cx); + assert("|lorem_|ipsum", cx); + assert("lorem_|ipsum_|dolor", cx); + assert("lorem|Ip|sum", cx); + assert("lorem|Ipsum|", cx); + + // Word boundaries are still respected + assert("\n| |lorem", cx); + assert(" |lorem|", cx); + assert(" |lor|em", cx); + assert("\nlorem\n| |ipsum", cx); + assert("\n\n|\n|", cx); + assert(" |lorem |ipsum", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|-#$@|ipsum", cx); + assert(" |defγ|", cx); + assert(" bc|Δ|", cx); + assert(" |bcδ|", cx); + assert(" ab|——|cd", cx); + } + #[gpui::test] fn test_next_word_end(cx: &mut gpui::MutableAppContext) { fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { From e5a00d72f8a498e77e82b396c3b846b21ad18609 Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 20:02:08 -0600 Subject: [PATCH 6/8] Implement next_subword_end --- crates/editor/src/movement.rs | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index c8f217b57e0ff12d58174862287955c149c0e1d5..72567312e1accbdd13f304433ff6ab87073e9a81 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -153,6 +153,15 @@ pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint }) } +pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { + find_boundary(map, point, |left, right| { + (char_kind(left) != char_kind(right) + || left != '_' && right == '_' + || left.is_lowercase() && right.is_uppercase()) + && !left.is_whitespace() + }) +} + pub fn find_boundary_reversed( map: &DisplaySnapshot, mut start: DisplayPoint, @@ -331,6 +340,38 @@ mod tests { assert(" ab|——|cd", cx); } + #[gpui::test] + fn test_next_subword_end(cx: &mut gpui::MutableAppContext) { + fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + next_subword_end(&snapshot, display_points[0]), + display_points[1] + ); + } + + // Subword boundaries are respected + assert("lo|rem|_ipsum", cx); + assert("|lorem|_ipsum", cx); + assert("lorem|_ipsum|", cx); + assert("lorem|_ipsum|_dolor", cx); + assert("lo|rem|Ipsum", cx); + assert("lorem|Ipsum|Dolor", cx); + + // Word boundaries are still respected + assert("\n| lorem|", cx); + assert(" |lorem|", cx); + assert(" lor|em|", cx); + assert(" lorem| |\nipsum\n", cx); + assert("\n|\n|\n\n", cx); + assert("lorem| ipsum| ", cx); + assert("lorem|-|ipsum", cx); + assert("lorem|#$@-|ipsum", cx); + assert("lorem|_ipsum|", cx); + assert(" |bc|Δ", cx); + assert(" ab|——|cd", cx); + } + #[gpui::test] fn test_surrounding_word(cx: &mut gpui::MutableAppContext) { fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { From f70f4c77293b8cec39ad80b22d97310301244200 Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 21:07:56 -0600 Subject: [PATCH 7/8] Improve DisplayPoint Debug impl --- crates/editor/src/display_map.rs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/crates/editor/src/display_map.rs b/crates/editor/src/display_map.rs index dbadbb386eb2b5fed57f81d1a5bfc83d264ab04d..17209e5c74420a421d49c5e62a445661c44f8336 100644 --- a/crates/editor/src/display_map.rs +++ b/crates/editor/src/display_map.rs @@ -12,7 +12,7 @@ use gpui::{ Entity, ModelContext, ModelHandle, }; use language::{Point, Subscription as BufferSubscription}; -use std::{any::TypeId, ops::Range, sync::Arc}; +use std::{any::TypeId, fmt::Debug, ops::Range, sync::Arc}; use sum_tree::{Bias, TreeMap}; use tab_map::TabMap; use wrap_map::WrapMap; @@ -414,9 +414,19 @@ impl DisplaySnapshot { } } -#[derive(Copy, Clone, Debug, Default, Eq, Ord, PartialOrd, PartialEq)] +#[derive(Copy, Clone, Default, Eq, Ord, PartialOrd, PartialEq)] pub struct DisplayPoint(BlockPoint); +impl Debug for DisplayPoint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!( + "DisplayPoint({}, {})", + self.row(), + self.column() + )) + } +} + impl DisplayPoint { pub fn new(row: u32, column: u32) -> Self { Self(BlockPoint(Point::new(row, column))) @@ -426,7 +436,6 @@ impl DisplayPoint { Self::new(0, 0) } - #[cfg(test)] pub fn is_zero(&self) -> bool { self.0.is_zero() } From ee3e6049a3870108e6562e14186409ea8be3ca94 Mon Sep 17 00:00:00 2001 From: Nathan Sobo Date: Mon, 21 Mar 2022 21:24:52 -0600 Subject: [PATCH 8/8] Make boundary-finding methods wrap across newlines This requires word and subword methods to explicitly acknowledge that they want to stop at newlines, which I think actually increases clarity. It makes the boundary finding method more general and useful for external callers such as the forthcoming vim crate. --- crates/editor/src/movement.rs | 176 ++++++++++++++++++++++++---------- 1 file changed, 125 insertions(+), 51 deletions(-) diff --git a/crates/editor/src/movement.rs b/crates/editor/src/movement.rs index 72567312e1accbdd13f304433ff6ab87073e9a81..5e6dd8c55cfa6a325b010d926454388c5c10dfc8 100644 --- a/crates/editor/src/movement.rs +++ b/crates/editor/src/movement.rs @@ -133,101 +133,111 @@ pub fn line_end( } pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { - find_boundary_reversed(map, point, |left, right| { - char_kind(left) != char_kind(right) && !right.is_whitespace() + find_preceding_boundary(map, point, |left, right| { + (char_kind(left) != char_kind(right) && !right.is_whitespace()) || left == '\n' }) } pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { - find_boundary_reversed(map, point, |left, right| { - (char_kind(left) != char_kind(right) - || left == '_' && right != '_' - || left.is_lowercase() && right.is_uppercase()) - && !right.is_whitespace() + find_preceding_boundary(map, point, |left, right| { + let is_word_start = char_kind(left) != char_kind(right) && !right.is_whitespace(); + let is_subword_start = + left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase(); + is_word_start || is_subword_start || left == '\n' }) } pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { find_boundary(map, point, |left, right| { - char_kind(left) != char_kind(right) && !left.is_whitespace() + (char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n' }) } pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint { find_boundary(map, point, |left, right| { - (char_kind(left) != char_kind(right) - || left != '_' && right == '_' - || left.is_lowercase() && right.is_uppercase()) - && !left.is_whitespace() + let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace(); + let is_subword_end = + left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase(); + is_word_end || is_subword_end || right == '\n' }) } -pub fn find_boundary_reversed( +/// Scans for a boundary from the start of each line preceding the given end point until a boundary +/// is found, indicated by the given predicate returning true. The predicate is called with the +/// character to the left and right of the candidate boundary location, and will be called with `\n` +/// characters indicating the start or end of a line. If the predicate returns true multiple times +/// on a line, the *rightmost* boundary is returned. +pub fn find_preceding_boundary( map: &DisplaySnapshot, - mut start: DisplayPoint, - is_boundary: impl Fn(char, char) -> bool, + end: DisplayPoint, + mut is_boundary: impl FnMut(char, char) -> bool, ) -> DisplayPoint { - let mut line_start = 0; - if start.row() > 0 { - if let Some(indent) = map.soft_wrap_indent(start.row() - 1) { - line_start = indent; + let mut point = end; + loop { + *point.column_mut() = 0; + if point.row() > 0 { + if let Some(indent) = map.soft_wrap_indent(point.row() - 1) { + *point.column_mut() = indent; + } } - } - if start.column() == line_start { - if start.row() == 0 { - return DisplayPoint::new(0, 0); - } else { - let row = start.row() - 1; - start = map.clip_point(DisplayPoint::new(row, map.line_len(row)), Bias::Left); - } - } + let mut boundary = None; + let mut prev_ch = if point.is_zero() { None } else { Some('\n') }; + for ch in map.chars_at(point) { + if point >= end { + break; + } - let mut boundary = DisplayPoint::new(start.row(), 0); - let mut column = 0; - let mut prev_ch = None; - for ch in map.chars_at(DisplayPoint::new(start.row(), 0)) { - if column >= start.column() { - break; - } + if let Some(prev_ch) = prev_ch { + if is_boundary(prev_ch, ch) { + boundary = Some(point); + } + } - if let Some(prev_ch) = prev_ch { - if is_boundary(prev_ch, ch) { - *boundary.column_mut() = column; + if ch == '\n' { + break; } + + prev_ch = Some(ch); + *point.column_mut() += ch.len_utf8() as u32; } - prev_ch = Some(ch); - column += ch.len_utf8() as u32; + if let Some(boundary) = boundary { + return boundary; + } else if point.row() == 0 { + return DisplayPoint::zero(); + } else { + *point.row_mut() -= 1; + } } - boundary } +/// Scans for a boundary following the given start point until a boundary is found, indicated by the +/// given predicate returning true. The predicate is called with the character to the left and right +/// of the candidate boundary location, and will be called with `\n` characters indicating the start +/// or end of a line. pub fn find_boundary( map: &DisplaySnapshot, - mut start: DisplayPoint, - is_boundary: impl Fn(char, char) -> bool, + mut point: DisplayPoint, + mut is_boundary: impl FnMut(char, char) -> bool, ) -> DisplayPoint { let mut prev_ch = None; - for ch in map.chars_at(start) { + for ch in map.chars_at(point) { if let Some(prev_ch) = prev_ch { - if ch == '\n' { - break; - } if is_boundary(prev_ch, ch) { break; } } if ch == '\n' { - *start.row_mut() += 1; - *start.column_mut() = 0; + *point.row_mut() += 1; + *point.column_mut() = 0; } else { - *start.column_mut() += ch.len_utf8() as u32; + *point.column_mut() += ch.len_utf8() as u32; } prev_ch = Some(ch); } - map.clip_point(start, Bias::Right) + map.clip_point(point, Bias::Right) } pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool { @@ -271,7 +281,9 @@ mod tests { } assert("\n| |lorem", cx); + assert("|\n| lorem", cx); assert(" |lorem|", cx); + assert("| |lorem", cx); assert(" |lor|em", cx); assert("\nlorem\n| |ipsum", cx); assert("\n\n|\n|", cx); @@ -317,6 +329,37 @@ mod tests { assert(" ab|——|cd", cx); } + #[gpui::test] + fn test_find_preceding_boundary(cx: &mut gpui::MutableAppContext) { + fn assert( + marked_text: &str, + cx: &mut gpui::MutableAppContext, + is_boundary: impl FnMut(char, char) -> bool, + ) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + find_preceding_boundary(&snapshot, display_points[1], is_boundary), + display_points[0] + ); + } + + assert("abc|def\ngh\nij|k", cx, |left, right| { + left == 'c' && right == 'd' + }); + assert("abcdef\n|gh\nij|k", cx, |left, right| { + left == '\n' && right == 'g' + }); + let mut line_count = 0; + assert("abcdef\n|gh\nij|k", cx, |left, _| { + if left == '\n' { + line_count += 1; + line_count == 2 + } else { + false + } + }); + } + #[gpui::test] fn test_next_word_end(cx: &mut gpui::MutableAppContext) { fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) { @@ -372,6 +415,37 @@ mod tests { assert(" ab|——|cd", cx); } + #[gpui::test] + fn test_find_boundary(cx: &mut gpui::MutableAppContext) { + fn assert( + marked_text: &str, + cx: &mut gpui::MutableAppContext, + is_boundary: impl FnMut(char, char) -> bool, + ) { + let (snapshot, display_points) = marked_snapshot(marked_text, cx); + assert_eq!( + find_boundary(&snapshot, display_points[0], is_boundary), + display_points[1] + ); + } + + assert("abc|def\ngh\nij|k", cx, |left, right| { + left == 'j' && right == 'k' + }); + assert("ab|cdef\ngh\n|ijk", cx, |left, right| { + left == '\n' && right == 'i' + }); + let mut line_count = 0; + assert("abc|def\ngh\n|ijk", cx, |left, _| { + if left == '\n' { + line_count += 1; + line_count == 2 + } else { + false + } + }); + } + #[gpui::test] fn test_surrounding_word(cx: &mut gpui::MutableAppContext) { fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) {