Make boundary-finding methods wrap across newlines

Nathan Sobo created

This requires word and subword methods to explicitly acknowledge that they want to stop at newlines, which I think actually increases clarity. It makes the boundary finding method more general and useful for external callers such as the forthcoming vim crate.

Change summary

crates/editor/src/movement.rs | 176 ++++++++++++++++++++++++++----------
1 file changed, 125 insertions(+), 51 deletions(-)

Detailed changes

crates/editor/src/movement.rs 🔗

@@ -133,101 +133,111 @@ pub fn line_end(
 }
 
 pub fn previous_word_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
-    find_boundary_reversed(map, point, |left, right| {
-        char_kind(left) != char_kind(right) && !right.is_whitespace()
+    find_preceding_boundary(map, point, |left, right| {
+        (char_kind(left) != char_kind(right) && !right.is_whitespace()) || left == '\n'
     })
 }
 
 pub fn previous_subword_start(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
-    find_boundary_reversed(map, point, |left, right| {
-        (char_kind(left) != char_kind(right)
-            || left == '_' && right != '_'
-            || left.is_lowercase() && right.is_uppercase())
-            && !right.is_whitespace()
+    find_preceding_boundary(map, point, |left, right| {
+        let is_word_start = char_kind(left) != char_kind(right) && !right.is_whitespace();
+        let is_subword_start =
+            left == '_' && right != '_' || left.is_lowercase() && right.is_uppercase();
+        is_word_start || is_subword_start || left == '\n'
     })
 }
 
 pub fn next_word_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     find_boundary(map, point, |left, right| {
-        char_kind(left) != char_kind(right) && !left.is_whitespace()
+        (char_kind(left) != char_kind(right) && !left.is_whitespace()) || right == '\n'
     })
 }
 
 pub fn next_subword_end(map: &DisplaySnapshot, point: DisplayPoint) -> DisplayPoint {
     find_boundary(map, point, |left, right| {
-        (char_kind(left) != char_kind(right)
-            || left != '_' && right == '_'
-            || left.is_lowercase() && right.is_uppercase())
-            && !left.is_whitespace()
+        let is_word_end = (char_kind(left) != char_kind(right)) && !left.is_whitespace();
+        let is_subword_end =
+            left != '_' && right == '_' || left.is_lowercase() && right.is_uppercase();
+        is_word_end || is_subword_end || right == '\n'
     })
 }
 
-pub fn find_boundary_reversed(
+/// Scans for a boundary from the start of each line preceding the given end point until a boundary
+/// is found, indicated by the given predicate returning true. The predicate is called with the
+/// character to the left and right of the candidate boundary location, and will be called with `\n`
+/// characters indicating the start or end of a line. If the predicate returns true multiple times
+/// on a line, the *rightmost* boundary is returned.
+pub fn find_preceding_boundary(
     map: &DisplaySnapshot,
-    mut start: DisplayPoint,
-    is_boundary: impl Fn(char, char) -> bool,
+    end: DisplayPoint,
+    mut is_boundary: impl FnMut(char, char) -> bool,
 ) -> DisplayPoint {
-    let mut line_start = 0;
-    if start.row() > 0 {
-        if let Some(indent) = map.soft_wrap_indent(start.row() - 1) {
-            line_start = indent;
+    let mut point = end;
+    loop {
+        *point.column_mut() = 0;
+        if point.row() > 0 {
+            if let Some(indent) = map.soft_wrap_indent(point.row() - 1) {
+                *point.column_mut() = indent;
+            }
         }
-    }
 
-    if start.column() == line_start {
-        if start.row() == 0 {
-            return DisplayPoint::new(0, 0);
-        } else {
-            let row = start.row() - 1;
-            start = map.clip_point(DisplayPoint::new(row, map.line_len(row)), Bias::Left);
-        }
-    }
+        let mut boundary = None;
+        let mut prev_ch = if point.is_zero() { None } else { Some('\n') };
+        for ch in map.chars_at(point) {
+            if point >= end {
+                break;
+            }
 
-    let mut boundary = DisplayPoint::new(start.row(), 0);
-    let mut column = 0;
-    let mut prev_ch = None;
-    for ch in map.chars_at(DisplayPoint::new(start.row(), 0)) {
-        if column >= start.column() {
-            break;
-        }
+            if let Some(prev_ch) = prev_ch {
+                if is_boundary(prev_ch, ch) {
+                    boundary = Some(point);
+                }
+            }
 
-        if let Some(prev_ch) = prev_ch {
-            if is_boundary(prev_ch, ch) {
-                *boundary.column_mut() = column;
+            if ch == '\n' {
+                break;
             }
+
+            prev_ch = Some(ch);
+            *point.column_mut() += ch.len_utf8() as u32;
         }
 
-        prev_ch = Some(ch);
-        column += ch.len_utf8() as u32;
+        if let Some(boundary) = boundary {
+            return boundary;
+        } else if point.row() == 0 {
+            return DisplayPoint::zero();
+        } else {
+            *point.row_mut() -= 1;
+        }
     }
-    boundary
 }
 
+/// Scans for a boundary following the given start point until a boundary is found, indicated by the
+/// given predicate returning true. The predicate is called with the character to the left and right
+/// of the candidate boundary location, and will be called with `\n` characters indicating the start
+/// or end of a line.
 pub fn find_boundary(
     map: &DisplaySnapshot,
-    mut start: DisplayPoint,
-    is_boundary: impl Fn(char, char) -> bool,
+    mut point: DisplayPoint,
+    mut is_boundary: impl FnMut(char, char) -> bool,
 ) -> DisplayPoint {
     let mut prev_ch = None;
-    for ch in map.chars_at(start) {
+    for ch in map.chars_at(point) {
         if let Some(prev_ch) = prev_ch {
-            if ch == '\n' {
-                break;
-            }
             if is_boundary(prev_ch, ch) {
                 break;
             }
         }
 
         if ch == '\n' {
-            *start.row_mut() += 1;
-            *start.column_mut() = 0;
+            *point.row_mut() += 1;
+            *point.column_mut() = 0;
         } else {
-            *start.column_mut() += ch.len_utf8() as u32;
+            *point.column_mut() += ch.len_utf8() as u32;
         }
         prev_ch = Some(ch);
     }
-    map.clip_point(start, Bias::Right)
+    map.clip_point(point, Bias::Right)
 }
 
 pub fn is_inside_word(map: &DisplaySnapshot, point: DisplayPoint) -> bool {
@@ -271,7 +281,9 @@ mod tests {
         }
 
         assert("\n|   |lorem", cx);
+        assert("|\n|   lorem", cx);
         assert("    |lorem|", cx);
+        assert("|    |lorem", cx);
         assert("    |lor|em", cx);
         assert("\nlorem\n|   |ipsum", cx);
         assert("\n\n|\n|", cx);
@@ -317,6 +329,37 @@ mod tests {
         assert(" ab|——|cd", cx);
     }
 
+    #[gpui::test]
+    fn test_find_preceding_boundary(cx: &mut gpui::MutableAppContext) {
+        fn assert(
+            marked_text: &str,
+            cx: &mut gpui::MutableAppContext,
+            is_boundary: impl FnMut(char, char) -> bool,
+        ) {
+            let (snapshot, display_points) = marked_snapshot(marked_text, cx);
+            assert_eq!(
+                find_preceding_boundary(&snapshot, display_points[1], is_boundary),
+                display_points[0]
+            );
+        }
+
+        assert("abc|def\ngh\nij|k", cx, |left, right| {
+            left == 'c' && right == 'd'
+        });
+        assert("abcdef\n|gh\nij|k", cx, |left, right| {
+            left == '\n' && right == 'g'
+        });
+        let mut line_count = 0;
+        assert("abcdef\n|gh\nij|k", cx, |left, _| {
+            if left == '\n' {
+                line_count += 1;
+                line_count == 2
+            } else {
+                false
+            }
+        });
+    }
+
     #[gpui::test]
     fn test_next_word_end(cx: &mut gpui::MutableAppContext) {
         fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) {
@@ -372,6 +415,37 @@ mod tests {
         assert(" ab|——|cd", cx);
     }
 
+    #[gpui::test]
+    fn test_find_boundary(cx: &mut gpui::MutableAppContext) {
+        fn assert(
+            marked_text: &str,
+            cx: &mut gpui::MutableAppContext,
+            is_boundary: impl FnMut(char, char) -> bool,
+        ) {
+            let (snapshot, display_points) = marked_snapshot(marked_text, cx);
+            assert_eq!(
+                find_boundary(&snapshot, display_points[0], is_boundary),
+                display_points[1]
+            );
+        }
+
+        assert("abc|def\ngh\nij|k", cx, |left, right| {
+            left == 'j' && right == 'k'
+        });
+        assert("ab|cdef\ngh\n|ijk", cx, |left, right| {
+            left == '\n' && right == 'i'
+        });
+        let mut line_count = 0;
+        assert("abc|def\ngh\n|ijk", cx, |left, _| {
+            if left == '\n' {
+                line_count += 1;
+                line_count == 2
+            } else {
+                false
+            }
+        });
+    }
+
     #[gpui::test]
     fn test_surrounding_word(cx: &mut gpui::MutableAppContext) {
         fn assert(marked_text: &str, cx: &mut gpui::MutableAppContext) {