editor: Fix multi-line cursor expansion when multi-byte characters are involved (#51780)

Finn Eitreim and Kirill Bulatov created

Closes #51740 

The multi-line cursor expansion operates off of byte offsets, instead of
character offsets, so multi-byte characters like the umlaut cause the
multi-line cursors to be weirdly offset. To fix we just convert the
expansion logic to rely on utf16 characters instead of bytes.

before behavior:


https://github.com/user-attachments/assets/320e24e9-0fdd-4d16-a9e8-ca17c9e21ff2

after behavior: 


https://github.com/user-attachments/assets/c4f0334b-dffc-4530-91ee-577b4fab75dd

+ test to verify functionality.

Before you mark this PR as ready for review, make sure that you have:
- [x] Added a solid test coverage and/or screenshots from doing manual
testing
- [x] Done a self-review taking into account security and performance
aspects
- [x] Aligned any UI changes with the [UI
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)

Release Notes:

- editor: fixed multi-line cursor expansion dealing with multi-byte
characters.

---------

Co-authored-by: Kirill Bulatov <mail4score@gmail.com>

Change summary

crates/editor/src/editor.rs                |  15 +
crates/editor/src/editor_tests.rs          |  22 +++
crates/editor/src/selections_collection.rs |  25 +--
crates/multi_buffer/src/multi_buffer.rs    |   5 
crates/vim/src/helix/duplicate.rs          | 151 ++++++++++++++++++-----
5 files changed, 163 insertions(+), 55 deletions(-)

Detailed changes

crates/editor/src/editor.rs 🔗

@@ -15687,11 +15687,10 @@ impl Editor {
             display_map.max_point().row()
         };
 
-        // When `skip_soft_wrap` is true, we use buffer columns instead of pixel
+        // When `skip_soft_wrap` is true, we use UTF-16 columns instead of pixel
         // positions to place new selections, so we need to keep track of the
         // column range of the oldest selection in each group, because
         // intermediate selections may have been clamped to shorter lines.
-        // selections may have been clamped to shorter lines.
         let mut goal_columns_by_selection_id = if skip_soft_wrap {
             let mut map = HashMap::default();
             for group in state.groups.iter() {
@@ -15699,8 +15698,10 @@ impl Editor {
                     if let Some(oldest_selection) =
                         columnar_selections.iter().find(|s| s.id == *oldest_id)
                     {
-                        let start_col = oldest_selection.start.column;
-                        let end_col = oldest_selection.end.column;
+                        let snapshot = display_map.buffer_snapshot();
+                        let start_col =
+                            snapshot.point_to_point_utf16(oldest_selection.start).column;
+                        let end_col = snapshot.point_to_point_utf16(oldest_selection.end).column;
                         let goal_columns = start_col.min(end_col)..start_col.max(end_col);
                         for id in &group.stack {
                             map.insert(*id, goal_columns.clone());
@@ -15741,8 +15742,10 @@ impl Editor {
                         let goal_columns = goal_columns_by_selection_id
                             .remove(&selection.id)
                             .unwrap_or_else(|| {
-                                let start_col = selection.start.column;
-                                let end_col = selection.end.column;
+                                let snapshot = display_map.buffer_snapshot();
+                                let start_col =
+                                    snapshot.point_to_point_utf16(selection.start).column;
+                                let end_col = snapshot.point_to_point_utf16(selection.end).column;
                                 start_col.min(end_col)..start_col.max(end_col)
                             });
                         self.selections.find_next_columnar_selection_by_buffer_row(

crates/editor/src/editor_tests.rs 🔗

@@ -9546,6 +9546,28 @@ async fn test_add_selection_above_below_multi_cursor_existing_state(cx: &mut Tes
     ));
 }
 
+#[gpui::test]
+async fn test_add_selection_above_below_multibyte(cx: &mut TestAppContext) {
+    init_test(cx, |_| {});
+    let mut cx = EditorTestContext::new(cx).await;
+
+    // Cursor after "Häl" (byte column 4, char column 3) should align to
+    // char column 3 on the ASCII line below, not byte column 4.
+    cx.set_state(indoc!(
+        r#"Hälˇlö
+           Hallo"#
+    ));
+
+    cx.update_editor(|editor, window, cx| {
+        editor.add_selection_below(&Default::default(), window, cx);
+    });
+
+    cx.assert_editor_state(indoc!(
+        r#"Hälˇlö
+           Halˇlo"#
+    ));
+}
+
 #[gpui::test]
 async fn test_select_next(cx: &mut TestAppContext) {
     init_test(cx, |_| {});

crates/editor/src/selections_collection.rs 🔗

@@ -7,7 +7,7 @@ use std::{
 use collections::HashMap;
 use gpui::Pixels;
 use itertools::Itertools as _;
-use language::{Bias, Point, Selection, SelectionGoal};
+use language::{Bias, Point, PointUtf16, Selection, SelectionGoal};
 use multi_buffer::{MultiBufferDimension, MultiBufferOffset};
 use util::post_inc;
 
@@ -408,11 +408,11 @@ impl SelectionsCollection {
     }
 
     /// Attempts to build a selection in the provided buffer row using the
-    /// same buffer column range as specified.
+    /// same UTF-16 column range as specified.
     /// Returns `None` if the range is not empty but it starts past the line's
     /// length, meaning that the line isn't long enough to be contained within
     /// part of the provided range.
-    pub fn build_columnar_selection_from_buffer_columns(
+    fn build_columnar_selection_from_utf16_columns(
         &mut self,
         display_map: &DisplaySnapshot,
         buffer_row: u32,
@@ -420,23 +420,22 @@ impl SelectionsCollection {
         reversed: bool,
         text_layout_details: &TextLayoutDetails,
     ) -> Option<Selection<Point>> {
+        let snapshot = display_map.buffer_snapshot();
         let is_empty = positions.start == positions.end;
-        let line_len = display_map
-            .buffer_snapshot()
-            .line_len(multi_buffer::MultiBufferRow(buffer_row));
+        let line_len_utf16 = snapshot.line_len_utf16(multi_buffer::MultiBufferRow(buffer_row));
 
         let (start, end) = if is_empty {
-            let column = std::cmp::min(positions.start, line_len);
-            let point = Point::new(buffer_row, column);
+            let column = std::cmp::min(positions.start, line_len_utf16);
+            let point = snapshot.point_utf16_to_point(PointUtf16::new(buffer_row, column));
             (point, point)
         } else {
-            if positions.start >= line_len {
+            if positions.start >= line_len_utf16 {
                 return None;
             }
 
-            let start = Point::new(buffer_row, positions.start);
-            let end_column = std::cmp::min(positions.end, line_len);
-            let end = Point::new(buffer_row, end_column);
+            let start = snapshot.point_utf16_to_point(PointUtf16::new(buffer_row, positions.start));
+            let end_column = std::cmp::min(positions.end, line_len_utf16);
+            let end = snapshot.point_utf16_to_point(PointUtf16::new(buffer_row, end_column));
             (start, end)
         };
 
@@ -510,7 +509,7 @@ impl SelectionsCollection {
             row = new_row.row();
             let buffer_row = new_row.to_point(display_map).row;
 
-            if let Some(selection) = self.build_columnar_selection_from_buffer_columns(
+            if let Some(selection) = self.build_columnar_selection_from_utf16_columns(
                 display_map,
                 buffer_row,
                 goal_columns,

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -5188,6 +5188,11 @@ impl MultiBufferSnapshot {
         }
     }
 
+    pub fn line_len_utf16(&self, row: MultiBufferRow) -> u32 {
+        self.clip_point_utf16(Unclipped(PointUtf16::new(row.0, u32::MAX)), Bias::Left)
+            .column
+    }
+
     pub fn buffer_line_for_row(
         &self,
         row: MultiBufferRow,

crates/vim/src/helix/duplicate.rs 🔗

@@ -2,11 +2,19 @@ use std::ops::Range;
 
 use editor::{DisplayPoint, MultiBufferOffset, display_map::DisplaySnapshot};
 use gpui::Context;
+use language::PointUtf16;
+use multi_buffer::MultiBufferRow;
 use text::Bias;
 use ui::Window;
 
 use crate::Vim;
 
+#[derive(Copy, Clone)]
+enum Direction {
+    Above,
+    Below,
+}
+
 impl Vim {
     /// Creates a duplicate of every selection below it in the first place that has both its start
     /// and end
@@ -16,14 +24,7 @@ impl Vim {
         window: &mut Window,
         cx: &mut Context<Self>,
     ) {
-        self.duplicate_selections(
-            times,
-            window,
-            cx,
-            &|prev_point| *prev_point.row_mut() += 1,
-            &|prev_range, map| prev_range.end.row() >= map.max_point().row(),
-            false,
-        );
+        self.duplicate_selections(times, window, cx, Direction::Below);
     }
 
     /// Creates a duplicate of every selection above it in the first place that has both its start
@@ -34,14 +35,7 @@ impl Vim {
         window: &mut Window,
         cx: &mut Context<Self>,
     ) {
-        self.duplicate_selections(
-            times,
-            window,
-            cx,
-            &|prev_point| *prev_point.row_mut() = prev_point.row().0.saturating_sub(1),
-            &|prev_range, _| prev_range.start.row() == DisplayPoint::zero().row(),
-            true,
-        );
+        self.duplicate_selections(times, window, cx, Direction::Above);
     }
 
     fn duplicate_selections(
@@ -49,9 +43,7 @@ impl Vim {
         times: Option<usize>,
         window: &mut Window,
         cx: &mut Context<Self>,
-        advance_search: &dyn Fn(&mut DisplayPoint),
-        end_search: &dyn Fn(&Range<DisplayPoint>, &DisplaySnapshot) -> bool,
-        above: bool,
+        direction: Direction,
     ) {
         let times = times.unwrap_or(1);
         self.update_editor(cx, |_, editor, cx| {
@@ -59,7 +51,7 @@ impl Vim {
             let map = editor.display_snapshot(cx);
             let mut original_selections = editor.selections.all_display(&map);
             // The order matters, because it is recorded when the selections are added.
-            if above {
+            if matches!(direction, Direction::Above) {
                 original_selections.reverse();
             }
 
@@ -68,12 +60,9 @@ impl Vim {
                 selections.push(display_point_range_to_offset_range(&origin, &map));
                 let mut last_origin = origin;
                 for _ in 1..=times {
-                    if let Some(duplicate) = find_next_valid_duplicate_space(
-                        last_origin.clone(),
-                        &map,
-                        &advance_search,
-                        &end_search,
-                    ) {
+                    if let Some(duplicate) =
+                        find_next_valid_duplicate_space(last_origin.clone(), &map, direction)
+                    {
                         selections.push(display_point_range_to_offset_range(&duplicate, &map));
                         last_origin = duplicate;
                     } else {
@@ -90,22 +79,62 @@ impl Vim {
 }
 
 fn find_next_valid_duplicate_space(
-    mut origin: Range<DisplayPoint>,
+    origin: Range<DisplayPoint>,
     map: &DisplaySnapshot,
-    advance_search: &impl Fn(&mut DisplayPoint),
-    end_search: &impl Fn(&Range<DisplayPoint>, &DisplaySnapshot) -> bool,
+    direction: Direction,
 ) -> Option<Range<DisplayPoint>> {
-    while !end_search(&origin, map) {
-        advance_search(&mut origin.start);
-        advance_search(&mut origin.end);
+    let buffer = map.buffer_snapshot();
+    let start_col_utf16 = buffer
+        .point_to_point_utf16(origin.start.to_point(map))
+        .column;
+    let end_col_utf16 = buffer.point_to_point_utf16(origin.end.to_point(map)).column;
 
-        if map.clip_point(origin.start, Bias::Left) == origin.start
-            && map.clip_point(origin.end, Bias::Right) == origin.end
+    let mut candidate = origin;
+    loop {
+        match direction {
+            Direction::Below => {
+                if candidate.end.row() >= map.max_point().row() {
+                    return None;
+                }
+                *candidate.start.row_mut() += 1;
+                *candidate.end.row_mut() += 1;
+            }
+            Direction::Above => {
+                if candidate.start.row() == DisplayPoint::zero().row() {
+                    return None;
+                }
+                *candidate.start.row_mut() = candidate.start.row().0.saturating_sub(1);
+                *candidate.end.row_mut() = candidate.end.row().0.saturating_sub(1);
+            }
+        }
+
+        let start_row = DisplayPoint::new(candidate.start.row(), 0)
+            .to_point(map)
+            .row;
+        let end_row = DisplayPoint::new(candidate.end.row(), 0).to_point(map).row;
+
+        if start_col_utf16 > buffer.line_len_utf16(MultiBufferRow(start_row))
+            || end_col_utf16 > buffer.line_len_utf16(MultiBufferRow(end_row))
         {
-            return Some(origin);
+            continue;
+        }
+
+        let start_col = buffer
+            .point_utf16_to_point(PointUtf16::new(start_row, start_col_utf16))
+            .column;
+        let end_col = buffer
+            .point_utf16_to_point(PointUtf16::new(end_row, end_col_utf16))
+            .column;
+
+        let candidate_start = DisplayPoint::new(candidate.start.row(), start_col);
+        let candidate_end = DisplayPoint::new(candidate.end.row(), end_col);
+
+        if map.clip_point(candidate_start, Bias::Left) == candidate_start
+            && map.clip_point(candidate_end, Bias::Right) == candidate_end
+        {
+            return Some(candidate_start..candidate_end);
         }
     }
-    None
 }
 
 fn display_point_range_to_offset_range(
@@ -231,4 +260,54 @@ mod tests {
             Mode::HelixNormal,
         );
     }
+
+    #[gpui::test]
+    async fn test_selection_duplication_multiline_multibyte(cx: &mut gpui::TestAppContext) {
+        let mut cx = VimTestContext::new(cx, true).await;
+        cx.enable_helix();
+
+        // Multiline selection on rows with multibyte chars should preserve
+        // the visual column on both start and end rows.
+        cx.set_state(
+            indoc! {"
+            «H䡻llo
+            Hëllo
+            Hallo"},
+            Mode::HelixNormal,
+        );
+
+        cx.simulate_keystrokes("C");
+
+        cx.assert_state(
+            indoc! {"
+            «H䡻llo
+            «H롻llo
+            Hallo"},
+            Mode::HelixNormal,
+        );
+    }
+
+    #[gpui::test]
+    async fn test_selection_duplication_multibyte(cx: &mut gpui::TestAppContext) {
+        let mut cx = VimTestContext::new(cx, true).await;
+        cx.enable_helix();
+
+        // Selection on a line with multibyte chars should duplicate to the
+        // same character column on the next line, not skip it.
+        cx.set_state(
+            indoc! {"
+            H«äˇ»llo
+            Hallo"},
+            Mode::HelixNormal,
+        );
+
+        cx.simulate_keystrokes("C");
+
+        cx.assert_state(
+            indoc! {"
+            H«äˇ»llo
+            H«aˇ»llo"},
+            Mode::HelixNormal,
+        );
+    }
 }