vim/helix: Use grapheme count on replace (#51776)

Finn Eitreim and dino created

Update vim and helix replace to repeat based on grapheme count instead of
byte length or Unicode scalar count.

This fixes cases where a single visible character is made up of multiple
bytes or scalars, such as decomposed characters like `e\u{301}` and emoji.

Closes #51772 

Release Notes:

- Fixed vim/helix's replace action to take into consideration grapheme
count

---------

Co-authored-by: dino <dinojoaocosta@gmail.com>

Change summary

Cargo.lock                              |  1 
crates/multi_buffer/Cargo.toml          |  1 
crates/multi_buffer/src/multi_buffer.rs | 11 +++++
crates/vim/src/helix.rs                 | 56 ++++++++++++++------------
crates/vim/src/visual.rs                | 22 ++++++++++
5 files changed, 65 insertions(+), 26 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -10752,6 +10752,7 @@ dependencies = [
  "theme",
  "tracing",
  "tree-sitter",
+ "unicode-segmentation",
  "util",
  "zlog",
  "ztracing",

crates/multi_buffer/Cargo.toml 🔗

@@ -45,6 +45,7 @@ tree-sitter.workspace = true
 ztracing.workspace = true
 tracing.workspace = true
 util.workspace = true
+unicode-segmentation.workspace = true
 
 [dev-dependencies]
 buffer_diff = { workspace = true, features = ["test-support"] }

crates/multi_buffer/src/multi_buffer.rs 🔗

@@ -55,6 +55,7 @@ use text::{
     subscription::{Subscription, Topic},
 };
 use theme::SyntaxTheme;
+use unicode_segmentation::UnicodeSegmentation;
 use util::post_inc;
 use ztracing::instrument;
 
@@ -7243,6 +7244,16 @@ impl MultiBufferSnapshot {
         }
         excerpt_edits
     }
+
+    /// Returns the number of graphemes in `range`.
+    ///
+    /// This counts user-visible characters like `e\u{301}` as one.
+    pub fn grapheme_count_for_range(&self, range: &Range<MultiBufferOffset>) -> usize {
+        self.text_for_range(range.clone())
+            .collect::<String>()
+            .graphemes(true)
+            .count()
+    }
 }
 
 #[cfg(any(test, feature = "test-support"))]

crates/vim/src/helix.rs 🔗

@@ -711,38 +711,28 @@ impl Vim {
                 let display_map = editor.display_snapshot(cx);
                 let selections = editor.selections.all_display(&display_map);
 
-                // Store selection info for positioning after edit
-                let selection_info: Vec<_> = selections
-                    .iter()
-                    .map(|selection| {
-                        let range = selection.range();
-                        let start_offset = range.start.to_offset(&display_map, Bias::Left);
-                        let end_offset = range.end.to_offset(&display_map, Bias::Left);
-                        let was_empty = range.is_empty();
-                        let was_reversed = selection.reversed;
-                        (
-                            display_map.buffer_snapshot().anchor_before(start_offset),
-                            end_offset - start_offset,
-                            was_empty,
-                            was_reversed,
-                        )
-                    })
-                    .collect();
-
                 let mut edits = Vec::new();
+                let mut selection_info = Vec::new();
                 for selection in &selections {
                     let mut range = selection.range();
+                    let was_empty = range.is_empty();
+                    let was_reversed = selection.reversed;
 
-                    // For empty selections, extend to replace one character
-                    if range.is_empty() {
+                    if was_empty {
                         range.end = movement::saturating_right(&display_map, range.start);
                     }
 
                     let byte_range = range.start.to_offset(&display_map, Bias::Left)
                         ..range.end.to_offset(&display_map, Bias::Left);
 
+                    let snapshot = display_map.buffer_snapshot();
+                    let grapheme_count = snapshot.grapheme_count_for_range(&byte_range);
+                    let anchor = snapshot.anchor_before(byte_range.start);
+
+                    selection_info.push((anchor, grapheme_count, was_empty, was_reversed));
+
                     if !byte_range.is_empty() {
-                        let replacement_text = text.repeat(byte_range.end - byte_range.start);
+                        let replacement_text = text.repeat(grapheme_count);
                         edits.push((byte_range, replacement_text));
                     }
                 }
@@ -753,14 +743,12 @@ impl Vim {
                 let snapshot = editor.buffer().read(cx).snapshot(cx);
                 let ranges: Vec<_> = selection_info
                     .into_iter()
-                    .map(|(start_anchor, original_len, was_empty, was_reversed)| {
+                    .map(|(start_anchor, grapheme_count, was_empty, was_reversed)| {
                         let start_point = start_anchor.to_point(&snapshot);
                         if was_empty {
-                            // For cursor-only, collapse to start
                             start_point..start_point
                         } else {
-                            // For selections, span the replaced text
-                            let replacement_len = text.len() * original_len;
+                            let replacement_len = text.len() * grapheme_count;
                             let end_offset = start_anchor.to_offset(&snapshot) + replacement_len;
                             let end_point = snapshot.offset_to_point(end_offset);
                             if was_reversed {
@@ -2375,4 +2363,22 @@ mod test {
             Mode::Insert,
         );
     }
+
+    #[gpui::test]
+    async fn test_helix_replace_uses_graphemes(cx: &mut gpui::TestAppContext) {
+        let mut cx = VimTestContext::new(cx, true).await;
+        cx.enable_helix();
+
+        cx.set_state("«Hällöˇ» Wörld", Mode::HelixNormal);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("«11111ˇ» Wörld", Mode::HelixNormal);
+
+        cx.set_state("«e\u{301}ˇ»", Mode::HelixNormal);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("«1ˇ»", Mode::HelixNormal);
+
+        cx.set_state("«🙂ˇ»", Mode::HelixNormal);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("«1ˇ»", Mode::HelixNormal);
+    }
 }

crates/vim/src/visual.rs 🔗

@@ -788,7 +788,10 @@ impl Vim {
                     {
                         let range = row_range.start.to_offset(&display_map, Bias::Right)
                             ..row_range.end.to_offset(&display_map, Bias::Right);
-                        let text = text.repeat(range.end - range.start);
+                        let grapheme_count = display_map
+                            .buffer_snapshot()
+                            .grapheme_count_for_range(&range);
+                        let text = text.repeat(grapheme_count);
                         edits.push((range, text));
                     }
                 }
@@ -2017,4 +2020,21 @@ mod test {
         // would depend on the key bindings configured, but the actions
         // are now available for use
     }
+
+    #[gpui::test]
+    async fn test_visual_replace_uses_graphemes(cx: &mut gpui::TestAppContext) {
+        let mut cx = VimTestContext::new(cx, true).await;
+
+        cx.set_state("«Hällöˇ» Wörld", Mode::Visual);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("ˇ11111 Wörld", Mode::Normal);
+
+        cx.set_state("«e\u{301}ˇ»", Mode::Visual);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("ˇ1", Mode::Normal);
+
+        cx.set_state("«🙂ˇ»", Mode::Visual);
+        cx.simulate_keystrokes("r 1");
+        cx.assert_state("ˇ1", Mode::Normal);
+    }
 }