From 3684b5a42fad83266a9dcd87333f867c7d038ecf Mon Sep 17 00:00:00 2001 From: Finn Eitreim <48069764+feitreim@users.noreply.github.com> Date: Wed, 25 Mar 2026 19:19:55 -0400 Subject: [PATCH] vim/helix: Use grapheme count on replace (#51776) Update vim and helix replace to repeat based on grapheme count instead of byte length or Unicode scalar count. This fixes cases where a single visible character is made up of multiple bytes or scalars, such as decomposed characters like `e\u{301}` and emoji. Closes #51772 Release Notes: - Fixed vim/helix's replace action to take into consideration grapheme count --------- Co-authored-by: dino --- Cargo.lock | 1 + crates/multi_buffer/Cargo.toml | 1 + crates/multi_buffer/src/multi_buffer.rs | 11 +++++ crates/vim/src/helix.rs | 56 ++++++++++++++----------- crates/vim/src/visual.rs | 22 +++++++++- 5 files changed, 65 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3b1e7b48ad76de494fb13f10391957ccc604816..1d36cd7198bab5a0f529d71904ef28ae1c915d5e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10752,6 +10752,7 @@ dependencies = [ "theme", "tracing", "tree-sitter", + "unicode-segmentation", "util", "zlog", "ztracing", diff --git a/crates/multi_buffer/Cargo.toml b/crates/multi_buffer/Cargo.toml index 66c23101ab26ac6be58d482c752f366522bb9305..a06599999c8147dc464128ad8ab5e6bf5ad5755b 100644 --- a/crates/multi_buffer/Cargo.toml +++ b/crates/multi_buffer/Cargo.toml @@ -45,6 +45,7 @@ tree-sitter.workspace = true ztracing.workspace = true tracing.workspace = true util.workspace = true +unicode-segmentation.workspace = true [dev-dependencies] buffer_diff = { workspace = true, features = ["test-support"] } diff --git a/crates/multi_buffer/src/multi_buffer.rs b/crates/multi_buffer/src/multi_buffer.rs index a1739629c6af498eccb6ea90b8c866e9cc7946b4..7b5f0135f57269b7c787031120f6eb22b0caf549 100644 --- a/crates/multi_buffer/src/multi_buffer.rs +++ b/crates/multi_buffer/src/multi_buffer.rs @@ -55,6 +55,7 @@ use text::{ subscription::{Subscription, Topic}, }; use theme::SyntaxTheme; +use unicode_segmentation::UnicodeSegmentation; use util::post_inc; use ztracing::instrument; @@ -7243,6 +7244,16 @@ impl MultiBufferSnapshot { } excerpt_edits } + + /// Returns the number of graphemes in `range`. + /// + /// This counts user-visible characters like `e\u{301}` as one. + pub fn grapheme_count_for_range(&self, range: &Range) -> usize { + self.text_for_range(range.clone()) + .collect::() + .graphemes(true) + .count() + } } #[cfg(any(test, feature = "test-support"))] diff --git a/crates/vim/src/helix.rs b/crates/vim/src/helix.rs index 0db3b5a3fe533f9e21503c6904ee0f62764003fb..56241275b5d8fa6de3645c6d00361b29dc49d259 100644 --- a/crates/vim/src/helix.rs +++ b/crates/vim/src/helix.rs @@ -711,38 +711,28 @@ impl Vim { let display_map = editor.display_snapshot(cx); let selections = editor.selections.all_display(&display_map); - // Store selection info for positioning after edit - let selection_info: Vec<_> = selections - .iter() - .map(|selection| { - let range = selection.range(); - let start_offset = range.start.to_offset(&display_map, Bias::Left); - let end_offset = range.end.to_offset(&display_map, Bias::Left); - let was_empty = range.is_empty(); - let was_reversed = selection.reversed; - ( - display_map.buffer_snapshot().anchor_before(start_offset), - end_offset - start_offset, - was_empty, - was_reversed, - ) - }) - .collect(); - let mut edits = Vec::new(); + let mut selection_info = Vec::new(); for selection in &selections { let mut range = selection.range(); + let was_empty = range.is_empty(); + let was_reversed = selection.reversed; - // For empty selections, extend to replace one character - if range.is_empty() { + if was_empty { range.end = movement::saturating_right(&display_map, range.start); } let byte_range = range.start.to_offset(&display_map, Bias::Left) ..range.end.to_offset(&display_map, Bias::Left); + let snapshot = display_map.buffer_snapshot(); + let grapheme_count = snapshot.grapheme_count_for_range(&byte_range); + let anchor = snapshot.anchor_before(byte_range.start); + + selection_info.push((anchor, grapheme_count, was_empty, was_reversed)); + if !byte_range.is_empty() { - let replacement_text = text.repeat(byte_range.end - byte_range.start); + let replacement_text = text.repeat(grapheme_count); edits.push((byte_range, replacement_text)); } } @@ -753,14 +743,12 @@ impl Vim { let snapshot = editor.buffer().read(cx).snapshot(cx); let ranges: Vec<_> = selection_info .into_iter() - .map(|(start_anchor, original_len, was_empty, was_reversed)| { + .map(|(start_anchor, grapheme_count, was_empty, was_reversed)| { let start_point = start_anchor.to_point(&snapshot); if was_empty { - // For cursor-only, collapse to start start_point..start_point } else { - // For selections, span the replaced text - let replacement_len = text.len() * original_len; + let replacement_len = text.len() * grapheme_count; let end_offset = start_anchor.to_offset(&snapshot) + replacement_len; let end_point = snapshot.offset_to_point(end_offset); if was_reversed { @@ -2375,4 +2363,22 @@ mod test { Mode::Insert, ); } + + #[gpui::test] + async fn test_helix_replace_uses_graphemes(cx: &mut gpui::TestAppContext) { + let mut cx = VimTestContext::new(cx, true).await; + cx.enable_helix(); + + cx.set_state("«Hällöˇ» Wörld", Mode::HelixNormal); + cx.simulate_keystrokes("r 1"); + cx.assert_state("«11111ˇ» Wörld", Mode::HelixNormal); + + cx.set_state("«e\u{301}ˇ»", Mode::HelixNormal); + cx.simulate_keystrokes("r 1"); + cx.assert_state("«1ˇ»", Mode::HelixNormal); + + cx.set_state("«🙂ˇ»", Mode::HelixNormal); + cx.simulate_keystrokes("r 1"); + cx.assert_state("«1ˇ»", Mode::HelixNormal); + } } diff --git a/crates/vim/src/visual.rs b/crates/vim/src/visual.rs index 502aa756b67889b1171464fde11be08ff0ccd508..bc53167b158d26717b1aa629b764a78dfe4c0ddc 100644 --- a/crates/vim/src/visual.rs +++ b/crates/vim/src/visual.rs @@ -788,7 +788,10 @@ impl Vim { { let range = row_range.start.to_offset(&display_map, Bias::Right) ..row_range.end.to_offset(&display_map, Bias::Right); - let text = text.repeat(range.end - range.start); + let grapheme_count = display_map + .buffer_snapshot() + .grapheme_count_for_range(&range); + let text = text.repeat(grapheme_count); edits.push((range, text)); } } @@ -2017,4 +2020,21 @@ mod test { // would depend on the key bindings configured, but the actions // are now available for use } + + #[gpui::test] + async fn test_visual_replace_uses_graphemes(cx: &mut gpui::TestAppContext) { + let mut cx = VimTestContext::new(cx, true).await; + + cx.set_state("«Hällöˇ» Wörld", Mode::Visual); + cx.simulate_keystrokes("r 1"); + cx.assert_state("ˇ11111 Wörld", Mode::Normal); + + cx.set_state("«e\u{301}ˇ»", Mode::Visual); + cx.simulate_keystrokes("r 1"); + cx.assert_state("ˇ1", Mode::Normal); + + cx.set_state("«🙂ˇ»", Mode::Visual); + cx.simulate_keystrokes("r 1"); + cx.assert_state("ˇ1", Mode::Normal); + } }