Expand tabs correctly with multibyte characters

Max Brunsfeld and Nathan Sobo created

Co-Authored-By: Nathan Sobo <nathan@zed.dev>

Change summary

zed/src/editor/buffer/mod.rs      |   4 
zed/src/editor/buffer/rope.rs     |  46 ---------
zed/src/editor/buffer_view.rs     |  14 +-
zed/src/editor/display_map/mod.rs | 150 +++++++++++++++++++++++---------
4 files changed, 118 insertions(+), 96 deletions(-)

Detailed changes

zed/src/editor/buffer/mod.rs šŸ”—

@@ -643,11 +643,11 @@ impl Buffer {
         self.visible_text.chunks_in_range(start..end)
     }
 
-    pub fn chars(&self) -> rope::Chars {
+    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
         self.chars_at(0)
     }
 
-    pub fn chars_at<T: ToOffset>(&self, position: T) -> rope::Chars {
+    pub fn chars_at<T: ToOffset>(&self, position: T) -> impl Iterator<Item = char> + '_ {
         let offset = position.to_offset(self);
         self.visible_text.chars_at(offset)
     }

zed/src/editor/buffer/rope.rs šŸ”—

@@ -2,7 +2,7 @@ use super::Point;
 use crate::sum_tree::{self, SeekBias, SumTree};
 use arrayvec::ArrayString;
 use smallvec::SmallVec;
-use std::{cmp, iter::Skip, ops::Range, str};
+use std::{cmp, ops::Range, str};
 
 #[cfg(test)]
 const CHUNK_BASE: usize = 6;
@@ -107,12 +107,12 @@ impl Rope {
         Cursor::new(self, offset)
     }
 
-    pub fn chars(&self) -> Chars {
+    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
         self.chars_at(0)
     }
 
-    pub fn chars_at(&self, start: usize) -> Chars {
-        Chars::new(self, start)
+    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
+        self.chunks_in_range(start..self.len()).flat_map(str::chars)
     }
 
     pub fn chunks<'a>(&'a self) -> ChunksIter<'a> {
@@ -446,44 +446,6 @@ impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
     }
 }
 
-pub struct Chars<'a> {
-    cursor: sum_tree::Cursor<'a, Chunk, usize, usize>,
-    chars: Skip<str::Chars<'a>>,
-}
-
-impl<'a> Chars<'a> {
-    pub fn new(rope: &'a Rope, start: usize) -> Self {
-        let mut cursor = rope.chunks.cursor::<usize, usize>();
-        cursor.seek(&start, SeekBias::Left, &());
-        let chars = if let Some(chunk) = cursor.item() {
-            let ix = start - cursor.start();
-            assert_char_boundary(&chunk.0, ix);
-            cursor.next();
-            chunk.0.chars().skip(ix)
-        } else {
-            "".chars().skip(0)
-        };
-
-        Self { cursor, chars }
-    }
-}
-
-impl<'a> Iterator for Chars<'a> {
-    type Item = char;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(ch) = self.chars.next() {
-            Some(ch)
-        } else if let Some(chunk) = self.cursor.item() {
-            self.chars = chunk.0.chars().skip(0);
-            self.cursor.next();
-            Some(self.chars.next().unwrap())
-        } else {
-            None
-        }
-    }
-}
-
 fn find_split_ix(text: &str) -> usize {
     let mut ix = text.len() / 2;
     while !text.is_char_boundary(ix) {

zed/src/editor/buffer_view.rs šŸ”—

@@ -3207,7 +3207,7 @@ mod tests {
                 &[
                     DisplayPoint::new(0, 1)..DisplayPoint::new(0, 1),
                     DisplayPoint::new(3, 1)..DisplayPoint::new(3, 1),
-                    DisplayPoint::new(3, 2)..DisplayPoint::new(4, 2),
+                    DisplayPoint::new(3, 2)..DisplayPoint::new(4, 3),
                     DisplayPoint::new(5, 0)..DisplayPoint::new(5, 2),
                 ],
                 ctx,
@@ -3229,7 +3229,7 @@ mod tests {
             vec![
                 DisplayPoint::new(0, 1)..DisplayPoint::new(0, 1),
                 DisplayPoint::new(2, 1)..DisplayPoint::new(2, 1),
-                DisplayPoint::new(2, 2)..DisplayPoint::new(3, 2),
+                DisplayPoint::new(2, 2)..DisplayPoint::new(3, 3),
                 DisplayPoint::new(4, 0)..DisplayPoint::new(4, 2)
             ]
         );
@@ -3244,7 +3244,7 @@ mod tests {
             vec![
                 DisplayPoint::new(1, 1)..DisplayPoint::new(1, 1),
                 DisplayPoint::new(3, 1)..DisplayPoint::new(3, 1),
-                DisplayPoint::new(3, 2)..DisplayPoint::new(4, 2),
+                DisplayPoint::new(3, 2)..DisplayPoint::new(4, 3),
                 DisplayPoint::new(5, 0)..DisplayPoint::new(5, 2)
             ]
         );
@@ -3259,7 +3259,7 @@ mod tests {
             vec![
                 DisplayPoint::new(2, 1)..DisplayPoint::new(2, 1),
                 DisplayPoint::new(3, 1)..DisplayPoint::new(3, 1),
-                DisplayPoint::new(3, 2)..DisplayPoint::new(4, 2),
+                DisplayPoint::new(3, 2)..DisplayPoint::new(4, 3),
                 DisplayPoint::new(5, 0)..DisplayPoint::new(5, 2)
             ]
         );
@@ -3274,7 +3274,7 @@ mod tests {
             vec![
                 DisplayPoint::new(1, 1)..DisplayPoint::new(1, 1),
                 DisplayPoint::new(2, 1)..DisplayPoint::new(2, 1),
-                DisplayPoint::new(2, 2)..DisplayPoint::new(3, 2),
+                DisplayPoint::new(2, 2)..DisplayPoint::new(3, 3),
                 DisplayPoint::new(4, 0)..DisplayPoint::new(4, 2)
             ]
         );
@@ -3489,7 +3489,7 @@ mod tests {
                     DisplayPoint::new(0, 0)..DisplayPoint::new(0, 1),
                     DisplayPoint::new(0, 2)..DisplayPoint::new(0, 2),
                     DisplayPoint::new(1, 0)..DisplayPoint::new(1, 0),
-                    DisplayPoint::new(4, 2)..DisplayPoint::new(4, 2),
+                    DisplayPoint::new(4, 4)..DisplayPoint::new(4, 4),
                 ],
                 ctx,
             )
@@ -3511,7 +3511,7 @@ mod tests {
                 DisplayPoint::new(0, 1)..DisplayPoint::new(0, 1),
                 DisplayPoint::new(0, 2)..DisplayPoint::new(0, 2),
                 DisplayPoint::new(1, 0)..DisplayPoint::new(1, 0),
-                DisplayPoint::new(4, 2)..DisplayPoint::new(4, 2)
+                DisplayPoint::new(4, 4)..DisplayPoint::new(4, 4)
             ]
         );
 

zed/src/editor/display_map/mod.rs šŸ”—

@@ -143,16 +143,17 @@ impl DisplayMapSnapshot {
     }
 
     pub fn chunks_at<'a>(&'a self, point: DisplayPoint, app: &'a AppContext) -> Chunks<'a> {
-        let column = point.column() as usize;
-        let (point, _) = self.collapse_tabs(point, Bias::Left, app);
+        let (point, expanded_char_column, to_next_stop) =
+            self.collapse_tabs(point, Bias::Left, app);
         let fold_chunks = self
             .folds_snapshot
             .chunks_at(self.folds_snapshot.to_display_offset(point, app), app);
         Chunks {
             fold_chunks,
-            column,
+            column: expanded_char_column,
             tab_size: self.tab_size,
-            chunk: "",
+            chunk: &SPACES[0..to_next_stop],
+            skip_leading_tab: to_next_stop > 0,
         }
     }
 
@@ -178,15 +179,16 @@ impl DisplayMapSnapshot {
         mut point: DisplayPoint,
         bias: Bias,
         ctx: &AppContext,
-    ) -> (DisplayPoint, usize) {
+    ) -> (DisplayPoint, usize, usize) {
         let chars = self
             .folds_snapshot
             .chars_at(DisplayPoint(Point::new(point.row(), 0)), ctx);
         let expanded = point.column() as usize;
-        let (collapsed, to_next_stop) = collapse_tabs(chars, expanded, bias, self.tab_size);
+        let (collapsed, expanded_char_column, to_next_stop) =
+            collapse_tabs(chars, expanded, bias, self.tab_size);
         *point.column_mut() = collapsed as u32;
 
-        (point, to_next_stop)
+        (point, expanded_char_column, to_next_stop)
     }
 }
 
@@ -220,20 +222,20 @@ impl DisplayPoint {
 
     pub fn to_buffer_point(self, map: &DisplayMap, bias: Bias, ctx: &AppContext) -> Point {
         map.fold_map
-            .to_buffer_point(self.collapse_tabs(map, bias, ctx).0, ctx)
+            .to_buffer_point(self.collapse_tabs(map, bias, ctx), ctx)
     }
 
     pub fn to_buffer_offset(self, map: &DisplayMap, bias: Bias, ctx: &AppContext) -> usize {
         map.fold_map
-            .to_buffer_offset(self.collapse_tabs(&map, bias, ctx).0, ctx)
+            .to_buffer_offset(self.collapse_tabs(&map, bias, ctx), ctx)
     }
 
     fn expand_tabs(self, map: &DisplayMap, ctx: &AppContext) -> Self {
         map.snapshot(ctx).expand_tabs(self, ctx)
     }
 
-    fn collapse_tabs(self, map: &DisplayMap, bias: Bias, ctx: &AppContext) -> (Self, usize) {
-        map.snapshot(ctx).collapse_tabs(self, bias, ctx)
+    fn collapse_tabs(self, map: &DisplayMap, bias: Bias, ctx: &AppContext) -> Self {
+        map.snapshot(ctx).collapse_tabs(self, bias, ctx).0
     }
 }
 
@@ -255,23 +257,28 @@ impl Anchor {
     }
 }
 
+// Handles a tab width <= 16
+const SPACES: &'static str = "                ";
+
 pub struct Chunks<'a> {
     fold_chunks: fold_map::Chunks<'a>,
     chunk: &'a str,
     column: usize,
     tab_size: usize,
+    skip_leading_tab: bool,
 }
 
 impl<'a> Iterator for Chunks<'a> {
     type Item = &'a str;
 
     fn next(&mut self) -> Option<Self::Item> {
-        // Handles a tab width <= 16
-        const SPACES: &'static str = "                ";
-
         if self.chunk.is_empty() {
             if let Some(chunk) = self.fold_chunks.next() {
                 self.chunk = chunk;
+                if self.skip_leading_tab {
+                    self.chunk = &self.chunk[1..];
+                    self.skip_leading_tab = false;
+                }
             } else {
                 return None;
             }
@@ -303,15 +310,24 @@ impl<'a> Iterator for Chunks<'a> {
 }
 
 pub fn expand_tabs(chars: impl Iterator<Item = char>, column: usize, tab_size: usize) -> usize {
-    let mut expanded = 0;
-    for c in chars.take(column) {
+    let mut expanded_chars = 0;
+    let mut expanded_bytes = 0;
+    let mut collapsed_bytes = 0;
+    for c in chars {
+        if collapsed_bytes == column {
+            break;
+        }
         if c == '\t' {
-            expanded += tab_size - expanded % tab_size;
+            let tab_len = tab_size - expanded_chars % tab_size;
+            expanded_bytes += tab_len;
+            expanded_chars += tab_len;
         } else {
-            expanded += 1;
+            expanded_bytes += c.len_utf8();
+            expanded_chars += 1;
         }
+        collapsed_bytes += c.len_utf8();
     }
-    expanded
+    expanded_bytes
 }
 
 pub fn collapse_tabs(
@@ -319,33 +335,39 @@ pub fn collapse_tabs(
     column: usize,
     bias: Bias,
     tab_size: usize,
-) -> (usize, usize) {
-    let mut expanded = 0;
-    let mut collapsed = 0;
+) -> (usize, usize, usize) {
+    let mut expanded_bytes = 0;
+    let mut expanded_chars = 0;
+    let mut collapsed_bytes = 0;
     while let Some(c) = chars.next() {
-        if expanded == column {
+        if expanded_bytes == column {
             break;
         }
 
         if c == '\t' {
-            expanded += tab_size - (expanded % tab_size);
-            if expanded > column {
+            let tab_len = tab_size - (expanded_chars % tab_size);
+            expanded_chars += tab_len;
+            expanded_bytes += tab_len;
+            if expanded_bytes > column {
+                expanded_chars -= expanded_bytes - column;
                 return match bias {
-                    Bias::Left => (collapsed, expanded - column),
-                    Bias::Right => (collapsed + 1, 0),
+                    Bias::Left => (collapsed_bytes, expanded_chars, expanded_bytes - column),
+                    Bias::Right => (collapsed_bytes + 1, expanded_chars, 0),
                 };
             }
-            collapsed += 1;
         } else {
-            expanded += 1;
-            collapsed += 1;
+            expanded_chars += 1;
+            expanded_bytes += c.len_utf8();
         }
+        collapsed_bytes += c.len_utf8();
     }
-    (collapsed, 0)
+    (collapsed_bytes, expanded_chars, 0)
 }
 
 #[cfg(test)]
 mod tests {
+    use gpui::MutableAppContext;
+
     use super::*;
     use crate::test::*;
 
@@ -395,20 +417,58 @@ mod tests {
         assert_eq!(expand_tabs("\ta".chars(), 2, 4), 5);
     }
 
-    #[test]
-    fn test_collapse_tabs() {
-        assert_eq!(collapse_tabs("\t".chars(), 0, Bias::Left, 4), (0, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 0, Bias::Right, 4), (0, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 1, Bias::Left, 4), (0, 3));
-        assert_eq!(collapse_tabs("\t".chars(), 1, Bias::Right, 4), (1, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 2, Bias::Left, 4), (0, 2));
-        assert_eq!(collapse_tabs("\t".chars(), 2, Bias::Right, 4), (1, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 3, Bias::Left, 4), (0, 1));
-        assert_eq!(collapse_tabs("\t".chars(), 3, Bias::Right, 4), (1, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 4, Bias::Left, 4), (1, 0));
-        assert_eq!(collapse_tabs("\t".chars(), 4, Bias::Right, 4), (1, 0));
-        assert_eq!(collapse_tabs("\ta".chars(), 5, Bias::Left, 4), (2, 0));
-        assert_eq!(collapse_tabs("\ta".chars(), 5, Bias::Right, 4), (2, 0));
+    #[gpui::test]
+    fn test_tabs_with_multibyte_chars(app: &mut MutableAppContext) {
+        let text = "āœ…\t\tx\nα\t\nšŸ€Ī±\t\ty";
+        let buffer = app.add_model(|ctx| Buffer::new(0, text, ctx));
+        let ctx = app.as_ref();
+        let map = DisplayMap::new(buffer.clone(), 4, ctx);
+        assert_eq!(map.text(ctx), "āœ…       x\nα   \nšŸ€Ī±      y");
+
+        let point = Point::new(0, "āœ…\t\t".len() as u32);
+        let display_point = DisplayPoint::new(0, "āœ…       ".len() as u32);
+        assert_eq!(point.to_display_point(&map, ctx), display_point);
+        assert_eq!(display_point.to_buffer_point(&map, Bias::Left, ctx), point,);
+
+        let point = Point::new(1, "α\t".len() as u32);
+        let display_point = DisplayPoint::new(1, "α   ".len() as u32);
+        assert_eq!(point.to_display_point(&map, ctx), display_point);
+        assert_eq!(display_point.to_buffer_point(&map, Bias::Left, ctx), point,);
+
+        let point = Point::new(2, "šŸ€Ī±\t\t".len() as u32);
+        let display_point = DisplayPoint::new(2, "šŸ€Ī±      ".len() as u32);
+        assert_eq!(point.to_display_point(&map, ctx), display_point);
+        assert_eq!(display_point.to_buffer_point(&map, Bias::Left, ctx), point,);
+
+        // Display points inside of expanded tabs
+        assert_eq!(
+            DisplayPoint::new(0, "āœ…      ".len() as u32).to_buffer_point(&map, Bias::Right, ctx),
+            Point::new(0, "āœ…\t\t".len() as u32),
+        );
+        assert_eq!(
+            DisplayPoint::new(0, "āœ…      ".len() as u32).to_buffer_point(&map, Bias::Left, ctx),
+            Point::new(0, "āœ…\t".len() as u32),
+        );
+        assert_eq!(
+            map.snapshot(ctx)
+                .chunks_at(DisplayPoint::new(0, "āœ…      ".len() as u32), ctx)
+                .collect::<String>(),
+            " x\nα   \nšŸ€Ī±      y"
+        );
+        assert_eq!(
+            DisplayPoint::new(0, "āœ… ".len() as u32).to_buffer_point(&map, Bias::Right, ctx),
+            Point::new(0, "āœ…\t".len() as u32),
+        );
+        assert_eq!(
+            DisplayPoint::new(0, "āœ… ".len() as u32).to_buffer_point(&map, Bias::Left, ctx),
+            Point::new(0, "āœ…".len() as u32),
+        );
+        assert_eq!(
+            map.snapshot(ctx)
+                .chunks_at(DisplayPoint::new(0, "āœ… ".len() as u32), ctx)
+                .collect::<String>(),
+            "      x\nα   \nšŸ€Ī±      y"
+        );
     }
 
     #[gpui::test]