Get randomized tests passing in the presence of multibyte chars

Max Brunsfeld created

Change summary

zed/src/editor/buffer/mod.rs           | 31 ++++++++++++++-----
zed/src/editor/buffer/rope.rs          | 44 +++++++++++++++------------
zed/src/editor/display_map/fold_map.rs |  7 +++-
zed/src/util.rs                        | 33 +++++++++++++++++++-
4 files changed, 83 insertions(+), 32 deletions(-)

Detailed changes

zed/src/editor/buffer/mod.rs 🔗

@@ -2509,12 +2509,12 @@ mod tests {
                 for _i in 0..10 {
                     let (old_ranges, new_text, _) = buffer.randomly_mutate(rng, None);
                     for old_range in old_ranges.iter().rev() {
-                        reference_string = [
-                            &reference_string[0..old_range.start],
-                            new_text.as_str(),
-                            &reference_string[old_range.end..],
-                        ]
-                        .concat();
+                        reference_string = reference_string
+                            .chars()
+                            .take(old_range.start)
+                            .chain(new_text.chars())
+                            .chain(reference_string.chars().skip(old_range.end))
+                            .collect();
                     }
                     assert_eq!(buffer.text(), reference_string);
 
@@ -2549,7 +2549,12 @@ mod tests {
                         let range_sum = buffer.text_summary_for_range(start..end);
                         assert_eq!(range_sum.rightmost_point.column, *longest_column);
                         assert!(longest_rows.contains(&range_sum.rightmost_point.row));
-                        let range_text = &buffer.text()[start..end];
+                        let range_text = buffer
+                            .text()
+                            .chars()
+                            .skip(start)
+                            .take(end - start)
+                            .collect::<String>();
                         assert_eq!(range_sum.chars, range_text.chars().count());
                         assert_eq!(range_sum.bytes, range_text.len());
                     }
@@ -3458,9 +3463,17 @@ mod tests {
 
     fn line_lengths_in_range(buffer: &Buffer, range: Range<usize>) -> BTreeMap<u32, HashSet<u32>> {
         let mut lengths = BTreeMap::new();
-        for (row, line) in buffer.text()[range].lines().enumerate() {
+        for (row, line) in buffer
+            .text()
+            .chars()
+            .skip(range.start)
+            .take(range.len())
+            .collect::<String>()
+            .lines()
+            .enumerate()
+        {
             lengths
-                .entry(line.len() as u32)
+                .entry(line.chars().count() as u32)
                 .or_insert(HashSet::default())
                 .insert(row as u32);
         }

zed/src/editor/buffer/rope.rs 🔗

@@ -1,12 +1,13 @@
 use super::Point;
 use crate::sum_tree::{self, SeekBias, SumTree};
+use crate::util::byte_range_for_char_range;
 use anyhow::{anyhow, Result};
 use arrayvec::ArrayString;
 use smallvec::SmallVec;
-use std::{cmp, ops::Range, str};
+use std::{cmp, iter::Skip, ops::Range, str};
 
 #[cfg(test)]
-const CHUNK_BASE: usize = 2;
+const CHUNK_BASE: usize = 6;
 
 #[cfg(not(test))]
 const CHUNK_BASE: usize = 16;
@@ -55,10 +56,7 @@ impl Rope {
                     if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
                         last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
                     } else {
-                        let mut text = ArrayString::<[_; 4 * CHUNK_BASE]>::new();
-                        text.push_str(&last_chunk.0);
-                        text.push_str(&first_new_chunk_ref.0);
-
+                        let text = [last_chunk.0, first_new_chunk_ref.0].concat();
                         let mut midpoint = text.len() / 2;
                         while !text.is_char_boundary(midpoint) {
                             midpoint += 1;
@@ -83,10 +81,11 @@ impl Rope {
         #[cfg(test)]
         {
             // Ensure all chunks except maybe the last one are not underflowing.
+            // Allow some wiggle room for multibyte characters at chunk boundaries.
             let mut chunks = self.chunks.cursor::<(), ()>().peekable();
             while let Some(chunk) = chunks.next() {
                 if chunks.peek().is_some() {
-                    assert!(chunk.0.len() >= CHUNK_BASE);
+                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
                 }
             }
         }
@@ -190,7 +189,8 @@ impl<'a> Cursor<'a> {
         if let Some(start_chunk) = self.chunks.item() {
             let start_ix = self.offset - self.chunks.start();
             let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
-            slice.push(&start_chunk.0[start_ix..end_ix]);
+            let byte_range = byte_range_for_char_range(start_chunk.0, start_ix..end_ix);
+            slice.push(&start_chunk.0[byte_range]);
         }
 
         if end_offset > self.chunks.end() {
@@ -199,7 +199,9 @@ impl<'a> Cursor<'a> {
                 chunks: self.chunks.slice(&end_offset, SeekBias::Right, &()),
             });
             if let Some(end_chunk) = self.chunks.item() {
-                slice.push(&end_chunk.0[..end_offset - self.chunks.start()]);
+                let end_ix = end_offset - self.chunks.start();
+                let byte_range = byte_range_for_char_range(end_chunk.0, 0..end_ix);
+                slice.push(&end_chunk.0[byte_range]);
             }
         }
 
@@ -217,7 +219,7 @@ impl<'a> Cursor<'a> {
 }
 
 #[derive(Clone, Debug, Default)]
-struct Chunk(ArrayString<[u8; 2 * CHUNK_BASE]>);
+struct Chunk(ArrayString<[u8; 4 * CHUNK_BASE]>);
 
 impl Chunk {
     fn to_point(&self, target: usize) -> Point {
@@ -358,7 +360,7 @@ impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
 
 pub struct Chars<'a> {
     cursor: sum_tree::Cursor<'a, Chunk, usize, usize>,
-    chars: str::Chars<'a>,
+    chars: Skip<str::Chars<'a>>,
 }
 
 impl<'a> Chars<'a> {
@@ -368,9 +370,9 @@ impl<'a> Chars<'a> {
         let chars = if let Some(chunk) = cursor.item() {
             let ix = start - cursor.start();
             cursor.next();
-            chunk.0[ix..].chars()
+            chunk.0.chars().skip(ix)
         } else {
-            "".chars()
+            "".chars().skip(0)
         };
 
         Self { cursor, chars }
@@ -384,7 +386,7 @@ impl<'a> Iterator for Chars<'a> {
         if let Some(ch) = self.chars.next() {
             Some(ch)
         } else if let Some(chunk) = self.cursor.item() {
-            self.chars = chunk.0.chars();
+            self.chars = chunk.0.chars().skip(0);
             self.cursor.next();
             Some(self.chars.next().unwrap())
         } else {
@@ -422,9 +424,9 @@ mod tests {
             let mut expected = String::new();
             let mut actual = Rope::new();
             for _ in 0..operations {
-                let end_ix = rng.gen_range(0..=expected.len());
+                let end_ix = rng.gen_range(0..=expected.chars().count());
                 let start_ix = rng.gen_range(0..=end_ix);
-                let len = rng.gen_range(0..=20);
+                let len = rng.gen_range(0..=64);
                 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
 
                 let mut new_actual = Rope::new();
@@ -436,16 +438,20 @@ mod tests {
                 actual = new_actual;
 
                 let mut new_expected = String::new();
-                new_expected.push_str(&expected[..start_ix]);
+                new_expected.extend(expected.chars().take(start_ix));
                 new_expected.push_str(&new_text);
-                new_expected.push_str(&expected[end_ix..]);
+                new_expected.extend(expected.chars().skip(end_ix));
                 expected = new_expected;
 
                 assert_eq!(actual.text(), expected);
+                log::info!("text: {:?}", expected);
 
                 for _ in 0..5 {
                     let ix = rng.gen_range(0..=expected.len());
-                    assert_eq!(actual.chars_at(ix).collect::<String>(), expected[ix..]);
+                    assert_eq!(
+                        actual.chars_at(ix).collect::<String>(),
+                        expected.chars().skip(ix).collect::<String>()
+                    );
                 }
 
                 let mut point = Point::new(0, 0);

zed/src/editor/display_map/fold_map.rs 🔗

@@ -830,7 +830,7 @@ mod tests {
     #[gpui::test]
     fn test_random_folds(app: &mut gpui::MutableAppContext) {
         use crate::editor::ToPoint;
-        use crate::util::RandomCharIter;
+        use crate::util::{byte_range_for_char_range, RandomCharIter};
         use rand::prelude::*;
         use std::env;
 
@@ -905,7 +905,10 @@ mod tests {
                     expected_buffer_rows.extend((fold_end.row + 1..=next_row).rev());
                     next_row = fold_start.row;
 
-                    expected_text.replace_range(fold_range.start..fold_range.end, "…");
+                    expected_text.replace_range(
+                        byte_range_for_char_range(&expected_text, fold_range.start..fold_range.end),
+                        "…",
+                    );
                 }
                 expected_buffer_rows.extend((0..=next_row).rev());
                 expected_buffer_rows.reverse();

zed/src/util.rs 🔗

@@ -1,5 +1,20 @@
 use rand::prelude::*;
-use std::cmp::Ordering;
+use std::{cmp::Ordering, ops::Range};
+
+pub fn byte_range_for_char_range(text: impl AsRef<str>, char_range: Range<usize>) -> Range<usize> {
+    let text = text.as_ref();
+    let mut result = text.len()..text.len();
+    for (i, (offset, _)) in text.char_indices().enumerate() {
+        if i == char_range.start {
+            result.start = offset;
+        }
+        if i == char_range.end {
+            result.end = offset;
+            break;
+        }
+    }
+    result
+}
 
 pub fn post_inc(value: &mut usize) -> usize {
     let prev = *value;
@@ -44,7 +59,21 @@ impl<T: Rng> Iterator for RandomCharIter<T> {
     fn next(&mut self) -> Option<Self::Item> {
         if self.0.gen_bool(1.0 / 5.0) {
             Some('\n')
-        } else {
+        }
+        // two-byte greek letters
+        else if self.0.gen_bool(1.0 / 8.0) {
+            Some(std::char::from_u32(self.0.gen_range(('α' as u32)..('ω' as u32 + 1))).unwrap())
+        }
+        // three-byte characters
+        else if self.0.gen_bool(1.0 / 10.0) {
+            ['✋', '✅', '❌', '❎', '⭐'].choose(&mut self.0).cloned()
+        }
+        // four-byte characters
+        else if self.0.gen_bool(1.0 / 12.0) {
+            ['🍐', '🏀', '🍗', '🎉'].choose(&mut self.0).cloned()
+        }
+        // ascii letters
+        else {
             Some(self.0.gen_range(b'a'..b'z' + 1).into())
         }
     }