rope.rs

  1use super::Point;
  2use arrayvec::ArrayString;
  3use smallvec::SmallVec;
  4use std::{cmp, ops::Range, str};
  5use sum_tree::{Bias, SumTree};
  6
  7#[cfg(test)]
  8const CHUNK_BASE: usize = 6;
  9
 10#[cfg(not(test))]
 11const CHUNK_BASE: usize = 16;
 12
 13#[derive(Clone, Default, Debug)]
 14pub struct Rope {
 15    chunks: SumTree<Chunk>,
 16}
 17
 18impl Rope {
 19    pub fn new() -> Self {
 20        Self::default()
 21    }
 22
 23    pub fn append(&mut self, rope: Rope) {
 24        let mut chunks = rope.chunks.cursor::<()>();
 25        chunks.next(&());
 26        if let Some(chunk) = chunks.item() {
 27            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
 28                || chunk.0.len() < CHUNK_BASE
 29            {
 30                self.push(&chunk.0);
 31                chunks.next(&());
 32            }
 33        }
 34
 35        self.chunks.push_tree(chunks.suffix(&()), &());
 36        self.check_invariants();
 37    }
 38
 39    pub fn push(&mut self, text: &str) {
 40        let mut new_chunks = SmallVec::<[_; 16]>::new();
 41        let mut new_chunk = ArrayString::new();
 42        for ch in text.chars() {
 43            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
 44                new_chunks.push(Chunk(new_chunk));
 45                new_chunk = ArrayString::new();
 46            }
 47            new_chunk.push(ch);
 48        }
 49        if !new_chunk.is_empty() {
 50            new_chunks.push(Chunk(new_chunk));
 51        }
 52
 53        let mut new_chunks = new_chunks.into_iter();
 54        let mut first_new_chunk = new_chunks.next();
 55        self.chunks.update_last(
 56            |last_chunk| {
 57                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
 58                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
 59                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
 60                    } else {
 61                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
 62                        text.push_str(&last_chunk.0);
 63                        text.push_str(&first_new_chunk_ref.0);
 64                        let (left, right) = text.split_at(find_split_ix(&text));
 65                        last_chunk.0.clear();
 66                        last_chunk.0.push_str(left);
 67                        first_new_chunk_ref.0.clear();
 68                        first_new_chunk_ref.0.push_str(right);
 69                    }
 70                }
 71            },
 72            &(),
 73        );
 74
 75        self.chunks
 76            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 77        self.check_invariants();
 78    }
 79
 80    fn check_invariants(&self) {
 81        #[cfg(test)]
 82        {
 83            // Ensure all chunks except maybe the last one are not underflowing.
 84            // Allow some wiggle room for multibyte characters at chunk boundaries.
 85            let mut chunks = self.chunks.cursor::<()>().peekable();
 86            while let Some(chunk) = chunks.next() {
 87                if chunks.peek().is_some() {
 88                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 89                }
 90            }
 91        }
 92    }
 93
 94    pub fn summary(&self) -> TextSummary {
 95        self.chunks.summary()
 96    }
 97
 98    pub fn len(&self) -> usize {
 99        self.chunks.extent(&())
100    }
101
102    pub fn max_point(&self) -> Point {
103        self.chunks.extent(&())
104    }
105
106    pub fn cursor(&self, offset: usize) -> Cursor {
107        Cursor::new(self, offset)
108    }
109
110    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
111        self.chars_at(0)
112    }
113
114    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
115        self.chunks_in_range(start..self.len()).flat_map(str::chars)
116    }
117
118    pub fn bytes_at(&self, start: usize) -> impl Iterator<Item = u8> + '_ {
119        self.chunks_in_range(start..self.len()).flat_map(str::bytes)
120    }
121
122    pub fn chunks<'a>(&'a self) -> Chunks<'a> {
123        self.chunks_in_range(0..self.len())
124    }
125
126    pub fn chunks_in_range<'a>(&'a self, range: Range<usize>) -> Chunks<'a> {
127        Chunks::new(self, range)
128    }
129
130    pub fn to_point(&self, offset: usize) -> Point {
131        assert!(offset <= self.summary().bytes);
132        let mut cursor = self.chunks.cursor::<(usize, Point)>();
133        cursor.seek(&offset, Bias::Left, &());
134        let overshoot = offset - cursor.start().0;
135        cursor.start().1
136            + cursor
137                .item()
138                .map_or(Point::zero(), |chunk| chunk.to_point(overshoot))
139    }
140
141    pub fn to_offset(&self, point: Point) -> usize {
142        assert!(point <= self.summary().lines);
143        let mut cursor = self.chunks.cursor::<(Point, usize)>();
144        cursor.seek(&point, Bias::Left, &());
145        let overshoot = point - cursor.start().0;
146        cursor.start().1 + cursor.item().map_or(0, |chunk| chunk.to_offset(overshoot))
147    }
148
149    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
150        let mut cursor = self.chunks.cursor::<usize>();
151        cursor.seek(&offset, Bias::Left, &());
152        if let Some(chunk) = cursor.item() {
153            let mut ix = offset - cursor.start();
154            while !chunk.0.is_char_boundary(ix) {
155                match bias {
156                    Bias::Left => {
157                        ix -= 1;
158                        offset -= 1;
159                    }
160                    Bias::Right => {
161                        ix += 1;
162                        offset += 1;
163                    }
164                }
165            }
166            offset
167        } else {
168            self.summary().bytes
169        }
170    }
171
172    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
173        let mut cursor = self.chunks.cursor::<Point>();
174        cursor.seek(&point, Bias::Right, &());
175        if let Some(chunk) = cursor.item() {
176            let overshoot = point - cursor.start();
177            *cursor.start() + chunk.clip_point(overshoot, bias)
178        } else {
179            self.summary().lines
180        }
181    }
182}
183
184impl<'a> From<&'a str> for Rope {
185    fn from(text: &'a str) -> Self {
186        let mut rope = Self::new();
187        rope.push(text);
188        rope
189    }
190}
191
192impl Into<String> for Rope {
193    fn into(self) -> String {
194        self.chunks().collect()
195    }
196}
197
198pub struct Cursor<'a> {
199    rope: &'a Rope,
200    chunks: sum_tree::Cursor<'a, Chunk, usize>,
201    offset: usize,
202}
203
204impl<'a> Cursor<'a> {
205    pub fn new(rope: &'a Rope, offset: usize) -> Self {
206        let mut chunks = rope.chunks.cursor();
207        chunks.seek(&offset, Bias::Right, &());
208        Self {
209            rope,
210            chunks,
211            offset,
212        }
213    }
214
215    pub fn seek_forward(&mut self, end_offset: usize) {
216        debug_assert!(end_offset >= self.offset);
217
218        self.chunks.seek_forward(&end_offset, Bias::Right, &());
219        self.offset = end_offset;
220    }
221
222    pub fn slice(&mut self, end_offset: usize) -> Rope {
223        debug_assert!(
224            end_offset >= self.offset,
225            "cannot slice backwards from {} to {}",
226            self.offset,
227            end_offset
228        );
229
230        let mut slice = Rope::new();
231        if let Some(start_chunk) = self.chunks.item() {
232            let start_ix = self.offset - self.chunks.start();
233            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
234            slice.push(&start_chunk.0[start_ix..end_ix]);
235        }
236
237        if end_offset > self.chunks.end(&()) {
238            self.chunks.next(&());
239            slice.append(Rope {
240                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
241            });
242            if let Some(end_chunk) = self.chunks.item() {
243                let end_ix = end_offset - self.chunks.start();
244                slice.push(&end_chunk.0[..end_ix]);
245            }
246        }
247
248        self.offset = end_offset;
249        slice
250    }
251
252    pub fn summary(&mut self, end_offset: usize) -> TextSummary {
253        debug_assert!(end_offset >= self.offset);
254
255        let mut summary = TextSummary::default();
256        if let Some(start_chunk) = self.chunks.item() {
257            let start_ix = self.offset - self.chunks.start();
258            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
259            summary = TextSummary::from(&start_chunk.0[start_ix..end_ix]);
260        }
261
262        if end_offset > self.chunks.end(&()) {
263            self.chunks.next(&());
264            summary += &self.chunks.summary(&end_offset, Bias::Right, &());
265            if let Some(end_chunk) = self.chunks.item() {
266                let end_ix = end_offset - self.chunks.start();
267                summary += TextSummary::from(&end_chunk.0[..end_ix]);
268            }
269        }
270
271        summary
272    }
273
274    pub fn suffix(mut self) -> Rope {
275        self.slice(self.rope.chunks.extent(&()))
276    }
277
278    pub fn offset(&self) -> usize {
279        self.offset
280    }
281}
282
283pub struct Chunks<'a> {
284    chunks: sum_tree::Cursor<'a, Chunk, usize>,
285    range: Range<usize>,
286}
287
288impl<'a> Chunks<'a> {
289    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
290        let mut chunks = rope.chunks.cursor();
291        chunks.seek(&range.start, Bias::Right, &());
292        Self { chunks, range }
293    }
294
295    pub fn offset(&self) -> usize {
296        self.range.start.max(*self.chunks.start())
297    }
298
299    pub fn seek(&mut self, offset: usize) {
300        if offset >= self.chunks.end(&()) {
301            self.chunks.seek_forward(&offset, Bias::Right, &());
302        } else {
303            self.chunks.seek(&offset, Bias::Right, &());
304        }
305        self.range.start = offset;
306    }
307
308    pub fn peek(&self) -> Option<&'a str> {
309        if let Some(chunk) = self.chunks.item() {
310            let offset = *self.chunks.start();
311            if self.range.end > offset {
312                let start = self.range.start.saturating_sub(*self.chunks.start());
313                let end = self.range.end - self.chunks.start();
314                return Some(&chunk.0[start..chunk.0.len().min(end)]);
315            }
316        }
317        None
318    }
319}
320
321impl<'a> Iterator for Chunks<'a> {
322    type Item = &'a str;
323
324    fn next(&mut self) -> Option<Self::Item> {
325        let result = self.peek();
326        if result.is_some() {
327            self.chunks.next(&());
328        }
329        result
330    }
331}
332
333#[derive(Clone, Debug, Default)]
334struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
335
336impl Chunk {
337    fn to_point(&self, target: usize) -> Point {
338        let mut offset = 0;
339        let mut point = Point::new(0, 0);
340        for ch in self.0.chars() {
341            if offset >= target {
342                break;
343            }
344
345            if ch == '\n' {
346                point.row += 1;
347                point.column = 0;
348            } else {
349                point.column += ch.len_utf8() as u32;
350            }
351            offset += ch.len_utf8();
352        }
353        point
354    }
355
356    fn to_offset(&self, target: Point) -> usize {
357        let mut offset = 0;
358        let mut point = Point::new(0, 0);
359        for ch in self.0.chars() {
360            if point >= target {
361                if point > target {
362                    panic!("point {:?} is inside of character {:?}", target, ch);
363                }
364                break;
365            }
366
367            if ch == '\n' {
368                point.row += 1;
369                point.column = 0;
370            } else {
371                point.column += ch.len_utf8() as u32;
372            }
373            offset += ch.len_utf8();
374        }
375        offset
376    }
377
378    fn clip_point(&self, target: Point, bias: Bias) -> Point {
379        for (row, line) in self.0.split('\n').enumerate() {
380            if row == target.row as usize {
381                let mut column = target.column.min(line.len() as u32);
382                while !line.is_char_boundary(column as usize) {
383                    match bias {
384                        Bias::Left => column -= 1,
385                        Bias::Right => column += 1,
386                    }
387                }
388                return Point::new(row as u32, column);
389            }
390        }
391        unreachable!()
392    }
393}
394
395impl sum_tree::Item for Chunk {
396    type Summary = TextSummary;
397
398    fn summary(&self) -> Self::Summary {
399        TextSummary::from(self.0.as_str())
400    }
401}
402
403#[derive(Clone, Debug, Default, Eq, PartialEq)]
404pub struct TextSummary {
405    pub bytes: usize,
406    pub lines: Point,
407    pub first_line_chars: u32,
408    pub last_line_chars: u32,
409    pub longest_row: u32,
410    pub longest_row_chars: u32,
411}
412
413impl<'a> From<&'a str> for TextSummary {
414    fn from(text: &'a str) -> Self {
415        let mut lines = Point::new(0, 0);
416        let mut first_line_chars = 0;
417        let mut last_line_chars = 0;
418        let mut longest_row = 0;
419        let mut longest_row_chars = 0;
420        for c in text.chars() {
421            if c == '\n' {
422                lines.row += 1;
423                lines.column = 0;
424                last_line_chars = 0;
425            } else {
426                lines.column += c.len_utf8() as u32;
427                last_line_chars += 1;
428            }
429
430            if lines.row == 0 {
431                first_line_chars = last_line_chars;
432            }
433
434            if last_line_chars > longest_row_chars {
435                longest_row = lines.row;
436                longest_row_chars = last_line_chars;
437            }
438        }
439
440        TextSummary {
441            bytes: text.len(),
442            lines,
443            first_line_chars,
444            last_line_chars,
445            longest_row,
446            longest_row_chars,
447        }
448    }
449}
450
451impl sum_tree::Summary for TextSummary {
452    type Context = ();
453
454    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
455        *self += summary;
456    }
457}
458
459impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
460    fn add_assign(&mut self, other: &'a Self) {
461        let joined_chars = self.last_line_chars + other.first_line_chars;
462        if joined_chars > self.longest_row_chars {
463            self.longest_row = self.lines.row;
464            self.longest_row_chars = joined_chars;
465        }
466        if other.longest_row_chars > self.longest_row_chars {
467            self.longest_row = self.lines.row + other.longest_row;
468            self.longest_row_chars = other.longest_row_chars;
469        }
470
471        if self.lines.row == 0 {
472            self.first_line_chars += other.first_line_chars;
473        }
474
475        if other.lines.row == 0 {
476            self.last_line_chars += other.first_line_chars;
477        } else {
478            self.last_line_chars = other.last_line_chars;
479        }
480
481        self.bytes += other.bytes;
482        self.lines += &other.lines;
483    }
484}
485
486impl std::ops::AddAssign<Self> for TextSummary {
487    fn add_assign(&mut self, other: Self) {
488        *self += &other;
489    }
490}
491
492impl<'a> sum_tree::Dimension<'a, TextSummary> for usize {
493    fn add_summary(&mut self, summary: &'a TextSummary, _: &()) {
494        *self += summary.bytes;
495    }
496}
497
498impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
499    fn add_summary(&mut self, summary: &'a TextSummary, _: &()) {
500        *self += &summary.lines;
501    }
502}
503
504fn find_split_ix(text: &str) -> usize {
505    let mut ix = text.len() / 2;
506    while !text.is_char_boundary(ix) {
507        if ix < 2 * CHUNK_BASE {
508            ix += 1;
509        } else {
510            ix = (text.len() / 2) - 1;
511            break;
512        }
513    }
514    while !text.is_char_boundary(ix) {
515        ix -= 1;
516    }
517
518    debug_assert!(ix <= 2 * CHUNK_BASE);
519    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
520    ix
521}
522
523#[cfg(test)]
524mod tests {
525    use super::*;
526    use crate::random_char_iter::RandomCharIter;
527    use rand::prelude::*;
528    use std::env;
529    use Bias::{Left, Right};
530
531    #[test]
532    fn test_all_4_byte_chars() {
533        let mut rope = Rope::new();
534        let text = "🏀".repeat(256);
535        rope.push(&text);
536        assert_eq!(rope.text(), text);
537    }
538
539    #[gpui::test(iterations = 100)]
540    fn test_random(mut rng: StdRng) {
541        let operations = env::var("OPERATIONS")
542            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
543            .unwrap_or(10);
544
545        let mut expected = String::new();
546        let mut actual = Rope::new();
547        for _ in 0..operations {
548            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
549            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
550            let len = rng.gen_range(0..=64);
551            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
552
553            let mut new_actual = Rope::new();
554            let mut cursor = actual.cursor(0);
555            new_actual.append(cursor.slice(start_ix));
556            new_actual.push(&new_text);
557            cursor.seek_forward(end_ix);
558            new_actual.append(cursor.suffix());
559            actual = new_actual;
560
561            expected.replace_range(start_ix..end_ix, &new_text);
562
563            assert_eq!(actual.text(), expected);
564            log::info!("text: {:?}", expected);
565
566            for _ in 0..5 {
567                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
568                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
569                assert_eq!(
570                    actual.chunks_in_range(start_ix..end_ix).collect::<String>(),
571                    &expected[start_ix..end_ix]
572                );
573            }
574
575            let mut point = Point::new(0, 0);
576            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
577                assert_eq!(actual.to_point(ix), point, "to_point({})", ix);
578                assert_eq!(actual.to_offset(point), ix, "to_offset({:?})", point);
579                if ch == '\n' {
580                    point.row += 1;
581                    point.column = 0
582                } else {
583                    point.column += ch.len_utf8() as u32;
584                }
585            }
586
587            for _ in 0..5 {
588                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
589                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
590                assert_eq!(
591                    actual.cursor(start_ix).summary(end_ix),
592                    TextSummary::from(&expected[start_ix..end_ix])
593                );
594            }
595        }
596    }
597
598    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
599        while !text.is_char_boundary(offset) {
600            match bias {
601                Bias::Left => offset -= 1,
602                Bias::Right => offset += 1,
603            }
604        }
605        offset
606    }
607
608    impl Rope {
609        fn text(&self) -> String {
610            let mut text = String::new();
611            for chunk in self.chunks.cursor::<()>() {
612                text.push_str(&chunk.0);
613            }
614            text
615        }
616    }
617}