rope.rs

  1use super::Point;
  2use arrayvec::ArrayString;
  3use smallvec::SmallVec;
  4use std::{cmp, ops::Range, str};
  5use sum_tree::{Bias, SumTree};
  6
  7#[cfg(test)]
  8const CHUNK_BASE: usize = 6;
  9
 10#[cfg(not(test))]
 11const CHUNK_BASE: usize = 16;
 12
 13#[derive(Clone, Default, Debug)]
 14pub struct Rope {
 15    chunks: SumTree<Chunk>,
 16}
 17
 18impl Rope {
 19    pub fn new() -> Self {
 20        Self::default()
 21    }
 22
 23    pub fn append(&mut self, rope: Rope) {
 24        let mut chunks = rope.chunks.cursor::<()>();
 25        chunks.next(&());
 26        if let Some(chunk) = chunks.item() {
 27            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
 28                || chunk.0.len() < CHUNK_BASE
 29            {
 30                self.push(&chunk.0);
 31                chunks.next(&());
 32            }
 33        }
 34
 35        self.chunks.push_tree(chunks.suffix(&()), &());
 36        self.check_invariants();
 37    }
 38
 39    pub fn push(&mut self, text: &str) {
 40        let mut new_chunks = SmallVec::<[_; 16]>::new();
 41        let mut new_chunk = ArrayString::new();
 42        for ch in text.chars() {
 43            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
 44                new_chunks.push(Chunk(new_chunk));
 45                new_chunk = ArrayString::new();
 46            }
 47            new_chunk.push(ch);
 48        }
 49        if !new_chunk.is_empty() {
 50            new_chunks.push(Chunk(new_chunk));
 51        }
 52
 53        let mut new_chunks = new_chunks.into_iter();
 54        let mut first_new_chunk = new_chunks.next();
 55        self.chunks.update_last(
 56            |last_chunk| {
 57                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
 58                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
 59                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
 60                    } else {
 61                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
 62                        text.push_str(&last_chunk.0);
 63                        text.push_str(&first_new_chunk_ref.0);
 64                        let (left, right) = text.split_at(find_split_ix(&text));
 65                        last_chunk.0.clear();
 66                        last_chunk.0.push_str(left);
 67                        first_new_chunk_ref.0.clear();
 68                        first_new_chunk_ref.0.push_str(right);
 69                    }
 70                }
 71            },
 72            &(),
 73        );
 74
 75        self.chunks
 76            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 77        self.check_invariants();
 78    }
 79
 80    fn check_invariants(&self) {
 81        #[cfg(test)]
 82        {
 83            // Ensure all chunks except maybe the last one are not underflowing.
 84            // Allow some wiggle room for multibyte characters at chunk boundaries.
 85            let mut chunks = self.chunks.cursor::<()>().peekable();
 86            while let Some(chunk) = chunks.next() {
 87                if chunks.peek().is_some() {
 88                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 89                }
 90            }
 91        }
 92    }
 93
 94    pub fn summary(&self) -> TextSummary {
 95        self.chunks.summary()
 96    }
 97
 98    pub fn len(&self) -> usize {
 99        self.chunks.extent(&())
100    }
101
102    pub fn max_point(&self) -> Point {
103        self.chunks.extent(&())
104    }
105
106    pub fn cursor(&self, offset: usize) -> Cursor {
107        Cursor::new(self, offset)
108    }
109
110    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
111        self.chars_at(0)
112    }
113
114    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
115        self.chunks_in_range(start..self.len()).flat_map(str::chars)
116    }
117
118    pub fn bytes_at(&self, start: usize) -> impl Iterator<Item = u8> + '_ {
119        self.chunks_in_range(start..self.len()).flat_map(str::bytes)
120    }
121
122    pub fn chunks<'a>(&'a self) -> Chunks<'a> {
123        self.chunks_in_range(0..self.len())
124    }
125
126    pub fn chunks_in_range<'a>(&'a self, range: Range<usize>) -> Chunks<'a> {
127        Chunks::new(self, range)
128    }
129
130    pub fn to_point(&self, offset: usize) -> Point {
131        assert!(offset <= self.summary().bytes);
132        let mut cursor = self.chunks.cursor::<(usize, Point)>();
133        cursor.seek(&offset, Bias::Left, &());
134        let overshoot = offset - cursor.start().0;
135        cursor.start().1
136            + cursor
137                .item()
138                .map_or(Point::zero(), |chunk| chunk.to_point(overshoot))
139    }
140
141    pub fn to_offset(&self, point: Point) -> usize {
142        assert!(point <= self.summary().lines);
143        let mut cursor = self.chunks.cursor::<(Point, usize)>();
144        cursor.seek(&point, Bias::Left, &());
145        let overshoot = point - cursor.start().0;
146        cursor.start().1 + cursor.item().map_or(0, |chunk| chunk.to_offset(overshoot))
147    }
148
149    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
150        let mut cursor = self.chunks.cursor::<usize>();
151        cursor.seek(&offset, Bias::Left, &());
152        if let Some(chunk) = cursor.item() {
153            let mut ix = offset - cursor.start();
154            while !chunk.0.is_char_boundary(ix) {
155                match bias {
156                    Bias::Left => {
157                        ix -= 1;
158                        offset -= 1;
159                    }
160                    Bias::Right => {
161                        ix += 1;
162                        offset += 1;
163                    }
164                }
165            }
166            offset
167        } else {
168            self.summary().bytes
169        }
170    }
171
172    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
173        let mut cursor = self.chunks.cursor::<Point>();
174        cursor.seek(&point, Bias::Right, &());
175        if let Some(chunk) = cursor.item() {
176            let overshoot = point - cursor.start();
177            *cursor.start() + chunk.clip_point(overshoot, bias)
178        } else {
179            self.summary().lines
180        }
181    }
182}
183
184impl<'a> From<&'a str> for Rope {
185    fn from(text: &'a str) -> Self {
186        let mut rope = Self::new();
187        rope.push(text);
188        rope
189    }
190}
191
192impl Into<String> for Rope {
193    fn into(self) -> String {
194        self.chunks().collect()
195    }
196}
197
198pub struct Cursor<'a> {
199    rope: &'a Rope,
200    chunks: sum_tree::Cursor<'a, Chunk, usize>,
201    offset: usize,
202}
203
204impl<'a> Cursor<'a> {
205    pub fn new(rope: &'a Rope, offset: usize) -> Self {
206        let mut chunks = rope.chunks.cursor();
207        chunks.seek(&offset, Bias::Right, &());
208        Self {
209            rope,
210            chunks,
211            offset,
212        }
213    }
214
215    pub fn seek_forward(&mut self, end_offset: usize) {
216        debug_assert!(end_offset >= self.offset);
217
218        self.chunks.seek_forward(&end_offset, Bias::Right, &());
219        self.offset = end_offset;
220    }
221
222    pub fn slice(&mut self, end_offset: usize) -> Rope {
223        debug_assert!(
224            end_offset >= self.offset,
225            "cannot slice backwards from {} to {}",
226            self.offset,
227            end_offset
228        );
229
230        let mut slice = Rope::new();
231        if let Some(start_chunk) = self.chunks.item() {
232            let start_ix = self.offset - self.chunks.start();
233            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
234            slice.push(&start_chunk.0[start_ix..end_ix]);
235        }
236
237        if end_offset > self.chunks.end(&()) {
238            self.chunks.next(&());
239            slice.append(Rope {
240                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
241            });
242            if let Some(end_chunk) = self.chunks.item() {
243                let end_ix = end_offset - self.chunks.start();
244                slice.push(&end_chunk.0[..end_ix]);
245            }
246        }
247
248        self.offset = end_offset;
249        slice
250    }
251
252    pub fn summary(&mut self, end_offset: usize) -> TextSummary {
253        debug_assert!(end_offset >= self.offset);
254
255        let mut summary = TextSummary::default();
256        if let Some(start_chunk) = self.chunks.item() {
257            let start_ix = self.offset - self.chunks.start();
258            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
259            summary = TextSummary::from(&start_chunk.0[start_ix..end_ix]);
260        }
261
262        if end_offset > self.chunks.end(&()) {
263            self.chunks.next(&());
264            summary += &self.chunks.summary(&end_offset, Bias::Right, &());
265            if let Some(end_chunk) = self.chunks.item() {
266                let end_ix = end_offset - self.chunks.start();
267                summary += TextSummary::from(&end_chunk.0[..end_ix]);
268            }
269        }
270
271        self.offset = end_offset;
272        summary
273    }
274
275    pub fn suffix(mut self) -> Rope {
276        self.slice(self.rope.chunks.extent(&()))
277    }
278
279    pub fn offset(&self) -> usize {
280        self.offset
281    }
282}
283
284pub struct Chunks<'a> {
285    chunks: sum_tree::Cursor<'a, Chunk, usize>,
286    range: Range<usize>,
287}
288
289impl<'a> Chunks<'a> {
290    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
291        let mut chunks = rope.chunks.cursor();
292        chunks.seek(&range.start, Bias::Right, &());
293        Self { chunks, range }
294    }
295
296    pub fn offset(&self) -> usize {
297        self.range.start.max(*self.chunks.start())
298    }
299
300    pub fn seek(&mut self, offset: usize) {
301        if offset >= self.chunks.end(&()) {
302            self.chunks.seek_forward(&offset, Bias::Right, &());
303        } else {
304            self.chunks.seek(&offset, Bias::Right, &());
305        }
306        self.range.start = offset;
307    }
308
309    pub fn peek(&self) -> Option<&'a str> {
310        if let Some(chunk) = self.chunks.item() {
311            let offset = *self.chunks.start();
312            if self.range.end > offset {
313                let start = self.range.start.saturating_sub(*self.chunks.start());
314                let end = self.range.end - self.chunks.start();
315                return Some(&chunk.0[start..chunk.0.len().min(end)]);
316            }
317        }
318        None
319    }
320}
321
322impl<'a> Iterator for Chunks<'a> {
323    type Item = &'a str;
324
325    fn next(&mut self) -> Option<Self::Item> {
326        let result = self.peek();
327        if result.is_some() {
328            self.chunks.next(&());
329        }
330        result
331    }
332}
333
334#[derive(Clone, Debug, Default)]
335struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
336
337impl Chunk {
338    fn to_point(&self, target: usize) -> Point {
339        let mut offset = 0;
340        let mut point = Point::new(0, 0);
341        for ch in self.0.chars() {
342            if offset >= target {
343                break;
344            }
345
346            if ch == '\n' {
347                point.row += 1;
348                point.column = 0;
349            } else {
350                point.column += ch.len_utf8() as u32;
351            }
352            offset += ch.len_utf8();
353        }
354        point
355    }
356
357    fn to_offset(&self, target: Point) -> usize {
358        let mut offset = 0;
359        let mut point = Point::new(0, 0);
360        for ch in self.0.chars() {
361            if point >= target {
362                if point > target {
363                    panic!("point {:?} is inside of character {:?}", target, ch);
364                }
365                break;
366            }
367
368            if ch == '\n' {
369                point.row += 1;
370                point.column = 0;
371            } else {
372                point.column += ch.len_utf8() as u32;
373            }
374            offset += ch.len_utf8();
375        }
376        offset
377    }
378
379    fn clip_point(&self, target: Point, bias: Bias) -> Point {
380        for (row, line) in self.0.split('\n').enumerate() {
381            if row == target.row as usize {
382                let mut column = target.column.min(line.len() as u32);
383                while !line.is_char_boundary(column as usize) {
384                    match bias {
385                        Bias::Left => column -= 1,
386                        Bias::Right => column += 1,
387                    }
388                }
389                return Point::new(row as u32, column);
390            }
391        }
392        unreachable!()
393    }
394}
395
396impl sum_tree::Item for Chunk {
397    type Summary = TextSummary;
398
399    fn summary(&self) -> Self::Summary {
400        TextSummary::from(self.0.as_str())
401    }
402}
403
404#[derive(Clone, Debug, Default, Eq, PartialEq)]
405pub struct TextSummary {
406    pub bytes: usize,
407    pub lines: Point,
408    pub first_line_chars: u32,
409    pub last_line_chars: u32,
410    pub longest_row: u32,
411    pub longest_row_chars: u32,
412}
413
414impl<'a> From<&'a str> for TextSummary {
415    fn from(text: &'a str) -> Self {
416        let mut lines = Point::new(0, 0);
417        let mut first_line_chars = 0;
418        let mut last_line_chars = 0;
419        let mut longest_row = 0;
420        let mut longest_row_chars = 0;
421        for c in text.chars() {
422            if c == '\n' {
423                lines.row += 1;
424                lines.column = 0;
425                last_line_chars = 0;
426            } else {
427                lines.column += c.len_utf8() as u32;
428                last_line_chars += 1;
429            }
430
431            if lines.row == 0 {
432                first_line_chars = last_line_chars;
433            }
434
435            if last_line_chars > longest_row_chars {
436                longest_row = lines.row;
437                longest_row_chars = last_line_chars;
438            }
439        }
440
441        TextSummary {
442            bytes: text.len(),
443            lines,
444            first_line_chars,
445            last_line_chars,
446            longest_row,
447            longest_row_chars,
448        }
449    }
450}
451
452impl sum_tree::Summary for TextSummary {
453    type Context = ();
454
455    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
456        *self += summary;
457    }
458}
459
460impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
461    fn add_assign(&mut self, other: &'a Self) {
462        let joined_chars = self.last_line_chars + other.first_line_chars;
463        if joined_chars > self.longest_row_chars {
464            self.longest_row = self.lines.row;
465            self.longest_row_chars = joined_chars;
466        }
467        if other.longest_row_chars > self.longest_row_chars {
468            self.longest_row = self.lines.row + other.longest_row;
469            self.longest_row_chars = other.longest_row_chars;
470        }
471
472        if self.lines.row == 0 {
473            self.first_line_chars += other.first_line_chars;
474        }
475
476        if other.lines.row == 0 {
477            self.last_line_chars += other.first_line_chars;
478        } else {
479            self.last_line_chars = other.last_line_chars;
480        }
481
482        self.bytes += other.bytes;
483        self.lines += &other.lines;
484    }
485}
486
487impl std::ops::AddAssign<Self> for TextSummary {
488    fn add_assign(&mut self, other: Self) {
489        *self += &other;
490    }
491}
492
493impl<'a> sum_tree::Dimension<'a, TextSummary> for usize {
494    fn add_summary(&mut self, summary: &'a TextSummary, _: &()) {
495        *self += summary.bytes;
496    }
497}
498
499impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
500    fn add_summary(&mut self, summary: &'a TextSummary, _: &()) {
501        *self += &summary.lines;
502    }
503}
504
505fn find_split_ix(text: &str) -> usize {
506    let mut ix = text.len() / 2;
507    while !text.is_char_boundary(ix) {
508        if ix < 2 * CHUNK_BASE {
509            ix += 1;
510        } else {
511            ix = (text.len() / 2) - 1;
512            break;
513        }
514    }
515    while !text.is_char_boundary(ix) {
516        ix -= 1;
517    }
518
519    debug_assert!(ix <= 2 * CHUNK_BASE);
520    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
521    ix
522}
523
524#[cfg(test)]
525mod tests {
526    use super::*;
527    use crate::random_char_iter::RandomCharIter;
528    use rand::prelude::*;
529    use std::env;
530    use Bias::{Left, Right};
531
532    #[test]
533    fn test_all_4_byte_chars() {
534        let mut rope = Rope::new();
535        let text = "🏀".repeat(256);
536        rope.push(&text);
537        assert_eq!(rope.text(), text);
538    }
539
540    #[gpui::test(iterations = 100)]
541    fn test_random(mut rng: StdRng) {
542        let operations = env::var("OPERATIONS")
543            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
544            .unwrap_or(10);
545
546        let mut expected = String::new();
547        let mut actual = Rope::new();
548        for _ in 0..operations {
549            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
550            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
551            let len = rng.gen_range(0..=64);
552            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
553
554            let mut new_actual = Rope::new();
555            let mut cursor = actual.cursor(0);
556            new_actual.append(cursor.slice(start_ix));
557            new_actual.push(&new_text);
558            cursor.seek_forward(end_ix);
559            new_actual.append(cursor.suffix());
560            actual = new_actual;
561
562            expected.replace_range(start_ix..end_ix, &new_text);
563
564            assert_eq!(actual.text(), expected);
565            log::info!("text: {:?}", expected);
566
567            for _ in 0..5 {
568                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
569                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
570                assert_eq!(
571                    actual.chunks_in_range(start_ix..end_ix).collect::<String>(),
572                    &expected[start_ix..end_ix]
573                );
574            }
575
576            let mut point = Point::new(0, 0);
577            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
578                assert_eq!(actual.to_point(ix), point, "to_point({})", ix);
579                assert_eq!(actual.to_offset(point), ix, "to_offset({:?})", point);
580                if ch == '\n' {
581                    point.row += 1;
582                    point.column = 0
583                } else {
584                    point.column += ch.len_utf8() as u32;
585                }
586            }
587
588            for _ in 0..5 {
589                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
590                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
591                assert_eq!(
592                    actual.cursor(start_ix).summary(end_ix),
593                    TextSummary::from(&expected[start_ix..end_ix])
594                );
595            }
596        }
597    }
598
599    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
600        while !text.is_char_boundary(offset) {
601            match bias {
602                Bias::Left => offset -= 1,
603                Bias::Right => offset += 1,
604            }
605        }
606        offset
607    }
608
609    impl Rope {
610        fn text(&self) -> String {
611            let mut text = String::new();
612            for chunk in self.chunks.cursor::<()>() {
613                text.push_str(&chunk.0);
614            }
615            text
616        }
617    }
618}