rope.rs

   1use crate::PointUtf16;
   2
   3use super::Point;
   4use arrayvec::ArrayString;
   5use bromberg_sl2::{DigestString, HashMatrix};
   6use smallvec::SmallVec;
   7use std::{cmp, fmt, io, mem, ops::Range, str};
   8use sum_tree::{Bias, Dimension, SumTree};
   9
  10#[cfg(test)]
  11const CHUNK_BASE: usize = 6;
  12
  13#[cfg(not(test))]
  14const CHUNK_BASE: usize = 16;
  15
  16#[derive(Clone, Default, Debug)]
  17pub struct Rope {
  18    chunks: SumTree<Chunk>,
  19}
  20
  21impl Rope {
  22    pub fn new() -> Self {
  23        Self::default()
  24    }
  25
  26    pub fn append(&mut self, rope: Rope) {
  27        let mut chunks = rope.chunks.cursor::<()>();
  28        chunks.next(&());
  29        if let Some(chunk) = chunks.item() {
  30            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  31                || chunk.0.len() < CHUNK_BASE
  32            {
  33                self.push(&chunk.0);
  34                chunks.next(&());
  35            }
  36        }
  37
  38        self.chunks.push_tree(chunks.suffix(&()), &());
  39        self.check_invariants();
  40    }
  41
  42    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  43        let mut new_rope = Rope::new();
  44        let mut cursor = self.cursor(0);
  45        new_rope.append(cursor.slice(range.start));
  46        cursor.seek_forward(range.end);
  47        new_rope.push(text);
  48        new_rope.append(cursor.suffix());
  49        *self = new_rope;
  50    }
  51
  52    pub fn slice(&self, range: Range<usize>) -> Rope {
  53        let mut cursor = self.cursor(0);
  54        cursor.seek_forward(range.start);
  55        cursor.slice(range.end)
  56    }
  57
  58    pub fn push(&mut self, text: &str) {
  59        let mut new_chunks = SmallVec::<[_; 16]>::new();
  60        let mut new_chunk = ArrayString::new();
  61        let mut chars = text.chars().peekable();
  62        while let Some(mut ch) = chars.next() {
  63            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  64                new_chunks.push(Chunk(new_chunk));
  65                new_chunk = ArrayString::new();
  66            }
  67
  68            if ch == '\r' {
  69                ch = '\n';
  70                if chars.peek().copied() == Some('\n') {
  71                    chars.next();
  72                }
  73            }
  74            new_chunk.push(ch);
  75        }
  76        if !new_chunk.is_empty() {
  77            new_chunks.push(Chunk(new_chunk));
  78        }
  79
  80        let mut new_chunks = new_chunks.into_iter();
  81        let mut first_new_chunk = new_chunks.next();
  82        self.chunks.update_last(
  83            |last_chunk| {
  84                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
  85                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
  86                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
  87                    } else {
  88                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
  89                        text.push_str(&last_chunk.0);
  90                        text.push_str(&first_new_chunk_ref.0);
  91                        let (left, right) = text.split_at(find_split_ix(&text));
  92                        last_chunk.0.clear();
  93                        last_chunk.0.push_str(left);
  94                        first_new_chunk_ref.0.clear();
  95                        first_new_chunk_ref.0.push_str(right);
  96                    }
  97                }
  98            },
  99            &(),
 100        );
 101
 102        self.chunks
 103            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 104        self.check_invariants();
 105    }
 106
 107    pub fn push_front(&mut self, text: &str) {
 108        let suffix = mem::replace(self, Rope::from(text));
 109        self.append(suffix);
 110    }
 111
 112    fn check_invariants(&self) {
 113        #[cfg(test)]
 114        {
 115            // Ensure all chunks except maybe the last one are not underflowing.
 116            // Allow some wiggle room for multibyte characters at chunk boundaries.
 117            let mut chunks = self.chunks.cursor::<()>().peekable();
 118            while let Some(chunk) = chunks.next() {
 119                if chunks.peek().is_some() {
 120                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 121                }
 122            }
 123        }
 124    }
 125
 126    pub fn summary(&self) -> TextSummary {
 127        self.chunks.summary().text.clone()
 128    }
 129
 130    pub fn len(&self) -> usize {
 131        self.chunks.extent(&())
 132    }
 133
 134    pub fn max_point(&self) -> Point {
 135        self.chunks.extent(&())
 136    }
 137
 138    pub fn max_point_utf16(&self) -> PointUtf16 {
 139        self.chunks.extent(&())
 140    }
 141
 142    pub fn cursor(&self, offset: usize) -> Cursor {
 143        Cursor::new(self, offset)
 144    }
 145
 146    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 147        self.chars_at(0)
 148    }
 149
 150    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 151        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 152    }
 153
 154    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 155        self.reversed_chunks_in_range(0..start)
 156            .flat_map(|chunk| chunk.chars().rev())
 157    }
 158
 159    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 160        Bytes::new(self, range)
 161    }
 162
 163    pub fn chunks<'a>(&'a self) -> Chunks<'a> {
 164        self.chunks_in_range(0..self.len())
 165    }
 166
 167    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 168        Chunks::new(self, range, false)
 169    }
 170
 171    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 172        Chunks::new(self, range, true)
 173    }
 174
 175    pub fn offset_to_point(&self, offset: usize) -> Point {
 176        if offset >= self.summary().bytes {
 177            return self.summary().lines;
 178        }
 179        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 180        cursor.seek(&offset, Bias::Left, &());
 181        let overshoot = offset - cursor.start().0;
 182        cursor.start().1
 183            + cursor
 184                .item()
 185                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 186    }
 187
 188    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 189        if offset >= self.summary().bytes {
 190            return self.summary().lines_utf16;
 191        }
 192        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 193        cursor.seek(&offset, Bias::Left, &());
 194        let overshoot = offset - cursor.start().0;
 195        cursor.start().1
 196            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 197                chunk.offset_to_point_utf16(overshoot)
 198            })
 199    }
 200
 201    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 202        if point >= self.summary().lines {
 203            return self.summary().lines_utf16;
 204        }
 205        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 206        cursor.seek(&point, Bias::Left, &());
 207        let overshoot = point - cursor.start().0;
 208        cursor.start().1
 209            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 210                chunk.point_to_point_utf16(overshoot)
 211            })
 212    }
 213
 214    pub fn point_to_offset(&self, point: Point) -> usize {
 215        if point >= self.summary().lines {
 216            return self.summary().bytes;
 217        }
 218        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 219        cursor.seek(&point, Bias::Left, &());
 220        let overshoot = point - cursor.start().0;
 221        cursor.start().1
 222            + cursor
 223                .item()
 224                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 225    }
 226
 227    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 228        if point >= self.summary().lines_utf16 {
 229            return self.summary().bytes;
 230        }
 231        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 232        cursor.seek(&point, Bias::Left, &());
 233        let overshoot = point - cursor.start().0;
 234        cursor.start().1
 235            + cursor
 236                .item()
 237                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
 238    }
 239
 240    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 241        if point >= self.summary().lines_utf16 {
 242            return self.summary().lines;
 243        }
 244        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 245        cursor.seek(&point, Bias::Left, &());
 246        let overshoot = point - cursor.start().0;
 247        cursor.start().1
 248            + cursor
 249                .item()
 250                .map_or(Point::zero(), |chunk| chunk.point_utf16_to_point(overshoot))
 251    }
 252
 253    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 254        let mut cursor = self.chunks.cursor::<usize>();
 255        cursor.seek(&offset, Bias::Left, &());
 256        if let Some(chunk) = cursor.item() {
 257            let mut ix = offset - cursor.start();
 258            while !chunk.0.is_char_boundary(ix) {
 259                match bias {
 260                    Bias::Left => {
 261                        ix -= 1;
 262                        offset -= 1;
 263                    }
 264                    Bias::Right => {
 265                        ix += 1;
 266                        offset += 1;
 267                    }
 268                }
 269            }
 270            offset
 271        } else {
 272            self.summary().bytes
 273        }
 274    }
 275
 276    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 277        let mut cursor = self.chunks.cursor::<Point>();
 278        cursor.seek(&point, Bias::Right, &());
 279        if let Some(chunk) = cursor.item() {
 280            let overshoot = point - cursor.start();
 281            *cursor.start() + chunk.clip_point(overshoot, bias)
 282        } else {
 283            self.summary().lines
 284        }
 285    }
 286
 287    pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 {
 288        let mut cursor = self.chunks.cursor::<PointUtf16>();
 289        cursor.seek(&point, Bias::Right, &());
 290        if let Some(chunk) = cursor.item() {
 291            let overshoot = point - cursor.start();
 292            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 293        } else {
 294            self.summary().lines_utf16
 295        }
 296    }
 297
 298    pub fn line_len(&self, row: u32) -> u32 {
 299        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 300            .column
 301    }
 302
 303    pub fn fingerprint(&self) -> String {
 304        self.chunks.summary().fingerprint.to_hex()
 305    }
 306}
 307
 308impl<'a> From<&'a str> for Rope {
 309    fn from(text: &'a str) -> Self {
 310        let mut rope = Self::new();
 311        rope.push(text);
 312        rope
 313    }
 314}
 315
 316impl fmt::Display for Rope {
 317    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 318        for chunk in self.chunks() {
 319            write!(f, "{}", chunk)?;
 320        }
 321        Ok(())
 322    }
 323}
 324
 325pub struct Cursor<'a> {
 326    rope: &'a Rope,
 327    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 328    offset: usize,
 329}
 330
 331impl<'a> Cursor<'a> {
 332    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 333        let mut chunks = rope.chunks.cursor();
 334        chunks.seek(&offset, Bias::Right, &());
 335        Self {
 336            rope,
 337            chunks,
 338            offset,
 339        }
 340    }
 341
 342    pub fn seek_forward(&mut self, end_offset: usize) {
 343        debug_assert!(end_offset >= self.offset);
 344
 345        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 346        self.offset = end_offset;
 347    }
 348
 349    pub fn slice(&mut self, end_offset: usize) -> Rope {
 350        debug_assert!(
 351            end_offset >= self.offset,
 352            "cannot slice backwards from {} to {}",
 353            self.offset,
 354            end_offset
 355        );
 356
 357        let mut slice = Rope::new();
 358        if let Some(start_chunk) = self.chunks.item() {
 359            let start_ix = self.offset - self.chunks.start();
 360            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 361            slice.push(&start_chunk.0[start_ix..end_ix]);
 362        }
 363
 364        if end_offset > self.chunks.end(&()) {
 365            self.chunks.next(&());
 366            slice.append(Rope {
 367                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 368            });
 369            if let Some(end_chunk) = self.chunks.item() {
 370                let end_ix = end_offset - self.chunks.start();
 371                slice.push(&end_chunk.0[..end_ix]);
 372            }
 373        }
 374
 375        self.offset = end_offset;
 376        slice
 377    }
 378
 379    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 380        debug_assert!(end_offset >= self.offset);
 381
 382        let mut summary = D::default();
 383        if let Some(start_chunk) = self.chunks.item() {
 384            let start_ix = self.offset - self.chunks.start();
 385            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 386            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 387                &start_chunk.0[start_ix..end_ix],
 388            )));
 389        }
 390
 391        if end_offset > self.chunks.end(&()) {
 392            self.chunks.next(&());
 393            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 394            if let Some(end_chunk) = self.chunks.item() {
 395                let end_ix = end_offset - self.chunks.start();
 396                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 397                    &end_chunk.0[..end_ix],
 398                )));
 399            }
 400        }
 401
 402        self.offset = end_offset;
 403        summary
 404    }
 405
 406    pub fn suffix(mut self) -> Rope {
 407        self.slice(self.rope.chunks.extent(&()))
 408    }
 409
 410    pub fn offset(&self) -> usize {
 411        self.offset
 412    }
 413}
 414
 415pub struct Chunks<'a> {
 416    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 417    range: Range<usize>,
 418    reversed: bool,
 419}
 420
 421impl<'a> Chunks<'a> {
 422    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 423        let mut chunks = rope.chunks.cursor();
 424        if reversed {
 425            chunks.seek(&range.end, Bias::Left, &());
 426        } else {
 427            chunks.seek(&range.start, Bias::Right, &());
 428        }
 429        Self {
 430            chunks,
 431            range,
 432            reversed,
 433        }
 434    }
 435
 436    pub fn offset(&self) -> usize {
 437        if self.reversed {
 438            self.range.end.min(self.chunks.end(&()))
 439        } else {
 440            self.range.start.max(*self.chunks.start())
 441        }
 442    }
 443
 444    pub fn seek(&mut self, offset: usize) {
 445        let bias = if self.reversed {
 446            Bias::Left
 447        } else {
 448            Bias::Right
 449        };
 450
 451        if offset >= self.chunks.end(&()) {
 452            self.chunks.seek_forward(&offset, bias, &());
 453        } else {
 454            self.chunks.seek(&offset, bias, &());
 455        }
 456
 457        if self.reversed {
 458            self.range.end = offset;
 459        } else {
 460            self.range.start = offset;
 461        }
 462    }
 463
 464    pub fn peek(&self) -> Option<&'a str> {
 465        let chunk = self.chunks.item()?;
 466        if self.reversed && self.range.start >= self.chunks.end(&()) {
 467            return None;
 468        }
 469        let chunk_start = *self.chunks.start();
 470        if self.range.end <= chunk_start {
 471            return None;
 472        }
 473
 474        let start = self.range.start.saturating_sub(chunk_start);
 475        let end = self.range.end - chunk_start;
 476        Some(&chunk.0[start..chunk.0.len().min(end)])
 477    }
 478}
 479
 480impl<'a> Iterator for Chunks<'a> {
 481    type Item = &'a str;
 482
 483    fn next(&mut self) -> Option<Self::Item> {
 484        let result = self.peek();
 485        if result.is_some() {
 486            if self.reversed {
 487                self.chunks.prev(&());
 488            } else {
 489                self.chunks.next(&());
 490            }
 491        }
 492        result
 493    }
 494}
 495
 496pub struct Bytes<'a> {
 497    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 498    range: Range<usize>,
 499}
 500
 501impl<'a> Bytes<'a> {
 502    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
 503        let mut chunks = rope.chunks.cursor();
 504        chunks.seek(&range.start, Bias::Right, &());
 505        Self { chunks, range }
 506    }
 507
 508    pub fn peek(&self) -> Option<&'a [u8]> {
 509        let chunk = self.chunks.item()?;
 510        let chunk_start = *self.chunks.start();
 511        if self.range.end <= chunk_start {
 512            return None;
 513        }
 514
 515        let start = self.range.start.saturating_sub(chunk_start);
 516        let end = self.range.end - chunk_start;
 517        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 518    }
 519}
 520
 521impl<'a> Iterator for Bytes<'a> {
 522    type Item = &'a [u8];
 523
 524    fn next(&mut self) -> Option<Self::Item> {
 525        let result = self.peek();
 526        if result.is_some() {
 527            self.chunks.next(&());
 528        }
 529        result
 530    }
 531}
 532
 533impl<'a> io::Read for Bytes<'a> {
 534    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 535        if let Some(chunk) = self.peek() {
 536            let len = cmp::min(buf.len(), chunk.len());
 537            buf[..len].copy_from_slice(&chunk[..len]);
 538            self.range.start += len;
 539            if len == chunk.len() {
 540                self.chunks.next(&());
 541            }
 542            Ok(len)
 543        } else {
 544            Ok(0)
 545        }
 546    }
 547}
 548
 549#[derive(Clone, Debug, Default)]
 550struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 551
 552impl Chunk {
 553    fn offset_to_point(&self, target: usize) -> Point {
 554        let mut offset = 0;
 555        let mut point = Point::new(0, 0);
 556        for ch in self.0.chars() {
 557            if offset >= target {
 558                break;
 559            }
 560
 561            if ch == '\n' {
 562                point.row += 1;
 563                point.column = 0;
 564            } else {
 565                point.column += ch.len_utf8() as u32;
 566            }
 567            offset += ch.len_utf8();
 568        }
 569        point
 570    }
 571
 572    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 573        let mut offset = 0;
 574        let mut point = PointUtf16::new(0, 0);
 575        for ch in self.0.chars() {
 576            if offset >= target {
 577                break;
 578            }
 579
 580            if ch == '\n' {
 581                point.row += 1;
 582                point.column = 0;
 583            } else {
 584                point.column += ch.len_utf16() as u32;
 585            }
 586            offset += ch.len_utf8();
 587        }
 588        point
 589    }
 590
 591    fn point_to_offset(&self, target: Point) -> usize {
 592        let mut offset = 0;
 593        let mut point = Point::new(0, 0);
 594        for ch in self.0.chars() {
 595            if point >= target {
 596                if point > target {
 597                    panic!("point {:?} is inside of character {:?}", target, ch);
 598                }
 599                break;
 600            }
 601
 602            if ch == '\n' {
 603                point.row += 1;
 604                if point.row > target.row {
 605                    panic!(
 606                        "point {:?} is beyond the end of a line with length {}",
 607                        target, point.column
 608                    );
 609                }
 610                point.column = 0;
 611            } else {
 612                point.column += ch.len_utf8() as u32;
 613            }
 614            offset += ch.len_utf8();
 615        }
 616        offset
 617    }
 618
 619    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 620        let mut point = Point::zero();
 621        let mut point_utf16 = PointUtf16::new(0, 0);
 622        for ch in self.0.chars() {
 623            if point >= target {
 624                break;
 625            }
 626
 627            if ch == '\n' {
 628                point_utf16.row += 1;
 629                point_utf16.column = 0;
 630                point.row += 1;
 631                point.column = 0;
 632            } else {
 633                point_utf16.column += ch.len_utf16() as u32;
 634                point.column += ch.len_utf8() as u32;
 635            }
 636        }
 637        point_utf16
 638    }
 639
 640    fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
 641        let mut offset = 0;
 642        let mut point = PointUtf16::new(0, 0);
 643        for ch in self.0.chars() {
 644            if point >= target {
 645                if point > target {
 646                    panic!("point {:?} is inside of character {:?}", target, ch);
 647                }
 648                break;
 649            }
 650
 651            if ch == '\n' {
 652                point.row += 1;
 653                if point.row > target.row {
 654                    panic!(
 655                        "point {:?} is beyond the end of a line with length {}",
 656                        target, point.column
 657                    );
 658                }
 659                point.column = 0;
 660            } else {
 661                point.column += ch.len_utf16() as u32;
 662            }
 663            offset += ch.len_utf8();
 664        }
 665        offset
 666    }
 667
 668    fn point_utf16_to_point(&self, target: PointUtf16) -> Point {
 669        let mut point = Point::zero();
 670        let mut point_utf16 = PointUtf16::zero();
 671        for ch in self.0.chars() {
 672            if point_utf16 >= target {
 673                if point_utf16 > target {
 674                    panic!("point {:?} is inside of character {:?}", target, ch);
 675                }
 676                break;
 677            }
 678
 679            if ch == '\n' {
 680                point_utf16 += PointUtf16::new(1, 0);
 681                point += Point::new(1, 0);
 682            } else {
 683                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 684                point += Point::new(0, ch.len_utf8() as u32);
 685            }
 686        }
 687        point
 688    }
 689
 690    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 691        for (row, line) in self.0.split('\n').enumerate() {
 692            if row == target.row as usize {
 693                let mut column = target.column.min(line.len() as u32);
 694                while !line.is_char_boundary(column as usize) {
 695                    match bias {
 696                        Bias::Left => column -= 1,
 697                        Bias::Right => column += 1,
 698                    }
 699                }
 700                return Point::new(row as u32, column);
 701            }
 702        }
 703        unreachable!()
 704    }
 705
 706    fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 {
 707        for (row, line) in self.0.split('\n').enumerate() {
 708            if row == target.row as usize {
 709                let mut code_units = line.encode_utf16();
 710                let mut column = code_units.by_ref().take(target.column as usize).count();
 711                if char::decode_utf16(code_units).next().transpose().is_err() {
 712                    match bias {
 713                        Bias::Left => column -= 1,
 714                        Bias::Right => column += 1,
 715                    }
 716                }
 717                return PointUtf16::new(row as u32, column as u32);
 718            }
 719        }
 720        unreachable!()
 721    }
 722}
 723
 724impl sum_tree::Item for Chunk {
 725    type Summary = ChunkSummary;
 726
 727    fn summary(&self) -> Self::Summary {
 728        ChunkSummary::from(self.0.as_str())
 729    }
 730}
 731
 732#[derive(Clone, Debug, Default, Eq, PartialEq)]
 733pub struct ChunkSummary {
 734    text: TextSummary,
 735    fingerprint: HashMatrix,
 736}
 737
 738impl<'a> From<&'a str> for ChunkSummary {
 739    fn from(text: &'a str) -> Self {
 740        Self {
 741            text: TextSummary::from(text),
 742            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
 743        }
 744    }
 745}
 746
 747impl sum_tree::Summary for ChunkSummary {
 748    type Context = ();
 749
 750    fn add_summary(&mut self, summary: &Self, _: &()) {
 751        self.text += &summary.text;
 752        self.fingerprint = self.fingerprint * summary.fingerprint;
 753    }
 754}
 755
 756#[derive(Clone, Debug, Default, Eq, PartialEq)]
 757pub struct TextSummary {
 758    pub bytes: usize,
 759    pub lines: Point,
 760    pub lines_utf16: PointUtf16,
 761    pub first_line_chars: u32,
 762    pub last_line_chars: u32,
 763    pub longest_row: u32,
 764    pub longest_row_chars: u32,
 765}
 766
 767impl<'a> From<&'a str> for TextSummary {
 768    fn from(text: &'a str) -> Self {
 769        let mut lines = Point::new(0, 0);
 770        let mut lines_utf16 = PointUtf16::new(0, 0);
 771        let mut first_line_chars = 0;
 772        let mut last_line_chars = 0;
 773        let mut longest_row = 0;
 774        let mut longest_row_chars = 0;
 775        for c in text.chars() {
 776            if c == '\n' {
 777                lines += Point::new(1, 0);
 778                lines_utf16 += PointUtf16::new(1, 0);
 779                last_line_chars = 0;
 780            } else {
 781                lines.column += c.len_utf8() as u32;
 782                lines_utf16.column += c.len_utf16() as u32;
 783                last_line_chars += 1;
 784            }
 785
 786            if lines.row == 0 {
 787                first_line_chars = last_line_chars;
 788            }
 789
 790            if last_line_chars > longest_row_chars {
 791                longest_row = lines.row;
 792                longest_row_chars = last_line_chars;
 793            }
 794        }
 795
 796        TextSummary {
 797            bytes: text.len(),
 798            lines,
 799            lines_utf16,
 800            first_line_chars,
 801            last_line_chars,
 802            longest_row,
 803            longest_row_chars,
 804        }
 805    }
 806}
 807
 808impl sum_tree::Summary for TextSummary {
 809    type Context = ();
 810
 811    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
 812        *self += summary;
 813    }
 814}
 815
 816impl<'a> std::ops::Add<Self> for TextSummary {
 817    type Output = Self;
 818
 819    fn add(mut self, rhs: Self) -> Self::Output {
 820        self.add_assign(&rhs);
 821        self
 822    }
 823}
 824
 825impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 826    fn add_assign(&mut self, other: &'a Self) {
 827        let joined_chars = self.last_line_chars + other.first_line_chars;
 828        if joined_chars > self.longest_row_chars {
 829            self.longest_row = self.lines.row;
 830            self.longest_row_chars = joined_chars;
 831        }
 832        if other.longest_row_chars > self.longest_row_chars {
 833            self.longest_row = self.lines.row + other.longest_row;
 834            self.longest_row_chars = other.longest_row_chars;
 835        }
 836
 837        if self.lines.row == 0 {
 838            self.first_line_chars += other.first_line_chars;
 839        }
 840
 841        if other.lines.row == 0 {
 842            self.last_line_chars += other.first_line_chars;
 843        } else {
 844            self.last_line_chars = other.last_line_chars;
 845        }
 846
 847        self.bytes += other.bytes;
 848        self.lines += other.lines;
 849        self.lines_utf16 += other.lines_utf16;
 850    }
 851}
 852
 853impl std::ops::AddAssign<Self> for TextSummary {
 854    fn add_assign(&mut self, other: Self) {
 855        *self += &other;
 856    }
 857}
 858
 859pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
 860    fn from_text_summary(summary: &TextSummary) -> Self;
 861    fn add_assign(&mut self, other: &Self);
 862}
 863
 864impl<'a, D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
 865    fn from_text_summary(summary: &TextSummary) -> Self {
 866        (
 867            D1::from_text_summary(summary),
 868            D2::from_text_summary(summary),
 869        )
 870    }
 871
 872    fn add_assign(&mut self, other: &Self) {
 873        self.0.add_assign(&other.0);
 874        self.1.add_assign(&other.1);
 875    }
 876}
 877
 878impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
 879    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 880        *self += &summary.text;
 881    }
 882}
 883
 884impl TextDimension for TextSummary {
 885    fn from_text_summary(summary: &TextSummary) -> Self {
 886        summary.clone()
 887    }
 888
 889    fn add_assign(&mut self, other: &Self) {
 890        *self += other;
 891    }
 892}
 893
 894impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
 895    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 896        *self += summary.text.bytes;
 897    }
 898}
 899
 900impl TextDimension for usize {
 901    fn from_text_summary(summary: &TextSummary) -> Self {
 902        summary.bytes
 903    }
 904
 905    fn add_assign(&mut self, other: &Self) {
 906        *self += other;
 907    }
 908}
 909
 910impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
 911    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 912        *self += summary.text.lines;
 913    }
 914}
 915
 916impl TextDimension for Point {
 917    fn from_text_summary(summary: &TextSummary) -> Self {
 918        summary.lines
 919    }
 920
 921    fn add_assign(&mut self, other: &Self) {
 922        *self += other;
 923    }
 924}
 925
 926impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
 927    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 928        *self += summary.text.lines_utf16;
 929    }
 930}
 931
 932impl TextDimension for PointUtf16 {
 933    fn from_text_summary(summary: &TextSummary) -> Self {
 934        summary.lines_utf16
 935    }
 936
 937    fn add_assign(&mut self, other: &Self) {
 938        *self += other;
 939    }
 940}
 941
 942fn find_split_ix(text: &str) -> usize {
 943    let mut ix = text.len() / 2;
 944    while !text.is_char_boundary(ix) {
 945        if ix < 2 * CHUNK_BASE {
 946            ix += 1;
 947        } else {
 948            ix = (text.len() / 2) - 1;
 949            break;
 950        }
 951    }
 952    while !text.is_char_boundary(ix) {
 953        ix -= 1;
 954    }
 955
 956    debug_assert!(ix <= 2 * CHUNK_BASE);
 957    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
 958    ix
 959}
 960
 961#[cfg(test)]
 962mod tests {
 963    use super::*;
 964    use crate::random_char_iter::RandomCharIter;
 965    use rand::prelude::*;
 966    use std::{cmp::Ordering, env, io::Read};
 967    use Bias::{Left, Right};
 968
 969    #[test]
 970    fn test_all_4_byte_chars() {
 971        let mut rope = Rope::new();
 972        let text = "🏀".repeat(256);
 973        rope.push(&text);
 974        assert_eq!(rope.text(), text);
 975    }
 976
 977    #[test]
 978    fn test_clip() {
 979        let rope = Rope::from("🧘");
 980
 981        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
 982        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
 983        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
 984
 985        assert_eq!(
 986            rope.clip_point(Point::new(0, 1), Bias::Left),
 987            Point::new(0, 0)
 988        );
 989        assert_eq!(
 990            rope.clip_point(Point::new(0, 1), Bias::Right),
 991            Point::new(0, 4)
 992        );
 993        assert_eq!(
 994            rope.clip_point(Point::new(0, 5), Bias::Right),
 995            Point::new(0, 4)
 996        );
 997
 998        assert_eq!(
 999            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left),
1000            PointUtf16::new(0, 0)
1001        );
1002        assert_eq!(
1003            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right),
1004            PointUtf16::new(0, 2)
1005        );
1006        assert_eq!(
1007            rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
1008            PointUtf16::new(0, 2)
1009        );
1010    }
1011
1012    #[gpui::test(iterations = 100)]
1013    fn test_random_rope(mut rng: StdRng) {
1014        let operations = env::var("OPERATIONS")
1015            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1016            .unwrap_or(10);
1017
1018        let mut expected = String::new();
1019        let mut actual = Rope::new();
1020        for _ in 0..operations {
1021            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1022            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1023            let len = rng.gen_range(0..=64);
1024            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1025
1026            let mut new_actual = Rope::new();
1027            let mut cursor = actual.cursor(0);
1028            new_actual.append(cursor.slice(start_ix));
1029            new_actual.push(&new_text);
1030            cursor.seek_forward(end_ix);
1031            new_actual.append(cursor.suffix());
1032            actual = new_actual;
1033
1034            expected.replace_range(start_ix..end_ix, &new_text);
1035
1036            assert_eq!(actual.text(), expected);
1037            log::info!("text: {:?}", expected);
1038
1039            for _ in 0..5 {
1040                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1041                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1042
1043                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1044                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1045
1046                let mut actual_text = String::new();
1047                actual
1048                    .bytes_in_range(start_ix..end_ix)
1049                    .read_to_string(&mut actual_text)
1050                    .unwrap();
1051                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1052
1053                assert_eq!(
1054                    actual
1055                        .reversed_chunks_in_range(start_ix..end_ix)
1056                        .collect::<Vec<&str>>()
1057                        .into_iter()
1058                        .rev()
1059                        .collect::<String>(),
1060                    &expected[start_ix..end_ix]
1061                );
1062            }
1063
1064            let mut point = Point::new(0, 0);
1065            let mut point_utf16 = PointUtf16::new(0, 0);
1066            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1067                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1068                assert_eq!(
1069                    actual.offset_to_point_utf16(ix),
1070                    point_utf16,
1071                    "offset_to_point_utf16({})",
1072                    ix
1073                );
1074                assert_eq!(
1075                    actual.point_to_offset(point),
1076                    ix,
1077                    "point_to_offset({:?})",
1078                    point
1079                );
1080                assert_eq!(
1081                    actual.point_utf16_to_offset(point_utf16),
1082                    ix,
1083                    "point_utf16_to_offset({:?})",
1084                    point_utf16
1085                );
1086                if ch == '\n' {
1087                    point += Point::new(1, 0);
1088                    point_utf16 += PointUtf16::new(1, 0);
1089                } else {
1090                    point.column += ch.len_utf8() as u32;
1091                    point_utf16.column += ch.len_utf16() as u32;
1092                }
1093            }
1094
1095            let mut point_utf16 = PointUtf16::zero();
1096            for unit in expected.encode_utf16() {
1097                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1098                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1099                assert!(right_point >= left_point);
1100                // Ensure translating UTF-16 points to offsets doesn't panic.
1101                actual.point_utf16_to_offset(left_point);
1102                actual.point_utf16_to_offset(right_point);
1103
1104                if unit == b'\n' as u16 {
1105                    point_utf16 += PointUtf16::new(1, 0);
1106                } else {
1107                    point_utf16 += PointUtf16::new(0, 1);
1108                }
1109            }
1110
1111            for _ in 0..5 {
1112                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1113                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1114                assert_eq!(
1115                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1116                    TextSummary::from(&expected[start_ix..end_ix])
1117                );
1118            }
1119
1120            let mut expected_longest_rows = Vec::new();
1121            let mut longest_line_len = -1_isize;
1122            for (row, line) in expected.split('\n').enumerate() {
1123                let row = row as u32;
1124                assert_eq!(
1125                    actual.line_len(row),
1126                    line.len() as u32,
1127                    "invalid line len for row {}",
1128                    row
1129                );
1130
1131                let line_char_count = line.chars().count() as isize;
1132                match line_char_count.cmp(&longest_line_len) {
1133                    Ordering::Less => {}
1134                    Ordering::Equal => expected_longest_rows.push(row),
1135                    Ordering::Greater => {
1136                        longest_line_len = line_char_count;
1137                        expected_longest_rows.clear();
1138                        expected_longest_rows.push(row);
1139                    }
1140                }
1141            }
1142
1143            let longest_row = actual.summary().longest_row;
1144            assert!(
1145                expected_longest_rows.contains(&longest_row),
1146                "incorrect longest row {}. expected {:?} with length {}",
1147                longest_row,
1148                expected_longest_rows,
1149                longest_line_len,
1150            );
1151        }
1152    }
1153
1154    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1155        while !text.is_char_boundary(offset) {
1156            match bias {
1157                Bias::Left => offset -= 1,
1158                Bias::Right => offset += 1,
1159            }
1160        }
1161        offset
1162    }
1163
1164    impl Rope {
1165        fn text(&self) -> String {
1166            let mut text = String::new();
1167            for chunk in self.chunks.cursor::<()>() {
1168                text.push_str(&chunk.0);
1169            }
1170            text
1171        }
1172    }
1173}