rope.rs

   1use super::Point;
   2use crate::{OffsetUtf16, PointUtf16};
   3use arrayvec::ArrayString;
   4use bromberg_sl2::{DigestString, HashMatrix};
   5use smallvec::SmallVec;
   6use std::{cmp, fmt, io, mem, ops::Range, str};
   7use sum_tree::{Bias, Dimension, SumTree};
   8
   9#[cfg(test)]
  10const CHUNK_BASE: usize = 6;
  11
  12#[cfg(not(test))]
  13const CHUNK_BASE: usize = 16;
  14
  15#[derive(Clone, Default, Debug)]
  16pub struct Rope {
  17    chunks: SumTree<Chunk>,
  18}
  19
  20impl Rope {
  21    pub fn new() -> Self {
  22        Self::default()
  23    }
  24
  25    pub fn append(&mut self, rope: Rope) {
  26        let mut chunks = rope.chunks.cursor::<()>();
  27        chunks.next(&());
  28        if let Some(chunk) = chunks.item() {
  29            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  30                || chunk.0.len() < CHUNK_BASE
  31            {
  32                self.push(&chunk.0);
  33                chunks.next(&());
  34            }
  35        }
  36
  37        self.chunks.push_tree(chunks.suffix(&()), &());
  38        self.check_invariants();
  39    }
  40
  41    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  42        let mut new_rope = Rope::new();
  43        let mut cursor = self.cursor(0);
  44        new_rope.append(cursor.slice(range.start));
  45        cursor.seek_forward(range.end);
  46        new_rope.push(text);
  47        new_rope.append(cursor.suffix());
  48        *self = new_rope;
  49    }
  50
  51    pub fn slice(&self, range: Range<usize>) -> Rope {
  52        let mut cursor = self.cursor(0);
  53        cursor.seek_forward(range.start);
  54        cursor.slice(range.end)
  55    }
  56
  57    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
  58        //This would be more efficient with a forward advance after the first, but it's fine
  59        let start = self.point_to_offset(Point::new(range.start, 0));
  60        let end = self.point_to_offset(Point::new(range.end, 0));
  61        self.slice(start..end)
  62    }
  63
  64    pub fn push(&mut self, text: &str) {
  65        let mut new_chunks = SmallVec::<[_; 16]>::new();
  66        let mut new_chunk = ArrayString::new();
  67        for ch in text.chars() {
  68            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  69                new_chunks.push(Chunk(new_chunk));
  70                new_chunk = ArrayString::new();
  71            }
  72
  73            new_chunk.push(ch);
  74        }
  75        if !new_chunk.is_empty() {
  76            new_chunks.push(Chunk(new_chunk));
  77        }
  78
  79        let mut new_chunks = new_chunks.into_iter();
  80        let mut first_new_chunk = new_chunks.next();
  81        self.chunks.update_last(
  82            |last_chunk| {
  83                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
  84                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
  85                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
  86                    } else {
  87                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
  88                        text.push_str(&last_chunk.0);
  89                        text.push_str(&first_new_chunk_ref.0);
  90                        let (left, right) = text.split_at(find_split_ix(&text));
  91                        last_chunk.0.clear();
  92                        last_chunk.0.push_str(left);
  93                        first_new_chunk_ref.0.clear();
  94                        first_new_chunk_ref.0.push_str(right);
  95                    }
  96                }
  97            },
  98            &(),
  99        );
 100
 101        self.chunks
 102            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 103        self.check_invariants();
 104    }
 105
 106    pub fn push_front(&mut self, text: &str) {
 107        let suffix = mem::replace(self, Rope::from(text));
 108        self.append(suffix);
 109    }
 110
 111    fn check_invariants(&self) {
 112        #[cfg(test)]
 113        {
 114            // Ensure all chunks except maybe the last one are not underflowing.
 115            // Allow some wiggle room for multibyte characters at chunk boundaries.
 116            let mut chunks = self.chunks.cursor::<()>().peekable();
 117            while let Some(chunk) = chunks.next() {
 118                if chunks.peek().is_some() {
 119                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 120                }
 121            }
 122        }
 123    }
 124
 125    pub fn summary(&self) -> TextSummary {
 126        self.chunks.summary().text.clone()
 127    }
 128
 129    pub fn len(&self) -> usize {
 130        self.chunks.extent(&())
 131    }
 132
 133    pub fn is_empty(&self) -> bool {
 134        self.len() == 0
 135    }
 136
 137    pub fn max_point(&self) -> Point {
 138        self.chunks.extent(&())
 139    }
 140
 141    pub fn max_point_utf16(&self) -> PointUtf16 {
 142        self.chunks.extent(&())
 143    }
 144
 145    pub fn cursor(&self, offset: usize) -> Cursor {
 146        Cursor::new(self, offset)
 147    }
 148
 149    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 150        self.chars_at(0)
 151    }
 152
 153    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 154        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 155    }
 156
 157    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 158        self.reversed_chunks_in_range(0..start)
 159            .flat_map(|chunk| chunk.chars().rev())
 160    }
 161
 162    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 163        Bytes::new(self, range)
 164    }
 165
 166    pub fn chunks(&self) -> Chunks {
 167        self.chunks_in_range(0..self.len())
 168    }
 169
 170    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 171        Chunks::new(self, range, false)
 172    }
 173
 174    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 175        Chunks::new(self, range, true)
 176    }
 177
 178    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 179        if offset >= self.summary().len {
 180            return self.summary().len_utf16;
 181        }
 182        let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
 183        cursor.seek(&offset, Bias::Left, &());
 184        let overshoot = offset - cursor.start().0;
 185        cursor.start().1
 186            + cursor.item().map_or(Default::default(), |chunk| {
 187                chunk.offset_to_offset_utf16(overshoot)
 188            })
 189    }
 190
 191    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 192        if offset >= self.summary().len_utf16 {
 193            return self.summary().len;
 194        }
 195        let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
 196        cursor.seek(&offset, Bias::Left, &());
 197        let overshoot = offset - cursor.start().0;
 198        cursor.start().1
 199            + cursor.item().map_or(Default::default(), |chunk| {
 200                chunk.offset_utf16_to_offset(overshoot)
 201            })
 202    }
 203
 204    pub fn offset_to_point(&self, offset: usize) -> Point {
 205        if offset >= self.summary().len {
 206            return self.summary().lines;
 207        }
 208        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 209        cursor.seek(&offset, Bias::Left, &());
 210        let overshoot = offset - cursor.start().0;
 211        cursor.start().1
 212            + cursor
 213                .item()
 214                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 215    }
 216
 217    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 218        if offset >= self.summary().len {
 219            return self.summary().lines_utf16();
 220        }
 221        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 222        cursor.seek(&offset, Bias::Left, &());
 223        let overshoot = offset - cursor.start().0;
 224        cursor.start().1
 225            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 226                chunk.offset_to_point_utf16(overshoot)
 227            })
 228    }
 229
 230    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 231        if point >= self.summary().lines {
 232            return self.summary().lines_utf16();
 233        }
 234        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 235        cursor.seek(&point, Bias::Left, &());
 236        let overshoot = point - cursor.start().0;
 237        cursor.start().1
 238            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 239                chunk.point_to_point_utf16(overshoot)
 240            })
 241    }
 242
 243    pub fn point_to_offset(&self, point: Point) -> usize {
 244        if point >= self.summary().lines {
 245            return self.summary().len;
 246        }
 247        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 248        cursor.seek(&point, Bias::Left, &());
 249        let overshoot = point - cursor.start().0;
 250        cursor.start().1
 251            + cursor
 252                .item()
 253                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 254    }
 255
 256    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 257        if point >= self.summary().lines_utf16() {
 258            return self.summary().len;
 259        }
 260        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 261        cursor.seek(&point, Bias::Left, &());
 262        let overshoot = point - cursor.start().0;
 263        cursor.start().1
 264            + cursor
 265                .item()
 266                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
 267    }
 268
 269    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 270        if point >= self.summary().lines_utf16() {
 271            return self.summary().lines;
 272        }
 273        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 274        cursor.seek(&point, Bias::Left, &());
 275        let overshoot = point - cursor.start().0;
 276        cursor.start().1
 277            + cursor
 278                .item()
 279                .map_or(Point::zero(), |chunk| chunk.point_utf16_to_point(overshoot))
 280    }
 281
 282    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 283        let mut cursor = self.chunks.cursor::<usize>();
 284        cursor.seek(&offset, Bias::Left, &());
 285        if let Some(chunk) = cursor.item() {
 286            let mut ix = offset - cursor.start();
 287            while !chunk.0.is_char_boundary(ix) {
 288                match bias {
 289                    Bias::Left => {
 290                        ix -= 1;
 291                        offset -= 1;
 292                    }
 293                    Bias::Right => {
 294                        ix += 1;
 295                        offset += 1;
 296                    }
 297                }
 298            }
 299            offset
 300        } else {
 301            self.summary().len
 302        }
 303    }
 304
 305    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 306        let mut cursor = self.chunks.cursor::<OffsetUtf16>();
 307        cursor.seek(&offset, Bias::Right, &());
 308        if let Some(chunk) = cursor.item() {
 309            let overshoot = offset - cursor.start();
 310            *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
 311        } else {
 312            self.summary().len_utf16
 313        }
 314    }
 315
 316    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 317        let mut cursor = self.chunks.cursor::<Point>();
 318        cursor.seek(&point, Bias::Right, &());
 319        if let Some(chunk) = cursor.item() {
 320            let overshoot = point - cursor.start();
 321            *cursor.start() + chunk.clip_point(overshoot, bias)
 322        } else {
 323            self.summary().lines
 324        }
 325    }
 326
 327    pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 {
 328        let mut cursor = self.chunks.cursor::<PointUtf16>();
 329        cursor.seek(&point, Bias::Right, &());
 330        if let Some(chunk) = cursor.item() {
 331            let overshoot = point - cursor.start();
 332            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 333        } else {
 334            self.summary().lines_utf16()
 335        }
 336    }
 337
 338    pub fn line_len(&self, row: u32) -> u32 {
 339        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 340            .column
 341    }
 342
 343    pub fn fingerprint(&self) -> String {
 344        self.chunks.summary().fingerprint.to_hex()
 345    }
 346}
 347
 348impl<'a> From<&'a str> for Rope {
 349    fn from(text: &'a str) -> Self {
 350        let mut rope = Self::new();
 351        rope.push(text);
 352        rope
 353    }
 354}
 355
 356impl fmt::Display for Rope {
 357    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 358        for chunk in self.chunks() {
 359            write!(f, "{}", chunk)?;
 360        }
 361        Ok(())
 362    }
 363}
 364
 365pub struct Cursor<'a> {
 366    rope: &'a Rope,
 367    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 368    offset: usize,
 369}
 370
 371impl<'a> Cursor<'a> {
 372    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 373        let mut chunks = rope.chunks.cursor();
 374        chunks.seek(&offset, Bias::Right, &());
 375        Self {
 376            rope,
 377            chunks,
 378            offset,
 379        }
 380    }
 381
 382    pub fn seek_forward(&mut self, end_offset: usize) {
 383        debug_assert!(end_offset >= self.offset);
 384
 385        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 386        self.offset = end_offset;
 387    }
 388
 389    pub fn slice(&mut self, end_offset: usize) -> Rope {
 390        debug_assert!(
 391            end_offset >= self.offset,
 392            "cannot slice backwards from {} to {}",
 393            self.offset,
 394            end_offset
 395        );
 396
 397        let mut slice = Rope::new();
 398        if let Some(start_chunk) = self.chunks.item() {
 399            let start_ix = self.offset - self.chunks.start();
 400            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 401            slice.push(&start_chunk.0[start_ix..end_ix]);
 402        }
 403
 404        if end_offset > self.chunks.end(&()) {
 405            self.chunks.next(&());
 406            slice.append(Rope {
 407                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 408            });
 409            if let Some(end_chunk) = self.chunks.item() {
 410                let end_ix = end_offset - self.chunks.start();
 411                slice.push(&end_chunk.0[..end_ix]);
 412            }
 413        }
 414
 415        self.offset = end_offset;
 416        slice
 417    }
 418
 419    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 420        debug_assert!(end_offset >= self.offset);
 421
 422        let mut summary = D::default();
 423        if let Some(start_chunk) = self.chunks.item() {
 424            let start_ix = self.offset - self.chunks.start();
 425            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 426            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 427                &start_chunk.0[start_ix..end_ix],
 428            )));
 429        }
 430
 431        if end_offset > self.chunks.end(&()) {
 432            self.chunks.next(&());
 433            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 434            if let Some(end_chunk) = self.chunks.item() {
 435                let end_ix = end_offset - self.chunks.start();
 436                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 437                    &end_chunk.0[..end_ix],
 438                )));
 439            }
 440        }
 441
 442        self.offset = end_offset;
 443        summary
 444    }
 445
 446    pub fn suffix(mut self) -> Rope {
 447        self.slice(self.rope.chunks.extent(&()))
 448    }
 449
 450    pub fn offset(&self) -> usize {
 451        self.offset
 452    }
 453}
 454
 455pub struct Chunks<'a> {
 456    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 457    range: Range<usize>,
 458    reversed: bool,
 459}
 460
 461impl<'a> Chunks<'a> {
 462    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 463        let mut chunks = rope.chunks.cursor();
 464        if reversed {
 465            chunks.seek(&range.end, Bias::Left, &());
 466        } else {
 467            chunks.seek(&range.start, Bias::Right, &());
 468        }
 469        Self {
 470            chunks,
 471            range,
 472            reversed,
 473        }
 474    }
 475
 476    pub fn offset(&self) -> usize {
 477        if self.reversed {
 478            self.range.end.min(self.chunks.end(&()))
 479        } else {
 480            self.range.start.max(*self.chunks.start())
 481        }
 482    }
 483
 484    pub fn seek(&mut self, offset: usize) {
 485        let bias = if self.reversed {
 486            Bias::Left
 487        } else {
 488            Bias::Right
 489        };
 490
 491        if offset >= self.chunks.end(&()) {
 492            self.chunks.seek_forward(&offset, bias, &());
 493        } else {
 494            self.chunks.seek(&offset, bias, &());
 495        }
 496
 497        if self.reversed {
 498            self.range.end = offset;
 499        } else {
 500            self.range.start = offset;
 501        }
 502    }
 503
 504    pub fn peek(&self) -> Option<&'a str> {
 505        let chunk = self.chunks.item()?;
 506        if self.reversed && self.range.start >= self.chunks.end(&()) {
 507            return None;
 508        }
 509        let chunk_start = *self.chunks.start();
 510        if self.range.end <= chunk_start {
 511            return None;
 512        }
 513
 514        let start = self.range.start.saturating_sub(chunk_start);
 515        let end = self.range.end - chunk_start;
 516        Some(&chunk.0[start..chunk.0.len().min(end)])
 517    }
 518}
 519
 520impl<'a> Iterator for Chunks<'a> {
 521    type Item = &'a str;
 522
 523    fn next(&mut self) -> Option<Self::Item> {
 524        let result = self.peek();
 525        if result.is_some() {
 526            if self.reversed {
 527                self.chunks.prev(&());
 528            } else {
 529                self.chunks.next(&());
 530            }
 531        }
 532        result
 533    }
 534}
 535
 536pub struct Bytes<'a> {
 537    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 538    range: Range<usize>,
 539}
 540
 541impl<'a> Bytes<'a> {
 542    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
 543        let mut chunks = rope.chunks.cursor();
 544        chunks.seek(&range.start, Bias::Right, &());
 545        Self { chunks, range }
 546    }
 547
 548    pub fn peek(&self) -> Option<&'a [u8]> {
 549        let chunk = self.chunks.item()?;
 550        let chunk_start = *self.chunks.start();
 551        if self.range.end <= chunk_start {
 552            return None;
 553        }
 554
 555        let start = self.range.start.saturating_sub(chunk_start);
 556        let end = self.range.end - chunk_start;
 557        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 558    }
 559}
 560
 561impl<'a> Iterator for Bytes<'a> {
 562    type Item = &'a [u8];
 563
 564    fn next(&mut self) -> Option<Self::Item> {
 565        let result = self.peek();
 566        if result.is_some() {
 567            self.chunks.next(&());
 568        }
 569        result
 570    }
 571}
 572
 573impl<'a> io::Read for Bytes<'a> {
 574    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 575        if let Some(chunk) = self.peek() {
 576            let len = cmp::min(buf.len(), chunk.len());
 577            buf[..len].copy_from_slice(&chunk[..len]);
 578            self.range.start += len;
 579            if len == chunk.len() {
 580                self.chunks.next(&());
 581            }
 582            Ok(len)
 583        } else {
 584            Ok(0)
 585        }
 586    }
 587}
 588
 589#[derive(Clone, Debug, Default)]
 590struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 591
 592impl Chunk {
 593    fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
 594        let mut offset = 0;
 595        let mut offset_utf16 = OffsetUtf16(0);
 596        for ch in self.0.chars() {
 597            if offset >= target {
 598                break;
 599            }
 600
 601            offset += ch.len_utf8();
 602            offset_utf16.0 += ch.len_utf16();
 603        }
 604        offset_utf16
 605    }
 606
 607    fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 608        let mut offset_utf16 = OffsetUtf16(0);
 609        let mut offset = 0;
 610        for ch in self.0.chars() {
 611            if offset_utf16 >= target {
 612                break;
 613            }
 614
 615            offset += ch.len_utf8();
 616            offset_utf16.0 += ch.len_utf16();
 617        }
 618        offset
 619    }
 620
 621    fn offset_to_point(&self, target: usize) -> Point {
 622        let mut offset = 0;
 623        let mut point = Point::new(0, 0);
 624        for ch in self.0.chars() {
 625            if offset >= target {
 626                break;
 627            }
 628
 629            if ch == '\n' {
 630                point.row += 1;
 631                point.column = 0;
 632            } else {
 633                point.column += ch.len_utf8() as u32;
 634            }
 635            offset += ch.len_utf8();
 636        }
 637        point
 638    }
 639
 640    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 641        let mut offset = 0;
 642        let mut point = PointUtf16::new(0, 0);
 643        for ch in self.0.chars() {
 644            if offset >= target {
 645                break;
 646            }
 647
 648            if ch == '\n' {
 649                point.row += 1;
 650                point.column = 0;
 651            } else {
 652                point.column += ch.len_utf16() as u32;
 653            }
 654            offset += ch.len_utf8();
 655        }
 656        point
 657    }
 658
 659    fn point_to_offset(&self, target: Point) -> usize {
 660        let mut offset = 0;
 661        let mut point = Point::new(0, 0);
 662        for ch in self.0.chars() {
 663            if point >= target {
 664                if point > target {
 665                    panic!("point {:?} is inside of character {:?}", target, ch);
 666                }
 667                break;
 668            }
 669
 670            if ch == '\n' {
 671                point.row += 1;
 672                if point.row > target.row {
 673                    panic!(
 674                        "point {:?} is beyond the end of a line with length {}",
 675                        target, point.column
 676                    );
 677                }
 678                point.column = 0;
 679            } else {
 680                point.column += ch.len_utf8() as u32;
 681            }
 682            offset += ch.len_utf8();
 683        }
 684        offset
 685    }
 686
 687    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 688        let mut point = Point::zero();
 689        let mut point_utf16 = PointUtf16::new(0, 0);
 690        for ch in self.0.chars() {
 691            if point >= target {
 692                break;
 693            }
 694
 695            if ch == '\n' {
 696                point_utf16.row += 1;
 697                point_utf16.column = 0;
 698                point.row += 1;
 699                point.column = 0;
 700            } else {
 701                point_utf16.column += ch.len_utf16() as u32;
 702                point.column += ch.len_utf8() as u32;
 703            }
 704        }
 705        point_utf16
 706    }
 707
 708    fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
 709        let mut offset = 0;
 710        let mut point = PointUtf16::new(0, 0);
 711        for ch in self.0.chars() {
 712            if point >= target {
 713                if point > target {
 714                    panic!("point {:?} is inside of character {:?}", target, ch);
 715                }
 716                break;
 717            }
 718
 719            if ch == '\n' {
 720                point.row += 1;
 721                if point.row > target.row {
 722                    panic!(
 723                        "point {:?} is beyond the end of a line with length {}",
 724                        target, point.column
 725                    );
 726                }
 727                point.column = 0;
 728            } else {
 729                point.column += ch.len_utf16() as u32;
 730            }
 731            offset += ch.len_utf8();
 732        }
 733        offset
 734    }
 735
 736    fn point_utf16_to_point(&self, target: PointUtf16) -> Point {
 737        let mut point = Point::zero();
 738        let mut point_utf16 = PointUtf16::zero();
 739        for ch in self.0.chars() {
 740            if point_utf16 >= target {
 741                if point_utf16 > target {
 742                    panic!("point {:?} is inside of character {:?}", target, ch);
 743                }
 744                break;
 745            }
 746
 747            if ch == '\n' {
 748                point_utf16 += PointUtf16::new(1, 0);
 749                point += Point::new(1, 0);
 750            } else {
 751                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 752                point += Point::new(0, ch.len_utf8() as u32);
 753            }
 754        }
 755        point
 756    }
 757
 758    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 759        for (row, line) in self.0.split('\n').enumerate() {
 760            if row == target.row as usize {
 761                let mut column = target.column.min(line.len() as u32);
 762                while !line.is_char_boundary(column as usize) {
 763                    match bias {
 764                        Bias::Left => column -= 1,
 765                        Bias::Right => column += 1,
 766                    }
 767                }
 768                return Point::new(row as u32, column);
 769            }
 770        }
 771        unreachable!()
 772    }
 773
 774    fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 {
 775        for (row, line) in self.0.split('\n').enumerate() {
 776            if row == target.row as usize {
 777                let mut code_units = line.encode_utf16();
 778                let mut column = code_units.by_ref().take(target.column as usize).count();
 779                if char::decode_utf16(code_units).next().transpose().is_err() {
 780                    match bias {
 781                        Bias::Left => column -= 1,
 782                        Bias::Right => column += 1,
 783                    }
 784                }
 785                return PointUtf16::new(row as u32, column as u32);
 786            }
 787        }
 788        unreachable!()
 789    }
 790
 791    fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 792        let mut code_units = self.0.encode_utf16();
 793        let mut offset = code_units.by_ref().take(target.0 as usize).count();
 794        if char::decode_utf16(code_units).next().transpose().is_err() {
 795            match bias {
 796                Bias::Left => offset -= 1,
 797                Bias::Right => offset += 1,
 798            }
 799        }
 800        OffsetUtf16(offset)
 801    }
 802}
 803
 804impl sum_tree::Item for Chunk {
 805    type Summary = ChunkSummary;
 806
 807    fn summary(&self) -> Self::Summary {
 808        ChunkSummary::from(self.0.as_str())
 809    }
 810}
 811
 812#[derive(Clone, Debug, Default, Eq, PartialEq)]
 813pub struct ChunkSummary {
 814    text: TextSummary,
 815    fingerprint: HashMatrix,
 816}
 817
 818impl<'a> From<&'a str> for ChunkSummary {
 819    fn from(text: &'a str) -> Self {
 820        Self {
 821            text: TextSummary::from(text),
 822            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
 823        }
 824    }
 825}
 826
 827impl sum_tree::Summary for ChunkSummary {
 828    type Context = ();
 829
 830    fn add_summary(&mut self, summary: &Self, _: &()) {
 831        self.text += &summary.text;
 832        self.fingerprint = self.fingerprint * summary.fingerprint;
 833    }
 834}
 835
 836#[derive(Clone, Debug, Default, Eq, PartialEq)]
 837pub struct TextSummary {
 838    pub len: usize,
 839    pub len_utf16: OffsetUtf16,
 840    pub lines: Point,
 841    pub first_line_chars: u32,
 842    pub last_line_chars: u32,
 843    pub last_line_len_utf16: u32,
 844    pub longest_row: u32,
 845    pub longest_row_chars: u32,
 846}
 847
 848impl TextSummary {
 849    pub fn lines_utf16(&self) -> PointUtf16 {
 850        PointUtf16 {
 851            row: self.lines.row,
 852            column: self.last_line_len_utf16,
 853        }
 854    }
 855}
 856
 857impl<'a> From<&'a str> for TextSummary {
 858    fn from(text: &'a str) -> Self {
 859        let mut len_utf16 = OffsetUtf16(0);
 860        let mut lines = Point::new(0, 0);
 861        let mut first_line_chars = 0;
 862        let mut last_line_chars = 0;
 863        let mut last_line_len_utf16 = 0;
 864        let mut longest_row = 0;
 865        let mut longest_row_chars = 0;
 866        for c in text.chars() {
 867            len_utf16.0 += c.len_utf16();
 868
 869            if c == '\n' {
 870                lines += Point::new(1, 0);
 871                last_line_len_utf16 = 0;
 872                last_line_chars = 0;
 873            } else {
 874                lines.column += c.len_utf8() as u32;
 875                last_line_len_utf16 += c.len_utf16() as u32;
 876                last_line_chars += 1;
 877            }
 878
 879            if lines.row == 0 {
 880                first_line_chars = last_line_chars;
 881            }
 882
 883            if last_line_chars > longest_row_chars {
 884                longest_row = lines.row;
 885                longest_row_chars = last_line_chars;
 886            }
 887        }
 888
 889        TextSummary {
 890            len: text.len(),
 891            len_utf16,
 892            lines,
 893            first_line_chars,
 894            last_line_chars,
 895            last_line_len_utf16,
 896            longest_row,
 897            longest_row_chars,
 898        }
 899    }
 900}
 901
 902impl sum_tree::Summary for TextSummary {
 903    type Context = ();
 904
 905    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
 906        *self += summary;
 907    }
 908}
 909
 910impl std::ops::Add<Self> for TextSummary {
 911    type Output = Self;
 912
 913    fn add(mut self, rhs: Self) -> Self::Output {
 914        self.add_assign(&rhs);
 915        self
 916    }
 917}
 918
 919impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 920    fn add_assign(&mut self, other: &'a Self) {
 921        let joined_chars = self.last_line_chars + other.first_line_chars;
 922        if joined_chars > self.longest_row_chars {
 923            self.longest_row = self.lines.row;
 924            self.longest_row_chars = joined_chars;
 925        }
 926        if other.longest_row_chars > self.longest_row_chars {
 927            self.longest_row = self.lines.row + other.longest_row;
 928            self.longest_row_chars = other.longest_row_chars;
 929        }
 930
 931        if self.lines.row == 0 {
 932            self.first_line_chars += other.first_line_chars;
 933        }
 934
 935        if other.lines.row == 0 {
 936            self.last_line_chars += other.first_line_chars;
 937            self.last_line_len_utf16 += other.last_line_len_utf16;
 938        } else {
 939            self.last_line_chars = other.last_line_chars;
 940            self.last_line_len_utf16 = other.last_line_len_utf16;
 941        }
 942
 943        self.len += other.len;
 944        self.len_utf16 += other.len_utf16;
 945        self.lines += other.lines;
 946    }
 947}
 948
 949impl std::ops::AddAssign<Self> for TextSummary {
 950    fn add_assign(&mut self, other: Self) {
 951        *self += &other;
 952    }
 953}
 954
 955pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
 956    fn from_text_summary(summary: &TextSummary) -> Self;
 957    fn add_assign(&mut self, other: &Self);
 958}
 959
 960impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
 961    fn from_text_summary(summary: &TextSummary) -> Self {
 962        (
 963            D1::from_text_summary(summary),
 964            D2::from_text_summary(summary),
 965        )
 966    }
 967
 968    fn add_assign(&mut self, other: &Self) {
 969        self.0.add_assign(&other.0);
 970        self.1.add_assign(&other.1);
 971    }
 972}
 973
 974impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
 975    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 976        *self += &summary.text;
 977    }
 978}
 979
 980impl TextDimension for TextSummary {
 981    fn from_text_summary(summary: &TextSummary) -> Self {
 982        summary.clone()
 983    }
 984
 985    fn add_assign(&mut self, other: &Self) {
 986        *self += other;
 987    }
 988}
 989
 990impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
 991    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 992        *self += summary.text.len;
 993    }
 994}
 995
 996impl TextDimension for usize {
 997    fn from_text_summary(summary: &TextSummary) -> Self {
 998        summary.len
 999    }
1000
1001    fn add_assign(&mut self, other: &Self) {
1002        *self += other;
1003    }
1004}
1005
1006impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1007    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1008        *self += summary.text.len_utf16;
1009    }
1010}
1011
1012impl TextDimension for OffsetUtf16 {
1013    fn from_text_summary(summary: &TextSummary) -> Self {
1014        summary.len_utf16
1015    }
1016
1017    fn add_assign(&mut self, other: &Self) {
1018        *self += other;
1019    }
1020}
1021
1022impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1023    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1024        *self += summary.text.lines;
1025    }
1026}
1027
1028impl TextDimension for Point {
1029    fn from_text_summary(summary: &TextSummary) -> Self {
1030        summary.lines
1031    }
1032
1033    fn add_assign(&mut self, other: &Self) {
1034        *self += other;
1035    }
1036}
1037
1038impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1039    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1040        *self += summary.text.lines_utf16();
1041    }
1042}
1043
1044impl TextDimension for PointUtf16 {
1045    fn from_text_summary(summary: &TextSummary) -> Self {
1046        summary.lines_utf16()
1047    }
1048
1049    fn add_assign(&mut self, other: &Self) {
1050        *self += other;
1051    }
1052}
1053
1054fn find_split_ix(text: &str) -> usize {
1055    let mut ix = text.len() / 2;
1056    while !text.is_char_boundary(ix) {
1057        if ix < 2 * CHUNK_BASE {
1058            ix += 1;
1059        } else {
1060            ix = (text.len() / 2) - 1;
1061            break;
1062        }
1063    }
1064    while !text.is_char_boundary(ix) {
1065        ix -= 1;
1066    }
1067
1068    debug_assert!(ix <= 2 * CHUNK_BASE);
1069    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
1070    ix
1071}
1072
1073#[cfg(test)]
1074mod tests {
1075    use super::*;
1076    use crate::random_char_iter::RandomCharIter;
1077    use rand::prelude::*;
1078    use std::{cmp::Ordering, env, io::Read};
1079    use Bias::{Left, Right};
1080
1081    #[test]
1082    fn test_all_4_byte_chars() {
1083        let mut rope = Rope::new();
1084        let text = "🏀".repeat(256);
1085        rope.push(&text);
1086        assert_eq!(rope.text(), text);
1087    }
1088
1089    #[test]
1090    fn test_clip() {
1091        let rope = Rope::from("🧘");
1092
1093        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1094        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1095        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1096
1097        assert_eq!(
1098            rope.clip_point(Point::new(0, 1), Bias::Left),
1099            Point::new(0, 0)
1100        );
1101        assert_eq!(
1102            rope.clip_point(Point::new(0, 1), Bias::Right),
1103            Point::new(0, 4)
1104        );
1105        assert_eq!(
1106            rope.clip_point(Point::new(0, 5), Bias::Right),
1107            Point::new(0, 4)
1108        );
1109
1110        assert_eq!(
1111            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left),
1112            PointUtf16::new(0, 0)
1113        );
1114        assert_eq!(
1115            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right),
1116            PointUtf16::new(0, 2)
1117        );
1118        assert_eq!(
1119            rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
1120            PointUtf16::new(0, 2)
1121        );
1122
1123        assert_eq!(
1124            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1125            OffsetUtf16(0)
1126        );
1127        assert_eq!(
1128            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1129            OffsetUtf16(2)
1130        );
1131        assert_eq!(
1132            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1133            OffsetUtf16(2)
1134        );
1135    }
1136
1137    #[gpui::test(iterations = 100)]
1138    fn test_random_rope(mut rng: StdRng) {
1139        let operations = env::var("OPERATIONS")
1140            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1141            .unwrap_or(10);
1142
1143        let mut expected = String::new();
1144        let mut actual = Rope::new();
1145        for _ in 0..operations {
1146            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1147            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1148            let len = rng.gen_range(0..=64);
1149            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1150
1151            let mut new_actual = Rope::new();
1152            let mut cursor = actual.cursor(0);
1153            new_actual.append(cursor.slice(start_ix));
1154            new_actual.push(&new_text);
1155            cursor.seek_forward(end_ix);
1156            new_actual.append(cursor.suffix());
1157            actual = new_actual;
1158
1159            expected.replace_range(start_ix..end_ix, &new_text);
1160
1161            assert_eq!(actual.text(), expected);
1162            log::info!("text: {:?}", expected);
1163
1164            for _ in 0..5 {
1165                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1166                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1167
1168                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1169                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1170
1171                let mut actual_text = String::new();
1172                actual
1173                    .bytes_in_range(start_ix..end_ix)
1174                    .read_to_string(&mut actual_text)
1175                    .unwrap();
1176                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1177
1178                assert_eq!(
1179                    actual
1180                        .reversed_chunks_in_range(start_ix..end_ix)
1181                        .collect::<Vec<&str>>()
1182                        .into_iter()
1183                        .rev()
1184                        .collect::<String>(),
1185                    &expected[start_ix..end_ix]
1186                );
1187            }
1188
1189            let mut offset_utf16 = OffsetUtf16(0);
1190            let mut point = Point::new(0, 0);
1191            let mut point_utf16 = PointUtf16::new(0, 0);
1192            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1193                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1194                assert_eq!(
1195                    actual.offset_to_point_utf16(ix),
1196                    point_utf16,
1197                    "offset_to_point_utf16({})",
1198                    ix
1199                );
1200                assert_eq!(
1201                    actual.point_to_offset(point),
1202                    ix,
1203                    "point_to_offset({:?})",
1204                    point
1205                );
1206                assert_eq!(
1207                    actual.point_utf16_to_offset(point_utf16),
1208                    ix,
1209                    "point_utf16_to_offset({:?})",
1210                    point_utf16
1211                );
1212                assert_eq!(
1213                    actual.offset_to_offset_utf16(ix),
1214                    offset_utf16,
1215                    "offset_to_offset_utf16({:?})",
1216                    ix
1217                );
1218                assert_eq!(
1219                    actual.offset_utf16_to_offset(offset_utf16),
1220                    ix,
1221                    "offset_utf16_to_offset({:?})",
1222                    offset_utf16
1223                );
1224                if ch == '\n' {
1225                    point += Point::new(1, 0);
1226                    point_utf16 += PointUtf16::new(1, 0);
1227                } else {
1228                    point.column += ch.len_utf8() as u32;
1229                    point_utf16.column += ch.len_utf16() as u32;
1230                }
1231                offset_utf16.0 += ch.len_utf16();
1232            }
1233
1234            let mut offset_utf16 = OffsetUtf16(0);
1235            let mut point_utf16 = PointUtf16::zero();
1236            for unit in expected.encode_utf16() {
1237                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1238                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1239                assert!(right_offset >= left_offset);
1240                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1241                actual.offset_utf16_to_offset(left_offset);
1242                actual.offset_utf16_to_offset(right_offset);
1243
1244                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1245                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1246                assert!(right_point >= left_point);
1247                // Ensure translating UTF-16 points to offsets doesn't panic.
1248                actual.point_utf16_to_offset(left_point);
1249                actual.point_utf16_to_offset(right_point);
1250
1251                offset_utf16.0 += 1;
1252                if unit == b'\n' as u16 {
1253                    point_utf16 += PointUtf16::new(1, 0);
1254                } else {
1255                    point_utf16 += PointUtf16::new(0, 1);
1256                }
1257            }
1258
1259            for _ in 0..5 {
1260                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1261                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1262                assert_eq!(
1263                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1264                    TextSummary::from(&expected[start_ix..end_ix])
1265                );
1266            }
1267
1268            let mut expected_longest_rows = Vec::new();
1269            let mut longest_line_len = -1_isize;
1270            for (row, line) in expected.split('\n').enumerate() {
1271                let row = row as u32;
1272                assert_eq!(
1273                    actual.line_len(row),
1274                    line.len() as u32,
1275                    "invalid line len for row {}",
1276                    row
1277                );
1278
1279                let line_char_count = line.chars().count() as isize;
1280                match line_char_count.cmp(&longest_line_len) {
1281                    Ordering::Less => {}
1282                    Ordering::Equal => expected_longest_rows.push(row),
1283                    Ordering::Greater => {
1284                        longest_line_len = line_char_count;
1285                        expected_longest_rows.clear();
1286                        expected_longest_rows.push(row);
1287                    }
1288                }
1289            }
1290
1291            let longest_row = actual.summary().longest_row;
1292            assert!(
1293                expected_longest_rows.contains(&longest_row),
1294                "incorrect longest row {}. expected {:?} with length {}",
1295                longest_row,
1296                expected_longest_rows,
1297                longest_line_len,
1298            );
1299        }
1300    }
1301
1302    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1303        while !text.is_char_boundary(offset) {
1304            match bias {
1305                Bias::Left => offset -= 1,
1306                Bias::Right => offset += 1,
1307            }
1308        }
1309        offset
1310    }
1311
1312    impl Rope {
1313        fn text(&self) -> String {
1314            let mut text = String::new();
1315            for chunk in self.chunks.cursor::<()>() {
1316                text.push_str(&chunk.0);
1317            }
1318            text
1319        }
1320    }
1321}