rope.rs

   1mod offset_utf16;
   2mod point;
   3mod point_utf16;
   4
   5use arrayvec::ArrayString;
   6use bromberg_sl2::{DigestString, HashMatrix};
   7use smallvec::SmallVec;
   8use std::{cmp, fmt, io, mem, ops::Range, str};
   9use sum_tree::{Bias, Dimension, SumTree};
  10
  11pub use offset_utf16::OffsetUtf16;
  12pub use point::Point;
  13pub use point_utf16::PointUtf16;
  14
  15#[cfg(test)]
  16const CHUNK_BASE: usize = 6;
  17
  18#[cfg(not(test))]
  19const CHUNK_BASE: usize = 16;
  20
  21#[derive(Clone, Default, Debug)]
  22pub struct Rope {
  23    chunks: SumTree<Chunk>,
  24}
  25
  26impl Rope {
  27    pub fn new() -> Self {
  28        Self::default()
  29    }
  30
  31    pub fn append(&mut self, rope: Rope) {
  32        let mut chunks = rope.chunks.cursor::<()>();
  33        chunks.next(&());
  34        if let Some(chunk) = chunks.item() {
  35            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  36                || chunk.0.len() < CHUNK_BASE
  37            {
  38                self.push(&chunk.0);
  39                chunks.next(&());
  40            }
  41        }
  42
  43        self.chunks.push_tree(chunks.suffix(&()), &());
  44        self.check_invariants();
  45    }
  46
  47    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  48        let mut new_rope = Rope::new();
  49        let mut cursor = self.cursor(0);
  50        new_rope.append(cursor.slice(range.start));
  51        cursor.seek_forward(range.end);
  52        new_rope.push(text);
  53        new_rope.append(cursor.suffix());
  54        *self = new_rope;
  55    }
  56
  57    pub fn slice(&self, range: Range<usize>) -> Rope {
  58        let mut cursor = self.cursor(0);
  59        cursor.seek_forward(range.start);
  60        cursor.slice(range.end)
  61    }
  62
  63    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
  64        //This would be more efficient with a forward advance after the first, but it's fine
  65        let start = self.point_to_offset(Point::new(range.start, 0));
  66        let end = self.point_to_offset(Point::new(range.end, 0));
  67        self.slice(start..end)
  68    }
  69
  70    pub fn push(&mut self, text: &str) {
  71        let mut new_chunks = SmallVec::<[_; 16]>::new();
  72        let mut new_chunk = ArrayString::new();
  73        for ch in text.chars() {
  74            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  75                new_chunks.push(Chunk(new_chunk));
  76                new_chunk = ArrayString::new();
  77            }
  78
  79            new_chunk.push(ch);
  80        }
  81        if !new_chunk.is_empty() {
  82            new_chunks.push(Chunk(new_chunk));
  83        }
  84
  85        let mut new_chunks = new_chunks.into_iter();
  86        let mut first_new_chunk = new_chunks.next();
  87        self.chunks.update_last(
  88            |last_chunk| {
  89                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
  90                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
  91                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
  92                    } else {
  93                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
  94                        text.push_str(&last_chunk.0);
  95                        text.push_str(&first_new_chunk_ref.0);
  96                        let (left, right) = text.split_at(find_split_ix(&text));
  97                        last_chunk.0.clear();
  98                        last_chunk.0.push_str(left);
  99                        first_new_chunk_ref.0.clear();
 100                        first_new_chunk_ref.0.push_str(right);
 101                    }
 102                }
 103            },
 104            &(),
 105        );
 106
 107        self.chunks
 108            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 109        self.check_invariants();
 110    }
 111
 112    pub fn push_front(&mut self, text: &str) {
 113        let suffix = mem::replace(self, Rope::from(text));
 114        self.append(suffix);
 115    }
 116
 117    fn check_invariants(&self) {
 118        #[cfg(test)]
 119        {
 120            // Ensure all chunks except maybe the last one are not underflowing.
 121            // Allow some wiggle room for multibyte characters at chunk boundaries.
 122            let mut chunks = self.chunks.cursor::<()>().peekable();
 123            while let Some(chunk) = chunks.next() {
 124                if chunks.peek().is_some() {
 125                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 126                }
 127            }
 128        }
 129    }
 130
 131    pub fn summary(&self) -> TextSummary {
 132        self.chunks.summary().text.clone()
 133    }
 134
 135    pub fn len(&self) -> usize {
 136        self.chunks.extent(&())
 137    }
 138
 139    pub fn is_empty(&self) -> bool {
 140        self.len() == 0
 141    }
 142
 143    pub fn max_point(&self) -> Point {
 144        self.chunks.extent(&())
 145    }
 146
 147    pub fn max_point_utf16(&self) -> PointUtf16 {
 148        self.chunks.extent(&())
 149    }
 150
 151    pub fn cursor(&self, offset: usize) -> Cursor {
 152        Cursor::new(self, offset)
 153    }
 154
 155    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 156        self.chars_at(0)
 157    }
 158
 159    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 160        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 161    }
 162
 163    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 164        self.reversed_chunks_in_range(0..start)
 165            .flat_map(|chunk| chunk.chars().rev())
 166    }
 167
 168    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 169        Bytes::new(self, range)
 170    }
 171
 172    pub fn chunks(&self) -> Chunks {
 173        self.chunks_in_range(0..self.len())
 174    }
 175
 176    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 177        Chunks::new(self, range, false)
 178    }
 179
 180    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 181        Chunks::new(self, range, true)
 182    }
 183
 184    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 185        if offset >= self.summary().len {
 186            return self.summary().len_utf16;
 187        }
 188        let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
 189        cursor.seek(&offset, Bias::Left, &());
 190        let overshoot = offset - cursor.start().0;
 191        cursor.start().1
 192            + cursor.item().map_or(Default::default(), |chunk| {
 193                chunk.offset_to_offset_utf16(overshoot)
 194            })
 195    }
 196
 197    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 198        if offset >= self.summary().len_utf16 {
 199            return self.summary().len;
 200        }
 201        let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
 202        cursor.seek(&offset, Bias::Left, &());
 203        let overshoot = offset - cursor.start().0;
 204        cursor.start().1
 205            + cursor.item().map_or(Default::default(), |chunk| {
 206                chunk.offset_utf16_to_offset(overshoot)
 207            })
 208    }
 209
 210    pub fn offset_to_point(&self, offset: usize) -> Point {
 211        if offset >= self.summary().len {
 212            return self.summary().lines;
 213        }
 214        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 215        cursor.seek(&offset, Bias::Left, &());
 216        let overshoot = offset - cursor.start().0;
 217        cursor.start().1
 218            + cursor
 219                .item()
 220                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 221    }
 222
 223    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 224        if offset >= self.summary().len {
 225            return self.summary().lines_utf16();
 226        }
 227        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 228        cursor.seek(&offset, Bias::Left, &());
 229        let overshoot = offset - cursor.start().0;
 230        cursor.start().1
 231            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 232                chunk.offset_to_point_utf16(overshoot)
 233            })
 234    }
 235
 236    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 237        if point >= self.summary().lines {
 238            return self.summary().lines_utf16();
 239        }
 240        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 241        cursor.seek(&point, Bias::Left, &());
 242        let overshoot = point - cursor.start().0;
 243        cursor.start().1
 244            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 245                chunk.point_to_point_utf16(overshoot)
 246            })
 247    }
 248
 249    pub fn point_to_offset(&self, point: Point) -> usize {
 250        if point >= self.summary().lines {
 251            return self.summary().len;
 252        }
 253        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 254        cursor.seek(&point, Bias::Left, &());
 255        let overshoot = point - cursor.start().0;
 256        cursor.start().1
 257            + cursor
 258                .item()
 259                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 260    }
 261
 262    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 263        if point >= self.summary().lines_utf16() {
 264            return self.summary().len;
 265        }
 266        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 267        cursor.seek(&point, Bias::Left, &());
 268        let overshoot = point - cursor.start().0;
 269        cursor.start().1
 270            + cursor
 271                .item()
 272                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
 273    }
 274
 275    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 276        if point >= self.summary().lines_utf16() {
 277            return self.summary().lines;
 278        }
 279        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 280        cursor.seek(&point, Bias::Left, &());
 281        let overshoot = point - cursor.start().0;
 282        cursor.start().1
 283            + cursor
 284                .item()
 285                .map_or(Point::zero(), |chunk| chunk.point_utf16_to_point(overshoot))
 286    }
 287
 288    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 289        let mut cursor = self.chunks.cursor::<usize>();
 290        cursor.seek(&offset, Bias::Left, &());
 291        if let Some(chunk) = cursor.item() {
 292            let mut ix = offset - cursor.start();
 293            while !chunk.0.is_char_boundary(ix) {
 294                match bias {
 295                    Bias::Left => {
 296                        ix -= 1;
 297                        offset -= 1;
 298                    }
 299                    Bias::Right => {
 300                        ix += 1;
 301                        offset += 1;
 302                    }
 303                }
 304            }
 305            offset
 306        } else {
 307            self.summary().len
 308        }
 309    }
 310
 311    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 312        let mut cursor = self.chunks.cursor::<OffsetUtf16>();
 313        cursor.seek(&offset, Bias::Right, &());
 314        if let Some(chunk) = cursor.item() {
 315            let overshoot = offset - cursor.start();
 316            *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
 317        } else {
 318            self.summary().len_utf16
 319        }
 320    }
 321
 322    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 323        let mut cursor = self.chunks.cursor::<Point>();
 324        cursor.seek(&point, Bias::Right, &());
 325        if let Some(chunk) = cursor.item() {
 326            let overshoot = point - cursor.start();
 327            *cursor.start() + chunk.clip_point(overshoot, bias)
 328        } else {
 329            self.summary().lines
 330        }
 331    }
 332
 333    pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 {
 334        let mut cursor = self.chunks.cursor::<PointUtf16>();
 335        cursor.seek(&point, Bias::Right, &());
 336        if let Some(chunk) = cursor.item() {
 337            let overshoot = point - cursor.start();
 338            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 339        } else {
 340            self.summary().lines_utf16()
 341        }
 342    }
 343
 344    pub fn line_len(&self, row: u32) -> u32 {
 345        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 346            .column
 347    }
 348
 349    pub fn fingerprint(&self) -> String {
 350        self.chunks.summary().fingerprint.to_hex()
 351    }
 352}
 353
 354impl<'a> From<&'a str> for Rope {
 355    fn from(text: &'a str) -> Self {
 356        let mut rope = Self::new();
 357        rope.push(text);
 358        rope
 359    }
 360}
 361
 362impl fmt::Display for Rope {
 363    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 364        for chunk in self.chunks() {
 365            write!(f, "{}", chunk)?;
 366        }
 367        Ok(())
 368    }
 369}
 370
 371pub struct Cursor<'a> {
 372    rope: &'a Rope,
 373    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 374    offset: usize,
 375}
 376
 377impl<'a> Cursor<'a> {
 378    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 379        let mut chunks = rope.chunks.cursor();
 380        chunks.seek(&offset, Bias::Right, &());
 381        Self {
 382            rope,
 383            chunks,
 384            offset,
 385        }
 386    }
 387
 388    pub fn seek_forward(&mut self, end_offset: usize) {
 389        debug_assert!(end_offset >= self.offset);
 390
 391        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 392        self.offset = end_offset;
 393    }
 394
 395    pub fn slice(&mut self, end_offset: usize) -> Rope {
 396        debug_assert!(
 397            end_offset >= self.offset,
 398            "cannot slice backwards from {} to {}",
 399            self.offset,
 400            end_offset
 401        );
 402
 403        let mut slice = Rope::new();
 404        if let Some(start_chunk) = self.chunks.item() {
 405            let start_ix = self.offset - self.chunks.start();
 406            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 407            slice.push(&start_chunk.0[start_ix..end_ix]);
 408        }
 409
 410        if end_offset > self.chunks.end(&()) {
 411            self.chunks.next(&());
 412            slice.append(Rope {
 413                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 414            });
 415            if let Some(end_chunk) = self.chunks.item() {
 416                let end_ix = end_offset - self.chunks.start();
 417                slice.push(&end_chunk.0[..end_ix]);
 418            }
 419        }
 420
 421        self.offset = end_offset;
 422        slice
 423    }
 424
 425    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 426        debug_assert!(end_offset >= self.offset);
 427
 428        let mut summary = D::default();
 429        if let Some(start_chunk) = self.chunks.item() {
 430            let start_ix = self.offset - self.chunks.start();
 431            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 432            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 433                &start_chunk.0[start_ix..end_ix],
 434            )));
 435        }
 436
 437        if end_offset > self.chunks.end(&()) {
 438            self.chunks.next(&());
 439            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 440            if let Some(end_chunk) = self.chunks.item() {
 441                let end_ix = end_offset - self.chunks.start();
 442                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 443                    &end_chunk.0[..end_ix],
 444                )));
 445            }
 446        }
 447
 448        self.offset = end_offset;
 449        summary
 450    }
 451
 452    pub fn suffix(mut self) -> Rope {
 453        self.slice(self.rope.chunks.extent(&()))
 454    }
 455
 456    pub fn offset(&self) -> usize {
 457        self.offset
 458    }
 459}
 460
 461pub struct Chunks<'a> {
 462    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 463    range: Range<usize>,
 464    reversed: bool,
 465}
 466
 467impl<'a> Chunks<'a> {
 468    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 469        let mut chunks = rope.chunks.cursor();
 470        if reversed {
 471            chunks.seek(&range.end, Bias::Left, &());
 472        } else {
 473            chunks.seek(&range.start, Bias::Right, &());
 474        }
 475        Self {
 476            chunks,
 477            range,
 478            reversed,
 479        }
 480    }
 481
 482    pub fn offset(&self) -> usize {
 483        if self.reversed {
 484            self.range.end.min(self.chunks.end(&()))
 485        } else {
 486            self.range.start.max(*self.chunks.start())
 487        }
 488    }
 489
 490    pub fn seek(&mut self, offset: usize) {
 491        let bias = if self.reversed {
 492            Bias::Left
 493        } else {
 494            Bias::Right
 495        };
 496
 497        if offset >= self.chunks.end(&()) {
 498            self.chunks.seek_forward(&offset, bias, &());
 499        } else {
 500            self.chunks.seek(&offset, bias, &());
 501        }
 502
 503        if self.reversed {
 504            self.range.end = offset;
 505        } else {
 506            self.range.start = offset;
 507        }
 508    }
 509
 510    pub fn peek(&self) -> Option<&'a str> {
 511        let chunk = self.chunks.item()?;
 512        if self.reversed && self.range.start >= self.chunks.end(&()) {
 513            return None;
 514        }
 515        let chunk_start = *self.chunks.start();
 516        if self.range.end <= chunk_start {
 517            return None;
 518        }
 519
 520        let start = self.range.start.saturating_sub(chunk_start);
 521        let end = self.range.end - chunk_start;
 522        Some(&chunk.0[start..chunk.0.len().min(end)])
 523    }
 524}
 525
 526impl<'a> Iterator for Chunks<'a> {
 527    type Item = &'a str;
 528
 529    fn next(&mut self) -> Option<Self::Item> {
 530        let result = self.peek();
 531        if result.is_some() {
 532            if self.reversed {
 533                self.chunks.prev(&());
 534            } else {
 535                self.chunks.next(&());
 536            }
 537        }
 538        result
 539    }
 540}
 541
 542pub struct Bytes<'a> {
 543    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 544    range: Range<usize>,
 545}
 546
 547impl<'a> Bytes<'a> {
 548    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
 549        let mut chunks = rope.chunks.cursor();
 550        chunks.seek(&range.start, Bias::Right, &());
 551        Self { chunks, range }
 552    }
 553
 554    pub fn peek(&self) -> Option<&'a [u8]> {
 555        let chunk = self.chunks.item()?;
 556        let chunk_start = *self.chunks.start();
 557        if self.range.end <= chunk_start {
 558            return None;
 559        }
 560
 561        let start = self.range.start.saturating_sub(chunk_start);
 562        let end = self.range.end - chunk_start;
 563        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 564    }
 565}
 566
 567impl<'a> Iterator for Bytes<'a> {
 568    type Item = &'a [u8];
 569
 570    fn next(&mut self) -> Option<Self::Item> {
 571        let result = self.peek();
 572        if result.is_some() {
 573            self.chunks.next(&());
 574        }
 575        result
 576    }
 577}
 578
 579impl<'a> io::Read for Bytes<'a> {
 580    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 581        if let Some(chunk) = self.peek() {
 582            let len = cmp::min(buf.len(), chunk.len());
 583            buf[..len].copy_from_slice(&chunk[..len]);
 584            self.range.start += len;
 585            if len == chunk.len() {
 586                self.chunks.next(&());
 587            }
 588            Ok(len)
 589        } else {
 590            Ok(0)
 591        }
 592    }
 593}
 594
 595#[derive(Clone, Debug, Default)]
 596struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 597
 598impl Chunk {
 599    fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
 600        let mut offset = 0;
 601        let mut offset_utf16 = OffsetUtf16(0);
 602        for ch in self.0.chars() {
 603            if offset >= target {
 604                break;
 605            }
 606
 607            offset += ch.len_utf8();
 608            offset_utf16.0 += ch.len_utf16();
 609        }
 610        offset_utf16
 611    }
 612
 613    fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 614        let mut offset_utf16 = OffsetUtf16(0);
 615        let mut offset = 0;
 616        for ch in self.0.chars() {
 617            if offset_utf16 >= target {
 618                break;
 619            }
 620
 621            offset += ch.len_utf8();
 622            offset_utf16.0 += ch.len_utf16();
 623        }
 624        offset
 625    }
 626
 627    fn offset_to_point(&self, target: usize) -> Point {
 628        let mut offset = 0;
 629        let mut point = Point::new(0, 0);
 630        for ch in self.0.chars() {
 631            if offset >= target {
 632                break;
 633            }
 634
 635            if ch == '\n' {
 636                point.row += 1;
 637                point.column = 0;
 638            } else {
 639                point.column += ch.len_utf8() as u32;
 640            }
 641            offset += ch.len_utf8();
 642        }
 643        point
 644    }
 645
 646    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 647        let mut offset = 0;
 648        let mut point = PointUtf16::new(0, 0);
 649        for ch in self.0.chars() {
 650            if offset >= target {
 651                break;
 652            }
 653
 654            if ch == '\n' {
 655                point.row += 1;
 656                point.column = 0;
 657            } else {
 658                point.column += ch.len_utf16() as u32;
 659            }
 660            offset += ch.len_utf8();
 661        }
 662        point
 663    }
 664
 665    fn point_to_offset(&self, target: Point) -> usize {
 666        let mut offset = 0;
 667        let mut point = Point::new(0, 0);
 668        for ch in self.0.chars() {
 669            if point >= target {
 670                if point > target {
 671                    panic!("point {:?} is inside of character {:?}", target, ch);
 672                }
 673                break;
 674            }
 675
 676            if ch == '\n' {
 677                point.row += 1;
 678                if point.row > target.row {
 679                    panic!(
 680                        "point {:?} is beyond the end of a line with length {}",
 681                        target, point.column
 682                    );
 683                }
 684                point.column = 0;
 685            } else {
 686                point.column += ch.len_utf8() as u32;
 687            }
 688            offset += ch.len_utf8();
 689        }
 690        offset
 691    }
 692
 693    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 694        let mut point = Point::zero();
 695        let mut point_utf16 = PointUtf16::new(0, 0);
 696        for ch in self.0.chars() {
 697            if point >= target {
 698                break;
 699            }
 700
 701            if ch == '\n' {
 702                point_utf16.row += 1;
 703                point_utf16.column = 0;
 704                point.row += 1;
 705                point.column = 0;
 706            } else {
 707                point_utf16.column += ch.len_utf16() as u32;
 708                point.column += ch.len_utf8() as u32;
 709            }
 710        }
 711        point_utf16
 712    }
 713
 714    fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
 715        let mut offset = 0;
 716        let mut point = PointUtf16::new(0, 0);
 717        for ch in self.0.chars() {
 718            if point >= target {
 719                if point > target {
 720                    panic!("point {:?} is inside of character {:?}", target, ch);
 721                }
 722                break;
 723            }
 724
 725            if ch == '\n' {
 726                point.row += 1;
 727                if point.row > target.row {
 728                    panic!(
 729                        "point {:?} is beyond the end of a line with length {}",
 730                        target, point.column
 731                    );
 732                }
 733                point.column = 0;
 734            } else {
 735                point.column += ch.len_utf16() as u32;
 736            }
 737            offset += ch.len_utf8();
 738        }
 739        offset
 740    }
 741
 742    fn point_utf16_to_point(&self, target: PointUtf16) -> Point {
 743        let mut point = Point::zero();
 744        let mut point_utf16 = PointUtf16::zero();
 745        for ch in self.0.chars() {
 746            if point_utf16 >= target {
 747                if point_utf16 > target {
 748                    panic!("point {:?} is inside of character {:?}", target, ch);
 749                }
 750                break;
 751            }
 752
 753            if ch == '\n' {
 754                point_utf16 += PointUtf16::new(1, 0);
 755                point += Point::new(1, 0);
 756            } else {
 757                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 758                point += Point::new(0, ch.len_utf8() as u32);
 759            }
 760        }
 761        point
 762    }
 763
 764    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 765        for (row, line) in self.0.split('\n').enumerate() {
 766            if row == target.row as usize {
 767                let mut column = target.column.min(line.len() as u32);
 768                while !line.is_char_boundary(column as usize) {
 769                    match bias {
 770                        Bias::Left => column -= 1,
 771                        Bias::Right => column += 1,
 772                    }
 773                }
 774                return Point::new(row as u32, column);
 775            }
 776        }
 777        unreachable!()
 778    }
 779
 780    fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 {
 781        for (row, line) in self.0.split('\n').enumerate() {
 782            if row == target.row as usize {
 783                let mut code_units = line.encode_utf16();
 784                let mut column = code_units.by_ref().take(target.column as usize).count();
 785                if char::decode_utf16(code_units).next().transpose().is_err() {
 786                    match bias {
 787                        Bias::Left => column -= 1,
 788                        Bias::Right => column += 1,
 789                    }
 790                }
 791                return PointUtf16::new(row as u32, column as u32);
 792            }
 793        }
 794        unreachable!()
 795    }
 796
 797    fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 798        let mut code_units = self.0.encode_utf16();
 799        let mut offset = code_units.by_ref().take(target.0 as usize).count();
 800        if char::decode_utf16(code_units).next().transpose().is_err() {
 801            match bias {
 802                Bias::Left => offset -= 1,
 803                Bias::Right => offset += 1,
 804            }
 805        }
 806        OffsetUtf16(offset)
 807    }
 808}
 809
 810impl sum_tree::Item for Chunk {
 811    type Summary = ChunkSummary;
 812
 813    fn summary(&self) -> Self::Summary {
 814        ChunkSummary::from(self.0.as_str())
 815    }
 816}
 817
 818#[derive(Clone, Debug, Default, Eq, PartialEq)]
 819pub struct ChunkSummary {
 820    text: TextSummary,
 821    fingerprint: HashMatrix,
 822}
 823
 824impl<'a> From<&'a str> for ChunkSummary {
 825    fn from(text: &'a str) -> Self {
 826        Self {
 827            text: TextSummary::from(text),
 828            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
 829        }
 830    }
 831}
 832
 833impl sum_tree::Summary for ChunkSummary {
 834    type Context = ();
 835
 836    fn add_summary(&mut self, summary: &Self, _: &()) {
 837        self.text += &summary.text;
 838        self.fingerprint = self.fingerprint * summary.fingerprint;
 839    }
 840}
 841
 842#[derive(Clone, Debug, Default, Eq, PartialEq)]
 843pub struct TextSummary {
 844    pub len: usize,
 845    pub len_utf16: OffsetUtf16,
 846    pub lines: Point,
 847    pub first_line_chars: u32,
 848    pub last_line_chars: u32,
 849    pub last_line_len_utf16: u32,
 850    pub longest_row: u32,
 851    pub longest_row_chars: u32,
 852}
 853
 854impl TextSummary {
 855    pub fn lines_utf16(&self) -> PointUtf16 {
 856        PointUtf16 {
 857            row: self.lines.row,
 858            column: self.last_line_len_utf16,
 859        }
 860    }
 861}
 862
 863impl<'a> From<&'a str> for TextSummary {
 864    fn from(text: &'a str) -> Self {
 865        let mut len_utf16 = OffsetUtf16(0);
 866        let mut lines = Point::new(0, 0);
 867        let mut first_line_chars = 0;
 868        let mut last_line_chars = 0;
 869        let mut last_line_len_utf16 = 0;
 870        let mut longest_row = 0;
 871        let mut longest_row_chars = 0;
 872        for c in text.chars() {
 873            len_utf16.0 += c.len_utf16();
 874
 875            if c == '\n' {
 876                lines += Point::new(1, 0);
 877                last_line_len_utf16 = 0;
 878                last_line_chars = 0;
 879            } else {
 880                lines.column += c.len_utf8() as u32;
 881                last_line_len_utf16 += c.len_utf16() as u32;
 882                last_line_chars += 1;
 883            }
 884
 885            if lines.row == 0 {
 886                first_line_chars = last_line_chars;
 887            }
 888
 889            if last_line_chars > longest_row_chars {
 890                longest_row = lines.row;
 891                longest_row_chars = last_line_chars;
 892            }
 893        }
 894
 895        TextSummary {
 896            len: text.len(),
 897            len_utf16,
 898            lines,
 899            first_line_chars,
 900            last_line_chars,
 901            last_line_len_utf16,
 902            longest_row,
 903            longest_row_chars,
 904        }
 905    }
 906}
 907
 908impl sum_tree::Summary for TextSummary {
 909    type Context = ();
 910
 911    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
 912        *self += summary;
 913    }
 914}
 915
 916impl std::ops::Add<Self> for TextSummary {
 917    type Output = Self;
 918
 919    fn add(mut self, rhs: Self) -> Self::Output {
 920        self.add_assign(&rhs);
 921        self
 922    }
 923}
 924
 925impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 926    fn add_assign(&mut self, other: &'a Self) {
 927        let joined_chars = self.last_line_chars + other.first_line_chars;
 928        if joined_chars > self.longest_row_chars {
 929            self.longest_row = self.lines.row;
 930            self.longest_row_chars = joined_chars;
 931        }
 932        if other.longest_row_chars > self.longest_row_chars {
 933            self.longest_row = self.lines.row + other.longest_row;
 934            self.longest_row_chars = other.longest_row_chars;
 935        }
 936
 937        if self.lines.row == 0 {
 938            self.first_line_chars += other.first_line_chars;
 939        }
 940
 941        if other.lines.row == 0 {
 942            self.last_line_chars += other.first_line_chars;
 943            self.last_line_len_utf16 += other.last_line_len_utf16;
 944        } else {
 945            self.last_line_chars = other.last_line_chars;
 946            self.last_line_len_utf16 = other.last_line_len_utf16;
 947        }
 948
 949        self.len += other.len;
 950        self.len_utf16 += other.len_utf16;
 951        self.lines += other.lines;
 952    }
 953}
 954
 955impl std::ops::AddAssign<Self> for TextSummary {
 956    fn add_assign(&mut self, other: Self) {
 957        *self += &other;
 958    }
 959}
 960
 961pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
 962    fn from_text_summary(summary: &TextSummary) -> Self;
 963    fn add_assign(&mut self, other: &Self);
 964}
 965
 966impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
 967    fn from_text_summary(summary: &TextSummary) -> Self {
 968        (
 969            D1::from_text_summary(summary),
 970            D2::from_text_summary(summary),
 971        )
 972    }
 973
 974    fn add_assign(&mut self, other: &Self) {
 975        self.0.add_assign(&other.0);
 976        self.1.add_assign(&other.1);
 977    }
 978}
 979
 980impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
 981    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 982        *self += &summary.text;
 983    }
 984}
 985
 986impl TextDimension for TextSummary {
 987    fn from_text_summary(summary: &TextSummary) -> Self {
 988        summary.clone()
 989    }
 990
 991    fn add_assign(&mut self, other: &Self) {
 992        *self += other;
 993    }
 994}
 995
 996impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
 997    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 998        *self += summary.text.len;
 999    }
1000}
1001
1002impl TextDimension for usize {
1003    fn from_text_summary(summary: &TextSummary) -> Self {
1004        summary.len
1005    }
1006
1007    fn add_assign(&mut self, other: &Self) {
1008        *self += other;
1009    }
1010}
1011
1012impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1013    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1014        *self += summary.text.len_utf16;
1015    }
1016}
1017
1018impl TextDimension for OffsetUtf16 {
1019    fn from_text_summary(summary: &TextSummary) -> Self {
1020        summary.len_utf16
1021    }
1022
1023    fn add_assign(&mut self, other: &Self) {
1024        *self += other;
1025    }
1026}
1027
1028impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1029    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1030        *self += summary.text.lines;
1031    }
1032}
1033
1034impl TextDimension for Point {
1035    fn from_text_summary(summary: &TextSummary) -> Self {
1036        summary.lines
1037    }
1038
1039    fn add_assign(&mut self, other: &Self) {
1040        *self += other;
1041    }
1042}
1043
1044impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1045    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1046        *self += summary.text.lines_utf16();
1047    }
1048}
1049
1050impl TextDimension for PointUtf16 {
1051    fn from_text_summary(summary: &TextSummary) -> Self {
1052        summary.lines_utf16()
1053    }
1054
1055    fn add_assign(&mut self, other: &Self) {
1056        *self += other;
1057    }
1058}
1059
1060fn find_split_ix(text: &str) -> usize {
1061    let mut ix = text.len() / 2;
1062    while !text.is_char_boundary(ix) {
1063        if ix < 2 * CHUNK_BASE {
1064            ix += 1;
1065        } else {
1066            ix = (text.len() / 2) - 1;
1067            break;
1068        }
1069    }
1070    while !text.is_char_boundary(ix) {
1071        ix -= 1;
1072    }
1073
1074    debug_assert!(ix <= 2 * CHUNK_BASE);
1075    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
1076    ix
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081    use super::*;
1082    use rand::prelude::*;
1083    use std::{cmp::Ordering, env, io::Read};
1084    use util::RandomCharIter;
1085    use Bias::{Left, Right};
1086
1087    #[test]
1088    fn test_all_4_byte_chars() {
1089        let mut rope = Rope::new();
1090        let text = "🏀".repeat(256);
1091        rope.push(&text);
1092        assert_eq!(rope.text(), text);
1093    }
1094
1095    #[test]
1096    fn test_clip() {
1097        let rope = Rope::from("🧘");
1098
1099        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1100        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1101        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1102
1103        assert_eq!(
1104            rope.clip_point(Point::new(0, 1), Bias::Left),
1105            Point::new(0, 0)
1106        );
1107        assert_eq!(
1108            rope.clip_point(Point::new(0, 1), Bias::Right),
1109            Point::new(0, 4)
1110        );
1111        assert_eq!(
1112            rope.clip_point(Point::new(0, 5), Bias::Right),
1113            Point::new(0, 4)
1114        );
1115
1116        assert_eq!(
1117            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left),
1118            PointUtf16::new(0, 0)
1119        );
1120        assert_eq!(
1121            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right),
1122            PointUtf16::new(0, 2)
1123        );
1124        assert_eq!(
1125            rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
1126            PointUtf16::new(0, 2)
1127        );
1128
1129        assert_eq!(
1130            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1131            OffsetUtf16(0)
1132        );
1133        assert_eq!(
1134            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1135            OffsetUtf16(2)
1136        );
1137        assert_eq!(
1138            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1139            OffsetUtf16(2)
1140        );
1141    }
1142
1143    #[gpui::test(iterations = 100)]
1144    fn test_random_rope(mut rng: StdRng) {
1145        let operations = env::var("OPERATIONS")
1146            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1147            .unwrap_or(10);
1148
1149        let mut expected = String::new();
1150        let mut actual = Rope::new();
1151        for _ in 0..operations {
1152            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1153            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1154            let len = rng.gen_range(0..=64);
1155            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1156
1157            let mut new_actual = Rope::new();
1158            let mut cursor = actual.cursor(0);
1159            new_actual.append(cursor.slice(start_ix));
1160            new_actual.push(&new_text);
1161            cursor.seek_forward(end_ix);
1162            new_actual.append(cursor.suffix());
1163            actual = new_actual;
1164
1165            expected.replace_range(start_ix..end_ix, &new_text);
1166
1167            assert_eq!(actual.text(), expected);
1168            log::info!("text: {:?}", expected);
1169
1170            for _ in 0..5 {
1171                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1172                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1173
1174                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1175                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1176
1177                let mut actual_text = String::new();
1178                actual
1179                    .bytes_in_range(start_ix..end_ix)
1180                    .read_to_string(&mut actual_text)
1181                    .unwrap();
1182                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1183
1184                assert_eq!(
1185                    actual
1186                        .reversed_chunks_in_range(start_ix..end_ix)
1187                        .collect::<Vec<&str>>()
1188                        .into_iter()
1189                        .rev()
1190                        .collect::<String>(),
1191                    &expected[start_ix..end_ix]
1192                );
1193            }
1194
1195            let mut offset_utf16 = OffsetUtf16(0);
1196            let mut point = Point::new(0, 0);
1197            let mut point_utf16 = PointUtf16::new(0, 0);
1198            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1199                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1200                assert_eq!(
1201                    actual.offset_to_point_utf16(ix),
1202                    point_utf16,
1203                    "offset_to_point_utf16({})",
1204                    ix
1205                );
1206                assert_eq!(
1207                    actual.point_to_offset(point),
1208                    ix,
1209                    "point_to_offset({:?})",
1210                    point
1211                );
1212                assert_eq!(
1213                    actual.point_utf16_to_offset(point_utf16),
1214                    ix,
1215                    "point_utf16_to_offset({:?})",
1216                    point_utf16
1217                );
1218                assert_eq!(
1219                    actual.offset_to_offset_utf16(ix),
1220                    offset_utf16,
1221                    "offset_to_offset_utf16({:?})",
1222                    ix
1223                );
1224                assert_eq!(
1225                    actual.offset_utf16_to_offset(offset_utf16),
1226                    ix,
1227                    "offset_utf16_to_offset({:?})",
1228                    offset_utf16
1229                );
1230                if ch == '\n' {
1231                    point += Point::new(1, 0);
1232                    point_utf16 += PointUtf16::new(1, 0);
1233                } else {
1234                    point.column += ch.len_utf8() as u32;
1235                    point_utf16.column += ch.len_utf16() as u32;
1236                }
1237                offset_utf16.0 += ch.len_utf16();
1238            }
1239
1240            let mut offset_utf16 = OffsetUtf16(0);
1241            let mut point_utf16 = PointUtf16::zero();
1242            for unit in expected.encode_utf16() {
1243                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1244                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1245                assert!(right_offset >= left_offset);
1246                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1247                actual.offset_utf16_to_offset(left_offset);
1248                actual.offset_utf16_to_offset(right_offset);
1249
1250                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1251                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1252                assert!(right_point >= left_point);
1253                // Ensure translating UTF-16 points to offsets doesn't panic.
1254                actual.point_utf16_to_offset(left_point);
1255                actual.point_utf16_to_offset(right_point);
1256
1257                offset_utf16.0 += 1;
1258                if unit == b'\n' as u16 {
1259                    point_utf16 += PointUtf16::new(1, 0);
1260                } else {
1261                    point_utf16 += PointUtf16::new(0, 1);
1262                }
1263            }
1264
1265            for _ in 0..5 {
1266                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1267                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1268                assert_eq!(
1269                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1270                    TextSummary::from(&expected[start_ix..end_ix])
1271                );
1272            }
1273
1274            let mut expected_longest_rows = Vec::new();
1275            let mut longest_line_len = -1_isize;
1276            for (row, line) in expected.split('\n').enumerate() {
1277                let row = row as u32;
1278                assert_eq!(
1279                    actual.line_len(row),
1280                    line.len() as u32,
1281                    "invalid line len for row {}",
1282                    row
1283                );
1284
1285                let line_char_count = line.chars().count() as isize;
1286                match line_char_count.cmp(&longest_line_len) {
1287                    Ordering::Less => {}
1288                    Ordering::Equal => expected_longest_rows.push(row),
1289                    Ordering::Greater => {
1290                        longest_line_len = line_char_count;
1291                        expected_longest_rows.clear();
1292                        expected_longest_rows.push(row);
1293                    }
1294                }
1295            }
1296
1297            let longest_row = actual.summary().longest_row;
1298            assert!(
1299                expected_longest_rows.contains(&longest_row),
1300                "incorrect longest row {}. expected {:?} with length {}",
1301                longest_row,
1302                expected_longest_rows,
1303                longest_line_len,
1304            );
1305        }
1306    }
1307
1308    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1309        while !text.is_char_boundary(offset) {
1310            match bias {
1311                Bias::Left => offset -= 1,
1312                Bias::Right => offset += 1,
1313            }
1314        }
1315        offset
1316    }
1317
1318    impl Rope {
1319        fn text(&self) -> String {
1320            let mut text = String::new();
1321            for chunk in self.chunks.cursor::<()>() {
1322                text.push_str(&chunk.0);
1323            }
1324            text
1325        }
1326    }
1327}